From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:17:33 +0200 Subject: Adding upstream version 15.5. Signed-off-by: Daniel Baumann --- src/backend/commands/Makefile | 66 + src/backend/commands/aggregatecmds.c | 496 + src/backend/commands/alter.c | 1061 ++ src/backend/commands/amcmds.c | 269 + src/backend/commands/analyze.c | 3076 +++++ src/backend/commands/async.c | 2446 ++++ src/backend/commands/cluster.c | 1736 +++ src/backend/commands/collationcmds.c | 820 ++ src/backend/commands/comment.c | 459 + src/backend/commands/constraint.c | 205 + src/backend/commands/conversioncmds.c | 139 + src/backend/commands/copy.c | 798 ++ src/backend/commands/copyfrom.c | 1624 +++ src/backend/commands/copyfromparse.c | 1921 +++ src/backend/commands/copyto.c | 1310 ++ src/backend/commands/createas.c | 637 + src/backend/commands/dbcommands.c | 3285 +++++ src/backend/commands/define.c | 391 + src/backend/commands/discard.c | 78 + src/backend/commands/dropcmds.c | 493 + src/backend/commands/event_trigger.c | 2182 ++++ src/backend/commands/explain.c | 5022 ++++++++ src/backend/commands/extension.c | 3417 ++++++ src/backend/commands/foreigncmds.c | 1617 +++ src/backend/commands/functioncmds.c | 2374 ++++ src/backend/commands/indexcmds.c | 4355 +++++++ src/backend/commands/lockcmds.c | 306 + src/backend/commands/matview.c | 936 ++ src/backend/commands/opclasscmds.c | 1745 +++ src/backend/commands/operatorcmds.c | 552 + src/backend/commands/policy.c | 1285 ++ src/backend/commands/portalcmds.c | 496 + src/backend/commands/prepare.c | 729 ++ src/backend/commands/proclang.c | 239 + src/backend/commands/publicationcmds.c | 2006 +++ src/backend/commands/schemacmds.c | 441 + src/backend/commands/seclabel.c | 581 + src/backend/commands/sequence.c | 1917 +++ src/backend/commands/statscmds.c | 898 ++ src/backend/commands/subscriptioncmds.c | 1966 +++ src/backend/commands/tablecmds.c | 19402 ++++++++++++++++++++++++++++++ src/backend/commands/tablespace.c | 1595 +++ src/backend/commands/trigger.c | 6664 ++++++++++ src/backend/commands/tsearchcmds.c | 1759 +++ src/backend/commands/typecmds.c | 4495 +++++++ src/backend/commands/user.c | 1645 +++ src/backend/commands/vacuum.c | 2465 ++++ src/backend/commands/vacuumparallel.c | 1074 ++ src/backend/commands/variable.c | 935 ++ src/backend/commands/view.c | 604 + 50 files changed, 95012 insertions(+) create mode 100644 src/backend/commands/Makefile create mode 100644 src/backend/commands/aggregatecmds.c create mode 100644 src/backend/commands/alter.c create mode 100644 src/backend/commands/amcmds.c create mode 100644 src/backend/commands/analyze.c create mode 100644 src/backend/commands/async.c create mode 100644 src/backend/commands/cluster.c create mode 100644 src/backend/commands/collationcmds.c create mode 100644 src/backend/commands/comment.c create mode 100644 src/backend/commands/constraint.c create mode 100644 src/backend/commands/conversioncmds.c create mode 100644 src/backend/commands/copy.c create mode 100644 src/backend/commands/copyfrom.c create mode 100644 src/backend/commands/copyfromparse.c create mode 100644 src/backend/commands/copyto.c create mode 100644 src/backend/commands/createas.c create mode 100644 src/backend/commands/dbcommands.c create mode 100644 src/backend/commands/define.c create mode 100644 src/backend/commands/discard.c create mode 100644 src/backend/commands/dropcmds.c create mode 100644 src/backend/commands/event_trigger.c create mode 100644 src/backend/commands/explain.c create mode 100644 src/backend/commands/extension.c create mode 100644 src/backend/commands/foreigncmds.c create mode 100644 src/backend/commands/functioncmds.c create mode 100644 src/backend/commands/indexcmds.c create mode 100644 src/backend/commands/lockcmds.c create mode 100644 src/backend/commands/matview.c create mode 100644 src/backend/commands/opclasscmds.c create mode 100644 src/backend/commands/operatorcmds.c create mode 100644 src/backend/commands/policy.c create mode 100644 src/backend/commands/portalcmds.c create mode 100644 src/backend/commands/prepare.c create mode 100644 src/backend/commands/proclang.c create mode 100644 src/backend/commands/publicationcmds.c create mode 100644 src/backend/commands/schemacmds.c create mode 100644 src/backend/commands/seclabel.c create mode 100644 src/backend/commands/sequence.c create mode 100644 src/backend/commands/statscmds.c create mode 100644 src/backend/commands/subscriptioncmds.c create mode 100644 src/backend/commands/tablecmds.c create mode 100644 src/backend/commands/tablespace.c create mode 100644 src/backend/commands/trigger.c create mode 100644 src/backend/commands/tsearchcmds.c create mode 100644 src/backend/commands/typecmds.c create mode 100644 src/backend/commands/user.c create mode 100644 src/backend/commands/vacuum.c create mode 100644 src/backend/commands/vacuumparallel.c create mode 100644 src/backend/commands/variable.c create mode 100644 src/backend/commands/view.c (limited to 'src/backend/commands') diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile new file mode 100644 index 0000000..48f7348 --- /dev/null +++ b/src/backend/commands/Makefile @@ -0,0 +1,66 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for backend/commands +# +# IDENTIFICATION +# src/backend/commands/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/commands +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + aggregatecmds.o \ + alter.o \ + amcmds.o \ + analyze.o \ + async.o \ + cluster.o \ + collationcmds.o \ + comment.o \ + constraint.o \ + conversioncmds.o \ + copy.o \ + copyfrom.o \ + copyfromparse.o \ + copyto.o \ + createas.o \ + dbcommands.o \ + define.o \ + discard.o \ + dropcmds.o \ + event_trigger.o \ + explain.o \ + extension.o \ + foreigncmds.o \ + functioncmds.o \ + indexcmds.o \ + lockcmds.o \ + matview.o \ + opclasscmds.o \ + operatorcmds.o \ + policy.o \ + portalcmds.o \ + prepare.o \ + proclang.o \ + publicationcmds.o \ + schemacmds.o \ + seclabel.o \ + sequence.o \ + statscmds.o \ + subscriptioncmds.o \ + tablecmds.o \ + tablespace.o \ + trigger.o \ + tsearchcmds.o \ + typecmds.o \ + user.o \ + vacuum.o \ + vacuumparallel.o \ + variable.o \ + view.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/commands/aggregatecmds.c b/src/backend/commands/aggregatecmds.c new file mode 100644 index 0000000..010eca7 --- /dev/null +++ b/src/backend/commands/aggregatecmds.c @@ -0,0 +1,496 @@ +/*------------------------------------------------------------------------- + * + * aggregatecmds.c + * + * Routines for aggregate-manipulation commands + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/aggregatecmds.c + * + * DESCRIPTION + * The "DefineFoo" routines take the parse tree and pick out the + * appropriate arguments/flags, passing the results to the + * corresponding "FooDefine" routines (in src/catalog) that do + * the actual catalog-munging. These routines also verify permission + * of the user to execute the command. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/dependency.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "commands/alter.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "parser/parse_type.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + + +static char extractModify(DefElem *defel); + + +/* + * DefineAggregate + * + * "oldstyle" signals the old (pre-8.2) style where the aggregate input type + * is specified by a BASETYPE element in the parameters. Otherwise, + * "args" is a pair, whose first element is a list of FunctionParameter structs + * defining the agg's arguments (both direct and aggregated), and whose second + * element is an Integer node with the number of direct args, or -1 if this + * isn't an ordered-set aggregate. + * "parameters" is a list of DefElem representing the agg's definition clauses. + */ +ObjectAddress +DefineAggregate(ParseState *pstate, + List *name, + List *args, + bool oldstyle, + List *parameters, + bool replace) +{ + char *aggName; + Oid aggNamespace; + AclResult aclresult; + char aggKind = AGGKIND_NORMAL; + List *transfuncName = NIL; + List *finalfuncName = NIL; + List *combinefuncName = NIL; + List *serialfuncName = NIL; + List *deserialfuncName = NIL; + List *mtransfuncName = NIL; + List *minvtransfuncName = NIL; + List *mfinalfuncName = NIL; + bool finalfuncExtraArgs = false; + bool mfinalfuncExtraArgs = false; + char finalfuncModify = 0; + char mfinalfuncModify = 0; + List *sortoperatorName = NIL; + TypeName *baseType = NULL; + TypeName *transType = NULL; + TypeName *mtransType = NULL; + int32 transSpace = 0; + int32 mtransSpace = 0; + char *initval = NULL; + char *minitval = NULL; + char *parallel = NULL; + int numArgs; + int numDirectArgs = 0; + oidvector *parameterTypes; + ArrayType *allParameterTypes; + ArrayType *parameterModes; + ArrayType *parameterNames; + List *parameterDefaults; + Oid variadicArgType; + Oid transTypeId; + Oid mtransTypeId = InvalidOid; + char transTypeType; + char mtransTypeType = 0; + char proparallel = PROPARALLEL_UNSAFE; + ListCell *pl; + + /* Convert list of names to a name and namespace */ + aggNamespace = QualifiedNameGetCreationNamespace(name, &aggName); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(aggNamespace, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(aggNamespace)); + + /* Deconstruct the output of the aggr_args grammar production */ + if (!oldstyle) + { + Assert(list_length(args) == 2); + numDirectArgs = intVal(lsecond(args)); + if (numDirectArgs >= 0) + aggKind = AGGKIND_ORDERED_SET; + else + numDirectArgs = 0; + args = linitial_node(List, args); + } + + /* Examine aggregate's definition clauses */ + foreach(pl, parameters) + { + DefElem *defel = lfirst_node(DefElem, pl); + + /* + * sfunc1, stype1, and initcond1 are accepted as obsolete spellings + * for sfunc, stype, initcond. + */ + if (strcmp(defel->defname, "sfunc") == 0) + transfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "sfunc1") == 0) + transfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "finalfunc") == 0) + finalfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "combinefunc") == 0) + combinefuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "serialfunc") == 0) + serialfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "deserialfunc") == 0) + deserialfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "msfunc") == 0) + mtransfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "minvfunc") == 0) + minvtransfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "mfinalfunc") == 0) + mfinalfuncName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "finalfunc_extra") == 0) + finalfuncExtraArgs = defGetBoolean(defel); + else if (strcmp(defel->defname, "mfinalfunc_extra") == 0) + mfinalfuncExtraArgs = defGetBoolean(defel); + else if (strcmp(defel->defname, "finalfunc_modify") == 0) + finalfuncModify = extractModify(defel); + else if (strcmp(defel->defname, "mfinalfunc_modify") == 0) + mfinalfuncModify = extractModify(defel); + else if (strcmp(defel->defname, "sortop") == 0) + sortoperatorName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "basetype") == 0) + baseType = defGetTypeName(defel); + else if (strcmp(defel->defname, "hypothetical") == 0) + { + if (defGetBoolean(defel)) + { + if (aggKind == AGGKIND_NORMAL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only ordered-set aggregates can be hypothetical"))); + aggKind = AGGKIND_HYPOTHETICAL; + } + } + else if (strcmp(defel->defname, "stype") == 0) + transType = defGetTypeName(defel); + else if (strcmp(defel->defname, "stype1") == 0) + transType = defGetTypeName(defel); + else if (strcmp(defel->defname, "sspace") == 0) + transSpace = defGetInt32(defel); + else if (strcmp(defel->defname, "mstype") == 0) + mtransType = defGetTypeName(defel); + else if (strcmp(defel->defname, "msspace") == 0) + mtransSpace = defGetInt32(defel); + else if (strcmp(defel->defname, "initcond") == 0) + initval = defGetString(defel); + else if (strcmp(defel->defname, "initcond1") == 0) + initval = defGetString(defel); + else if (strcmp(defel->defname, "minitcond") == 0) + minitval = defGetString(defel); + else if (strcmp(defel->defname, "parallel") == 0) + parallel = defGetString(defel); + else + ereport(WARNING, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("aggregate attribute \"%s\" not recognized", + defel->defname))); + } + + /* + * make sure we have our required definitions + */ + if (transType == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate stype must be specified"))); + if (transfuncName == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate sfunc must be specified"))); + + /* + * if mtransType is given, mtransfuncName and minvtransfuncName must be as + * well; if not, then none of the moving-aggregate options should have + * been given. + */ + if (mtransType != NULL) + { + if (mtransfuncName == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate msfunc must be specified when mstype is specified"))); + if (minvtransfuncName == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate minvfunc must be specified when mstype is specified"))); + } + else + { + if (mtransfuncName != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate msfunc must not be specified without mstype"))); + if (minvtransfuncName != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate minvfunc must not be specified without mstype"))); + if (mfinalfuncName != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate mfinalfunc must not be specified without mstype"))); + if (mtransSpace != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate msspace must not be specified without mstype"))); + if (minitval != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate minitcond must not be specified without mstype"))); + } + + /* + * Default values for modify flags can only be determined once we know the + * aggKind. + */ + if (finalfuncModify == 0) + finalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE; + if (mfinalfuncModify == 0) + mfinalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE; + + /* + * look up the aggregate's input datatype(s). + */ + if (oldstyle) + { + /* + * Old style: use basetype parameter. This supports aggregates of + * zero or one input, with input type ANY meaning zero inputs. + * + * Historically we allowed the command to look like basetype = 'ANY' + * so we must do a case-insensitive comparison for the name ANY. Ugh. + */ + Oid aggArgTypes[1]; + + if (baseType == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate input type must be specified"))); + + if (pg_strcasecmp(TypeNameToString(baseType), "ANY") == 0) + { + numArgs = 0; + aggArgTypes[0] = InvalidOid; + } + else + { + numArgs = 1; + aggArgTypes[0] = typenameTypeId(NULL, baseType); + } + parameterTypes = buildoidvector(aggArgTypes, numArgs); + allParameterTypes = NULL; + parameterModes = NULL; + parameterNames = NULL; + parameterDefaults = NIL; + variadicArgType = InvalidOid; + } + else + { + /* + * New style: args is a list of FunctionParameters (possibly zero of + * 'em). We share functioncmds.c's code for processing them. + */ + Oid requiredResultType; + + if (baseType != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("basetype is redundant with aggregate input type specification"))); + + numArgs = list_length(args); + interpret_function_parameter_list(pstate, + args, + InvalidOid, + OBJECT_AGGREGATE, + ¶meterTypes, + NULL, + &allParameterTypes, + ¶meterModes, + ¶meterNames, + NULL, + ¶meterDefaults, + &variadicArgType, + &requiredResultType); + /* Parameter defaults are not currently allowed by the grammar */ + Assert(parameterDefaults == NIL); + /* There shouldn't have been any OUT parameters, either */ + Assert(requiredResultType == InvalidOid); + } + + /* + * look up the aggregate's transtype. + * + * transtype can't be a pseudo-type, since we need to be able to store + * values of the transtype. However, we can allow polymorphic transtype + * in some cases (AggregateCreate will check). Also, we allow "internal" + * for functions that want to pass pointers to private data structures; + * but allow that only to superusers, since you could crash the system (or + * worse) by connecting up incompatible internal-using functions in an + * aggregate. + */ + transTypeId = typenameTypeId(NULL, transType); + transTypeType = get_typtype(transTypeId); + if (transTypeType == TYPTYPE_PSEUDO && + !IsPolymorphicType(transTypeId)) + { + if (transTypeId == INTERNALOID && superuser()) + /* okay */ ; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate transition data type cannot be %s", + format_type_be(transTypeId)))); + } + + if (serialfuncName && deserialfuncName) + { + /* + * Serialization is only needed/allowed for transtype INTERNAL. + */ + if (transTypeId != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("serialization functions may be specified only when the aggregate transition data type is %s", + format_type_be(INTERNALOID)))); + } + else if (serialfuncName || deserialfuncName) + { + /* + * Cannot specify one function without the other. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("must specify both or neither of serialization and deserialization functions"))); + } + + /* + * If a moving-aggregate transtype is specified, look that up. Same + * restrictions as for transtype. + */ + if (mtransType) + { + mtransTypeId = typenameTypeId(NULL, mtransType); + mtransTypeType = get_typtype(mtransTypeId); + if (mtransTypeType == TYPTYPE_PSEUDO && + !IsPolymorphicType(mtransTypeId)) + { + if (mtransTypeId == INTERNALOID && superuser()) + /* okay */ ; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate transition data type cannot be %s", + format_type_be(mtransTypeId)))); + } + } + + /* + * If we have an initval, and it's not for a pseudotype (particularly a + * polymorphic type), make sure it's acceptable to the type's input + * function. We will store the initval as text, because the input + * function isn't necessarily immutable (consider "now" for timestamp), + * and we want to use the runtime not creation-time interpretation of the + * value. However, if it's an incorrect value it seems much more + * user-friendly to complain at CREATE AGGREGATE time. + */ + if (initval && transTypeType != TYPTYPE_PSEUDO) + { + Oid typinput, + typioparam; + + getTypeInputInfo(transTypeId, &typinput, &typioparam); + (void) OidInputFunctionCall(typinput, initval, typioparam, -1); + } + + /* + * Likewise for moving-aggregate initval. + */ + if (minitval && mtransTypeType != TYPTYPE_PSEUDO) + { + Oid typinput, + typioparam; + + getTypeInputInfo(mtransTypeId, &typinput, &typioparam); + (void) OidInputFunctionCall(typinput, minitval, typioparam, -1); + } + + if (parallel) + { + if (strcmp(parallel, "safe") == 0) + proparallel = PROPARALLEL_SAFE; + else if (strcmp(parallel, "restricted") == 0) + proparallel = PROPARALLEL_RESTRICTED; + else if (strcmp(parallel, "unsafe") == 0) + proparallel = PROPARALLEL_UNSAFE; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parameter \"parallel\" must be SAFE, RESTRICTED, or UNSAFE"))); + } + + /* + * Most of the argument-checking is done inside of AggregateCreate + */ + return AggregateCreate(aggName, /* aggregate name */ + aggNamespace, /* namespace */ + replace, + aggKind, + numArgs, + numDirectArgs, + parameterTypes, + PointerGetDatum(allParameterTypes), + PointerGetDatum(parameterModes), + PointerGetDatum(parameterNames), + parameterDefaults, + variadicArgType, + transfuncName, /* step function name */ + finalfuncName, /* final function name */ + combinefuncName, /* combine function name */ + serialfuncName, /* serial function name */ + deserialfuncName, /* deserial function name */ + mtransfuncName, /* fwd trans function name */ + minvtransfuncName, /* inv trans function name */ + mfinalfuncName, /* final function name */ + finalfuncExtraArgs, + mfinalfuncExtraArgs, + finalfuncModify, + mfinalfuncModify, + sortoperatorName, /* sort operator name */ + transTypeId, /* transition data type */ + transSpace, /* transition space */ + mtransTypeId, /* transition data type */ + mtransSpace, /* transition space */ + initval, /* initial condition */ + minitval, /* initial condition */ + proparallel); /* parallel safe? */ +} + +/* + * Convert the string form of [m]finalfunc_modify to the catalog representation + */ +static char +extractModify(DefElem *defel) +{ + char *val = defGetString(defel); + + if (strcmp(val, "read_only") == 0) + return AGGMODIFY_READ_ONLY; + if (strcmp(val, "shareable") == 0) + return AGGMODIFY_SHAREABLE; + if (strcmp(val, "read_write") == 0) + return AGGMODIFY_READ_WRITE; + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parameter \"%s\" must be READ_ONLY, SHAREABLE, or READ_WRITE", + defel->defname))); + return 0; /* keep compiler quiet */ +} diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c new file mode 100644 index 0000000..5456b82 --- /dev/null +++ b/src/backend/commands/alter.c @@ -0,0 +1,1061 @@ +/*------------------------------------------------------------------------- + * + * alter.c + * Drivers for generic alter commands + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/alter.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/relation.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_conversion.h" +#include "catalog/pg_event_trigger.h" +#include "catalog/pg_foreign_data_wrapper.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_language.h" +#include "catalog/pg_largeobject.h" +#include "catalog/pg_largeobject_metadata.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_subscription.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "commands/alter.h" +#include "commands/collationcmds.h" +#include "commands/conversioncmds.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/extension.h" +#include "commands/policy.h" +#include "commands/proclang.h" +#include "commands/publicationcmds.h" +#include "commands/schemacmds.h" +#include "commands/subscriptioncmds.h" +#include "commands/tablecmds.h" +#include "commands/tablespace.h" +#include "commands/trigger.h" +#include "commands/typecmds.h" +#include "commands/user.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "rewrite/rewriteDefine.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +static Oid AlterObjectNamespace_internal(Relation rel, Oid objid, Oid nspOid); + +/* + * Raise an error to the effect that an object of the given name is already + * present in the given namespace. + */ +static void +report_name_conflict(Oid classId, const char *name) +{ + char *msgfmt; + + switch (classId) + { + case EventTriggerRelationId: + msgfmt = gettext_noop("event trigger \"%s\" already exists"); + break; + case ForeignDataWrapperRelationId: + msgfmt = gettext_noop("foreign-data wrapper \"%s\" already exists"); + break; + case ForeignServerRelationId: + msgfmt = gettext_noop("server \"%s\" already exists"); + break; + case LanguageRelationId: + msgfmt = gettext_noop("language \"%s\" already exists"); + break; + case PublicationRelationId: + msgfmt = gettext_noop("publication \"%s\" already exists"); + break; + case SubscriptionRelationId: + msgfmt = gettext_noop("subscription \"%s\" already exists"); + break; + default: + elog(ERROR, "unsupported object class %u", classId); + break; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg(msgfmt, name))); +} + +static void +report_namespace_conflict(Oid classId, const char *name, Oid nspOid) +{ + char *msgfmt; + + Assert(OidIsValid(nspOid)); + + switch (classId) + { + case ConversionRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\""); + break; + case StatisticExtRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("statistics object \"%s\" already exists in schema \"%s\""); + break; + case TSParserRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\""); + break; + case TSDictionaryRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("text search dictionary \"%s\" already exists in schema \"%s\""); + break; + case TSTemplateRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("text search template \"%s\" already exists in schema \"%s\""); + break; + case TSConfigRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("text search configuration \"%s\" already exists in schema \"%s\""); + break; + default: + elog(ERROR, "unsupported object class %u", classId); + break; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg(msgfmt, name, get_namespace_name(nspOid)))); +} + +/* + * AlterObjectRename_internal + * + * Generic function to rename the given object, for simple cases (won't + * work for tables, nor other cases where we need to do more than change + * the name column of a single catalog entry). + * + * rel: catalog relation containing object (RowExclusiveLock'd by caller) + * objectId: OID of object to be renamed + * new_name: CString representation of new name + */ +static void +AlterObjectRename_internal(Relation rel, Oid objectId, const char *new_name) +{ + Oid classId = RelationGetRelid(rel); + int oidCacheId = get_object_catcache_oid(classId); + int nameCacheId = get_object_catcache_name(classId); + AttrNumber Anum_name = get_object_attnum_name(classId); + AttrNumber Anum_namespace = get_object_attnum_namespace(classId); + AttrNumber Anum_owner = get_object_attnum_owner(classId); + HeapTuple oldtup; + HeapTuple newtup; + Datum datum; + bool isnull; + Oid namespaceId; + Oid ownerId; + char *old_name; + AclResult aclresult; + Datum *values; + bool *nulls; + bool *replaces; + NameData nameattrdata; + + oldtup = SearchSysCache1(oidCacheId, ObjectIdGetDatum(objectId)); + if (!HeapTupleIsValid(oldtup)) + elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"", + objectId, RelationGetRelationName(rel)); + + datum = heap_getattr(oldtup, Anum_name, + RelationGetDescr(rel), &isnull); + Assert(!isnull); + old_name = NameStr(*(DatumGetName(datum))); + + /* Get OID of namespace */ + if (Anum_namespace > 0) + { + datum = heap_getattr(oldtup, Anum_namespace, + RelationGetDescr(rel), &isnull); + Assert(!isnull); + namespaceId = DatumGetObjectId(datum); + } + else + namespaceId = InvalidOid; + + /* Permission checks ... superusers can always do it */ + if (!superuser()) + { + /* Fail if object does not have an explicit owner */ + if (Anum_owner <= 0) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to rename %s", + getObjectDescriptionOids(classId, objectId)))); + + /* Otherwise, must be owner of the existing object */ + datum = heap_getattr(oldtup, Anum_owner, + RelationGetDescr(rel), &isnull); + Assert(!isnull); + ownerId = DatumGetObjectId(datum); + + if (!has_privs_of_role(GetUserId(), DatumGetObjectId(ownerId))) + aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId), + old_name); + + /* User must have CREATE privilege on the namespace */ + if (OidIsValid(namespaceId)) + { + aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + } + } + + /* + * Check for duplicate name (more friendly than unique-index failure). + * Since this is just a friendliness check, we can just skip it in cases + * where there isn't suitable support. + */ + if (classId == ProcedureRelationId) + { + Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(oldtup); + + IsThereFunctionInNamespace(new_name, proc->pronargs, + &proc->proargtypes, proc->pronamespace); + } + else if (classId == CollationRelationId) + { + Form_pg_collation coll = (Form_pg_collation) GETSTRUCT(oldtup); + + IsThereCollationInNamespace(new_name, coll->collnamespace); + } + else if (classId == OperatorClassRelationId) + { + Form_pg_opclass opc = (Form_pg_opclass) GETSTRUCT(oldtup); + + IsThereOpClassInNamespace(new_name, opc->opcmethod, + opc->opcnamespace); + } + else if (classId == OperatorFamilyRelationId) + { + Form_pg_opfamily opf = (Form_pg_opfamily) GETSTRUCT(oldtup); + + IsThereOpFamilyInNamespace(new_name, opf->opfmethod, + opf->opfnamespace); + } + else if (classId == SubscriptionRelationId) + { + if (SearchSysCacheExists2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(new_name))) + report_name_conflict(classId, new_name); + + /* Also enforce regression testing naming rules, if enabled */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (strncmp(new_name, "regress_", 8) != 0) + elog(WARNING, "subscriptions created by regression test cases should have names starting with \"regress_\""); +#endif + } + else if (nameCacheId >= 0) + { + if (OidIsValid(namespaceId)) + { + if (SearchSysCacheExists2(nameCacheId, + CStringGetDatum(new_name), + ObjectIdGetDatum(namespaceId))) + report_namespace_conflict(classId, new_name, namespaceId); + } + else + { + if (SearchSysCacheExists1(nameCacheId, + CStringGetDatum(new_name))) + report_name_conflict(classId, new_name); + } + } + + /* Build modified tuple */ + values = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(Datum)); + nulls = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool)); + replaces = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool)); + namestrcpy(&nameattrdata, new_name); + values[Anum_name - 1] = NameGetDatum(&nameattrdata); + replaces[Anum_name - 1] = true; + newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel), + values, nulls, replaces); + + /* Perform actual update */ + CatalogTupleUpdate(rel, &oldtup->t_self, newtup); + + InvokeObjectPostAlterHook(classId, objectId, 0); + + /* Release memory */ + pfree(values); + pfree(nulls); + pfree(replaces); + heap_freetuple(newtup); + + ReleaseSysCache(oldtup); +} + +/* + * Executes an ALTER OBJECT / RENAME TO statement. Based on the object + * type, the function appropriate to that type is executed. + * + * Return value is the address of the renamed object. + */ +ObjectAddress +ExecRenameStmt(RenameStmt *stmt) +{ + switch (stmt->renameType) + { + case OBJECT_TABCONSTRAINT: + case OBJECT_DOMCONSTRAINT: + return RenameConstraint(stmt); + + case OBJECT_DATABASE: + return RenameDatabase(stmt->subname, stmt->newname); + + case OBJECT_ROLE: + return RenameRole(stmt->subname, stmt->newname); + + case OBJECT_SCHEMA: + return RenameSchema(stmt->subname, stmt->newname); + + case OBJECT_TABLESPACE: + return RenameTableSpace(stmt->subname, stmt->newname); + + case OBJECT_TABLE: + case OBJECT_SEQUENCE: + case OBJECT_VIEW: + case OBJECT_MATVIEW: + case OBJECT_INDEX: + case OBJECT_FOREIGN_TABLE: + return RenameRelation(stmt); + + case OBJECT_COLUMN: + case OBJECT_ATTRIBUTE: + return renameatt(stmt); + + case OBJECT_RULE: + return RenameRewriteRule(stmt->relation, stmt->subname, + stmt->newname); + + case OBJECT_TRIGGER: + return renametrig(stmt); + + case OBJECT_POLICY: + return rename_policy(stmt); + + case OBJECT_DOMAIN: + case OBJECT_TYPE: + return RenameType(stmt); + + case OBJECT_AGGREGATE: + case OBJECT_COLLATION: + case OBJECT_CONVERSION: + case OBJECT_EVENT_TRIGGER: + case OBJECT_FDW: + case OBJECT_FOREIGN_SERVER: + case OBJECT_FUNCTION: + case OBJECT_OPCLASS: + case OBJECT_OPFAMILY: + case OBJECT_LANGUAGE: + case OBJECT_PROCEDURE: + case OBJECT_ROUTINE: + case OBJECT_STATISTIC_EXT: + case OBJECT_TSCONFIGURATION: + case OBJECT_TSDICTIONARY: + case OBJECT_TSPARSER: + case OBJECT_TSTEMPLATE: + case OBJECT_PUBLICATION: + case OBJECT_SUBSCRIPTION: + { + ObjectAddress address; + Relation catalog; + Relation relation; + + address = get_object_address(stmt->renameType, + stmt->object, + &relation, + AccessExclusiveLock, false); + Assert(relation == NULL); + + catalog = table_open(address.classId, RowExclusiveLock); + AlterObjectRename_internal(catalog, + address.objectId, + stmt->newname); + table_close(catalog, RowExclusiveLock); + + return address; + } + + default: + elog(ERROR, "unrecognized rename stmt type: %d", + (int) stmt->renameType); + return InvalidObjectAddress; /* keep compiler happy */ + } +} + +/* + * Executes an ALTER OBJECT / [NO] DEPENDS ON EXTENSION statement. + * + * Return value is the address of the altered object. refAddress is an output + * argument which, if not null, receives the address of the object that the + * altered object now depends on. + */ +ObjectAddress +ExecAlterObjectDependsStmt(AlterObjectDependsStmt *stmt, ObjectAddress *refAddress) +{ + ObjectAddress address; + ObjectAddress refAddr; + Relation rel; + + address = + get_object_address_rv(stmt->objectType, stmt->relation, (List *) stmt->object, + &rel, AccessExclusiveLock, false); + + /* + * Verify that the user is entitled to run the command. + * + * We don't check any privileges on the extension, because that's not + * needed. The object owner is stipulating, by running this command, that + * the extension owner can drop the object whenever they feel like it, + * which is not considered a problem. + */ + check_object_ownership(GetUserId(), + stmt->objectType, address, stmt->object, rel); + + /* + * If a relation was involved, it would have been opened and locked. We + * don't need the relation here, but we'll retain the lock until commit. + */ + if (rel) + table_close(rel, NoLock); + + refAddr = get_object_address(OBJECT_EXTENSION, (Node *) stmt->extname, + &rel, AccessExclusiveLock, false); + Assert(rel == NULL); + if (refAddress) + *refAddress = refAddr; + + if (stmt->remove) + { + deleteDependencyRecordsForSpecific(address.classId, address.objectId, + DEPENDENCY_AUTO_EXTENSION, + refAddr.classId, refAddr.objectId); + } + else + { + List *currexts; + + /* Avoid duplicates */ + currexts = getAutoExtensionsOfObject(address.classId, + address.objectId); + if (!list_member_oid(currexts, refAddr.objectId)) + recordDependencyOn(&address, &refAddr, DEPENDENCY_AUTO_EXTENSION); + } + + return address; +} + +/* + * Executes an ALTER OBJECT / SET SCHEMA statement. Based on the object + * type, the function appropriate to that type is executed. + * + * Return value is that of the altered object. + * + * oldSchemaAddr is an output argument which, if not NULL, is set to the object + * address of the original schema. + */ +ObjectAddress +ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt, + ObjectAddress *oldSchemaAddr) +{ + ObjectAddress address; + Oid oldNspOid; + + switch (stmt->objectType) + { + case OBJECT_EXTENSION: + address = AlterExtensionNamespace(strVal(stmt->object), stmt->newschema, + oldSchemaAddr ? &oldNspOid : NULL); + break; + + case OBJECT_FOREIGN_TABLE: + case OBJECT_SEQUENCE: + case OBJECT_TABLE: + case OBJECT_VIEW: + case OBJECT_MATVIEW: + address = AlterTableNamespace(stmt, + oldSchemaAddr ? &oldNspOid : NULL); + break; + + case OBJECT_DOMAIN: + case OBJECT_TYPE: + address = AlterTypeNamespace(castNode(List, stmt->object), stmt->newschema, + stmt->objectType, + oldSchemaAddr ? &oldNspOid : NULL); + break; + + /* generic code path */ + case OBJECT_AGGREGATE: + case OBJECT_COLLATION: + case OBJECT_CONVERSION: + case OBJECT_FUNCTION: + case OBJECT_OPERATOR: + case OBJECT_OPCLASS: + case OBJECT_OPFAMILY: + case OBJECT_PROCEDURE: + case OBJECT_ROUTINE: + case OBJECT_STATISTIC_EXT: + case OBJECT_TSCONFIGURATION: + case OBJECT_TSDICTIONARY: + case OBJECT_TSPARSER: + case OBJECT_TSTEMPLATE: + { + Relation catalog; + Relation relation; + Oid classId; + Oid nspOid; + + address = get_object_address(stmt->objectType, + stmt->object, + &relation, + AccessExclusiveLock, + false); + Assert(relation == NULL); + classId = address.classId; + catalog = table_open(classId, RowExclusiveLock); + nspOid = LookupCreationNamespace(stmt->newschema); + + oldNspOid = AlterObjectNamespace_internal(catalog, address.objectId, + nspOid); + table_close(catalog, RowExclusiveLock); + } + break; + + default: + elog(ERROR, "unrecognized AlterObjectSchemaStmt type: %d", + (int) stmt->objectType); + return InvalidObjectAddress; /* keep compiler happy */ + } + + if (oldSchemaAddr) + ObjectAddressSet(*oldSchemaAddr, NamespaceRelationId, oldNspOid); + + return address; +} + +/* + * Change an object's namespace given its classOid and object Oid. + * + * Objects that don't have a namespace should be ignored. + * + * This function is currently used only by ALTER EXTENSION SET SCHEMA, + * so it only needs to cover object types that can be members of an + * extension, and it doesn't have to deal with certain special cases + * such as not wanting to process array types --- those should never + * be direct members of an extension anyway. Nonetheless, we insist + * on listing all OCLASS types in the switch. + * + * Returns the OID of the object's previous namespace, or InvalidOid if + * object doesn't have a schema. + */ +Oid +AlterObjectNamespace_oid(Oid classId, Oid objid, Oid nspOid, + ObjectAddresses *objsMoved) +{ + Oid oldNspOid = InvalidOid; + ObjectAddress dep; + + dep.classId = classId; + dep.objectId = objid; + dep.objectSubId = 0; + + switch (getObjectClass(&dep)) + { + case OCLASS_CLASS: + { + Relation rel; + + rel = relation_open(objid, AccessExclusiveLock); + oldNspOid = RelationGetNamespace(rel); + + AlterTableNamespaceInternal(rel, oldNspOid, nspOid, objsMoved); + + relation_close(rel, NoLock); + break; + } + + case OCLASS_TYPE: + oldNspOid = AlterTypeNamespace_oid(objid, nspOid, objsMoved); + break; + + case OCLASS_PROC: + case OCLASS_COLLATION: + case OCLASS_CONVERSION: + case OCLASS_OPERATOR: + case OCLASS_OPCLASS: + case OCLASS_OPFAMILY: + case OCLASS_STATISTIC_EXT: + case OCLASS_TSPARSER: + case OCLASS_TSDICT: + case OCLASS_TSTEMPLATE: + case OCLASS_TSCONFIG: + { + Relation catalog; + + catalog = table_open(classId, RowExclusiveLock); + + oldNspOid = AlterObjectNamespace_internal(catalog, objid, + nspOid); + + table_close(catalog, RowExclusiveLock); + } + break; + + case OCLASS_CAST: + case OCLASS_CONSTRAINT: + case OCLASS_DEFAULT: + case OCLASS_LANGUAGE: + case OCLASS_LARGEOBJECT: + case OCLASS_AM: + case OCLASS_AMOP: + case OCLASS_AMPROC: + case OCLASS_REWRITE: + case OCLASS_TRIGGER: + case OCLASS_SCHEMA: + case OCLASS_ROLE: + case OCLASS_DATABASE: + case OCLASS_TBLSPACE: + case OCLASS_FDW: + case OCLASS_FOREIGN_SERVER: + case OCLASS_USER_MAPPING: + case OCLASS_DEFACL: + case OCLASS_EXTENSION: + case OCLASS_EVENT_TRIGGER: + case OCLASS_PARAMETER_ACL: + case OCLASS_POLICY: + case OCLASS_PUBLICATION: + case OCLASS_PUBLICATION_NAMESPACE: + case OCLASS_PUBLICATION_REL: + case OCLASS_SUBSCRIPTION: + case OCLASS_TRANSFORM: + /* ignore object types that don't have schema-qualified names */ + break; + + /* + * There's intentionally no default: case here; we want the + * compiler to warn if a new OCLASS hasn't been handled above. + */ + } + + return oldNspOid; +} + +/* + * Generic function to change the namespace of a given object, for simple + * cases (won't work for tables, nor other cases where we need to do more + * than change the namespace column of a single catalog entry). + * + * rel: catalog relation containing object (RowExclusiveLock'd by caller) + * objid: OID of object to change the namespace of + * nspOid: OID of new namespace + * + * Returns the OID of the object's previous namespace. + */ +static Oid +AlterObjectNamespace_internal(Relation rel, Oid objid, Oid nspOid) +{ + Oid classId = RelationGetRelid(rel); + int oidCacheId = get_object_catcache_oid(classId); + int nameCacheId = get_object_catcache_name(classId); + AttrNumber Anum_name = get_object_attnum_name(classId); + AttrNumber Anum_namespace = get_object_attnum_namespace(classId); + AttrNumber Anum_owner = get_object_attnum_owner(classId); + Oid oldNspOid; + Datum name, + namespace; + bool isnull; + HeapTuple tup, + newtup; + Datum *values; + bool *nulls; + bool *replaces; + + tup = SearchSysCacheCopy1(oidCacheId, ObjectIdGetDatum(objid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"", + objid, RelationGetRelationName(rel)); + + name = heap_getattr(tup, Anum_name, RelationGetDescr(rel), &isnull); + Assert(!isnull); + namespace = heap_getattr(tup, Anum_namespace, RelationGetDescr(rel), + &isnull); + Assert(!isnull); + oldNspOid = DatumGetObjectId(namespace); + + /* + * If the object is already in the correct namespace, we don't need to do + * anything except fire the object access hook. + */ + if (oldNspOid == nspOid) + { + InvokeObjectPostAlterHook(classId, objid, 0); + return oldNspOid; + } + + /* Check basic namespace related issues */ + CheckSetNamespace(oldNspOid, nspOid); + + /* Permission checks ... superusers can always do it */ + if (!superuser()) + { + Datum owner; + Oid ownerId; + AclResult aclresult; + + /* Fail if object does not have an explicit owner */ + if (Anum_owner <= 0) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to set schema of %s", + getObjectDescriptionOids(classId, objid)))); + + /* Otherwise, must be owner of the existing object */ + owner = heap_getattr(tup, Anum_owner, RelationGetDescr(rel), &isnull); + Assert(!isnull); + ownerId = DatumGetObjectId(owner); + + if (!has_privs_of_role(GetUserId(), ownerId)) + aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objid), + NameStr(*(DatumGetName(name)))); + + /* User must have CREATE privilege on new namespace */ + aclresult = pg_namespace_aclcheck(nspOid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(nspOid)); + } + + /* + * Check for duplicate name (more friendly than unique-index failure). + * Since this is just a friendliness check, we can just skip it in cases + * where there isn't suitable support. + */ + if (classId == ProcedureRelationId) + { + Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(tup); + + IsThereFunctionInNamespace(NameStr(proc->proname), proc->pronargs, + &proc->proargtypes, nspOid); + } + else if (classId == CollationRelationId) + { + Form_pg_collation coll = (Form_pg_collation) GETSTRUCT(tup); + + IsThereCollationInNamespace(NameStr(coll->collname), nspOid); + } + else if (classId == OperatorClassRelationId) + { + Form_pg_opclass opc = (Form_pg_opclass) GETSTRUCT(tup); + + IsThereOpClassInNamespace(NameStr(opc->opcname), + opc->opcmethod, nspOid); + } + else if (classId == OperatorFamilyRelationId) + { + Form_pg_opfamily opf = (Form_pg_opfamily) GETSTRUCT(tup); + + IsThereOpFamilyInNamespace(NameStr(opf->opfname), + opf->opfmethod, nspOid); + } + else if (nameCacheId >= 0 && + SearchSysCacheExists2(nameCacheId, name, + ObjectIdGetDatum(nspOid))) + report_namespace_conflict(classId, + NameStr(*(DatumGetName(name))), + nspOid); + + /* Build modified tuple */ + values = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(Datum)); + nulls = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool)); + replaces = palloc0(RelationGetNumberOfAttributes(rel) * sizeof(bool)); + values[Anum_namespace - 1] = ObjectIdGetDatum(nspOid); + replaces[Anum_namespace - 1] = true; + newtup = heap_modify_tuple(tup, RelationGetDescr(rel), + values, nulls, replaces); + + /* Perform actual update */ + CatalogTupleUpdate(rel, &tup->t_self, newtup); + + /* Release memory */ + pfree(values); + pfree(nulls); + pfree(replaces); + + /* update dependencies to point to the new schema */ + changeDependencyFor(classId, objid, + NamespaceRelationId, oldNspOid, nspOid); + + InvokeObjectPostAlterHook(classId, objid, 0); + + return oldNspOid; +} + +/* + * Executes an ALTER OBJECT / OWNER TO statement. Based on the object + * type, the function appropriate to that type is executed. + */ +ObjectAddress +ExecAlterOwnerStmt(AlterOwnerStmt *stmt) +{ + Oid newowner = get_rolespec_oid(stmt->newowner, false); + + switch (stmt->objectType) + { + case OBJECT_DATABASE: + return AlterDatabaseOwner(strVal(stmt->object), newowner); + + case OBJECT_SCHEMA: + return AlterSchemaOwner(strVal(stmt->object), newowner); + + case OBJECT_TYPE: + case OBJECT_DOMAIN: /* same as TYPE */ + return AlterTypeOwner(castNode(List, stmt->object), newowner, stmt->objectType); + break; + + case OBJECT_FDW: + return AlterForeignDataWrapperOwner(strVal(stmt->object), + newowner); + + case OBJECT_FOREIGN_SERVER: + return AlterForeignServerOwner(strVal(stmt->object), + newowner); + + case OBJECT_EVENT_TRIGGER: + return AlterEventTriggerOwner(strVal(stmt->object), + newowner); + + case OBJECT_PUBLICATION: + return AlterPublicationOwner(strVal(stmt->object), + newowner); + + case OBJECT_SUBSCRIPTION: + return AlterSubscriptionOwner(strVal(stmt->object), + newowner); + + /* Generic cases */ + case OBJECT_AGGREGATE: + case OBJECT_COLLATION: + case OBJECT_CONVERSION: + case OBJECT_FUNCTION: + case OBJECT_LANGUAGE: + case OBJECT_LARGEOBJECT: + case OBJECT_OPERATOR: + case OBJECT_OPCLASS: + case OBJECT_OPFAMILY: + case OBJECT_PROCEDURE: + case OBJECT_ROUTINE: + case OBJECT_STATISTIC_EXT: + case OBJECT_TABLESPACE: + case OBJECT_TSDICTIONARY: + case OBJECT_TSCONFIGURATION: + { + Relation catalog; + Relation relation; + Oid classId; + ObjectAddress address; + + address = get_object_address(stmt->objectType, + stmt->object, + &relation, + AccessExclusiveLock, + false); + Assert(relation == NULL); + classId = address.classId; + + /* + * XXX - get_object_address returns Oid of pg_largeobject + * catalog for OBJECT_LARGEOBJECT because of historical + * reasons. Fix up it here. + */ + if (classId == LargeObjectRelationId) + classId = LargeObjectMetadataRelationId; + + catalog = table_open(classId, RowExclusiveLock); + + AlterObjectOwner_internal(catalog, address.objectId, newowner); + table_close(catalog, RowExclusiveLock); + + return address; + } + break; + + default: + elog(ERROR, "unrecognized AlterOwnerStmt type: %d", + (int) stmt->objectType); + return InvalidObjectAddress; /* keep compiler happy */ + } +} + +/* + * Generic function to change the ownership of a given object, for simple + * cases (won't work for tables, nor other cases where we need to do more than + * change the ownership column of a single catalog entry). + * + * rel: catalog relation containing object (RowExclusiveLock'd by caller) + * objectId: OID of object to change the ownership of + * new_ownerId: OID of new object owner + */ +void +AlterObjectOwner_internal(Relation rel, Oid objectId, Oid new_ownerId) +{ + Oid classId = RelationGetRelid(rel); + AttrNumber Anum_oid = get_object_attnum_oid(classId); + AttrNumber Anum_owner = get_object_attnum_owner(classId); + AttrNumber Anum_namespace = get_object_attnum_namespace(classId); + AttrNumber Anum_acl = get_object_attnum_acl(classId); + AttrNumber Anum_name = get_object_attnum_name(classId); + HeapTuple oldtup; + Datum datum; + bool isnull; + Oid old_ownerId; + Oid namespaceId = InvalidOid; + + oldtup = get_catalog_object_by_oid(rel, Anum_oid, objectId); + if (oldtup == NULL) + elog(ERROR, "cache lookup failed for object %u of catalog \"%s\"", + objectId, RelationGetRelationName(rel)); + + datum = heap_getattr(oldtup, Anum_owner, + RelationGetDescr(rel), &isnull); + Assert(!isnull); + old_ownerId = DatumGetObjectId(datum); + + if (Anum_namespace != InvalidAttrNumber) + { + datum = heap_getattr(oldtup, Anum_namespace, + RelationGetDescr(rel), &isnull); + Assert(!isnull); + namespaceId = DatumGetObjectId(datum); + } + + if (old_ownerId != new_ownerId) + { + AttrNumber nattrs; + HeapTuple newtup; + Datum *values; + bool *nulls; + bool *replaces; + + /* Superusers can bypass permission checks */ + if (!superuser()) + { + /* must be owner */ + if (!has_privs_of_role(GetUserId(), old_ownerId)) + { + char *objname; + char namebuf[NAMEDATALEN]; + + if (Anum_name != InvalidAttrNumber) + { + datum = heap_getattr(oldtup, Anum_name, + RelationGetDescr(rel), &isnull); + Assert(!isnull); + objname = NameStr(*DatumGetName(datum)); + } + else + { + snprintf(namebuf, sizeof(namebuf), "%u", objectId); + objname = namebuf; + } + aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId), + objname); + } + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), new_ownerId); + + /* New owner must have CREATE privilege on namespace */ + if (OidIsValid(namespaceId)) + { + AclResult aclresult; + + aclresult = pg_namespace_aclcheck(namespaceId, new_ownerId, + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + } + } + + /* Build a modified tuple */ + nattrs = RelationGetNumberOfAttributes(rel); + values = palloc0(nattrs * sizeof(Datum)); + nulls = palloc0(nattrs * sizeof(bool)); + replaces = palloc0(nattrs * sizeof(bool)); + values[Anum_owner - 1] = ObjectIdGetDatum(new_ownerId); + replaces[Anum_owner - 1] = true; + + /* + * Determine the modified ACL for the new owner. This is only + * necessary when the ACL is non-null. + */ + if (Anum_acl != InvalidAttrNumber) + { + datum = heap_getattr(oldtup, + Anum_acl, RelationGetDescr(rel), &isnull); + if (!isnull) + { + Acl *newAcl; + + newAcl = aclnewowner(DatumGetAclP(datum), + old_ownerId, new_ownerId); + values[Anum_acl - 1] = PointerGetDatum(newAcl); + replaces[Anum_acl - 1] = true; + } + } + + newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel), + values, nulls, replaces); + + /* Perform actual update */ + CatalogTupleUpdate(rel, &newtup->t_self, newtup); + + /* Update owner dependency reference */ + if (classId == LargeObjectMetadataRelationId) + classId = LargeObjectRelationId; + changeDependencyOnOwner(classId, objectId, new_ownerId); + + /* Release memory */ + pfree(values); + pfree(nulls); + pfree(replaces); + } + + InvokeObjectPostAlterHook(classId, objectId, 0); +} diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c new file mode 100644 index 0000000..914cfa4 --- /dev/null +++ b/src/backend/commands/amcmds.c @@ -0,0 +1,269 @@ +/*------------------------------------------------------------------------- + * + * amcmds.c + * Routines for SQL commands that manipulate access methods. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/amcmds.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_am.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +static Oid lookup_am_handler_func(List *handler_name, char amtype); +static const char *get_am_type_string(char amtype); + + +/* + * CreateAccessMethod + * Registers a new access method. + */ +ObjectAddress +CreateAccessMethod(CreateAmStmt *stmt) +{ + Relation rel; + ObjectAddress myself; + ObjectAddress referenced; + Oid amoid; + Oid amhandler; + bool nulls[Natts_pg_am]; + Datum values[Natts_pg_am]; + HeapTuple tup; + + rel = table_open(AccessMethodRelationId, RowExclusiveLock); + + /* Must be superuser */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to create access method \"%s\"", + stmt->amname), + errhint("Must be superuser to create an access method."))); + + /* Check if name is used */ + amoid = GetSysCacheOid1(AMNAME, Anum_pg_am_oid, + CStringGetDatum(stmt->amname)); + if (OidIsValid(amoid)) + { + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("access method \"%s\" already exists", + stmt->amname))); + } + + /* + * Get the handler function oid, verifying the AM type while at it. + */ + amhandler = lookup_am_handler_func(stmt->handler_name, stmt->amtype); + + /* + * Insert tuple into pg_am. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + amoid = GetNewOidWithIndex(rel, AmOidIndexId, Anum_pg_am_oid); + values[Anum_pg_am_oid - 1] = ObjectIdGetDatum(amoid); + values[Anum_pg_am_amname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->amname)); + values[Anum_pg_am_amhandler - 1] = ObjectIdGetDatum(amhandler); + values[Anum_pg_am_amtype - 1] = CharGetDatum(stmt->amtype); + + tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + CatalogTupleInsert(rel, tup); + heap_freetuple(tup); + + myself.classId = AccessMethodRelationId; + myself.objectId = amoid; + myself.objectSubId = 0; + + /* Record dependency on handler function */ + referenced.classId = ProcedureRelationId; + referenced.objectId = amhandler; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + recordDependencyOnCurrentExtension(&myself, false); + + InvokeObjectPostCreateHook(AccessMethodRelationId, amoid, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + +/* + * get_am_type_oid + * Worker for various get_am_*_oid variants + * + * If missing_ok is false, throw an error if access method not found. If + * true, just return InvalidOid. + * + * If amtype is not '\0', an error is raised if the AM found is not of the + * given type. + */ +static Oid +get_am_type_oid(const char *amname, char amtype, bool missing_ok) +{ + HeapTuple tup; + Oid oid = InvalidOid; + + tup = SearchSysCache1(AMNAME, CStringGetDatum(amname)); + if (HeapTupleIsValid(tup)) + { + Form_pg_am amform = (Form_pg_am) GETSTRUCT(tup); + + if (amtype != '\0' && + amform->amtype != amtype) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("access method \"%s\" is not of type %s", + NameStr(amform->amname), + get_am_type_string(amtype)))); + + oid = amform->oid; + ReleaseSysCache(tup); + } + + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("access method \"%s\" does not exist", amname))); + return oid; +} + +/* + * get_index_am_oid - given an access method name, look up its OID + * and verify it corresponds to an index AM. + */ +Oid +get_index_am_oid(const char *amname, bool missing_ok) +{ + return get_am_type_oid(amname, AMTYPE_INDEX, missing_ok); +} + +/* + * get_table_am_oid - given an access method name, look up its OID + * and verify it corresponds to an table AM. + */ +Oid +get_table_am_oid(const char *amname, bool missing_ok) +{ + return get_am_type_oid(amname, AMTYPE_TABLE, missing_ok); +} + +/* + * get_am_oid - given an access method name, look up its OID. + * The type is not checked. + */ +Oid +get_am_oid(const char *amname, bool missing_ok) +{ + return get_am_type_oid(amname, '\0', missing_ok); +} + +/* + * get_am_name - given an access method OID, look up its name. + */ +char * +get_am_name(Oid amOid) +{ + HeapTuple tup; + char *result = NULL; + + tup = SearchSysCache1(AMOID, ObjectIdGetDatum(amOid)); + if (HeapTupleIsValid(tup)) + { + Form_pg_am amform = (Form_pg_am) GETSTRUCT(tup); + + result = pstrdup(NameStr(amform->amname)); + ReleaseSysCache(tup); + } + return result; +} + +/* + * Convert single-character access method type into string for error reporting. + */ +static const char * +get_am_type_string(char amtype) +{ + switch (amtype) + { + case AMTYPE_INDEX: + return "INDEX"; + case AMTYPE_TABLE: + return "TABLE"; + default: + /* shouldn't happen */ + elog(ERROR, "invalid access method type '%c'", amtype); + return NULL; /* keep compiler quiet */ + } +} + +/* + * Convert a handler function name to an Oid. If the return type of the + * function doesn't match the given AM type, an error is raised. + * + * This function either return valid function Oid or throw an error. + */ +static Oid +lookup_am_handler_func(List *handler_name, char amtype) +{ + Oid handlerOid; + Oid funcargtypes[1] = {INTERNALOID}; + Oid expectedType = InvalidOid; + + if (handler_name == NIL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("handler function is not specified"))); + + /* handlers have one argument of type internal */ + handlerOid = LookupFuncName(handler_name, 1, funcargtypes, false); + + /* check that handler has the correct return type */ + switch (amtype) + { + case AMTYPE_INDEX: + expectedType = INDEX_AM_HANDLEROID; + break; + case AMTYPE_TABLE: + expectedType = TABLE_AM_HANDLEROID; + break; + default: + elog(ERROR, "unrecognized access method type \"%c\"", amtype); + } + + if (get_func_rettype(handlerOid) != expectedType) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("function %s must return type %s", + get_func_name(handlerOid), + format_type_extended(expectedType, -1, 0)))); + + return handlerOid; +} diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c new file mode 100644 index 0000000..01efdd5 --- /dev/null +++ b/src/backend/commands/analyze.c @@ -0,0 +1,3076 @@ +/*------------------------------------------------------------------------- + * + * analyze.c + * the Postgres statistics generator + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/analyze.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/detoast.h" +#include "access/genam.h" +#include "access/multixact.h" +#include "access/relation.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/transam.h" +#include "access/tupconvert.h" +#include "access/visibilitymap.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/indexing.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" +#include "commands/dbcommands.h" +#include "commands/progress.h" +#include "commands/tablecmds.h" +#include "commands/vacuum.h" +#include "common/pg_prng.h" +#include "executor/executor.h" +#include "foreign/fdwapi.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_oper.h" +#include "parser/parse_relation.h" +#include "pgstat.h" +#include "postmaster/autovacuum.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "utils/acl.h" +#include "utils/attoptcache.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/sampling.h" +#include "utils/sortsupport.h" +#include "utils/spccache.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" + + +/* Per-index data for ANALYZE */ +typedef struct AnlIndexData +{ + IndexInfo *indexInfo; /* BuildIndexInfo result */ + double tupleFract; /* fraction of rows for partial index */ + VacAttrStats **vacattrstats; /* index attrs to analyze */ + int attr_cnt; +} AnlIndexData; + + +/* Default statistics target (GUC parameter) */ +int default_statistics_target = 100; + +/* A few variables that don't seem worth passing around as parameters */ +static MemoryContext anl_context = NULL; +static BufferAccessStrategy vac_strategy; + + +static void do_analyze_rel(Relation onerel, + VacuumParams *params, List *va_cols, + AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, + bool inh, bool in_outer_xact, int elevel); +static void compute_index_stats(Relation onerel, double totalrows, + AnlIndexData *indexdata, int nindexes, + HeapTuple *rows, int numrows, + MemoryContext col_context); +static VacAttrStats *examine_attribute(Relation onerel, int attnum, + Node *index_expr); +static int acquire_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); +static int compare_rows(const void *a, const void *b, void *arg); +static int acquire_inherited_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); +static void update_attstats(Oid relid, bool inh, + int natts, VacAttrStats **vacattrstats); +static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); +static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); + + +/* + * analyze_rel() -- analyze one relation + * + * relid identifies the relation to analyze. If relation is supplied, use + * the name therein for reporting any failure to open/lock the rel; do not + * use it once we've successfully opened the rel, since it might be stale. + */ +void +analyze_rel(Oid relid, RangeVar *relation, + VacuumParams *params, List *va_cols, bool in_outer_xact, + BufferAccessStrategy bstrategy) +{ + Relation onerel; + int elevel; + AcquireSampleRowsFunc acquirefunc = NULL; + BlockNumber relpages = 0; + + /* Select logging level */ + if (params->options & VACOPT_VERBOSE) + elevel = INFO; + else + elevel = DEBUG2; + + /* Set up static variables */ + vac_strategy = bstrategy; + + /* + * Check for user-requested abort. + */ + CHECK_FOR_INTERRUPTS(); + + /* + * Open the relation, getting ShareUpdateExclusiveLock to ensure that two + * ANALYZEs don't run on it concurrently. (This also locks out a + * concurrent VACUUM, which doesn't matter much at the moment but might + * matter if we ever try to accumulate stats on dead tuples.) If the rel + * has been dropped since we last saw it, we don't need to process it. + * + * Make sure to generate only logs for ANALYZE in this case. + */ + onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM), + params->log_min_duration >= 0, + ShareUpdateExclusiveLock); + + /* leave if relation could not be opened or locked */ + if (!onerel) + return; + + /* + * Check if relation needs to be skipped based on ownership. This check + * happens also when building the relation list to analyze for a manual + * operation, and needs to be done additionally here as ANALYZE could + * happen across multiple transactions where relation ownership could have + * changed in-between. Make sure to generate only logs for ANALYZE in + * this case. + */ + if (!vacuum_is_relation_owner(RelationGetRelid(onerel), + onerel->rd_rel, + params->options & VACOPT_ANALYZE)) + { + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + + /* + * Silently ignore tables that are temp tables of other backends --- + * trying to analyze these is rather pointless, since their contents are + * probably not up-to-date on disk. (We don't throw a warning here; it + * would just lead to chatter during a database-wide ANALYZE.) + */ + if (RELATION_IS_OTHER_TEMP(onerel)) + { + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + + /* + * We can ANALYZE any table except pg_statistic. See update_attstats + */ + if (RelationGetRelid(onerel) == StatisticRelationId) + { + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + + /* + * Check that it's of an analyzable relkind, and set up appropriately. + */ + if (onerel->rd_rel->relkind == RELKIND_RELATION || + onerel->rd_rel->relkind == RELKIND_MATVIEW) + { + /* Regular table, so we'll use the regular row acquisition function */ + acquirefunc = acquire_sample_rows; + /* Also get regular table's size */ + relpages = RelationGetNumberOfBlocks(onerel); + } + else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + /* + * For a foreign table, call the FDW's hook function to see whether it + * supports analysis. + */ + FdwRoutine *fdwroutine; + bool ok = false; + + fdwroutine = GetFdwRoutineForRelation(onerel, false); + + if (fdwroutine->AnalyzeForeignTable != NULL) + ok = fdwroutine->AnalyzeForeignTable(onerel, + &acquirefunc, + &relpages); + + if (!ok) + { + ereport(WARNING, + (errmsg("skipping \"%s\" --- cannot analyze this foreign table", + RelationGetRelationName(onerel)))); + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + } + else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * For partitioned tables, we want to do the recursive ANALYZE below. + */ + } + else + { + /* No need for a WARNING if we already complained during VACUUM */ + if (!(params->options & VACOPT_VACUUM)) + ereport(WARNING, + (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables", + RelationGetRelationName(onerel)))); + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + + /* + * OK, let's do it. First, initialize progress reporting. + */ + pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE, + RelationGetRelid(onerel)); + + /* + * Do the normal non-recursive ANALYZE. We can skip this for partitioned + * tables, which don't contain any rows. + */ + if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + do_analyze_rel(onerel, params, va_cols, acquirefunc, + relpages, false, in_outer_xact, elevel); + + /* + * If there are child tables, do recursive ANALYZE. + */ + if (onerel->rd_rel->relhassubclass) + do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages, + true, in_outer_xact, elevel); + + /* + * Close source relation now, but keep lock so that no one deletes it + * before we commit. (If someone did, they'd fail to clean up the entries + * we made in pg_statistic. Also, releasing the lock before commit would + * expose us to concurrent-update failures in update_attstats.) + */ + relation_close(onerel, NoLock); + + pgstat_progress_end_command(); +} + +/* + * do_analyze_rel() -- analyze one relation, recursively or not + * + * Note that "acquirefunc" is only relevant for the non-inherited case. + * For the inherited case, acquire_inherited_sample_rows() determines the + * appropriate acquirefunc for each child table. + */ +static void +do_analyze_rel(Relation onerel, VacuumParams *params, + List *va_cols, AcquireSampleRowsFunc acquirefunc, + BlockNumber relpages, bool inh, bool in_outer_xact, + int elevel) +{ + int attr_cnt, + tcnt, + i, + ind; + Relation *Irel; + int nindexes; + bool hasindex; + VacAttrStats **vacattrstats; + AnlIndexData *indexdata; + int targrows, + numrows, + minrows; + double totalrows, + totaldeadrows; + HeapTuple *rows; + PGRUsage ru0; + TimestampTz starttime = 0; + MemoryContext caller_context; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + int64 AnalyzePageHit = VacuumPageHit; + int64 AnalyzePageMiss = VacuumPageMiss; + int64 AnalyzePageDirty = VacuumPageDirty; + PgStat_Counter startreadtime = 0; + PgStat_Counter startwritetime = 0; + + if (inh) + ereport(elevel, + (errmsg("analyzing \"%s.%s\" inheritance tree", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + else + ereport(elevel, + (errmsg("analyzing \"%s.%s\"", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + + /* + * Set up a working context so that we can easily free whatever junk gets + * created. + */ + anl_context = AllocSetContextCreate(CurrentMemoryContext, + "Analyze", + ALLOCSET_DEFAULT_SIZES); + caller_context = MemoryContextSwitchTo(anl_context); + + /* + * Switch to the table owner's userid, so that any index functions are run + * as that user. Also lock down security-restricted operations and + * arrange to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(onerel->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + + /* measure elapsed time iff autovacuum logging requires it */ + if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) + { + if (track_io_timing) + { + startreadtime = pgStatBlockReadTime; + startwritetime = pgStatBlockWriteTime; + } + + pg_rusage_init(&ru0); + if (params->log_min_duration >= 0) + starttime = GetCurrentTimestamp(); + } + + /* + * Determine which columns to analyze + * + * Note that system attributes are never analyzed, so we just reject them + * at the lookup stage. We also reject duplicate column mentions. (We + * could alternatively ignore duplicates, but analyzing a column twice + * won't work; we'd end up making a conflicting update in pg_statistic.) + */ + if (va_cols != NIL) + { + Bitmapset *unique_cols = NULL; + ListCell *le; + + vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) * + sizeof(VacAttrStats *)); + tcnt = 0; + foreach(le, va_cols) + { + char *col = strVal(lfirst(le)); + + i = attnameAttNum(onerel, col, false); + if (i == InvalidAttrNumber) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + col, RelationGetRelationName(onerel)))); + if (bms_is_member(i, unique_cols)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" of relation \"%s\" appears more than once", + col, RelationGetRelationName(onerel)))); + unique_cols = bms_add_member(unique_cols, i); + + vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); + if (vacattrstats[tcnt] != NULL) + tcnt++; + } + attr_cnt = tcnt; + } + else + { + attr_cnt = onerel->rd_att->natts; + vacattrstats = (VacAttrStats **) + palloc(attr_cnt * sizeof(VacAttrStats *)); + tcnt = 0; + for (i = 1; i <= attr_cnt; i++) + { + vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); + if (vacattrstats[tcnt] != NULL) + tcnt++; + } + attr_cnt = tcnt; + } + + /* + * Open all indexes of the relation, and see if there are any analyzable + * columns in the indexes. We do not analyze index columns if there was + * an explicit column list in the ANALYZE command, however. + * + * If we are doing a recursive scan, we don't want to touch the parent's + * indexes at all. If we're processing a partitioned table, we need to + * know if there are any indexes, but we don't want to process them. + */ + if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + List *idxs = RelationGetIndexList(onerel); + + Irel = NULL; + nindexes = 0; + hasindex = idxs != NIL; + list_free(idxs); + } + else if (!inh) + { + vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); + hasindex = nindexes > 0; + } + else + { + Irel = NULL; + nindexes = 0; + hasindex = false; + } + indexdata = NULL; + if (nindexes > 0) + { + indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData)); + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + IndexInfo *indexInfo; + + thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]); + thisdata->tupleFract = 1.0; /* fix later if partial */ + if (indexInfo->ii_Expressions != NIL && va_cols == NIL) + { + ListCell *indexpr_item = list_head(indexInfo->ii_Expressions); + + thisdata->vacattrstats = (VacAttrStats **) + palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *)); + tcnt = 0; + for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + { + int keycol = indexInfo->ii_IndexAttrNumbers[i]; + + if (keycol == 0) + { + /* Found an index expression */ + Node *indexkey; + + if (indexpr_item == NULL) /* shouldn't happen */ + elog(ERROR, "too few entries in indexprs list"); + indexkey = (Node *) lfirst(indexpr_item); + indexpr_item = lnext(indexInfo->ii_Expressions, + indexpr_item); + thisdata->vacattrstats[tcnt] = + examine_attribute(Irel[ind], i + 1, indexkey); + if (thisdata->vacattrstats[tcnt] != NULL) + tcnt++; + } + } + thisdata->attr_cnt = tcnt; + } + } + } + + /* + * Determine how many rows we need to sample, using the worst case from + * all analyzable columns. We use a lower bound of 100 rows to avoid + * possible overflow in Vitter's algorithm. (Note: that will also be the + * target in the corner case where there are no analyzable columns.) + */ + targrows = 100; + for (i = 0; i < attr_cnt; i++) + { + if (targrows < vacattrstats[i]->minrows) + targrows = vacattrstats[i]->minrows; + } + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + + for (i = 0; i < thisdata->attr_cnt; i++) + { + if (targrows < thisdata->vacattrstats[i]->minrows) + targrows = thisdata->vacattrstats[i]->minrows; + } + } + + /* + * Look at extended statistics objects too, as those may define custom + * statistics target. So we may need to sample more rows and then build + * the statistics with enough detail. + */ + minrows = ComputeExtStatisticsRows(onerel, attr_cnt, vacattrstats); + + if (targrows < minrows) + targrows = minrows; + + /* + * Acquire the sample rows + */ + rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH : + PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS); + if (inh) + numrows = acquire_inherited_sample_rows(onerel, elevel, + rows, targrows, + &totalrows, &totaldeadrows); + else + numrows = (*acquirefunc) (onerel, elevel, + rows, targrows, + &totalrows, &totaldeadrows); + + /* + * Compute the statistics. Temporary results during the calculations for + * each column are stored in a child context. The calc routines are + * responsible to make sure that whatever they store into the VacAttrStats + * structure is allocated in anl_context. + */ + if (numrows > 0) + { + MemoryContext col_context, + old_context; + + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COMPUTE_STATS); + + col_context = AllocSetContextCreate(anl_context, + "Analyze Column", + ALLOCSET_DEFAULT_SIZES); + old_context = MemoryContextSwitchTo(col_context); + + for (i = 0; i < attr_cnt; i++) + { + VacAttrStats *stats = vacattrstats[i]; + AttributeOpts *aopt; + + stats->rows = rows; + stats->tupDesc = onerel->rd_att; + stats->compute_stats(stats, + std_fetch_func, + numrows, + totalrows); + + /* + * If the appropriate flavor of the n_distinct option is + * specified, override with the corresponding value. + */ + aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum); + if (aopt != NULL) + { + float8 n_distinct; + + n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct; + if (n_distinct != 0.0) + stats->stadistinct = n_distinct; + } + + MemoryContextResetAndDeleteChildren(col_context); + } + + if (nindexes > 0) + compute_index_stats(onerel, totalrows, + indexdata, nindexes, + rows, numrows, + col_context); + + MemoryContextSwitchTo(old_context); + MemoryContextDelete(col_context); + + /* + * Emit the completed stats rows into pg_statistic, replacing any + * previous statistics for the target columns. (If there are stats in + * pg_statistic for columns we didn't process, we leave them alone.) + */ + update_attstats(RelationGetRelid(onerel), inh, + attr_cnt, vacattrstats); + + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + + update_attstats(RelationGetRelid(Irel[ind]), false, + thisdata->attr_cnt, thisdata->vacattrstats); + } + + /* Build extended statistics (if there are any). */ + BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows, + attr_cnt, vacattrstats); + } + + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE); + + /* + * Update pages/tuples stats in pg_class ... but not if we're doing + * inherited stats. + * + * We assume that VACUUM hasn't set pg_class.reltuples already, even + * during a VACUUM ANALYZE. Although VACUUM often updates pg_class, + * exceptions exist. A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command will + * never update pg_class entries for index relations. It's also possible + * that an individual index's pg_class entry won't be updated during + * VACUUM if the index AM returns NULL from its amvacuumcleanup() routine. + */ + if (!inh) + { + BlockNumber relallvisible; + + visibilitymap_count(onerel, &relallvisible, NULL); + + /* Update pg_class for table relation */ + vac_update_relstats(onerel, + relpages, + totalrows, + relallvisible, + hasindex, + InvalidTransactionId, + InvalidMultiXactId, + NULL, NULL, + in_outer_xact); + + /* Same for indexes */ + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + double totalindexrows; + + totalindexrows = ceil(thisdata->tupleFract * totalrows); + vac_update_relstats(Irel[ind], + RelationGetNumberOfBlocks(Irel[ind]), + totalindexrows, + 0, + false, + InvalidTransactionId, + InvalidMultiXactId, + NULL, NULL, + in_outer_xact); + } + } + else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * Partitioned tables don't have storage, so we don't set any fields + * in their pg_class entries except for reltuples and relhasindex. + */ + vac_update_relstats(onerel, -1, totalrows, + 0, hasindex, InvalidTransactionId, + InvalidMultiXactId, + NULL, NULL, + in_outer_xact); + } + + /* + * Now report ANALYZE to the cumulative stats system. For regular tables, + * we do it only if not doing inherited stats. For partitioned tables, we + * only do it for inherited stats. (We're never called for not-inherited + * stats on partitioned tables anyway.) + * + * Reset the changes_since_analyze counter only if we analyzed all + * columns; otherwise, there is still work for auto-analyze to do. + */ + if (!inh) + pgstat_report_analyze(onerel, totalrows, totaldeadrows, + (va_cols == NIL)); + else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL)); + + /* + * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup. + * + * Note that most index AMs perform a no-op as a matter of policy for + * amvacuumcleanup() when called in ANALYZE-only mode. The only exception + * among core index AMs is GIN/ginvacuumcleanup(). + */ + if (!(params->options & VACOPT_VACUUM)) + { + for (ind = 0; ind < nindexes; ind++) + { + IndexBulkDeleteResult *stats; + IndexVacuumInfo ivinfo; + + ivinfo.index = Irel[ind]; + ivinfo.analyze_only = true; + ivinfo.estimated_count = true; + ivinfo.message_level = elevel; + ivinfo.num_heap_tuples = onerel->rd_rel->reltuples; + ivinfo.strategy = vac_strategy; + + stats = index_vacuum_cleanup(&ivinfo, NULL); + + if (stats) + pfree(stats); + } + } + + /* Done with indexes */ + vac_close_indexes(nindexes, Irel, NoLock); + + /* Log the action if appropriate */ + if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) + { + TimestampTz endtime = GetCurrentTimestamp(); + + if (params->log_min_duration == 0 || + TimestampDifferenceExceeds(starttime, endtime, + params->log_min_duration)) + { + long delay_in_ms; + double read_rate = 0; + double write_rate = 0; + StringInfoData buf; + + /* + * Calculate the difference in the Page Hit/Miss/Dirty that + * happened as part of the analyze by subtracting out the + * pre-analyze values which we saved above. + */ + AnalyzePageHit = VacuumPageHit - AnalyzePageHit; + AnalyzePageMiss = VacuumPageMiss - AnalyzePageMiss; + AnalyzePageDirty = VacuumPageDirty - AnalyzePageDirty; + + /* + * We do not expect an analyze to take > 25 days and it simplifies + * things a bit to use TimestampDifferenceMilliseconds. + */ + delay_in_ms = TimestampDifferenceMilliseconds(starttime, endtime); + + /* + * Note that we are reporting these read/write rates in the same + * manner as VACUUM does, which means that while the 'average read + * rate' here actually corresponds to page misses and resulting + * reads which are also picked up by track_io_timing, if enabled, + * the 'average write rate' is actually talking about the rate of + * pages being dirtied, not being written out, so it's typical to + * have a non-zero 'avg write rate' while I/O timings only reports + * reads. + * + * It's not clear that an ANALYZE will ever result in + * FlushBuffer() being called, but we track and support reporting + * on I/O write time in case that changes as it's practically free + * to do so anyway. + */ + + if (delay_in_ms > 0) + { + read_rate = (double) BLCKSZ * AnalyzePageMiss / (1024 * 1024) / + (delay_in_ms / 1000.0); + write_rate = (double) BLCKSZ * AnalyzePageDirty / (1024 * 1024) / + (delay_in_ms / 1000.0); + } + + /* + * We split this up so we don't emit empty I/O timing values when + * track_io_timing isn't enabled. + */ + + initStringInfo(&buf); + appendStringInfo(&buf, _("automatic analyze of table \"%s.%s.%s\"\n"), + get_database_name(MyDatabaseId), + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)); + if (track_io_timing) + { + double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000; + double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000; + + appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"), + read_ms, write_ms); + } + appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"), + read_rate, write_rate); + appendStringInfo(&buf, _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"), + (long long) AnalyzePageHit, + (long long) AnalyzePageMiss, + (long long) AnalyzePageDirty); + appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0)); + + ereport(LOG, + (errmsg_internal("%s", buf.data))); + + pfree(buf.data); + } + } + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* Restore current context and release memory */ + MemoryContextSwitchTo(caller_context); + MemoryContextDelete(anl_context); + anl_context = NULL; +} + +/* + * Compute statistics about indexes of a relation + */ +static void +compute_index_stats(Relation onerel, double totalrows, + AnlIndexData *indexdata, int nindexes, + HeapTuple *rows, int numrows, + MemoryContext col_context) +{ + MemoryContext ind_context, + old_context; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + int ind, + i; + + ind_context = AllocSetContextCreate(anl_context, + "Analyze Index", + ALLOCSET_DEFAULT_SIZES); + old_context = MemoryContextSwitchTo(ind_context); + + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + IndexInfo *indexInfo = thisdata->indexInfo; + int attr_cnt = thisdata->attr_cnt; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + ExprState *predicate; + Datum *exprvals; + bool *exprnulls; + int numindexrows, + tcnt, + rowno; + double totalindexrows; + + /* Ignore index if no columns to analyze and not partial */ + if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL) + continue; + + /* + * Need an EState for evaluation of index expressions and + * partial-index predicates. Create it in the per-index context to be + * sure it gets cleaned up at the bottom of the loop. + */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + /* Need a slot to hold the current heap tuple, too */ + slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel), + &TTSOpsHeapTuple); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Set up execution state for predicate. */ + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + + /* Compute and save index expression values */ + exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum)); + exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool)); + numindexrows = 0; + tcnt = 0; + for (rowno = 0; rowno < numrows; rowno++) + { + HeapTuple heapTuple = rows[rowno]; + + vacuum_delay_point(); + + /* + * Reset the per-tuple context each time, to reclaim any cruft + * left behind by evaluating the predicate or index expressions. + */ + ResetExprContext(econtext); + + /* Set up for predicate or expression evaluation */ + ExecStoreHeapTuple(heapTuple, slot, false); + + /* If index is partial, check predicate */ + if (predicate != NULL) + { + if (!ExecQual(predicate, econtext)) + continue; + } + numindexrows++; + + if (attr_cnt > 0) + { + /* + * Evaluate the index row to compute expression values. We + * could do this by hand, but FormIndexDatum is convenient. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + /* + * Save just the columns we care about. We copy the values + * into ind_context from the estate's per-tuple context. + */ + for (i = 0; i < attr_cnt; i++) + { + VacAttrStats *stats = thisdata->vacattrstats[i]; + int attnum = stats->attr->attnum; + + if (isnull[attnum - 1]) + { + exprvals[tcnt] = (Datum) 0; + exprnulls[tcnt] = true; + } + else + { + exprvals[tcnt] = datumCopy(values[attnum - 1], + stats->attrtype->typbyval, + stats->attrtype->typlen); + exprnulls[tcnt] = false; + } + tcnt++; + } + } + } + + /* + * Having counted the number of rows that pass the predicate in the + * sample, we can estimate the total number of rows in the index. + */ + thisdata->tupleFract = (double) numindexrows / (double) numrows; + totalindexrows = ceil(thisdata->tupleFract * totalrows); + + /* + * Now we can compute the statistics for the expression columns. + */ + if (numindexrows > 0) + { + MemoryContextSwitchTo(col_context); + for (i = 0; i < attr_cnt; i++) + { + VacAttrStats *stats = thisdata->vacattrstats[i]; + + stats->exprvals = exprvals + i; + stats->exprnulls = exprnulls + i; + stats->rowstride = attr_cnt; + stats->compute_stats(stats, + ind_fetch_func, + numindexrows, + totalindexrows); + + MemoryContextResetAndDeleteChildren(col_context); + } + } + + /* And clean up */ + MemoryContextSwitchTo(ind_context); + + ExecDropSingleTupleTableSlot(slot); + FreeExecutorState(estate); + MemoryContextResetAndDeleteChildren(ind_context); + } + + MemoryContextSwitchTo(old_context); + MemoryContextDelete(ind_context); +} + +/* + * examine_attribute -- pre-analysis of a single column + * + * Determine whether the column is analyzable; if so, create and initialize + * a VacAttrStats struct for it. If not, return NULL. + * + * If index_expr isn't NULL, then we're trying to analyze an expression index, + * and index_expr is the expression tree representing the column's data. + */ +static VacAttrStats * +examine_attribute(Relation onerel, int attnum, Node *index_expr) +{ + Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1); + HeapTuple typtuple; + VacAttrStats *stats; + int i; + bool ok; + + /* Never analyze dropped columns */ + if (attr->attisdropped) + return NULL; + + /* Don't analyze column if user has specified not to */ + if (attr->attstattarget == 0) + return NULL; + + /* + * Create the VacAttrStats struct. Note that we only have a copy of the + * fixed fields of the pg_attribute tuple. + */ + stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); + stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); + memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE); + + /* + * When analyzing an expression index, believe the expression tree's type + * not the column datatype --- the latter might be the opckeytype storage + * type of the opclass, which is not interesting for our purposes. (Note: + * if we did anything with non-expression index columns, we'd need to + * figure out where to get the correct type info from, but for now that's + * not a problem.) It's not clear whether anyone will care about the + * typmod, but we store that too just in case. + */ + if (index_expr) + { + stats->attrtypid = exprType(index_expr); + stats->attrtypmod = exprTypmod(index_expr); + + /* + * If a collation has been specified for the index column, use that in + * preference to anything else; but if not, fall back to whatever we + * can get from the expression. + */ + if (OidIsValid(onerel->rd_indcollation[attnum - 1])) + stats->attrcollid = onerel->rd_indcollation[attnum - 1]; + else + stats->attrcollid = exprCollation(index_expr); + } + else + { + stats->attrtypid = attr->atttypid; + stats->attrtypmod = attr->atttypmod; + stats->attrcollid = attr->attcollation; + } + + typtuple = SearchSysCacheCopy1(TYPEOID, + ObjectIdGetDatum(stats->attrtypid)); + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", stats->attrtypid); + stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + stats->anl_context = anl_context; + stats->tupattnum = attnum; + + /* + * The fields describing the stats->stavalues[n] element types default to + * the type of the data being analyzed, but the type-specific typanalyze + * function can change them if it wants to store something else. + */ + for (i = 0; i < STATISTIC_NUM_SLOTS; i++) + { + stats->statypid[i] = stats->attrtypid; + stats->statyplen[i] = stats->attrtype->typlen; + stats->statypbyval[i] = stats->attrtype->typbyval; + stats->statypalign[i] = stats->attrtype->typalign; + } + + /* + * Call the type-specific typanalyze function. If none is specified, use + * std_typanalyze(). + */ + if (OidIsValid(stats->attrtype->typanalyze)) + ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze, + PointerGetDatum(stats))); + else + ok = std_typanalyze(stats); + + if (!ok || stats->compute_stats == NULL || stats->minrows <= 0) + { + heap_freetuple(typtuple); + pfree(stats->attr); + pfree(stats); + return NULL; + } + + return stats; +} + +/* + * acquire_sample_rows -- acquire a random sample of rows from the table + * + * Selected rows are returned in the caller-allocated array rows[], which + * must have at least targrows entries. + * The actual number of rows selected is returned as the function result. + * We also estimate the total numbers of live and dead rows in the table, + * and return them into *totalrows and *totaldeadrows, respectively. + * + * The returned list of tuples is in order by physical position in the table. + * (We will rely on this later to derive correlation estimates.) + * + * As of May 2004 we use a new two-stage method: Stage one selects up + * to targrows random blocks (or all blocks, if there aren't so many). + * Stage two scans these blocks and uses the Vitter algorithm to create + * a random sample of targrows rows (or less, if there are less in the + * sample of blocks). The two stages are executed simultaneously: each + * block is processed as soon as stage one returns its number and while + * the rows are read stage two controls which ones are to be inserted + * into the sample. + * + * Although every row has an equal chance of ending up in the final + * sample, this sampling method is not perfect: not every possible + * sample has an equal chance of being selected. For large relations + * the number of different blocks represented by the sample tends to be + * too small. We can live with that for now. Improvements are welcome. + * + * An important property of this sampling method is that because we do + * look at a statistically unbiased set of blocks, we should get + * unbiased estimates of the average numbers of live and dead rows per + * block. The previous sampling method put too much credence in the row + * density near the start of the table. + */ +static int +acquire_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows) +{ + int numrows = 0; /* # rows now in reservoir */ + double samplerows = 0; /* total # rows collected */ + double liverows = 0; /* # live rows seen */ + double deadrows = 0; /* # dead rows seen */ + double rowstoskip = -1; /* -1 means not set yet */ + uint32 randseed; /* Seed for block sampler(s) */ + BlockNumber totalblocks; + TransactionId OldestXmin; + BlockSamplerData bs; + ReservoirStateData rstate; + TupleTableSlot *slot; + TableScanDesc scan; + BlockNumber nblocks; + BlockNumber blksdone = 0; +#ifdef USE_PREFETCH + int prefetch_maximum = 0; /* blocks to prefetch if enabled */ + BlockSamplerData prefetch_bs; +#endif + + Assert(targrows > 0); + + totalblocks = RelationGetNumberOfBlocks(onerel); + + /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ + OldestXmin = GetOldestNonRemovableTransactionId(onerel); + + /* Prepare for sampling block numbers */ + randseed = pg_prng_uint32(&pg_global_prng_state); + nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed); + +#ifdef USE_PREFETCH + prefetch_maximum = get_tablespace_maintenance_io_concurrency(onerel->rd_rel->reltablespace); + /* Create another BlockSampler, using the same seed, for prefetching */ + if (prefetch_maximum) + (void) BlockSampler_Init(&prefetch_bs, totalblocks, targrows, randseed); +#endif + + /* Report sampling block numbers */ + pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL, + nblocks); + + /* Prepare for sampling rows */ + reservoir_init_selection_state(&rstate, targrows); + + scan = table_beginscan_analyze(onerel); + slot = table_slot_create(onerel, NULL); + +#ifdef USE_PREFETCH + + /* + * If we are doing prefetching, then go ahead and tell the kernel about + * the first set of pages we are going to want. This also moves our + * iterator out ahead of the main one being used, where we will keep it so + * that we're always pre-fetching out prefetch_maximum number of blocks + * ahead. + */ + if (prefetch_maximum) + { + for (int i = 0; i < prefetch_maximum; i++) + { + BlockNumber prefetch_block; + + if (!BlockSampler_HasMore(&prefetch_bs)) + break; + + prefetch_block = BlockSampler_Next(&prefetch_bs); + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_block); + } + } +#endif + + /* Outer loop over blocks to sample */ + while (BlockSampler_HasMore(&bs)) + { + bool block_accepted; + BlockNumber targblock = BlockSampler_Next(&bs); +#ifdef USE_PREFETCH + BlockNumber prefetch_targblock = InvalidBlockNumber; + + /* + * Make sure that every time the main BlockSampler is moved forward + * that our prefetch BlockSampler also gets moved forward, so that we + * always stay out ahead. + */ + if (prefetch_maximum && BlockSampler_HasMore(&prefetch_bs)) + prefetch_targblock = BlockSampler_Next(&prefetch_bs); +#endif + + vacuum_delay_point(); + + block_accepted = table_scan_analyze_next_block(scan, targblock, vac_strategy); + +#ifdef USE_PREFETCH + + /* + * When pre-fetching, after we get a block, tell the kernel about the + * next one we will want, if there's any left. + * + * We want to do this even if the table_scan_analyze_next_block() call + * above decides against analyzing the block it picked. + */ + if (prefetch_maximum && prefetch_targblock != InvalidBlockNumber) + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_targblock); +#endif + + /* + * Don't analyze if table_scan_analyze_next_block() indicated this + * block is unsuitable for analyzing. + */ + if (!block_accepted) + continue; + + while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) + { + /* + * The first targrows sample rows are simply copied into the + * reservoir. Then we start replacing tuples in the sample until + * we reach the end of the relation. This algorithm is from Jeff + * Vitter's paper (see full citation in utils/misc/sampling.c). It + * works by repeatedly computing the number of tuples to skip + * before selecting a tuple, which replaces a randomly chosen + * element of the reservoir (current set of tuples). At all times + * the reservoir is a true random sample of the tuples we've + * passed over so far, so when we fall off the end of the relation + * we're done. + */ + if (numrows < targrows) + rows[numrows++] = ExecCopySlotHeapTuple(slot); + else + { + /* + * t in Vitter's paper is the number of records already + * processed. If we need to compute a new S value, we must + * use the not-yet-incremented value of samplerows as t. + */ + if (rowstoskip < 0) + rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); + + if (rowstoskip <= 0) + { + /* + * Found a suitable tuple, so save it, replacing one old + * tuple at random + */ + int k = (int) (targrows * sampler_random_fract(&rstate.randstate)); + + Assert(k >= 0 && k < targrows); + heap_freetuple(rows[k]); + rows[k] = ExecCopySlotHeapTuple(slot); + } + + rowstoskip -= 1; + } + + samplerows += 1; + } + + pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE, + ++blksdone); + } + + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); + + /* + * If we didn't find as many tuples as we wanted then we're done. No sort + * is needed, since they're already in order. + * + * Otherwise we need to sort the collected tuples by position + * (itempointer). It's not worth worrying about corner cases where the + * tuples are already sorted. + */ + if (numrows == targrows) + qsort_interruptible((void *) rows, numrows, sizeof(HeapTuple), + compare_rows, NULL); + + /* + * Estimate total numbers of live and dead rows in relation, extrapolating + * on the assumption that the average tuple density in pages we didn't + * scan is the same as in the pages we did scan. Since what we scanned is + * a random sample of the pages in the relation, this should be a good + * assumption. + */ + if (bs.m > 0) + { + *totalrows = floor((liverows / bs.m) * totalblocks + 0.5); + *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5); + } + else + { + *totalrows = 0.0; + *totaldeadrows = 0.0; + } + + /* + * Emit some interesting relation info + */ + ereport(elevel, + (errmsg("\"%s\": scanned %d of %u pages, " + "containing %.0f live rows and %.0f dead rows; " + "%d rows in sample, %.0f estimated total rows", + RelationGetRelationName(onerel), + bs.m, totalblocks, + liverows, deadrows, + numrows, *totalrows))); + + return numrows; +} + +/* + * Comparator for sorting rows[] array + */ +static int +compare_rows(const void *a, const void *b, void *arg) +{ + HeapTuple ha = *(const HeapTuple *) a; + HeapTuple hb = *(const HeapTuple *) b; + BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self); + OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self); + BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self); + OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self); + + if (ba < bb) + return -1; + if (ba > bb) + return 1; + if (oa < ob) + return -1; + if (oa > ob) + return 1; + return 0; +} + + +/* + * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree + * + * This has the same API as acquire_sample_rows, except that rows are + * collected from all inheritance children as well as the specified table. + * We fail and return zero if there are no inheritance children, or if all + * children are foreign tables that don't support ANALYZE. + */ +static int +acquire_inherited_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows) +{ + List *tableOIDs; + Relation *rels; + AcquireSampleRowsFunc *acquirefuncs; + double *relblocks; + double totalblocks; + int numrows, + nrels, + i; + ListCell *lc; + bool has_child; + + /* Initialize output parameters to zero now, in case we exit early */ + *totalrows = 0; + *totaldeadrows = 0; + + /* + * Find all members of inheritance set. We only need AccessShareLock on + * the children. + */ + tableOIDs = + find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL); + + /* + * Check that there's at least one descendant, else fail. This could + * happen despite analyze_rel's relhassubclass check, if table once had a + * child but no longer does. In that case, we can clear the + * relhassubclass field so as not to make the same mistake again later. + * (This is safe because we hold ShareUpdateExclusiveLock.) + */ + if (list_length(tableOIDs) < 2) + { + /* CCI because we already updated the pg_class row in this command */ + CommandCounterIncrement(); + SetRelationHasSubclass(RelationGetRelid(onerel), false); + ereport(elevel, + (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + return 0; + } + + /* + * Identify acquirefuncs to use, and count blocks in all the relations. + * The result could overflow BlockNumber, so we use double arithmetic. + */ + rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation)); + acquirefuncs = (AcquireSampleRowsFunc *) + palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc)); + relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double)); + totalblocks = 0; + nrels = 0; + has_child = false; + foreach(lc, tableOIDs) + { + Oid childOID = lfirst_oid(lc); + Relation childrel; + AcquireSampleRowsFunc acquirefunc = NULL; + BlockNumber relpages = 0; + + /* We already got the needed lock */ + childrel = table_open(childOID, NoLock); + + /* Ignore if temp table of another backend */ + if (RELATION_IS_OTHER_TEMP(childrel)) + { + /* ... but release the lock on it */ + Assert(childrel != onerel); + table_close(childrel, AccessShareLock); + continue; + } + + /* Check table type (MATVIEW can't happen, but might as well allow) */ + if (childrel->rd_rel->relkind == RELKIND_RELATION || + childrel->rd_rel->relkind == RELKIND_MATVIEW) + { + /* Regular table, so use the regular row acquisition function */ + acquirefunc = acquire_sample_rows; + relpages = RelationGetNumberOfBlocks(childrel); + } + else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + /* + * For a foreign table, call the FDW's hook function to see + * whether it supports analysis. + */ + FdwRoutine *fdwroutine; + bool ok = false; + + fdwroutine = GetFdwRoutineForRelation(childrel, false); + + if (fdwroutine->AnalyzeForeignTable != NULL) + ok = fdwroutine->AnalyzeForeignTable(childrel, + &acquirefunc, + &relpages); + + if (!ok) + { + /* ignore, but release the lock on it */ + Assert(childrel != onerel); + table_close(childrel, AccessShareLock); + continue; + } + } + else + { + /* + * ignore, but release the lock on it. don't try to unlock the + * passed-in relation + */ + Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + if (childrel != onerel) + table_close(childrel, AccessShareLock); + else + table_close(childrel, NoLock); + continue; + } + + /* OK, we'll process this child */ + has_child = true; + rels[nrels] = childrel; + acquirefuncs[nrels] = acquirefunc; + relblocks[nrels] = (double) relpages; + totalblocks += (double) relpages; + nrels++; + } + + /* + * If we don't have at least one child table to consider, fail. If the + * relation is a partitioned table, it's not counted as a child table. + */ + if (!has_child) + { + ereport(elevel, + (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables", + get_namespace_name(RelationGetNamespace(onerel)), + RelationGetRelationName(onerel)))); + return 0; + } + + /* + * Now sample rows from each relation, proportionally to its fraction of + * the total block count. (This might be less than desirable if the child + * rels have radically different free-space percentages, but it's not + * clear that it's worth working harder.) + */ + pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL, + nrels); + numrows = 0; + for (i = 0; i < nrels; i++) + { + Relation childrel = rels[i]; + AcquireSampleRowsFunc acquirefunc = acquirefuncs[i]; + double childblocks = relblocks[i]; + + /* + * Report progress. The sampling function will normally report blocks + * done/total, but we need to reset them to 0 here, so that they don't + * show an old value until that. + */ + { + const int progress_index[] = { + PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID, + PROGRESS_ANALYZE_BLOCKS_DONE, + PROGRESS_ANALYZE_BLOCKS_TOTAL + }; + const int64 progress_vals[] = { + RelationGetRelid(childrel), + 0, + 0, + }; + + pgstat_progress_update_multi_param(3, progress_index, progress_vals); + } + + if (childblocks > 0) + { + int childtargrows; + + childtargrows = (int) rint(targrows * childblocks / totalblocks); + /* Make sure we don't overrun due to roundoff error */ + childtargrows = Min(childtargrows, targrows - numrows); + if (childtargrows > 0) + { + int childrows; + double trows, + tdrows; + + /* Fetch a random sample of the child's rows */ + childrows = (*acquirefunc) (childrel, elevel, + rows + numrows, childtargrows, + &trows, &tdrows); + + /* We may need to convert from child's rowtype to parent's */ + if (childrows > 0 && + !equalTupleDescs(RelationGetDescr(childrel), + RelationGetDescr(onerel))) + { + TupleConversionMap *map; + + map = convert_tuples_by_name(RelationGetDescr(childrel), + RelationGetDescr(onerel)); + if (map != NULL) + { + int j; + + for (j = 0; j < childrows; j++) + { + HeapTuple newtup; + + newtup = execute_attr_map_tuple(rows[numrows + j], map); + heap_freetuple(rows[numrows + j]); + rows[numrows + j] = newtup; + } + free_conversion_map(map); + } + } + + /* And add to counts */ + numrows += childrows; + *totalrows += trows; + *totaldeadrows += tdrows; + } + } + + /* + * Note: we cannot release the child-table locks, since we may have + * pointers to their TOAST tables in the sampled rows. + */ + table_close(childrel, NoLock); + pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE, + i + 1); + } + + return numrows; +} + + +/* + * update_attstats() -- update attribute statistics for one relation + * + * Statistics are stored in several places: the pg_class row for the + * relation has stats about the whole relation, and there is a + * pg_statistic row for each (non-system) attribute that has ever + * been analyzed. The pg_class values are updated by VACUUM, not here. + * + * pg_statistic rows are just added or updated normally. This means + * that pg_statistic will probably contain some deleted rows at the + * completion of a vacuum cycle, unless it happens to get vacuumed last. + * + * To keep things simple, we punt for pg_statistic, and don't try + * to compute or store rows for pg_statistic itself in pg_statistic. + * This could possibly be made to work, but it's not worth the trouble. + * Note analyze_rel() has seen to it that we won't come here when + * vacuuming pg_statistic itself. + * + * Note: there would be a race condition here if two backends could + * ANALYZE the same table concurrently. Presently, we lock that out + * by taking a self-exclusive lock on the relation in analyze_rel(). + */ +static void +update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) +{ + Relation sd; + int attno; + + if (natts <= 0) + return; /* nothing to do */ + + sd = table_open(StatisticRelationId, RowExclusiveLock); + + for (attno = 0; attno < natts; attno++) + { + VacAttrStats *stats = vacattrstats[attno]; + HeapTuple stup, + oldtup; + int i, + k, + n; + Datum values[Natts_pg_statistic]; + bool nulls[Natts_pg_statistic]; + bool replaces[Natts_pg_statistic]; + + /* Ignore attr if we weren't able to collect stats */ + if (!stats->stats_valid) + continue; + + /* + * Construct a new pg_statistic tuple + */ + for (i = 0; i < Natts_pg_statistic; ++i) + { + nulls[i] = false; + replaces[i] = true; + } + + values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid); + values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum); + values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh); + values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac); + values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth); + values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct); + i = Anum_pg_statistic_stakind1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */ + } + i = Anum_pg_statistic_staop1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */ + } + i = Anum_pg_statistic_stacoll1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */ + } + i = Anum_pg_statistic_stanumbers1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + int nnum = stats->numnumbers[k]; + + if (nnum > 0) + { + Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum)); + ArrayType *arry; + + for (n = 0; n < nnum; n++) + numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]); + /* XXX knows more than it should about type float4: */ + arry = construct_array(numdatums, nnum, + FLOAT4OID, + sizeof(float4), true, TYPALIGN_INT); + values[i++] = PointerGetDatum(arry); /* stanumbersN */ + } + else + { + nulls[i] = true; + values[i++] = (Datum) 0; + } + } + i = Anum_pg_statistic_stavalues1 - 1; + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + if (stats->numvalues[k] > 0) + { + ArrayType *arry; + + arry = construct_array(stats->stavalues[k], + stats->numvalues[k], + stats->statypid[k], + stats->statyplen[k], + stats->statypbyval[k], + stats->statypalign[k]); + values[i++] = PointerGetDatum(arry); /* stavaluesN */ + } + else + { + nulls[i] = true; + values[i++] = (Datum) 0; + } + } + + /* Is there already a pg_statistic tuple for this attribute? */ + oldtup = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(stats->attr->attnum), + BoolGetDatum(inh)); + + if (HeapTupleIsValid(oldtup)) + { + /* Yes, replace it */ + stup = heap_modify_tuple(oldtup, + RelationGetDescr(sd), + values, + nulls, + replaces); + ReleaseSysCache(oldtup); + CatalogTupleUpdate(sd, &stup->t_self, stup); + } + else + { + /* No, insert new tuple */ + stup = heap_form_tuple(RelationGetDescr(sd), values, nulls); + CatalogTupleInsert(sd, stup); + } + + heap_freetuple(stup); + } + + table_close(sd, RowExclusiveLock); +} + +/* + * Standard fetch function for use by compute_stats subroutines. + * + * This exists to provide some insulation between compute_stats routines + * and the actual storage of the sample data. + */ +static Datum +std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) +{ + int attnum = stats->tupattnum; + HeapTuple tuple = stats->rows[rownum]; + TupleDesc tupDesc = stats->tupDesc; + + return heap_getattr(tuple, attnum, tupDesc, isNull); +} + +/* + * Fetch function for analyzing index expressions. + * + * We have not bothered to construct index tuples, instead the data is + * just in Datum arrays. + */ +static Datum +ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) +{ + int i; + + /* exprvals and exprnulls are already offset for proper column */ + i = rownum * stats->rowstride; + *isNull = stats->exprnulls[i]; + return stats->exprvals[i]; +} + + +/*========================================================================== + * + * Code below this point represents the "standard" type-specific statistics + * analysis algorithms. This code can be replaced on a per-data-type basis + * by setting a nonzero value in pg_type.typanalyze. + * + *========================================================================== + */ + + +/* + * To avoid consuming too much memory during analysis and/or too much space + * in the resulting pg_statistic rows, we ignore varlena datums that are wider + * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV + * and distinct-value calculations since a wide value is unlikely to be + * duplicated at all, much less be a most-common value. For the same reason, + * ignoring wide values will not affect our estimates of histogram bin + * boundaries very much. + */ +#define WIDTH_THRESHOLD 1024 + +#define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0) +#define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0) + +/* + * Extra information used by the default analysis routines + */ +typedef struct +{ + int count; /* # of duplicates */ + int first; /* values[] index of first occurrence */ +} ScalarMCVItem; + +typedef struct +{ + SortSupport ssup; + int *tupnoLink; +} CompareScalarsContext; + + +static void compute_trivial_stats(VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows); +static void compute_distinct_stats(VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows); +static void compute_scalar_stats(VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows); +static int compare_scalars(const void *a, const void *b, void *arg); +static int compare_mcvs(const void *a, const void *b, void *arg); +static int analyze_mcv_list(int *mcv_counts, + int num_mcv, + double stadistinct, + double stanullfrac, + int samplerows, + double totalrows); + + +/* + * std_typanalyze -- the default type-specific typanalyze function + */ +bool +std_typanalyze(VacAttrStats *stats) +{ + Form_pg_attribute attr = stats->attr; + Oid ltopr; + Oid eqopr; + StdAnalyzeData *mystats; + + /* If the attstattarget column is negative, use the default value */ + /* NB: it is okay to scribble on stats->attr since it's a copy */ + if (attr->attstattarget < 0) + attr->attstattarget = default_statistics_target; + + /* Look for default "<" and "=" operators for column's type */ + get_sort_group_operators(stats->attrtypid, + false, false, false, + <opr, &eqopr, NULL, + NULL); + + /* Save the operator info for compute_stats routines */ + mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData)); + mystats->eqopr = eqopr; + mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid; + mystats->ltopr = ltopr; + stats->extra_data = mystats; + + /* + * Determine which standard statistics algorithm to use + */ + if (OidIsValid(eqopr) && OidIsValid(ltopr)) + { + /* Seems to be a scalar datatype */ + stats->compute_stats = compute_scalar_stats; + /*-------------------- + * The following choice of minrows is based on the paper + * "Random sampling for histogram construction: how much is enough?" + * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in + * Proceedings of ACM SIGMOD International Conference on Management + * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5 + * says that for table size n, histogram size k, maximum relative + * error in bin size f, and error probability gamma, the minimum + * random sample size is + * r = 4 * k * ln(2*n/gamma) / f^2 + * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain + * r = 305.82 * k + * Note that because of the log function, the dependence on n is + * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66 + * bin size error with probability 0.99. So there's no real need to + * scale for n, which is a good thing because we don't necessarily + * know it at this point. + *-------------------- + */ + stats->minrows = 300 * attr->attstattarget; + } + else if (OidIsValid(eqopr)) + { + /* We can still recognize distinct values */ + stats->compute_stats = compute_distinct_stats; + /* Might as well use the same minrows as above */ + stats->minrows = 300 * attr->attstattarget; + } + else + { + /* Can't do much but the trivial stuff */ + stats->compute_stats = compute_trivial_stats; + /* Might as well use the same minrows as above */ + stats->minrows = 300 * attr->attstattarget; + } + + return true; +} + + +/* + * compute_trivial_stats() -- compute very basic column statistics + * + * We use this when we cannot find a hash "=" operator for the datatype. + * + * We determine the fraction of non-null rows and the average datum width. + */ +static void +compute_trivial_stats(VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows) +{ + int i; + int null_cnt = 0; + int nonnull_cnt = 0; + double total_width = 0; + bool is_varlena = (!stats->attrtype->typbyval && + stats->attrtype->typlen == -1); + bool is_varwidth = (!stats->attrtype->typbyval && + stats->attrtype->typlen < 0); + + for (i = 0; i < samplerows; i++) + { + Datum value; + bool isnull; + + vacuum_delay_point(); + + value = fetchfunc(stats, i, &isnull); + + /* Check for null/nonnull */ + if (isnull) + { + null_cnt++; + continue; + } + nonnull_cnt++; + + /* + * If it's a variable-width field, add up widths for average width + * calculation. Note that if the value is toasted, we use the toasted + * width. We don't bother with this calculation if it's a fixed-width + * type. + */ + if (is_varlena) + { + total_width += VARSIZE_ANY(DatumGetPointer(value)); + } + else if (is_varwidth) + { + /* must be cstring */ + total_width += strlen(DatumGetCString(value)) + 1; + } + } + + /* We can only compute average width if we found some non-null values. */ + if (nonnull_cnt > 0) + { + stats->stats_valid = true; + /* Do the simple null-frac and width stats */ + stats->stanullfrac = (double) null_cnt / (double) samplerows; + if (is_varwidth) + stats->stawidth = total_width / (double) nonnull_cnt; + else + stats->stawidth = stats->attrtype->typlen; + stats->stadistinct = 0.0; /* "unknown" */ + } + else if (null_cnt > 0) + { + /* We found only nulls; assume the column is entirely null */ + stats->stats_valid = true; + stats->stanullfrac = 1.0; + if (is_varwidth) + stats->stawidth = 0; /* "unknown" */ + else + stats->stawidth = stats->attrtype->typlen; + stats->stadistinct = 0.0; /* "unknown" */ + } +} + + +/* + * compute_distinct_stats() -- compute column statistics including ndistinct + * + * We use this when we can find only an "=" operator for the datatype. + * + * We determine the fraction of non-null rows, the average width, the + * most common values, and the (estimated) number of distinct values. + * + * The most common values are determined by brute force: we keep a list + * of previously seen values, ordered by number of times seen, as we scan + * the samples. A newly seen value is inserted just after the last + * multiply-seen value, causing the bottommost (oldest) singly-seen value + * to drop off the list. The accuracy of this method, and also its cost, + * depend mainly on the length of the list we are willing to keep. + */ +static void +compute_distinct_stats(VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows) +{ + int i; + int null_cnt = 0; + int nonnull_cnt = 0; + int toowide_cnt = 0; + double total_width = 0; + bool is_varlena = (!stats->attrtype->typbyval && + stats->attrtype->typlen == -1); + bool is_varwidth = (!stats->attrtype->typbyval && + stats->attrtype->typlen < 0); + FmgrInfo f_cmpeq; + typedef struct + { + Datum value; + int count; + } TrackItem; + TrackItem *track; + int track_cnt, + track_max; + int num_mcv = stats->attr->attstattarget; + StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; + + /* + * We track up to 2*n values for an n-element MCV list; but at least 10 + */ + track_max = 2 * num_mcv; + if (track_max < 10) + track_max = 10; + track = (TrackItem *) palloc(track_max * sizeof(TrackItem)); + track_cnt = 0; + + fmgr_info(mystats->eqfunc, &f_cmpeq); + + for (i = 0; i < samplerows; i++) + { + Datum value; + bool isnull; + bool match; + int firstcount1, + j; + + vacuum_delay_point(); + + value = fetchfunc(stats, i, &isnull); + + /* Check for null/nonnull */ + if (isnull) + { + null_cnt++; + continue; + } + nonnull_cnt++; + + /* + * If it's a variable-width field, add up widths for average width + * calculation. Note that if the value is toasted, we use the toasted + * width. We don't bother with this calculation if it's a fixed-width + * type. + */ + if (is_varlena) + { + total_width += VARSIZE_ANY(DatumGetPointer(value)); + + /* + * If the value is toasted, we want to detoast it just once to + * avoid repeated detoastings and resultant excess memory usage + * during the comparisons. Also, check to see if the value is + * excessively wide, and if so don't detoast at all --- just + * ignore the value. + */ + if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) + { + toowide_cnt++; + continue; + } + value = PointerGetDatum(PG_DETOAST_DATUM(value)); + } + else if (is_varwidth) + { + /* must be cstring */ + total_width += strlen(DatumGetCString(value)) + 1; + } + + /* + * See if the value matches anything we're already tracking. + */ + match = false; + firstcount1 = track_cnt; + for (j = 0; j < track_cnt; j++) + { + if (DatumGetBool(FunctionCall2Coll(&f_cmpeq, + stats->attrcollid, + value, track[j].value))) + { + match = true; + break; + } + if (j < firstcount1 && track[j].count == 1) + firstcount1 = j; + } + + if (match) + { + /* Found a match */ + track[j].count++; + /* This value may now need to "bubble up" in the track list */ + while (j > 0 && track[j].count > track[j - 1].count) + { + swapDatum(track[j].value, track[j - 1].value); + swapInt(track[j].count, track[j - 1].count); + j--; + } + } + else + { + /* No match. Insert at head of count-1 list */ + if (track_cnt < track_max) + track_cnt++; + for (j = track_cnt - 1; j > firstcount1; j--) + { + track[j].value = track[j - 1].value; + track[j].count = track[j - 1].count; + } + if (firstcount1 < track_cnt) + { + track[firstcount1].value = value; + track[firstcount1].count = 1; + } + } + } + + /* We can only compute real stats if we found some non-null values. */ + if (nonnull_cnt > 0) + { + int nmultiple, + summultiple; + + stats->stats_valid = true; + /* Do the simple null-frac and width stats */ + stats->stanullfrac = (double) null_cnt / (double) samplerows; + if (is_varwidth) + stats->stawidth = total_width / (double) nonnull_cnt; + else + stats->stawidth = stats->attrtype->typlen; + + /* Count the number of values we found multiple times */ + summultiple = 0; + for (nmultiple = 0; nmultiple < track_cnt; nmultiple++) + { + if (track[nmultiple].count == 1) + break; + summultiple += track[nmultiple].count; + } + + if (nmultiple == 0) + { + /* + * If we found no repeated non-null values, assume it's a unique + * column; but be sure to discount for any nulls we found. + */ + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); + } + else if (track_cnt < track_max && toowide_cnt == 0 && + nmultiple == track_cnt) + { + /* + * Our track list includes every value in the sample, and every + * value appeared more than once. Assume the column has just + * these values. (This case is meant to address columns with + * small, fixed sets of possible values, such as boolean or enum + * columns. If there are any values that appear just once in the + * sample, including too-wide values, we should assume that that's + * not what we're dealing with.) + */ + stats->stadistinct = track_cnt; + } + else + { + /*---------- + * Estimate the number of distinct values using the estimator + * proposed by Haas and Stokes in IBM Research Report RJ 10025: + * n*d / (n - f1 + f1*n/N) + * where f1 is the number of distinct values that occurred + * exactly once in our sample of n rows (from a total of N), + * and d is the total number of distinct values in the sample. + * This is their Duj1 estimator; the other estimators they + * recommend are considerably more complex, and are numerically + * very unstable when n is much smaller than N. + * + * In this calculation, we consider only non-nulls. We used to + * include rows with null values in the n and N counts, but that + * leads to inaccurate answers in columns with many nulls, and + * it's intuitively bogus anyway considering the desired result is + * the number of distinct non-null values. + * + * We assume (not very reliably!) that all the multiply-occurring + * values are reflected in the final track[] list, and the other + * nonnull values all appeared but once. (XXX this usually + * results in a drastic overestimate of ndistinct. Can we do + * any better?) + *---------- + */ + int f1 = nonnull_cnt - summultiple; + int d = f1 + nmultiple; + double n = samplerows - null_cnt; + double N = totalrows * (1.0 - stats->stanullfrac); + double stadistinct; + + /* N == 0 shouldn't happen, but just in case ... */ + if (N > 0) + stadistinct = (n * d) / ((n - f1) + f1 * n / N); + else + stadistinct = 0; + + /* Clamp to sane range in case of roundoff error */ + if (stadistinct < d) + stadistinct = d; + if (stadistinct > N) + stadistinct = N; + /* And round to integer */ + stats->stadistinct = floor(stadistinct + 0.5); + } + + /* + * If we estimated the number of distinct values at more than 10% of + * the total row count (a very arbitrary limit), then assume that + * stadistinct should scale with the row count rather than be a fixed + * value. + */ + if (stats->stadistinct > 0.1 * totalrows) + stats->stadistinct = -(stats->stadistinct / totalrows); + + /* + * Decide how many values are worth storing as most-common values. If + * we are able to generate a complete MCV list (all the values in the + * sample will fit, and we think these are all the ones in the table), + * then do so. Otherwise, store only those values that are + * significantly more common than the values not in the list. + * + * Note: the first of these cases is meant to address columns with + * small, fixed sets of possible values, such as boolean or enum + * columns. If we can *completely* represent the column population by + * an MCV list that will fit into the stats target, then we should do + * so and thus provide the planner with complete information. But if + * the MCV list is not complete, it's generally worth being more + * selective, and not just filling it all the way up to the stats + * target. + */ + if (track_cnt < track_max && toowide_cnt == 0 && + stats->stadistinct > 0 && + track_cnt <= num_mcv) + { + /* Track list includes all values seen, and all will fit */ + num_mcv = track_cnt; + } + else + { + int *mcv_counts; + + /* Incomplete list; decide how many values are worth keeping */ + if (num_mcv > track_cnt) + num_mcv = track_cnt; + + if (num_mcv > 0) + { + mcv_counts = (int *) palloc(num_mcv * sizeof(int)); + for (i = 0; i < num_mcv; i++) + mcv_counts[i] = track[i].count; + + num_mcv = analyze_mcv_list(mcv_counts, num_mcv, + stats->stadistinct, + stats->stanullfrac, + samplerows, totalrows); + } + } + + /* Generate MCV slot entry */ + if (num_mcv > 0) + { + MemoryContext old_context; + Datum *mcv_values; + float4 *mcv_freqs; + + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); + mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); + for (i = 0; i < num_mcv; i++) + { + mcv_values[i] = datumCopy(track[i].value, + stats->attrtype->typbyval, + stats->attrtype->typlen); + mcv_freqs[i] = (double) track[i].count / (double) samplerows; + } + MemoryContextSwitchTo(old_context); + + stats->stakind[0] = STATISTIC_KIND_MCV; + stats->staop[0] = mystats->eqopr; + stats->stacoll[0] = stats->attrcollid; + stats->stanumbers[0] = mcv_freqs; + stats->numnumbers[0] = num_mcv; + stats->stavalues[0] = mcv_values; + stats->numvalues[0] = num_mcv; + + /* + * Accept the defaults for stats->statypid and others. They have + * been set before we were called (see vacuum.h) + */ + } + } + else if (null_cnt > 0) + { + /* We found only nulls; assume the column is entirely null */ + stats->stats_valid = true; + stats->stanullfrac = 1.0; + if (is_varwidth) + stats->stawidth = 0; /* "unknown" */ + else + stats->stawidth = stats->attrtype->typlen; + stats->stadistinct = 0.0; /* "unknown" */ + } + + /* We don't need to bother cleaning up any of our temporary palloc's */ +} + + +/* + * compute_scalar_stats() -- compute column statistics + * + * We use this when we can find "=" and "<" operators for the datatype. + * + * We determine the fraction of non-null rows, the average width, the + * most common values, the (estimated) number of distinct values, the + * distribution histogram, and the correlation of physical to logical order. + * + * The desired stats can be determined fairly easily after sorting the + * data values into order. + */ +static void +compute_scalar_stats(VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows) +{ + int i; + int null_cnt = 0; + int nonnull_cnt = 0; + int toowide_cnt = 0; + double total_width = 0; + bool is_varlena = (!stats->attrtype->typbyval && + stats->attrtype->typlen == -1); + bool is_varwidth = (!stats->attrtype->typbyval && + stats->attrtype->typlen < 0); + double corr_xysum; + SortSupportData ssup; + ScalarItem *values; + int values_cnt = 0; + int *tupnoLink; + ScalarMCVItem *track; + int track_cnt = 0; + int num_mcv = stats->attr->attstattarget; + int num_bins = stats->attr->attstattarget; + StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data; + + values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem)); + tupnoLink = (int *) palloc(samplerows * sizeof(int)); + track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem)); + + memset(&ssup, 0, sizeof(ssup)); + ssup.ssup_cxt = CurrentMemoryContext; + ssup.ssup_collation = stats->attrcollid; + ssup.ssup_nulls_first = false; + + /* + * For now, don't perform abbreviated key conversion, because full values + * are required for MCV slot generation. Supporting that optimization + * would necessitate teaching compare_scalars() to call a tie-breaker. + */ + ssup.abbreviate = false; + + PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); + + /* Initial scan to find sortable values */ + for (i = 0; i < samplerows; i++) + { + Datum value; + bool isnull; + + vacuum_delay_point(); + + value = fetchfunc(stats, i, &isnull); + + /* Check for null/nonnull */ + if (isnull) + { + null_cnt++; + continue; + } + nonnull_cnt++; + + /* + * If it's a variable-width field, add up widths for average width + * calculation. Note that if the value is toasted, we use the toasted + * width. We don't bother with this calculation if it's a fixed-width + * type. + */ + if (is_varlena) + { + total_width += VARSIZE_ANY(DatumGetPointer(value)); + + /* + * If the value is toasted, we want to detoast it just once to + * avoid repeated detoastings and resultant excess memory usage + * during the comparisons. Also, check to see if the value is + * excessively wide, and if so don't detoast at all --- just + * ignore the value. + */ + if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) + { + toowide_cnt++; + continue; + } + value = PointerGetDatum(PG_DETOAST_DATUM(value)); + } + else if (is_varwidth) + { + /* must be cstring */ + total_width += strlen(DatumGetCString(value)) + 1; + } + + /* Add it to the list to be sorted */ + values[values_cnt].value = value; + values[values_cnt].tupno = values_cnt; + tupnoLink[values_cnt] = values_cnt; + values_cnt++; + } + + /* We can only compute real stats if we found some sortable values. */ + if (values_cnt > 0) + { + int ndistinct, /* # distinct values in sample */ + nmultiple, /* # that appear multiple times */ + num_hist, + dups_cnt; + int slot_idx = 0; + CompareScalarsContext cxt; + + /* Sort the collected values */ + cxt.ssup = &ssup; + cxt.tupnoLink = tupnoLink; + qsort_interruptible((void *) values, values_cnt, sizeof(ScalarItem), + compare_scalars, (void *) &cxt); + + /* + * Now scan the values in order, find the most common ones, and also + * accumulate ordering-correlation statistics. + * + * To determine which are most common, we first have to count the + * number of duplicates of each value. The duplicates are adjacent in + * the sorted list, so a brute-force approach is to compare successive + * datum values until we find two that are not equal. However, that + * requires N-1 invocations of the datum comparison routine, which are + * completely redundant with work that was done during the sort. (The + * sort algorithm must at some point have compared each pair of items + * that are adjacent in the sorted order; otherwise it could not know + * that it's ordered the pair correctly.) We exploit this by having + * compare_scalars remember the highest tupno index that each + * ScalarItem has been found equal to. At the end of the sort, a + * ScalarItem's tupnoLink will still point to itself if and only if it + * is the last item of its group of duplicates (since the group will + * be ordered by tupno). + */ + corr_xysum = 0; + ndistinct = 0; + nmultiple = 0; + dups_cnt = 0; + for (i = 0; i < values_cnt; i++) + { + int tupno = values[i].tupno; + + corr_xysum += ((double) i) * ((double) tupno); + dups_cnt++; + if (tupnoLink[tupno] == tupno) + { + /* Reached end of duplicates of this value */ + ndistinct++; + if (dups_cnt > 1) + { + nmultiple++; + if (track_cnt < num_mcv || + dups_cnt > track[track_cnt - 1].count) + { + /* + * Found a new item for the mcv list; find its + * position, bubbling down old items if needed. Loop + * invariant is that j points at an empty/ replaceable + * slot. + */ + int j; + + if (track_cnt < num_mcv) + track_cnt++; + for (j = track_cnt - 1; j > 0; j--) + { + if (dups_cnt <= track[j - 1].count) + break; + track[j].count = track[j - 1].count; + track[j].first = track[j - 1].first; + } + track[j].count = dups_cnt; + track[j].first = i + 1 - dups_cnt; + } + } + dups_cnt = 0; + } + } + + stats->stats_valid = true; + /* Do the simple null-frac and width stats */ + stats->stanullfrac = (double) null_cnt / (double) samplerows; + if (is_varwidth) + stats->stawidth = total_width / (double) nonnull_cnt; + else + stats->stawidth = stats->attrtype->typlen; + + if (nmultiple == 0) + { + /* + * If we found no repeated non-null values, assume it's a unique + * column; but be sure to discount for any nulls we found. + */ + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); + } + else if (toowide_cnt == 0 && nmultiple == ndistinct) + { + /* + * Every value in the sample appeared more than once. Assume the + * column has just these values. (This case is meant to address + * columns with small, fixed sets of possible values, such as + * boolean or enum columns. If there are any values that appear + * just once in the sample, including too-wide values, we should + * assume that that's not what we're dealing with.) + */ + stats->stadistinct = ndistinct; + } + else + { + /*---------- + * Estimate the number of distinct values using the estimator + * proposed by Haas and Stokes in IBM Research Report RJ 10025: + * n*d / (n - f1 + f1*n/N) + * where f1 is the number of distinct values that occurred + * exactly once in our sample of n rows (from a total of N), + * and d is the total number of distinct values in the sample. + * This is their Duj1 estimator; the other estimators they + * recommend are considerably more complex, and are numerically + * very unstable when n is much smaller than N. + * + * In this calculation, we consider only non-nulls. We used to + * include rows with null values in the n and N counts, but that + * leads to inaccurate answers in columns with many nulls, and + * it's intuitively bogus anyway considering the desired result is + * the number of distinct non-null values. + * + * Overwidth values are assumed to have been distinct. + *---------- + */ + int f1 = ndistinct - nmultiple + toowide_cnt; + int d = f1 + nmultiple; + double n = samplerows - null_cnt; + double N = totalrows * (1.0 - stats->stanullfrac); + double stadistinct; + + /* N == 0 shouldn't happen, but just in case ... */ + if (N > 0) + stadistinct = (n * d) / ((n - f1) + f1 * n / N); + else + stadistinct = 0; + + /* Clamp to sane range in case of roundoff error */ + if (stadistinct < d) + stadistinct = d; + if (stadistinct > N) + stadistinct = N; + /* And round to integer */ + stats->stadistinct = floor(stadistinct + 0.5); + } + + /* + * If we estimated the number of distinct values at more than 10% of + * the total row count (a very arbitrary limit), then assume that + * stadistinct should scale with the row count rather than be a fixed + * value. + */ + if (stats->stadistinct > 0.1 * totalrows) + stats->stadistinct = -(stats->stadistinct / totalrows); + + /* + * Decide how many values are worth storing as most-common values. If + * we are able to generate a complete MCV list (all the values in the + * sample will fit, and we think these are all the ones in the table), + * then do so. Otherwise, store only those values that are + * significantly more common than the values not in the list. + * + * Note: the first of these cases is meant to address columns with + * small, fixed sets of possible values, such as boolean or enum + * columns. If we can *completely* represent the column population by + * an MCV list that will fit into the stats target, then we should do + * so and thus provide the planner with complete information. But if + * the MCV list is not complete, it's generally worth being more + * selective, and not just filling it all the way up to the stats + * target. + */ + if (track_cnt == ndistinct && toowide_cnt == 0 && + stats->stadistinct > 0 && + track_cnt <= num_mcv) + { + /* Track list includes all values seen, and all will fit */ + num_mcv = track_cnt; + } + else + { + int *mcv_counts; + + /* Incomplete list; decide how many values are worth keeping */ + if (num_mcv > track_cnt) + num_mcv = track_cnt; + + if (num_mcv > 0) + { + mcv_counts = (int *) palloc(num_mcv * sizeof(int)); + for (i = 0; i < num_mcv; i++) + mcv_counts[i] = track[i].count; + + num_mcv = analyze_mcv_list(mcv_counts, num_mcv, + stats->stadistinct, + stats->stanullfrac, + samplerows, totalrows); + } + } + + /* Generate MCV slot entry */ + if (num_mcv > 0) + { + MemoryContext old_context; + Datum *mcv_values; + float4 *mcv_freqs; + + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum)); + mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4)); + for (i = 0; i < num_mcv; i++) + { + mcv_values[i] = datumCopy(values[track[i].first].value, + stats->attrtype->typbyval, + stats->attrtype->typlen); + mcv_freqs[i] = (double) track[i].count / (double) samplerows; + } + MemoryContextSwitchTo(old_context); + + stats->stakind[slot_idx] = STATISTIC_KIND_MCV; + stats->staop[slot_idx] = mystats->eqopr; + stats->stacoll[slot_idx] = stats->attrcollid; + stats->stanumbers[slot_idx] = mcv_freqs; + stats->numnumbers[slot_idx] = num_mcv; + stats->stavalues[slot_idx] = mcv_values; + stats->numvalues[slot_idx] = num_mcv; + + /* + * Accept the defaults for stats->statypid and others. They have + * been set before we were called (see vacuum.h) + */ + slot_idx++; + } + + /* + * Generate a histogram slot entry if there are at least two distinct + * values not accounted for in the MCV list. (This ensures the + * histogram won't collapse to empty or a singleton.) + */ + num_hist = ndistinct - num_mcv; + if (num_hist > num_bins) + num_hist = num_bins + 1; + if (num_hist >= 2) + { + MemoryContext old_context; + Datum *hist_values; + int nvals; + int pos, + posfrac, + delta, + deltafrac; + + /* Sort the MCV items into position order to speed next loop */ + qsort_interruptible((void *) track, num_mcv, sizeof(ScalarMCVItem), + compare_mcvs, NULL); + + /* + * Collapse out the MCV items from the values[] array. + * + * Note we destroy the values[] array here... but we don't need it + * for anything more. We do, however, still need values_cnt. + * nvals will be the number of remaining entries in values[]. + */ + if (num_mcv > 0) + { + int src, + dest; + int j; + + src = dest = 0; + j = 0; /* index of next interesting MCV item */ + while (src < values_cnt) + { + int ncopy; + + if (j < num_mcv) + { + int first = track[j].first; + + if (src >= first) + { + /* advance past this MCV item */ + src = first + track[j].count; + j++; + continue; + } + ncopy = first - src; + } + else + ncopy = values_cnt - src; + memmove(&values[dest], &values[src], + ncopy * sizeof(ScalarItem)); + src += ncopy; + dest += ncopy; + } + nvals = dest; + } + else + nvals = values_cnt; + Assert(nvals >= num_hist); + + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); + + /* + * The object of this loop is to copy the first and last values[] + * entries along with evenly-spaced values in between. So the + * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But + * computing that subscript directly risks integer overflow when + * the stats target is more than a couple thousand. Instead we + * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking + * the integral and fractional parts of the sum separately. + */ + delta = (nvals - 1) / (num_hist - 1); + deltafrac = (nvals - 1) % (num_hist - 1); + pos = posfrac = 0; + + for (i = 0; i < num_hist; i++) + { + hist_values[i] = datumCopy(values[pos].value, + stats->attrtype->typbyval, + stats->attrtype->typlen); + pos += delta; + posfrac += deltafrac; + if (posfrac >= (num_hist - 1)) + { + /* fractional part exceeds 1, carry to integer part */ + pos++; + posfrac -= (num_hist - 1); + } + } + + MemoryContextSwitchTo(old_context); + + stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM; + stats->staop[slot_idx] = mystats->ltopr; + stats->stacoll[slot_idx] = stats->attrcollid; + stats->stavalues[slot_idx] = hist_values; + stats->numvalues[slot_idx] = num_hist; + + /* + * Accept the defaults for stats->statypid and others. They have + * been set before we were called (see vacuum.h) + */ + slot_idx++; + } + + /* Generate a correlation entry if there are multiple values */ + if (values_cnt > 1) + { + MemoryContext old_context; + float4 *corrs; + double corr_xsum, + corr_x2sum; + + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + corrs = (float4 *) palloc(sizeof(float4)); + MemoryContextSwitchTo(old_context); + + /*---------- + * Since we know the x and y value sets are both + * 0, 1, ..., values_cnt-1 + * we have sum(x) = sum(y) = + * (values_cnt-1)*values_cnt / 2 + * and sum(x^2) = sum(y^2) = + * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6. + *---------- + */ + corr_xsum = ((double) (values_cnt - 1)) * + ((double) values_cnt) / 2.0; + corr_x2sum = ((double) (values_cnt - 1)) * + ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0; + + /* And the correlation coefficient reduces to */ + corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) / + (values_cnt * corr_x2sum - corr_xsum * corr_xsum); + + stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION; + stats->staop[slot_idx] = mystats->ltopr; + stats->stacoll[slot_idx] = stats->attrcollid; + stats->stanumbers[slot_idx] = corrs; + stats->numnumbers[slot_idx] = 1; + slot_idx++; + } + } + else if (nonnull_cnt > 0) + { + /* We found some non-null values, but they were all too wide */ + Assert(nonnull_cnt == toowide_cnt); + stats->stats_valid = true; + /* Do the simple null-frac and width stats */ + stats->stanullfrac = (double) null_cnt / (double) samplerows; + if (is_varwidth) + stats->stawidth = total_width / (double) nonnull_cnt; + else + stats->stawidth = stats->attrtype->typlen; + /* Assume all too-wide values are distinct, so it's a unique column */ + stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac); + } + else if (null_cnt > 0) + { + /* We found only nulls; assume the column is entirely null */ + stats->stats_valid = true; + stats->stanullfrac = 1.0; + if (is_varwidth) + stats->stawidth = 0; /* "unknown" */ + else + stats->stawidth = stats->attrtype->typlen; + stats->stadistinct = 0.0; /* "unknown" */ + } + + /* We don't need to bother cleaning up any of our temporary palloc's */ +} + +/* + * Comparator for sorting ScalarItems + * + * Aside from sorting the items, we update the tupnoLink[] array + * whenever two ScalarItems are found to contain equal datums. The array + * is indexed by tupno; for each ScalarItem, it contains the highest + * tupno that that item's datum has been found to be equal to. This allows + * us to avoid additional comparisons in compute_scalar_stats(). + */ +static int +compare_scalars(const void *a, const void *b, void *arg) +{ + Datum da = ((const ScalarItem *) a)->value; + int ta = ((const ScalarItem *) a)->tupno; + Datum db = ((const ScalarItem *) b)->value; + int tb = ((const ScalarItem *) b)->tupno; + CompareScalarsContext *cxt = (CompareScalarsContext *) arg; + int compare; + + compare = ApplySortComparator(da, false, db, false, cxt->ssup); + if (compare != 0) + return compare; + + /* + * The two datums are equal, so update cxt->tupnoLink[]. + */ + if (cxt->tupnoLink[ta] < tb) + cxt->tupnoLink[ta] = tb; + if (cxt->tupnoLink[tb] < ta) + cxt->tupnoLink[tb] = ta; + + /* + * For equal datums, sort by tupno + */ + return ta - tb; +} + +/* + * Comparator for sorting ScalarMCVItems by position + */ +static int +compare_mcvs(const void *a, const void *b, void *arg) +{ + int da = ((const ScalarMCVItem *) a)->first; + int db = ((const ScalarMCVItem *) b)->first; + + return da - db; +} + +/* + * Analyze the list of common values in the sample and decide how many are + * worth storing in the table's MCV list. + * + * mcv_counts is assumed to be a list of the counts of the most common values + * seen in the sample, starting with the most common. The return value is the + * number that are significantly more common than the values not in the list, + * and which are therefore deemed worth storing in the table's MCV list. + */ +static int +analyze_mcv_list(int *mcv_counts, + int num_mcv, + double stadistinct, + double stanullfrac, + int samplerows, + double totalrows) +{ + double ndistinct_table; + double sumcount; + int i; + + /* + * If the entire table was sampled, keep the whole list. This also + * protects us against division by zero in the code below. + */ + if (samplerows == totalrows || totalrows <= 1.0) + return num_mcv; + + /* Re-extract the estimated number of distinct nonnull values in table */ + ndistinct_table = stadistinct; + if (ndistinct_table < 0) + ndistinct_table = -ndistinct_table * totalrows; + + /* + * Exclude the least common values from the MCV list, if they are not + * significantly more common than the estimated selectivity they would + * have if they weren't in the list. All non-MCV values are assumed to be + * equally common, after taking into account the frequencies of all the + * values in the MCV list and the number of nulls (c.f. eqsel()). + * + * Here sumcount tracks the total count of all but the last (least common) + * value in the MCV list, allowing us to determine the effect of excluding + * that value from the list. + * + * Note that we deliberately do this by removing values from the full + * list, rather than starting with an empty list and adding values, + * because the latter approach can fail to add any values if all the most + * common values have around the same frequency and make up the majority + * of the table, so that the overall average frequency of all values is + * roughly the same as that of the common values. This would lead to any + * uncommon values being significantly overestimated. + */ + sumcount = 0.0; + for (i = 0; i < num_mcv - 1; i++) + sumcount += mcv_counts[i]; + + while (num_mcv > 0) + { + double selec, + otherdistinct, + N, + n, + K, + variance, + stddev; + + /* + * Estimated selectivity the least common value would have if it + * wasn't in the MCV list (c.f. eqsel()). + */ + selec = 1.0 - sumcount / samplerows - stanullfrac; + if (selec < 0.0) + selec = 0.0; + if (selec > 1.0) + selec = 1.0; + otherdistinct = ndistinct_table - (num_mcv - 1); + if (otherdistinct > 1) + selec /= otherdistinct; + + /* + * If the value is kept in the MCV list, its population frequency is + * assumed to equal its sample frequency. We use the lower end of a + * textbook continuity-corrected Wald-type confidence interval to + * determine if that is significantly more common than the non-MCV + * frequency --- specifically we assume the population frequency is + * highly likely to be within around 2 standard errors of the sample + * frequency, which equates to an interval of 2 standard deviations + * either side of the sample count, plus an additional 0.5 for the + * continuity correction. Since we are sampling without replacement, + * this is a hypergeometric distribution. + * + * XXX: Empirically, this approach seems to work quite well, but it + * may be worth considering more advanced techniques for estimating + * the confidence interval of the hypergeometric distribution. + */ + N = totalrows; + n = samplerows; + K = N * mcv_counts[num_mcv - 1] / n; + variance = n * K * (N - K) * (N - n) / (N * N * (N - 1)); + stddev = sqrt(variance); + + if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5) + { + /* + * The value is significantly more common than the non-MCV + * selectivity would suggest. Keep it, and all the other more + * common values in the list. + */ + break; + } + else + { + /* Discard this value and consider the next least common value */ + num_mcv--; + if (num_mcv == 0) + break; + sumcount -= mcv_counts[num_mcv - 1]; + } + } + return num_mcv; +} diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c new file mode 100644 index 0000000..3e1b92d --- /dev/null +++ b/src/backend/commands/async.c @@ -0,0 +1,2446 @@ +/*------------------------------------------------------------------------- + * + * async.c + * Asynchronous notification: NOTIFY, LISTEN, UNLISTEN + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/async.c + * + *------------------------------------------------------------------------- + */ + +/*------------------------------------------------------------------------- + * Async Notification Model as of 9.0: + * + * 1. Multiple backends on same machine. Multiple backends listening on + * several channels. (Channels are also called "conditions" in other + * parts of the code.) + * + * 2. There is one central queue in disk-based storage (directory pg_notify/), + * with actively-used pages mapped into shared memory by the slru.c module. + * All notification messages are placed in the queue and later read out + * by listening backends. + * + * There is no central knowledge of which backend listens on which channel; + * every backend has its own list of interesting channels. + * + * Although there is only one queue, notifications are treated as being + * database-local; this is done by including the sender's database OID + * in each notification message. Listening backends ignore messages + * that don't match their database OID. This is important because it + * ensures senders and receivers have the same database encoding and won't + * misinterpret non-ASCII text in the channel name or payload string. + * + * Since notifications are not expected to survive database crashes, + * we can simply clean out the pg_notify data at any reboot, and there + * is no need for WAL support or fsync'ing. + * + * 3. Every backend that is listening on at least one channel registers by + * entering its PID into the array in AsyncQueueControl. It then scans all + * incoming notifications in the central queue and first compares the + * database OID of the notification with its own database OID and then + * compares the notified channel with the list of channels that it listens + * to. In case there is a match it delivers the notification event to its + * frontend. Non-matching events are simply skipped. + * + * 4. The NOTIFY statement (routine Async_Notify) stores the notification in + * a backend-local list which will not be processed until transaction end. + * + * Duplicate notifications from the same transaction are sent out as one + * notification only. This is done to save work when for example a trigger + * on a 2 million row table fires a notification for each row that has been + * changed. If the application needs to receive every single notification + * that has been sent, it can easily add some unique string into the extra + * payload parameter. + * + * When the transaction is ready to commit, PreCommit_Notify() adds the + * pending notifications to the head of the queue. The head pointer of the + * queue always points to the next free position and a position is just a + * page number and the offset in that page. This is done before marking the + * transaction as committed in clog. If we run into problems writing the + * notifications, we can still call elog(ERROR, ...) and the transaction + * will roll back. + * + * Once we have put all of the notifications into the queue, we return to + * CommitTransaction() which will then do the actual transaction commit. + * + * After commit we are called another time (AtCommit_Notify()). Here we + * make any actual updates to the effective listen state (listenChannels). + * Then we signal any backends that may be interested in our messages + * (including our own backend, if listening). This is done by + * SignalBackends(), which scans the list of listening backends and sends a + * PROCSIG_NOTIFY_INTERRUPT signal to every listening backend (we don't + * know which backend is listening on which channel so we must signal them + * all). We can exclude backends that are already up to date, though, and + * we can also exclude backends that are in other databases (unless they + * are way behind and should be kicked to make them advance their + * pointers). + * + * Finally, after we are out of the transaction altogether and about to go + * idle, we scan the queue for messages that need to be sent to our + * frontend (which might be notifies from other backends, or self-notifies + * from our own). This step is not part of the CommitTransaction sequence + * for two important reasons. First, we could get errors while sending + * data to our frontend, and it's really bad for errors to happen in + * post-commit cleanup. Second, in cases where a procedure issues commits + * within a single frontend command, we don't want to send notifies to our + * frontend until the command is done; but notifies to other backends + * should go out immediately after each commit. + * + * 5. Upon receipt of a PROCSIG_NOTIFY_INTERRUPT signal, the signal handler + * sets the process's latch, which triggers the event to be processed + * immediately if this backend is idle (i.e., it is waiting for a frontend + * command and is not within a transaction block. C.f. + * ProcessClientReadInterrupt()). Otherwise the handler may only set a + * flag, which will cause the processing to occur just before we next go + * idle. + * + * Inbound-notify processing consists of reading all of the notifications + * that have arrived since scanning last time. We read every notification + * until we reach either a notification from an uncommitted transaction or + * the head pointer's position. + * + * 6. To avoid SLRU wraparound and limit disk space consumption, the tail + * pointer needs to be advanced so that old pages can be truncated. + * This is relatively expensive (notably, it requires an exclusive lock), + * so we don't want to do it often. We make sending backends do this work + * if they advanced the queue head into a new page, but only once every + * QUEUE_CLEANUP_DELAY pages. + * + * An application that listens on the same channel it notifies will get + * NOTIFY messages for its own NOTIFYs. These can be ignored, if not useful, + * by comparing be_pid in the NOTIFY message to the application's own backend's + * PID. (As of FE/BE protocol 2.0, the backend's PID is provided to the + * frontend during startup.) The above design guarantees that notifies from + * other backends will never be missed by ignoring self-notifies. + * + * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS) + * can be varied without affecting anything but performance. The maximum + * amount of notification data that can be queued at one time is determined + * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below. + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include +#include +#include + +#include "access/parallel.h" +#include "access/slru.h" +#include "access/transam.h" +#include "access/xact.h" +#include "catalog/pg_database.h" +#include "commands/async.h" +#include "common/hashfn.h" +#include "funcapi.h" +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/lmgr.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "storage/procsignal.h" +#include "storage/sinval.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/ps_status.h" +#include "utils/snapmgr.h" +#include "utils/timestamp.h" + + +/* + * Maximum size of a NOTIFY payload, including terminating NULL. This + * must be kept small enough so that a notification message fits on one + * SLRU page. The magic fudge factor here is noncritical as long as it's + * more than AsyncQueueEntryEmptySize --- we make it significantly bigger + * than that, so changes in that data structure won't affect user-visible + * restrictions. + */ +#define NOTIFY_PAYLOAD_MAX_LENGTH (BLCKSZ - NAMEDATALEN - 128) + +/* + * Struct representing an entry in the global notify queue + * + * This struct declaration has the maximal length, but in a real queue entry + * the data area is only big enough for the actual channel and payload strings + * (each null-terminated). AsyncQueueEntryEmptySize is the minimum possible + * entry size, if both channel and payload strings are empty (but note it + * doesn't include alignment padding). + * + * The "length" field should always be rounded up to the next QUEUEALIGN + * multiple so that all fields are properly aligned. + */ +typedef struct AsyncQueueEntry +{ + int length; /* total allocated length of entry */ + Oid dboid; /* sender's database OID */ + TransactionId xid; /* sender's XID */ + int32 srcPid; /* sender's PID */ + char data[NAMEDATALEN + NOTIFY_PAYLOAD_MAX_LENGTH]; +} AsyncQueueEntry; + +/* Currently, no field of AsyncQueueEntry requires more than int alignment */ +#define QUEUEALIGN(len) INTALIGN(len) + +#define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2) + +/* + * Struct describing a queue position, and assorted macros for working with it + */ +typedef struct QueuePosition +{ + int page; /* SLRU page number */ + int offset; /* byte offset within page */ +} QueuePosition; + +#define QUEUE_POS_PAGE(x) ((x).page) +#define QUEUE_POS_OFFSET(x) ((x).offset) + +#define SET_QUEUE_POS(x,y,z) \ + do { \ + (x).page = (y); \ + (x).offset = (z); \ + } while (0) + +#define QUEUE_POS_EQUAL(x,y) \ + ((x).page == (y).page && (x).offset == (y).offset) + +#define QUEUE_POS_IS_ZERO(x) \ + ((x).page == 0 && (x).offset == 0) + +/* choose logically smaller QueuePosition */ +#define QUEUE_POS_MIN(x,y) \ + (asyncQueuePagePrecedes((x).page, (y).page) ? (x) : \ + (x).page != (y).page ? (y) : \ + (x).offset < (y).offset ? (x) : (y)) + +/* choose logically larger QueuePosition */ +#define QUEUE_POS_MAX(x,y) \ + (asyncQueuePagePrecedes((x).page, (y).page) ? (y) : \ + (x).page != (y).page ? (x) : \ + (x).offset > (y).offset ? (x) : (y)) + +/* + * Parameter determining how often we try to advance the tail pointer: + * we do that after every QUEUE_CLEANUP_DELAY pages of NOTIFY data. This is + * also the distance by which a backend in another database needs to be + * behind before we'll decide we need to wake it up to advance its pointer. + * + * Resist the temptation to make this really large. While that would save + * work in some places, it would add cost in others. In particular, this + * should likely be less than NUM_NOTIFY_BUFFERS, to ensure that backends + * catch up before the pages they'll need to read fall out of SLRU cache. + */ +#define QUEUE_CLEANUP_DELAY 4 + +/* + * Struct describing a listening backend's status + */ +typedef struct QueueBackendStatus +{ + int32 pid; /* either a PID or InvalidPid */ + Oid dboid; /* backend's database OID, or InvalidOid */ + BackendId nextListener; /* id of next listener, or InvalidBackendId */ + QueuePosition pos; /* backend has read queue up to here */ +} QueueBackendStatus; + +/* + * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff) + * + * The AsyncQueueControl structure is protected by the NotifyQueueLock and + * NotifyQueueTailLock. + * + * When holding NotifyQueueLock in SHARED mode, backends may only inspect + * their own entries as well as the head and tail pointers. Consequently we + * can allow a backend to update its own record while holding only SHARED lock + * (since no other backend will inspect it). + * + * When holding NotifyQueueLock in EXCLUSIVE mode, backends can inspect the + * entries of other backends and also change the head pointer. When holding + * both NotifyQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends + * can change the tail pointers. + * + * NotifySLRULock is used as the control lock for the pg_notify SLRU buffers. + * In order to avoid deadlocks, whenever we need multiple locks, we first get + * NotifyQueueTailLock, then NotifyQueueLock, and lastly NotifySLRULock. + * + * Each backend uses the backend[] array entry with index equal to its + * BackendId (which can range from 1 to MaxBackends). We rely on this to make + * SendProcSignal fast. + * + * The backend[] array entries for actively-listening backends are threaded + * together using firstListener and the nextListener links, so that we can + * scan them without having to iterate over inactive entries. We keep this + * list in order by BackendId so that the scan is cache-friendly when there + * are many active entries. + */ +typedef struct AsyncQueueControl +{ + QueuePosition head; /* head points to the next free location */ + QueuePosition tail; /* tail must be <= the queue position of every + * listening backend */ + int stopPage; /* oldest unrecycled page; must be <= + * tail.page */ + BackendId firstListener; /* id of first listener, or InvalidBackendId */ + TimestampTz lastQueueFillWarn; /* time of last queue-full msg */ + QueueBackendStatus backend[FLEXIBLE_ARRAY_MEMBER]; + /* backend[0] is not used; used entries are from [1] to [MaxBackends] */ +} AsyncQueueControl; + +static AsyncQueueControl *asyncQueueControl; + +#define QUEUE_HEAD (asyncQueueControl->head) +#define QUEUE_TAIL (asyncQueueControl->tail) +#define QUEUE_STOP_PAGE (asyncQueueControl->stopPage) +#define QUEUE_FIRST_LISTENER (asyncQueueControl->firstListener) +#define QUEUE_BACKEND_PID(i) (asyncQueueControl->backend[i].pid) +#define QUEUE_BACKEND_DBOID(i) (asyncQueueControl->backend[i].dboid) +#define QUEUE_NEXT_LISTENER(i) (asyncQueueControl->backend[i].nextListener) +#define QUEUE_BACKEND_POS(i) (asyncQueueControl->backend[i].pos) + +/* + * The SLRU buffer area through which we access the notification queue + */ +static SlruCtlData NotifyCtlData; + +#define NotifyCtl (&NotifyCtlData) +#define QUEUE_PAGESIZE BLCKSZ +#define QUEUE_FULL_WARN_INTERVAL 5000 /* warn at most once every 5s */ + +/* + * Use segments 0000 through FFFF. Each contains SLRU_PAGES_PER_SEGMENT pages + * which gives us the pages from 0 to SLRU_PAGES_PER_SEGMENT * 0x10000 - 1. + * We could use as many segments as SlruScanDirectory() allows, but this gives + * us so much space already that it doesn't seem worth the trouble. + * + * The most data we can have in the queue at a time is QUEUE_MAX_PAGE/2 + * pages, because more than that would confuse slru.c into thinking there + * was a wraparound condition. With the default BLCKSZ this means there + * can be up to 8GB of queued-and-not-read data. + * + * Note: it's possible to redefine QUEUE_MAX_PAGE with a smaller multiple of + * SLRU_PAGES_PER_SEGMENT, for easier testing of queue-full behaviour. + */ +#define QUEUE_MAX_PAGE (SLRU_PAGES_PER_SEGMENT * 0x10000 - 1) + +/* + * listenChannels identifies the channels we are actually listening to + * (ie, have committed a LISTEN on). It is a simple list of channel names, + * allocated in TopMemoryContext. + */ +static List *listenChannels = NIL; /* list of C strings */ + +/* + * State for pending LISTEN/UNLISTEN actions consists of an ordered list of + * all actions requested in the current transaction. As explained above, + * we don't actually change listenChannels until we reach transaction commit. + * + * The list is kept in CurTransactionContext. In subtransactions, each + * subtransaction has its own list in its own CurTransactionContext, but + * successful subtransactions attach their lists to their parent's list. + * Failed subtransactions simply discard their lists. + */ +typedef enum +{ + LISTEN_LISTEN, + LISTEN_UNLISTEN, + LISTEN_UNLISTEN_ALL +} ListenActionKind; + +typedef struct +{ + ListenActionKind action; + char channel[FLEXIBLE_ARRAY_MEMBER]; /* nul-terminated string */ +} ListenAction; + +typedef struct ActionList +{ + int nestingLevel; /* current transaction nesting depth */ + List *actions; /* list of ListenAction structs */ + struct ActionList *upper; /* details for upper transaction levels */ +} ActionList; + +static ActionList *pendingActions = NULL; + +/* + * State for outbound notifies consists of a list of all channels+payloads + * NOTIFYed in the current transaction. We do not actually perform a NOTIFY + * until and unless the transaction commits. pendingNotifies is NULL if no + * NOTIFYs have been done in the current (sub) transaction. + * + * We discard duplicate notify events issued in the same transaction. + * Hence, in addition to the list proper (which we need to track the order + * of the events, since we guarantee to deliver them in order), we build a + * hash table which we can probe to detect duplicates. Since building the + * hash table is somewhat expensive, we do so only once we have at least + * MIN_HASHABLE_NOTIFIES events queued in the current (sub) transaction; + * before that we just scan the events linearly. + * + * The list is kept in CurTransactionContext. In subtransactions, each + * subtransaction has its own list in its own CurTransactionContext, but + * successful subtransactions add their entries to their parent's list. + * Failed subtransactions simply discard their lists. Since these lists + * are independent, there may be notify events in a subtransaction's list + * that duplicate events in some ancestor (sub) transaction; we get rid of + * the dups when merging the subtransaction's list into its parent's. + * + * Note: the action and notify lists do not interact within a transaction. + * In particular, if a transaction does NOTIFY and then LISTEN on the same + * condition name, it will get a self-notify at commit. This is a bit odd + * but is consistent with our historical behavior. + */ +typedef struct Notification +{ + uint16 channel_len; /* length of channel-name string */ + uint16 payload_len; /* length of payload string */ + /* null-terminated channel name, then null-terminated payload follow */ + char data[FLEXIBLE_ARRAY_MEMBER]; +} Notification; + +typedef struct NotificationList +{ + int nestingLevel; /* current transaction nesting depth */ + List *events; /* list of Notification structs */ + HTAB *hashtab; /* hash of NotificationHash structs, or NULL */ + struct NotificationList *upper; /* details for upper transaction levels */ +} NotificationList; + +#define MIN_HASHABLE_NOTIFIES 16 /* threshold to build hashtab */ + +typedef struct NotificationHash +{ + Notification *event; /* => the actual Notification struct */ +} NotificationHash; + +static NotificationList *pendingNotifies = NULL; + +/* + * Inbound notifications are initially processed by HandleNotifyInterrupt(), + * called from inside a signal handler. That just sets the + * notifyInterruptPending flag and sets the process + * latch. ProcessNotifyInterrupt() will then be called whenever it's safe to + * actually deal with the interrupt. + */ +volatile sig_atomic_t notifyInterruptPending = false; + +/* True if we've registered an on_shmem_exit cleanup */ +static bool unlistenExitRegistered = false; + +/* True if we're currently registered as a listener in asyncQueueControl */ +static bool amRegisteredListener = false; + +/* have we advanced to a page that's a multiple of QUEUE_CLEANUP_DELAY? */ +static bool tryAdvanceTail = false; + +/* GUC parameter */ +bool Trace_notify = false; + +/* local function prototypes */ +static int asyncQueuePageDiff(int p, int q); +static bool asyncQueuePagePrecedes(int p, int q); +static void queue_listen(ListenActionKind action, const char *channel); +static void Async_UnlistenOnExit(int code, Datum arg); +static void Exec_ListenPreCommit(void); +static void Exec_ListenCommit(const char *channel); +static void Exec_UnlistenCommit(const char *channel); +static void Exec_UnlistenAllCommit(void); +static bool IsListeningOn(const char *channel); +static void asyncQueueUnregister(void); +static bool asyncQueueIsFull(void); +static bool asyncQueueAdvance(volatile QueuePosition *position, int entryLength); +static void asyncQueueNotificationToEntry(Notification *n, AsyncQueueEntry *qe); +static ListCell *asyncQueueAddEntries(ListCell *nextNotify); +static double asyncQueueUsage(void); +static void asyncQueueFillWarning(void); +static void SignalBackends(void); +static void asyncQueueReadAllNotifications(void); +static bool asyncQueueProcessPageEntries(volatile QueuePosition *current, + QueuePosition stop, + char *page_buffer, + Snapshot snapshot); +static void asyncQueueAdvanceTail(void); +static void ProcessIncomingNotify(bool flush); +static bool AsyncExistsPendingNotify(Notification *n); +static void AddEventToPendingNotifies(Notification *n); +static uint32 notification_hash(const void *key, Size keysize); +static int notification_match(const void *key1, const void *key2, Size keysize); +static void ClearPendingActionsAndNotifies(void); + +/* + * Compute the difference between two queue page numbers (i.e., p - q), + * accounting for wraparound. + */ +static int +asyncQueuePageDiff(int p, int q) +{ + int diff; + + /* + * We have to compare modulo (QUEUE_MAX_PAGE+1)/2. Both inputs should be + * in the range 0..QUEUE_MAX_PAGE. + */ + Assert(p >= 0 && p <= QUEUE_MAX_PAGE); + Assert(q >= 0 && q <= QUEUE_MAX_PAGE); + + diff = p - q; + if (diff >= ((QUEUE_MAX_PAGE + 1) / 2)) + diff -= QUEUE_MAX_PAGE + 1; + else if (diff < -((QUEUE_MAX_PAGE + 1) / 2)) + diff += QUEUE_MAX_PAGE + 1; + return diff; +} + +/* + * Is p < q, accounting for wraparound? + * + * Since asyncQueueIsFull() blocks creation of a page that could precede any + * extant page, we need not assess entries within a page. + */ +static bool +asyncQueuePagePrecedes(int p, int q) +{ + return asyncQueuePageDiff(p, q) < 0; +} + +/* + * Report space needed for our shared memory area + */ +Size +AsyncShmemSize(void) +{ + Size size; + + /* This had better match AsyncShmemInit */ + size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus)); + size = add_size(size, offsetof(AsyncQueueControl, backend)); + + size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0)); + + return size; +} + +/* + * Initialize our shared memory area + */ +void +AsyncShmemInit(void) +{ + bool found; + Size size; + + /* + * Create or attach to the AsyncQueueControl structure. + * + * The used entries in the backend[] array run from 1 to MaxBackends; the + * zero'th entry is unused but must be allocated. + */ + size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus)); + size = add_size(size, offsetof(AsyncQueueControl, backend)); + + asyncQueueControl = (AsyncQueueControl *) + ShmemInitStruct("Async Queue Control", size, &found); + + if (!found) + { + /* First time through, so initialize it */ + SET_QUEUE_POS(QUEUE_HEAD, 0, 0); + SET_QUEUE_POS(QUEUE_TAIL, 0, 0); + QUEUE_STOP_PAGE = 0; + QUEUE_FIRST_LISTENER = InvalidBackendId; + asyncQueueControl->lastQueueFillWarn = 0; + /* zero'th entry won't be used, but let's initialize it anyway */ + for (int i = 0; i <= MaxBackends; i++) + { + QUEUE_BACKEND_PID(i) = InvalidPid; + QUEUE_BACKEND_DBOID(i) = InvalidOid; + QUEUE_NEXT_LISTENER(i) = InvalidBackendId; + SET_QUEUE_POS(QUEUE_BACKEND_POS(i), 0, 0); + } + } + + /* + * Set up SLRU management of the pg_notify data. + */ + NotifyCtl->PagePrecedes = asyncQueuePagePrecedes; + SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0, + NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER, + SYNC_HANDLER_NONE); + + if (!found) + { + /* + * During start or reboot, clean out the pg_notify directory. + */ + (void) SlruScanDirectory(NotifyCtl, SlruScanDirCbDeleteAll, NULL); + } +} + + +/* + * pg_notify - + * SQL function to send a notification event + */ +Datum +pg_notify(PG_FUNCTION_ARGS) +{ + const char *channel; + const char *payload; + + if (PG_ARGISNULL(0)) + channel = ""; + else + channel = text_to_cstring(PG_GETARG_TEXT_PP(0)); + + if (PG_ARGISNULL(1)) + payload = ""; + else + payload = text_to_cstring(PG_GETARG_TEXT_PP(1)); + + /* For NOTIFY as a statement, this is checked in ProcessUtility */ + PreventCommandDuringRecovery("NOTIFY"); + + Async_Notify(channel, payload); + + PG_RETURN_VOID(); +} + + +/* + * Async_Notify + * + * This is executed by the SQL notify command. + * + * Adds the message to the list of pending notifies. + * Actual notification happens during transaction commit. + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + */ +void +Async_Notify(const char *channel, const char *payload) +{ + int my_level = GetCurrentTransactionNestLevel(); + size_t channel_len; + size_t payload_len; + Notification *n; + MemoryContext oldcontext; + + if (IsParallelWorker()) + elog(ERROR, "cannot send notifications from a parallel worker"); + + if (Trace_notify) + elog(DEBUG1, "Async_Notify(%s)", channel); + + channel_len = channel ? strlen(channel) : 0; + payload_len = payload ? strlen(payload) : 0; + + /* a channel name must be specified */ + if (channel_len == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("channel name cannot be empty"))); + + /* enforce length limits */ + if (channel_len >= NAMEDATALEN) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("channel name too long"))); + + if (payload_len >= NOTIFY_PAYLOAD_MAX_LENGTH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("payload string too long"))); + + /* + * We must construct the Notification entry, even if we end up not using + * it, in order to compare it cheaply to existing list entries. + * + * The notification list needs to live until end of transaction, so store + * it in the transaction context. + */ + oldcontext = MemoryContextSwitchTo(CurTransactionContext); + + n = (Notification *) palloc(offsetof(Notification, data) + + channel_len + payload_len + 2); + n->channel_len = channel_len; + n->payload_len = payload_len; + strcpy(n->data, channel); + if (payload) + strcpy(n->data + channel_len + 1, payload); + else + n->data[channel_len + 1] = '\0'; + + if (pendingNotifies == NULL || my_level > pendingNotifies->nestingLevel) + { + NotificationList *notifies; + + /* + * First notify event in current (sub)xact. Note that we allocate the + * NotificationList in TopTransactionContext; the nestingLevel might + * get changed later by AtSubCommit_Notify. + */ + notifies = (NotificationList *) + MemoryContextAlloc(TopTransactionContext, + sizeof(NotificationList)); + notifies->nestingLevel = my_level; + notifies->events = list_make1(n); + /* We certainly don't need a hashtable yet */ + notifies->hashtab = NULL; + notifies->upper = pendingNotifies; + pendingNotifies = notifies; + } + else + { + /* Now check for duplicates */ + if (AsyncExistsPendingNotify(n)) + { + /* It's a dup, so forget it */ + pfree(n); + MemoryContextSwitchTo(oldcontext); + return; + } + + /* Append more events to existing list */ + AddEventToPendingNotifies(n); + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * queue_listen + * Common code for listen, unlisten, unlisten all commands. + * + * Adds the request to the list of pending actions. + * Actual update of the listenChannels list happens during transaction + * commit. + */ +static void +queue_listen(ListenActionKind action, const char *channel) +{ + MemoryContext oldcontext; + ListenAction *actrec; + int my_level = GetCurrentTransactionNestLevel(); + + /* + * Unlike Async_Notify, we don't try to collapse out duplicates. It would + * be too complicated to ensure we get the right interactions of + * conflicting LISTEN/UNLISTEN/UNLISTEN_ALL, and it's unlikely that there + * would be any performance benefit anyway in sane applications. + */ + oldcontext = MemoryContextSwitchTo(CurTransactionContext); + + /* space for terminating null is included in sizeof(ListenAction) */ + actrec = (ListenAction *) palloc(offsetof(ListenAction, channel) + + strlen(channel) + 1); + actrec->action = action; + strcpy(actrec->channel, channel); + + if (pendingActions == NULL || my_level > pendingActions->nestingLevel) + { + ActionList *actions; + + /* + * First action in current sub(xact). Note that we allocate the + * ActionList in TopTransactionContext; the nestingLevel might get + * changed later by AtSubCommit_Notify. + */ + actions = (ActionList *) + MemoryContextAlloc(TopTransactionContext, sizeof(ActionList)); + actions->nestingLevel = my_level; + actions->actions = list_make1(actrec); + actions->upper = pendingActions; + pendingActions = actions; + } + else + pendingActions->actions = lappend(pendingActions->actions, actrec); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Async_Listen + * + * This is executed by the SQL listen command. + */ +void +Async_Listen(const char *channel) +{ + if (Trace_notify) + elog(DEBUG1, "Async_Listen(%s,%d)", channel, MyProcPid); + + queue_listen(LISTEN_LISTEN, channel); +} + +/* + * Async_Unlisten + * + * This is executed by the SQL unlisten command. + */ +void +Async_Unlisten(const char *channel) +{ + if (Trace_notify) + elog(DEBUG1, "Async_Unlisten(%s,%d)", channel, MyProcPid); + + /* If we couldn't possibly be listening, no need to queue anything */ + if (pendingActions == NULL && !unlistenExitRegistered) + return; + + queue_listen(LISTEN_UNLISTEN, channel); +} + +/* + * Async_UnlistenAll + * + * This is invoked by UNLISTEN * command, and also at backend exit. + */ +void +Async_UnlistenAll(void) +{ + if (Trace_notify) + elog(DEBUG1, "Async_UnlistenAll(%d)", MyProcPid); + + /* If we couldn't possibly be listening, no need to queue anything */ + if (pendingActions == NULL && !unlistenExitRegistered) + return; + + queue_listen(LISTEN_UNLISTEN_ALL, ""); +} + +/* + * SQL function: return a set of the channel names this backend is actively + * listening to. + * + * Note: this coding relies on the fact that the listenChannels list cannot + * change within a transaction. + */ +Datum +pg_listening_channels(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < list_length(listenChannels)) + { + char *channel = (char *) list_nth(listenChannels, + funcctx->call_cntr); + + SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(channel)); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * Async_UnlistenOnExit + * + * This is executed at backend exit if we have done any LISTENs in this + * backend. It might not be necessary anymore, if the user UNLISTENed + * everything, but we don't try to detect that case. + */ +static void +Async_UnlistenOnExit(int code, Datum arg) +{ + Exec_UnlistenAllCommit(); + asyncQueueUnregister(); +} + +/* + * AtPrepare_Notify + * + * This is called at the prepare phase of a two-phase + * transaction. Save the state for possible commit later. + */ +void +AtPrepare_Notify(void) +{ + /* It's not allowed to have any pending LISTEN/UNLISTEN/NOTIFY actions */ + if (pendingActions || pendingNotifies) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot PREPARE a transaction that has executed LISTEN, UNLISTEN, or NOTIFY"))); +} + +/* + * PreCommit_Notify + * + * This is called at transaction commit, before actually committing to + * clog. + * + * If there are pending LISTEN actions, make sure we are listed in the + * shared-memory listener array. This must happen before commit to + * ensure we don't miss any notifies from transactions that commit + * just after ours. + * + * If there are outbound notify requests in the pendingNotifies list, + * add them to the global queue. We do that before commit so that + * we can still throw error if we run out of queue space. + */ +void +PreCommit_Notify(void) +{ + ListCell *p; + + if (!pendingActions && !pendingNotifies) + return; /* no relevant statements in this xact */ + + if (Trace_notify) + elog(DEBUG1, "PreCommit_Notify"); + + /* Preflight for any pending listen/unlisten actions */ + if (pendingActions != NULL) + { + foreach(p, pendingActions->actions) + { + ListenAction *actrec = (ListenAction *) lfirst(p); + + switch (actrec->action) + { + case LISTEN_LISTEN: + Exec_ListenPreCommit(); + break; + case LISTEN_UNLISTEN: + /* there is no Exec_UnlistenPreCommit() */ + break; + case LISTEN_UNLISTEN_ALL: + /* there is no Exec_UnlistenAllPreCommit() */ + break; + } + } + } + + /* Queue any pending notifies (must happen after the above) */ + if (pendingNotifies) + { + ListCell *nextNotify; + + /* + * Make sure that we have an XID assigned to the current transaction. + * GetCurrentTransactionId is cheap if we already have an XID, but not + * so cheap if we don't, and we'd prefer not to do that work while + * holding NotifyQueueLock. + */ + (void) GetCurrentTransactionId(); + + /* + * Serialize writers by acquiring a special lock that we hold till + * after commit. This ensures that queue entries appear in commit + * order, and in particular that there are never uncommitted queue + * entries ahead of committed ones, so an uncommitted transaction + * can't block delivery of deliverable notifications. + * + * We use a heavyweight lock so that it'll automatically be released + * after either commit or abort. This also allows deadlocks to be + * detected, though really a deadlock shouldn't be possible here. + * + * The lock is on "database 0", which is pretty ugly but it doesn't + * seem worth inventing a special locktag category just for this. + * (Historical note: before PG 9.0, a similar lock on "database 0" was + * used by the flatfiles mechanism.) + */ + LockSharedObject(DatabaseRelationId, InvalidOid, 0, + AccessExclusiveLock); + + /* Now push the notifications into the queue */ + nextNotify = list_head(pendingNotifies->events); + while (nextNotify != NULL) + { + /* + * Add the pending notifications to the queue. We acquire and + * release NotifyQueueLock once per page, which might be overkill + * but it does allow readers to get in while we're doing this. + * + * A full queue is very uncommon and should really not happen, + * given that we have so much space available in the SLRU pages. + * Nevertheless we need to deal with this possibility. Note that + * when we get here we are in the process of committing our + * transaction, but we have not yet committed to clog, so at this + * point in time we can still roll the transaction back. + */ + LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); + asyncQueueFillWarning(); + if (asyncQueueIsFull()) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many notifications in the NOTIFY queue"))); + nextNotify = asyncQueueAddEntries(nextNotify); + LWLockRelease(NotifyQueueLock); + } + + /* Note that we don't clear pendingNotifies; AtCommit_Notify will. */ + } +} + +/* + * AtCommit_Notify + * + * This is called at transaction commit, after committing to clog. + * + * Update listenChannels and clear transaction-local state. + * + * If we issued any notifications in the transaction, send signals to + * listening backends (possibly including ourselves) to process them. + * Also, if we filled enough queue pages with new notifies, try to + * advance the queue tail pointer. + */ +void +AtCommit_Notify(void) +{ + ListCell *p; + + /* + * Allow transactions that have not executed LISTEN/UNLISTEN/NOTIFY to + * return as soon as possible + */ + if (!pendingActions && !pendingNotifies) + return; + + if (Trace_notify) + elog(DEBUG1, "AtCommit_Notify"); + + /* Perform any pending listen/unlisten actions */ + if (pendingActions != NULL) + { + foreach(p, pendingActions->actions) + { + ListenAction *actrec = (ListenAction *) lfirst(p); + + switch (actrec->action) + { + case LISTEN_LISTEN: + Exec_ListenCommit(actrec->channel); + break; + case LISTEN_UNLISTEN: + Exec_UnlistenCommit(actrec->channel); + break; + case LISTEN_UNLISTEN_ALL: + Exec_UnlistenAllCommit(); + break; + } + } + } + + /* If no longer listening to anything, get out of listener array */ + if (amRegisteredListener && listenChannels == NIL) + asyncQueueUnregister(); + + /* + * Send signals to listening backends. We need do this only if there are + * pending notifies, which were previously added to the shared queue by + * PreCommit_Notify(). + */ + if (pendingNotifies != NULL) + SignalBackends(); + + /* + * If it's time to try to advance the global tail pointer, do that. + * + * (It might seem odd to do this in the sender, when more than likely the + * listeners won't yet have read the messages we just sent. However, + * there's less contention if only the sender does it, and there is little + * need for urgency in advancing the global tail. So this typically will + * be clearing out messages that were sent some time ago.) + */ + if (tryAdvanceTail) + { + tryAdvanceTail = false; + asyncQueueAdvanceTail(); + } + + /* And clean up */ + ClearPendingActionsAndNotifies(); +} + +/* + * Exec_ListenPreCommit --- subroutine for PreCommit_Notify + * + * This function must make sure we are ready to catch any incoming messages. + */ +static void +Exec_ListenPreCommit(void) +{ + QueuePosition head; + QueuePosition max; + BackendId prevListener; + + /* + * Nothing to do if we are already listening to something, nor if we + * already ran this routine in this transaction. + */ + if (amRegisteredListener) + return; + + if (Trace_notify) + elog(DEBUG1, "Exec_ListenPreCommit(%d)", MyProcPid); + + /* + * Before registering, make sure we will unlisten before dying. (Note: + * this action does not get undone if we abort later.) + */ + if (!unlistenExitRegistered) + { + before_shmem_exit(Async_UnlistenOnExit, 0); + unlistenExitRegistered = true; + } + + /* + * This is our first LISTEN, so establish our pointer. + * + * We set our pointer to the global tail pointer and then move it forward + * over already-committed notifications. This ensures we cannot miss any + * not-yet-committed notifications. We might get a few more but that + * doesn't hurt. + * + * In some scenarios there might be a lot of committed notifications that + * have not yet been pruned away (because some backend is being lazy about + * reading them). To reduce our startup time, we can look at other + * backends and adopt the maximum "pos" pointer of any backend that's in + * our database; any notifications it's already advanced over are surely + * committed and need not be re-examined by us. (We must consider only + * backends connected to our DB, because others will not have bothered to + * check committed-ness of notifications in our DB.) + * + * We need exclusive lock here so we can look at other backends' entries + * and manipulate the list links. + */ + LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); + head = QUEUE_HEAD; + max = QUEUE_TAIL; + prevListener = InvalidBackendId; + for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i)) + { + if (QUEUE_BACKEND_DBOID(i) == MyDatabaseId) + max = QUEUE_POS_MAX(max, QUEUE_BACKEND_POS(i)); + /* Also find last listening backend before this one */ + if (i < MyBackendId) + prevListener = i; + } + QUEUE_BACKEND_POS(MyBackendId) = max; + QUEUE_BACKEND_PID(MyBackendId) = MyProcPid; + QUEUE_BACKEND_DBOID(MyBackendId) = MyDatabaseId; + /* Insert backend into list of listeners at correct position */ + if (prevListener > 0) + { + QUEUE_NEXT_LISTENER(MyBackendId) = QUEUE_NEXT_LISTENER(prevListener); + QUEUE_NEXT_LISTENER(prevListener) = MyBackendId; + } + else + { + QUEUE_NEXT_LISTENER(MyBackendId) = QUEUE_FIRST_LISTENER; + QUEUE_FIRST_LISTENER = MyBackendId; + } + LWLockRelease(NotifyQueueLock); + + /* Now we are listed in the global array, so remember we're listening */ + amRegisteredListener = true; + + /* + * Try to move our pointer forward as far as possible. This will skip + * over already-committed notifications, which we want to do because they + * might be quite stale. Note that we are not yet listening on anything, + * so we won't deliver such notifications to our frontend. Also, although + * our transaction might have executed NOTIFY, those message(s) aren't + * queued yet so we won't skip them here. + */ + if (!QUEUE_POS_EQUAL(max, head)) + asyncQueueReadAllNotifications(); +} + +/* + * Exec_ListenCommit --- subroutine for AtCommit_Notify + * + * Add the channel to the list of channels we are listening on. + */ +static void +Exec_ListenCommit(const char *channel) +{ + MemoryContext oldcontext; + + /* Do nothing if we are already listening on this channel */ + if (IsListeningOn(channel)) + return; + + /* + * Add the new channel name to listenChannels. + * + * XXX It is theoretically possible to get an out-of-memory failure here, + * which would be bad because we already committed. For the moment it + * doesn't seem worth trying to guard against that, but maybe improve this + * later. + */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + listenChannels = lappend(listenChannels, pstrdup(channel)); + MemoryContextSwitchTo(oldcontext); +} + +/* + * Exec_UnlistenCommit --- subroutine for AtCommit_Notify + * + * Remove the specified channel name from listenChannels. + */ +static void +Exec_UnlistenCommit(const char *channel) +{ + ListCell *q; + + if (Trace_notify) + elog(DEBUG1, "Exec_UnlistenCommit(%s,%d)", channel, MyProcPid); + + foreach(q, listenChannels) + { + char *lchan = (char *) lfirst(q); + + if (strcmp(lchan, channel) == 0) + { + listenChannels = foreach_delete_current(listenChannels, q); + pfree(lchan); + break; + } + } + + /* + * We do not complain about unlistening something not being listened; + * should we? + */ +} + +/* + * Exec_UnlistenAllCommit --- subroutine for AtCommit_Notify + * + * Unlisten on all channels for this backend. + */ +static void +Exec_UnlistenAllCommit(void) +{ + if (Trace_notify) + elog(DEBUG1, "Exec_UnlistenAllCommit(%d)", MyProcPid); + + list_free_deep(listenChannels); + listenChannels = NIL; +} + +/* + * Test whether we are actively listening on the given channel name. + * + * Note: this function is executed for every notification found in the queue. + * Perhaps it is worth further optimization, eg convert the list to a sorted + * array so we can binary-search it. In practice the list is likely to be + * fairly short, though. + */ +static bool +IsListeningOn(const char *channel) +{ + ListCell *p; + + foreach(p, listenChannels) + { + char *lchan = (char *) lfirst(p); + + if (strcmp(lchan, channel) == 0) + return true; + } + return false; +} + +/* + * Remove our entry from the listeners array when we are no longer listening + * on any channel. NB: must not fail if we're already not listening. + */ +static void +asyncQueueUnregister(void) +{ + Assert(listenChannels == NIL); /* else caller error */ + + if (!amRegisteredListener) /* nothing to do */ + return; + + /* + * Need exclusive lock here to manipulate list links. + */ + LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); + /* Mark our entry as invalid */ + QUEUE_BACKEND_PID(MyBackendId) = InvalidPid; + QUEUE_BACKEND_DBOID(MyBackendId) = InvalidOid; + /* and remove it from the list */ + if (QUEUE_FIRST_LISTENER == MyBackendId) + QUEUE_FIRST_LISTENER = QUEUE_NEXT_LISTENER(MyBackendId); + else + { + for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i)) + { + if (QUEUE_NEXT_LISTENER(i) == MyBackendId) + { + QUEUE_NEXT_LISTENER(i) = QUEUE_NEXT_LISTENER(MyBackendId); + break; + } + } + } + QUEUE_NEXT_LISTENER(MyBackendId) = InvalidBackendId; + LWLockRelease(NotifyQueueLock); + + /* mark ourselves as no longer listed in the global array */ + amRegisteredListener = false; +} + +/* + * Test whether there is room to insert more notification messages. + * + * Caller must hold at least shared NotifyQueueLock. + */ +static bool +asyncQueueIsFull(void) +{ + int nexthead; + int boundary; + + /* + * The queue is full if creating a new head page would create a page that + * logically precedes the current global tail pointer, ie, the head + * pointer would wrap around compared to the tail. We cannot create such + * a head page for fear of confusing slru.c. For safety we round the tail + * pointer back to a segment boundary (truncation logic in + * asyncQueueAdvanceTail does not do this, so doing it here is optional). + * + * Note that this test is *not* dependent on how much space there is on + * the current head page. This is necessary because asyncQueueAddEntries + * might try to create the next head page in any case. + */ + nexthead = QUEUE_POS_PAGE(QUEUE_HEAD) + 1; + if (nexthead > QUEUE_MAX_PAGE) + nexthead = 0; /* wrap around */ + boundary = QUEUE_STOP_PAGE; + boundary -= boundary % SLRU_PAGES_PER_SEGMENT; + return asyncQueuePagePrecedes(nexthead, boundary); +} + +/* + * Advance the QueuePosition to the next entry, assuming that the current + * entry is of length entryLength. If we jump to a new page the function + * returns true, else false. + */ +static bool +asyncQueueAdvance(volatile QueuePosition *position, int entryLength) +{ + int pageno = QUEUE_POS_PAGE(*position); + int offset = QUEUE_POS_OFFSET(*position); + bool pageJump = false; + + /* + * Move to the next writing position: First jump over what we have just + * written or read. + */ + offset += entryLength; + Assert(offset <= QUEUE_PAGESIZE); + + /* + * In a second step check if another entry can possibly be written to the + * page. If so, stay here, we have reached the next position. If not, then + * we need to move on to the next page. + */ + if (offset + QUEUEALIGN(AsyncQueueEntryEmptySize) > QUEUE_PAGESIZE) + { + pageno++; + if (pageno > QUEUE_MAX_PAGE) + pageno = 0; /* wrap around */ + offset = 0; + pageJump = true; + } + + SET_QUEUE_POS(*position, pageno, offset); + return pageJump; +} + +/* + * Fill the AsyncQueueEntry at *qe with an outbound notification message. + */ +static void +asyncQueueNotificationToEntry(Notification *n, AsyncQueueEntry *qe) +{ + size_t channellen = n->channel_len; + size_t payloadlen = n->payload_len; + int entryLength; + + Assert(channellen < NAMEDATALEN); + Assert(payloadlen < NOTIFY_PAYLOAD_MAX_LENGTH); + + /* The terminators are already included in AsyncQueueEntryEmptySize */ + entryLength = AsyncQueueEntryEmptySize + payloadlen + channellen; + entryLength = QUEUEALIGN(entryLength); + qe->length = entryLength; + qe->dboid = MyDatabaseId; + qe->xid = GetCurrentTransactionId(); + qe->srcPid = MyProcPid; + memcpy(qe->data, n->data, channellen + payloadlen + 2); +} + +/* + * Add pending notifications to the queue. + * + * We go page by page here, i.e. we stop once we have to go to a new page but + * we will be called again and then fill that next page. If an entry does not + * fit into the current page, we write a dummy entry with an InvalidOid as the + * database OID in order to fill the page. So every page is always used up to + * the last byte which simplifies reading the page later. + * + * We are passed the list cell (in pendingNotifies->events) containing the next + * notification to write and return the first still-unwritten cell back. + * Eventually we will return NULL indicating all is done. + * + * We are holding NotifyQueueLock already from the caller and grab + * NotifySLRULock locally in this function. + */ +static ListCell * +asyncQueueAddEntries(ListCell *nextNotify) +{ + AsyncQueueEntry qe; + QueuePosition queue_head; + int pageno; + int offset; + int slotno; + + /* We hold both NotifyQueueLock and NotifySLRULock during this operation */ + LWLockAcquire(NotifySLRULock, LW_EXCLUSIVE); + + /* + * We work with a local copy of QUEUE_HEAD, which we write back to shared + * memory upon exiting. The reason for this is that if we have to advance + * to a new page, SimpleLruZeroPage might fail (out of disk space, for + * instance), and we must not advance QUEUE_HEAD if it does. (Otherwise, + * subsequent insertions would try to put entries into a page that slru.c + * thinks doesn't exist yet.) So, use a local position variable. Note + * that if we do fail, any already-inserted queue entries are forgotten; + * this is okay, since they'd be useless anyway after our transaction + * rolls back. + */ + queue_head = QUEUE_HEAD; + + /* + * If this is the first write since the postmaster started, we need to + * initialize the first page of the async SLRU. Otherwise, the current + * page should be initialized already, so just fetch it. + * + * (We could also take the first path when the SLRU position has just + * wrapped around, but re-zeroing the page is harmless in that case.) + */ + pageno = QUEUE_POS_PAGE(queue_head); + if (QUEUE_POS_IS_ZERO(queue_head)) + slotno = SimpleLruZeroPage(NotifyCtl, pageno); + else + slotno = SimpleLruReadPage(NotifyCtl, pageno, true, + InvalidTransactionId); + + /* Note we mark the page dirty before writing in it */ + NotifyCtl->shared->page_dirty[slotno] = true; + + while (nextNotify != NULL) + { + Notification *n = (Notification *) lfirst(nextNotify); + + /* Construct a valid queue entry in local variable qe */ + asyncQueueNotificationToEntry(n, &qe); + + offset = QUEUE_POS_OFFSET(queue_head); + + /* Check whether the entry really fits on the current page */ + if (offset + qe.length <= QUEUE_PAGESIZE) + { + /* OK, so advance nextNotify past this item */ + nextNotify = lnext(pendingNotifies->events, nextNotify); + } + else + { + /* + * Write a dummy entry to fill up the page. Actually readers will + * only check dboid and since it won't match any reader's database + * OID, they will ignore this entry and move on. + */ + qe.length = QUEUE_PAGESIZE - offset; + qe.dboid = InvalidOid; + qe.data[0] = '\0'; /* empty channel */ + qe.data[1] = '\0'; /* empty payload */ + } + + /* Now copy qe into the shared buffer page */ + memcpy(NotifyCtl->shared->page_buffer[slotno] + offset, + &qe, + qe.length); + + /* Advance queue_head appropriately, and detect if page is full */ + if (asyncQueueAdvance(&(queue_head), qe.length)) + { + /* + * Page is full, so we're done here, but first fill the next page + * with zeroes. The reason to do this is to ensure that slru.c's + * idea of the head page is always the same as ours, which avoids + * boundary problems in SimpleLruTruncate. The test in + * asyncQueueIsFull() ensured that there is room to create this + * page without overrunning the queue. + */ + slotno = SimpleLruZeroPage(NotifyCtl, QUEUE_POS_PAGE(queue_head)); + + /* + * If the new page address is a multiple of QUEUE_CLEANUP_DELAY, + * set flag to remember that we should try to advance the tail + * pointer (we don't want to actually do that right here). + */ + if (QUEUE_POS_PAGE(queue_head) % QUEUE_CLEANUP_DELAY == 0) + tryAdvanceTail = true; + + /* And exit the loop */ + break; + } + } + + /* Success, so update the global QUEUE_HEAD */ + QUEUE_HEAD = queue_head; + + LWLockRelease(NotifySLRULock); + + return nextNotify; +} + +/* + * SQL function to return the fraction of the notification queue currently + * occupied. + */ +Datum +pg_notification_queue_usage(PG_FUNCTION_ARGS) +{ + double usage; + + /* Advance the queue tail so we don't report a too-large result */ + asyncQueueAdvanceTail(); + + LWLockAcquire(NotifyQueueLock, LW_SHARED); + usage = asyncQueueUsage(); + LWLockRelease(NotifyQueueLock); + + PG_RETURN_FLOAT8(usage); +} + +/* + * Return the fraction of the queue that is currently occupied. + * + * The caller must hold NotifyQueueLock in (at least) shared mode. + * + * Note: we measure the distance to the logical tail page, not the physical + * tail page. In some sense that's wrong, but the relative position of the + * physical tail is affected by details such as SLRU segment boundaries, + * so that a result based on that is unpleasantly unstable. + */ +static double +asyncQueueUsage(void) +{ + int headPage = QUEUE_POS_PAGE(QUEUE_HEAD); + int tailPage = QUEUE_POS_PAGE(QUEUE_TAIL); + int occupied; + + occupied = headPage - tailPage; + + if (occupied == 0) + return (double) 0; /* fast exit for common case */ + + if (occupied < 0) + { + /* head has wrapped around, tail not yet */ + occupied += QUEUE_MAX_PAGE + 1; + } + + return (double) occupied / (double) ((QUEUE_MAX_PAGE + 1) / 2); +} + +/* + * Check whether the queue is at least half full, and emit a warning if so. + * + * This is unlikely given the size of the queue, but possible. + * The warnings show up at most once every QUEUE_FULL_WARN_INTERVAL. + * + * Caller must hold exclusive NotifyQueueLock. + */ +static void +asyncQueueFillWarning(void) +{ + double fillDegree; + TimestampTz t; + + fillDegree = asyncQueueUsage(); + if (fillDegree < 0.5) + return; + + t = GetCurrentTimestamp(); + + if (TimestampDifferenceExceeds(asyncQueueControl->lastQueueFillWarn, + t, QUEUE_FULL_WARN_INTERVAL)) + { + QueuePosition min = QUEUE_HEAD; + int32 minPid = InvalidPid; + + for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i)) + { + Assert(QUEUE_BACKEND_PID(i) != InvalidPid); + min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i)); + if (QUEUE_POS_EQUAL(min, QUEUE_BACKEND_POS(i))) + minPid = QUEUE_BACKEND_PID(i); + } + + ereport(WARNING, + (errmsg("NOTIFY queue is %.0f%% full", fillDegree * 100), + (minPid != InvalidPid ? + errdetail("The server process with PID %d is among those with the oldest transactions.", minPid) + : 0), + (minPid != InvalidPid ? + errhint("The NOTIFY queue cannot be emptied until that process ends its current transaction.") + : 0))); + + asyncQueueControl->lastQueueFillWarn = t; + } +} + +/* + * Send signals to listening backends. + * + * Normally we signal only backends in our own database, since only those + * backends could be interested in notifies we send. However, if there's + * notify traffic in our database but no traffic in another database that + * does have listener(s), those listeners will fall further and further + * behind. Waken them anyway if they're far enough behind, so that they'll + * advance their queue position pointers, allowing the global tail to advance. + * + * Since we know the BackendId and the Pid the signaling is quite cheap. + * + * This is called during CommitTransaction(), so it's important for it + * to have very low probability of failure. + */ +static void +SignalBackends(void) +{ + int32 *pids; + BackendId *ids; + int count; + + /* + * Identify backends that we need to signal. We don't want to send + * signals while holding the NotifyQueueLock, so this loop just builds a + * list of target PIDs. + * + * XXX in principle these pallocs could fail, which would be bad. Maybe + * preallocate the arrays? They're not that large, though. + */ + pids = (int32 *) palloc(MaxBackends * sizeof(int32)); + ids = (BackendId *) palloc(MaxBackends * sizeof(BackendId)); + count = 0; + + LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); + for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i)) + { + int32 pid = QUEUE_BACKEND_PID(i); + QueuePosition pos; + + Assert(pid != InvalidPid); + pos = QUEUE_BACKEND_POS(i); + if (QUEUE_BACKEND_DBOID(i) == MyDatabaseId) + { + /* + * Always signal listeners in our own database, unless they're + * already caught up (unlikely, but possible). + */ + if (QUEUE_POS_EQUAL(pos, QUEUE_HEAD)) + continue; + } + else + { + /* + * Listeners in other databases should be signaled only if they + * are far behind. + */ + if (asyncQueuePageDiff(QUEUE_POS_PAGE(QUEUE_HEAD), + QUEUE_POS_PAGE(pos)) < QUEUE_CLEANUP_DELAY) + continue; + } + /* OK, need to signal this one */ + pids[count] = pid; + ids[count] = i; + count++; + } + LWLockRelease(NotifyQueueLock); + + /* Now send signals */ + for (int i = 0; i < count; i++) + { + int32 pid = pids[i]; + + /* + * If we are signaling our own process, no need to involve the kernel; + * just set the flag directly. + */ + if (pid == MyProcPid) + { + notifyInterruptPending = true; + continue; + } + + /* + * Note: assuming things aren't broken, a signal failure here could + * only occur if the target backend exited since we released + * NotifyQueueLock; which is unlikely but certainly possible. So we + * just log a low-level debug message if it happens. + */ + if (SendProcSignal(pid, PROCSIG_NOTIFY_INTERRUPT, ids[i]) < 0) + elog(DEBUG3, "could not signal backend with PID %d: %m", pid); + } + + pfree(pids); + pfree(ids); +} + +/* + * AtAbort_Notify + * + * This is called at transaction abort. + * + * Gets rid of pending actions and outbound notifies that we would have + * executed if the transaction got committed. + */ +void +AtAbort_Notify(void) +{ + /* + * If we LISTEN but then roll back the transaction after PreCommit_Notify, + * we have registered as a listener but have not made any entry in + * listenChannels. In that case, deregister again. + */ + if (amRegisteredListener && listenChannels == NIL) + asyncQueueUnregister(); + + /* And clean up */ + ClearPendingActionsAndNotifies(); +} + +/* + * AtSubCommit_Notify() --- Take care of subtransaction commit. + * + * Reassign all items in the pending lists to the parent transaction. + */ +void +AtSubCommit_Notify(void) +{ + int my_level = GetCurrentTransactionNestLevel(); + + /* If there are actions at our nesting level, we must reparent them. */ + if (pendingActions != NULL && + pendingActions->nestingLevel >= my_level) + { + if (pendingActions->upper == NULL || + pendingActions->upper->nestingLevel < my_level - 1) + { + /* nothing to merge; give the whole thing to the parent */ + --pendingActions->nestingLevel; + } + else + { + ActionList *childPendingActions = pendingActions; + + pendingActions = pendingActions->upper; + + /* + * Mustn't try to eliminate duplicates here --- see queue_listen() + */ + pendingActions->actions = + list_concat(pendingActions->actions, + childPendingActions->actions); + pfree(childPendingActions); + } + } + + /* If there are notifies at our nesting level, we must reparent them. */ + if (pendingNotifies != NULL && + pendingNotifies->nestingLevel >= my_level) + { + Assert(pendingNotifies->nestingLevel == my_level); + + if (pendingNotifies->upper == NULL || + pendingNotifies->upper->nestingLevel < my_level - 1) + { + /* nothing to merge; give the whole thing to the parent */ + --pendingNotifies->nestingLevel; + } + else + { + /* + * Formerly, we didn't bother to eliminate duplicates here, but + * now we must, else we fall foul of "Assert(!found)", either here + * or during a later attempt to build the parent-level hashtable. + */ + NotificationList *childPendingNotifies = pendingNotifies; + ListCell *l; + + pendingNotifies = pendingNotifies->upper; + /* Insert all the subxact's events into parent, except for dups */ + foreach(l, childPendingNotifies->events) + { + Notification *childn = (Notification *) lfirst(l); + + if (!AsyncExistsPendingNotify(childn)) + AddEventToPendingNotifies(childn); + } + pfree(childPendingNotifies); + } + } +} + +/* + * AtSubAbort_Notify() --- Take care of subtransaction abort. + */ +void +AtSubAbort_Notify(void) +{ + int my_level = GetCurrentTransactionNestLevel(); + + /* + * All we have to do is pop the stack --- the actions/notifies made in + * this subxact are no longer interesting, and the space will be freed + * when CurTransactionContext is recycled. We still have to free the + * ActionList and NotificationList objects themselves, though, because + * those are allocated in TopTransactionContext. + * + * Note that there might be no entries at all, or no entries for the + * current subtransaction level, either because none were ever created, or + * because we reentered this routine due to trouble during subxact abort. + */ + while (pendingActions != NULL && + pendingActions->nestingLevel >= my_level) + { + ActionList *childPendingActions = pendingActions; + + pendingActions = pendingActions->upper; + pfree(childPendingActions); + } + + while (pendingNotifies != NULL && + pendingNotifies->nestingLevel >= my_level) + { + NotificationList *childPendingNotifies = pendingNotifies; + + pendingNotifies = pendingNotifies->upper; + pfree(childPendingNotifies); + } +} + +/* + * HandleNotifyInterrupt + * + * Signal handler portion of interrupt handling. Let the backend know + * that there's a pending notify interrupt. If we're currently reading + * from the client, this will interrupt the read and + * ProcessClientReadInterrupt() will call ProcessNotifyInterrupt(). + */ +void +HandleNotifyInterrupt(void) +{ + /* + * Note: this is called by a SIGNAL HANDLER. You must be very wary what + * you do here. + */ + + /* signal that work needs to be done */ + notifyInterruptPending = true; + + /* make sure the event is processed in due course */ + SetLatch(MyLatch); +} + +/* + * ProcessNotifyInterrupt + * + * This is called if we see notifyInterruptPending set, just before + * transmitting ReadyForQuery at the end of a frontend command, and + * also if a notify signal occurs while reading from the frontend. + * HandleNotifyInterrupt() will cause the read to be interrupted + * via the process's latch, and this routine will get called. + * If we are truly idle (ie, *not* inside a transaction block), + * process the incoming notifies. + * + * If "flush" is true, force any frontend messages out immediately. + * This can be false when being called at the end of a frontend command, + * since we'll flush after sending ReadyForQuery. + */ +void +ProcessNotifyInterrupt(bool flush) +{ + if (IsTransactionOrTransactionBlock()) + return; /* not really idle */ + + /* Loop in case another signal arrives while sending messages */ + while (notifyInterruptPending) + ProcessIncomingNotify(flush); +} + + +/* + * Read all pending notifications from the queue, and deliver appropriate + * ones to my frontend. Stop when we reach queue head or an uncommitted + * notification. + */ +static void +asyncQueueReadAllNotifications(void) +{ + volatile QueuePosition pos; + QueuePosition head; + Snapshot snapshot; + + /* page_buffer must be adequately aligned, so use a union */ + union + { + char buf[QUEUE_PAGESIZE]; + AsyncQueueEntry align; + } page_buffer; + + /* Fetch current state */ + LWLockAcquire(NotifyQueueLock, LW_SHARED); + /* Assert checks that we have a valid state entry */ + Assert(MyProcPid == QUEUE_BACKEND_PID(MyBackendId)); + pos = QUEUE_BACKEND_POS(MyBackendId); + head = QUEUE_HEAD; + LWLockRelease(NotifyQueueLock); + + if (QUEUE_POS_EQUAL(pos, head)) + { + /* Nothing to do, we have read all notifications already. */ + return; + } + + /*---------- + * Get snapshot we'll use to decide which xacts are still in progress. + * This is trickier than it might seem, because of race conditions. + * Consider the following example: + * + * Backend 1: Backend 2: + * + * transaction starts + * UPDATE foo SET ...; + * NOTIFY foo; + * commit starts + * queue the notify message + * transaction starts + * LISTEN foo; -- first LISTEN in session + * SELECT * FROM foo WHERE ...; + * commit to clog + * commit starts + * add backend 2 to array of listeners + * advance to queue head (this code) + * commit to clog + * + * Transaction 2's SELECT has not seen the UPDATE's effects, since that + * wasn't committed yet. Ideally we'd ensure that client 2 would + * eventually get transaction 1's notify message, but there's no way + * to do that; until we're in the listener array, there's no guarantee + * that the notify message doesn't get removed from the queue. + * + * Therefore the coding technique transaction 2 is using is unsafe: + * applications must commit a LISTEN before inspecting database state, + * if they want to ensure they will see notifications about subsequent + * changes to that state. + * + * What we do guarantee is that we'll see all notifications from + * transactions committing after the snapshot we take here. + * Exec_ListenPreCommit has already added us to the listener array, + * so no not-yet-committed messages can be removed from the queue + * before we see them. + *---------- + */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); + + /* + * It is possible that we fail while trying to send a message to our + * frontend (for example, because of encoding conversion failure). If + * that happens it is critical that we not try to send the same message + * over and over again. Therefore, we place a PG_TRY block here that will + * forcibly advance our queue position before we lose control to an error. + * (We could alternatively retake NotifyQueueLock and move the position + * before handling each individual message, but that seems like too much + * lock traffic.) + */ + PG_TRY(); + { + bool reachedStop; + + do + { + int curpage = QUEUE_POS_PAGE(pos); + int curoffset = QUEUE_POS_OFFSET(pos); + int slotno; + int copysize; + + /* + * We copy the data from SLRU into a local buffer, so as to avoid + * holding the NotifySLRULock while we are examining the entries + * and possibly transmitting them to our frontend. Copy only the + * part of the page we will actually inspect. + */ + slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage, + InvalidTransactionId); + if (curpage == QUEUE_POS_PAGE(head)) + { + /* we only want to read as far as head */ + copysize = QUEUE_POS_OFFSET(head) - curoffset; + if (copysize < 0) + copysize = 0; /* just for safety */ + } + else + { + /* fetch all the rest of the page */ + copysize = QUEUE_PAGESIZE - curoffset; + } + memcpy(page_buffer.buf + curoffset, + NotifyCtl->shared->page_buffer[slotno] + curoffset, + copysize); + /* Release lock that we got from SimpleLruReadPage_ReadOnly() */ + LWLockRelease(NotifySLRULock); + + /* + * Process messages up to the stop position, end of page, or an + * uncommitted message. + * + * Our stop position is what we found to be the head's position + * when we entered this function. It might have changed already. + * But if it has, we will receive (or have already received and + * queued) another signal and come here again. + * + * We are not holding NotifyQueueLock here! The queue can only + * extend beyond the head pointer (see above) and we leave our + * backend's pointer where it is so nobody will truncate or + * rewrite pages under us. Especially we don't want to hold a lock + * while sending the notifications to the frontend. + */ + reachedStop = asyncQueueProcessPageEntries(&pos, head, + page_buffer.buf, + snapshot); + } while (!reachedStop); + } + PG_FINALLY(); + { + /* Update shared state */ + LWLockAcquire(NotifyQueueLock, LW_SHARED); + QUEUE_BACKEND_POS(MyBackendId) = pos; + LWLockRelease(NotifyQueueLock); + } + PG_END_TRY(); + + /* Done with snapshot */ + UnregisterSnapshot(snapshot); +} + +/* + * Fetch notifications from the shared queue, beginning at position current, + * and deliver relevant ones to my frontend. + * + * The current page must have been fetched into page_buffer from shared + * memory. (We could access the page right in shared memory, but that + * would imply holding the NotifySLRULock throughout this routine.) + * + * We stop if we reach the "stop" position, or reach a notification from an + * uncommitted transaction, or reach the end of the page. + * + * The function returns true once we have reached the stop position or an + * uncommitted notification, and false if we have finished with the page. + * In other words: once it returns true there is no need to look further. + * The QueuePosition *current is advanced past all processed messages. + */ +static bool +asyncQueueProcessPageEntries(volatile QueuePosition *current, + QueuePosition stop, + char *page_buffer, + Snapshot snapshot) +{ + bool reachedStop = false; + bool reachedEndOfPage; + AsyncQueueEntry *qe; + + do + { + QueuePosition thisentry = *current; + + if (QUEUE_POS_EQUAL(thisentry, stop)) + break; + + qe = (AsyncQueueEntry *) (page_buffer + QUEUE_POS_OFFSET(thisentry)); + + /* + * Advance *current over this message, possibly to the next page. As + * noted in the comments for asyncQueueReadAllNotifications, we must + * do this before possibly failing while processing the message. + */ + reachedEndOfPage = asyncQueueAdvance(current, qe->length); + + /* Ignore messages destined for other databases */ + if (qe->dboid == MyDatabaseId) + { + if (XidInMVCCSnapshot(qe->xid, snapshot)) + { + /* + * The source transaction is still in progress, so we can't + * process this message yet. Break out of the loop, but first + * back up *current so we will reprocess the message next + * time. (Note: it is unlikely but not impossible for + * TransactionIdDidCommit to fail, so we can't really avoid + * this advance-then-back-up behavior when dealing with an + * uncommitted message.) + * + * Note that we must test XidInMVCCSnapshot before we test + * TransactionIdDidCommit, else we might return a message from + * a transaction that is not yet visible to snapshots; compare + * the comments at the head of heapam_visibility.c. + * + * Also, while our own xact won't be listed in the snapshot, + * we need not check for TransactionIdIsCurrentTransactionId + * because our transaction cannot (yet) have queued any + * messages. + */ + *current = thisentry; + reachedStop = true; + break; + } + else if (TransactionIdDidCommit(qe->xid)) + { + /* qe->data is the null-terminated channel name */ + char *channel = qe->data; + + if (IsListeningOn(channel)) + { + /* payload follows channel name */ + char *payload = qe->data + strlen(channel) + 1; + + NotifyMyFrontEnd(channel, payload, qe->srcPid); + } + } + else + { + /* + * The source transaction aborted or crashed, so we just + * ignore its notifications. + */ + } + } + + /* Loop back if we're not at end of page */ + } while (!reachedEndOfPage); + + if (QUEUE_POS_EQUAL(*current, stop)) + reachedStop = true; + + return reachedStop; +} + +/* + * Advance the shared queue tail variable to the minimum of all the + * per-backend tail pointers. Truncate pg_notify space if possible. + * + * This is (usually) called during CommitTransaction(), so it's important for + * it to have very low probability of failure. + */ +static void +asyncQueueAdvanceTail(void) +{ + QueuePosition min; + int oldtailpage; + int newtailpage; + int boundary; + + /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */ + LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE); + + /* + * Compute the new tail. Pre-v13, it's essential that QUEUE_TAIL be exact + * (ie, exactly match at least one backend's queue position), so it must + * be updated atomically with the actual computation. Since v13, we could + * get away with not doing it like that, but it seems prudent to keep it + * so. + * + * Also, because incoming backends will scan forward from QUEUE_TAIL, that + * must be advanced before we can truncate any data. Thus, QUEUE_TAIL is + * the logical tail, while QUEUE_STOP_PAGE is the physical tail, or oldest + * un-truncated page. When QUEUE_STOP_PAGE != QUEUE_POS_PAGE(QUEUE_TAIL), + * there are pages we can truncate but haven't yet finished doing so. + * + * For concurrency's sake, we don't want to hold NotifyQueueLock while + * performing SimpleLruTruncate. This is OK because no backend will try + * to access the pages we are in the midst of truncating. + */ + LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); + min = QUEUE_HEAD; + for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i)) + { + Assert(QUEUE_BACKEND_PID(i) != InvalidPid); + min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i)); + } + QUEUE_TAIL = min; + oldtailpage = QUEUE_STOP_PAGE; + LWLockRelease(NotifyQueueLock); + + /* + * We can truncate something if the global tail advanced across an SLRU + * segment boundary. + * + * XXX it might be better to truncate only once every several segments, to + * reduce the number of directory scans. + */ + newtailpage = QUEUE_POS_PAGE(min); + boundary = newtailpage - (newtailpage % SLRU_PAGES_PER_SEGMENT); + if (asyncQueuePagePrecedes(oldtailpage, boundary)) + { + /* + * SimpleLruTruncate() will ask for NotifySLRULock but will also + * release the lock again. + */ + SimpleLruTruncate(NotifyCtl, newtailpage); + + /* + * Update QUEUE_STOP_PAGE. This changes asyncQueueIsFull()'s verdict + * for the segment immediately prior to the old tail, allowing fresh + * data into that segment. + */ + LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); + QUEUE_STOP_PAGE = newtailpage; + LWLockRelease(NotifyQueueLock); + } + + LWLockRelease(NotifyQueueTailLock); +} + +/* + * ProcessIncomingNotify + * + * Scan the queue for arriving notifications and report them to the front + * end. The notifications might be from other sessions, or our own; + * there's no need to distinguish here. + * + * If "flush" is true, force any frontend messages out immediately. + * + * NOTE: since we are outside any transaction, we must create our own. + */ +static void +ProcessIncomingNotify(bool flush) +{ + /* We *must* reset the flag */ + notifyInterruptPending = false; + + /* Do nothing else if we aren't actively listening */ + if (listenChannels == NIL) + return; + + if (Trace_notify) + elog(DEBUG1, "ProcessIncomingNotify"); + + set_ps_display("notify interrupt"); + + /* + * We must run asyncQueueReadAllNotifications inside a transaction, else + * bad things happen if it gets an error. + */ + StartTransactionCommand(); + + asyncQueueReadAllNotifications(); + + CommitTransactionCommand(); + + /* + * If this isn't an end-of-command case, we must flush the notify messages + * to ensure frontend gets them promptly. + */ + if (flush) + pq_flush(); + + set_ps_display("idle"); + + if (Trace_notify) + elog(DEBUG1, "ProcessIncomingNotify: done"); +} + +/* + * Send NOTIFY message to my front end. + */ +void +NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid) +{ + if (whereToSendOutput == DestRemote) + { + StringInfoData buf; + + pq_beginmessage(&buf, 'A'); + pq_sendint32(&buf, srcPid); + pq_sendstring(&buf, channel); + pq_sendstring(&buf, payload); + pq_endmessage(&buf); + + /* + * NOTE: we do not do pq_flush() here. Some level of caller will + * handle it later, allowing this message to be combined into a packet + * with other ones. + */ + } + else + elog(INFO, "NOTIFY for \"%s\" payload \"%s\"", channel, payload); +} + +/* Does pendingNotifies include a match for the given event? */ +static bool +AsyncExistsPendingNotify(Notification *n) +{ + if (pendingNotifies == NULL) + return false; + + if (pendingNotifies->hashtab != NULL) + { + /* Use the hash table to probe for a match */ + if (hash_search(pendingNotifies->hashtab, + &n, + HASH_FIND, + NULL)) + return true; + } + else + { + /* Must scan the event list */ + ListCell *l; + + foreach(l, pendingNotifies->events) + { + Notification *oldn = (Notification *) lfirst(l); + + if (n->channel_len == oldn->channel_len && + n->payload_len == oldn->payload_len && + memcmp(n->data, oldn->data, + n->channel_len + n->payload_len + 2) == 0) + return true; + } + } + + return false; +} + +/* + * Add a notification event to a pre-existing pendingNotifies list. + * + * Because pendingNotifies->events is already nonempty, this works + * correctly no matter what CurrentMemoryContext is. + */ +static void +AddEventToPendingNotifies(Notification *n) +{ + Assert(pendingNotifies->events != NIL); + + /* Create the hash table if it's time to */ + if (list_length(pendingNotifies->events) >= MIN_HASHABLE_NOTIFIES && + pendingNotifies->hashtab == NULL) + { + HASHCTL hash_ctl; + ListCell *l; + + /* Create the hash table */ + hash_ctl.keysize = sizeof(Notification *); + hash_ctl.entrysize = sizeof(NotificationHash); + hash_ctl.hash = notification_hash; + hash_ctl.match = notification_match; + hash_ctl.hcxt = CurTransactionContext; + pendingNotifies->hashtab = + hash_create("Pending Notifies", + 256L, + &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + + /* Insert all the already-existing events */ + foreach(l, pendingNotifies->events) + { + Notification *oldn = (Notification *) lfirst(l); + NotificationHash *hentry; + bool found; + + hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab, + &oldn, + HASH_ENTER, + &found); + Assert(!found); + hentry->event = oldn; + } + } + + /* Add new event to the list, in order */ + pendingNotifies->events = lappend(pendingNotifies->events, n); + + /* Add event to the hash table if needed */ + if (pendingNotifies->hashtab != NULL) + { + NotificationHash *hentry; + bool found; + + hentry = (NotificationHash *) hash_search(pendingNotifies->hashtab, + &n, + HASH_ENTER, + &found); + Assert(!found); + hentry->event = n; + } +} + +/* + * notification_hash: hash function for notification hash table + * + * The hash "keys" are pointers to Notification structs. + */ +static uint32 +notification_hash(const void *key, Size keysize) +{ + const Notification *k = *(const Notification *const *) key; + + Assert(keysize == sizeof(Notification *)); + /* We don't bother to include the payload's trailing null in the hash */ + return DatumGetUInt32(hash_any((const unsigned char *) k->data, + k->channel_len + k->payload_len + 1)); +} + +/* + * notification_match: match function to use with notification_hash + */ +static int +notification_match(const void *key1, const void *key2, Size keysize) +{ + const Notification *k1 = *(const Notification *const *) key1; + const Notification *k2 = *(const Notification *const *) key2; + + Assert(keysize == sizeof(Notification *)); + if (k1->channel_len == k2->channel_len && + k1->payload_len == k2->payload_len && + memcmp(k1->data, k2->data, + k1->channel_len + k1->payload_len + 2) == 0) + return 0; /* equal */ + return 1; /* not equal */ +} + +/* Clear the pendingActions and pendingNotifies lists. */ +static void +ClearPendingActionsAndNotifies(void) +{ + /* + * Everything's allocated in either TopTransactionContext or the context + * for the subtransaction to which it corresponds. So, there's nothing to + * do here except reset the pointers; the space will be reclaimed when the + * contexts are deleted. + */ + pendingActions = NULL; + pendingNotifies = NULL; +} diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c new file mode 100644 index 0000000..e4b7ffd --- /dev/null +++ b/src/backend/commands/cluster.c @@ -0,0 +1,1736 @@ +/*------------------------------------------------------------------------- + * + * cluster.c + * CLUSTER a table on an index. This is now also used for VACUUM FULL. + * + * There is hardly anything left of Paul Brown's original implementation... + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994-5, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/cluster.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/amapi.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "access/relscan.h" +#include "access/tableam.h" +#include "access/toast_internals.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/index.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/partition.h" +#include "catalog/pg_am.h" +#include "catalog/pg_inherits.h" +#include "catalog/toasting.h" +#include "commands/cluster.h" +#include "commands/defrem.h" +#include "commands/progress.h" +#include "commands/tablecmds.h" +#include "commands/vacuum.h" +#include "miscadmin.h" +#include "optimizer/optimizer.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "utils/acl.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/relmapper.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/tuplesort.h" + +/* + * This struct is used to pass around the information on tables to be + * clustered. We need this so we can make a list of them when invoked without + * a specific table/index pair. + */ +typedef struct +{ + Oid tableOid; + Oid indexOid; +} RelToCluster; + + +static void cluster_multiple_rels(List *rtcs, ClusterParams *params); +static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose); +static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, + bool verbose, bool *pSwapToastByContent, + TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); +static List *get_tables_to_cluster(MemoryContext cluster_context); +static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, + Oid indexOid); + + +/*--------------------------------------------------------------------------- + * This cluster code allows for clustering multiple tables at once. Because + * of this, we cannot just run everything on a single transaction, or we + * would be forced to acquire exclusive locks on all the tables being + * clustered, simultaneously --- very likely leading to deadlock. + * + * To solve this we follow a similar strategy to VACUUM code, + * clustering each relation in a separate transaction. For this to work, + * we need to: + * - provide a separate memory context so that we can pass information in + * a way that survives across transactions + * - start a new transaction every time a new relation is clustered + * - check for validity of the information on to-be-clustered relations, + * as someone might have deleted a relation behind our back, or + * clustered one on a different index + * - end the transaction + * + * The single-relation case does not have any such overhead. + * + * We also allow a relation to be specified without index. In that case, + * the indisclustered bit will be looked up, and an ERROR will be thrown + * if there is no index with the bit set. + *--------------------------------------------------------------------------- + */ +void +cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) +{ + ListCell *lc; + ClusterParams params = {0}; + bool verbose = false; + Relation rel = NULL; + Oid indexOid = InvalidOid; + MemoryContext cluster_context; + List *rtcs; + + /* Parse option list */ + foreach(lc, stmt->params) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "verbose") == 0) + verbose = defGetBoolean(opt); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized CLUSTER option \"%s\"", + opt->defname), + parser_errposition(pstate, opt->location))); + } + + params.options = (verbose ? CLUOPT_VERBOSE : 0); + + if (stmt->relation != NULL) + { + /* This is the single-relation case. */ + Oid tableOid; + + /* + * Find, lock, and check permissions on the table. We obtain + * AccessExclusiveLock right away to avoid lock-upgrade hazard in the + * single-transaction case. + */ + tableOid = RangeVarGetRelidExtended(stmt->relation, + AccessExclusiveLock, + 0, + RangeVarCallbackOwnsTable, NULL); + rel = table_open(tableOid, NoLock); + + /* + * Reject clustering a remote temp table ... their local buffer + * manager is not going to cope. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster temporary tables of other sessions"))); + + if (stmt->indexname == NULL) + { + ListCell *index; + + /* We need to find the index that has indisclustered set. */ + foreach(index, RelationGetIndexList(rel)) + { + indexOid = lfirst_oid(index); + if (get_index_isclustered(indexOid)) + break; + indexOid = InvalidOid; + } + + if (!OidIsValid(indexOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("there is no previously clustered index for table \"%s\"", + stmt->relation->relname))); + } + else + { + /* + * The index is expected to be in the same namespace as the + * relation. + */ + indexOid = get_relname_relid(stmt->indexname, + rel->rd_rel->relnamespace); + if (!OidIsValid(indexOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("index \"%s\" for table \"%s\" does not exist", + stmt->indexname, stmt->relation->relname))); + } + + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + { + /* close relation, keep lock till commit */ + table_close(rel, NoLock); + + /* Do the job. */ + cluster_rel(tableOid, indexOid, ¶ms); + + return; + } + } + + /* + * By here, we know we are in a multi-table situation. In order to avoid + * holding locks for too long, we want to process each table in its own + * transaction. This forces us to disallow running inside a user + * transaction block. + */ + PreventInTransactionBlock(isTopLevel, "CLUSTER"); + + /* Also, we need a memory context to hold our list of relations */ + cluster_context = AllocSetContextCreate(PortalContext, + "Cluster", + ALLOCSET_DEFAULT_SIZES); + + /* + * Either we're processing a partitioned table, or we were not given any + * table name at all. In either case, obtain a list of relations to + * process. + * + * In the former case, an index name must have been given, so we don't + * need to recheck its "indisclustered" bit, but we have to check that it + * is an index that we can cluster on. In the latter case, we set the + * option bit to have indisclustered verified. + * + * Rechecking the relation itself is necessary here in all cases. + */ + params.options |= CLUOPT_RECHECK; + if (rel != NULL) + { + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + check_index_is_clusterable(rel, indexOid, AccessShareLock); + rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid); + + /* close relation, releasing lock on parent table */ + table_close(rel, AccessExclusiveLock); + } + else + { + rtcs = get_tables_to_cluster(cluster_context); + params.options |= CLUOPT_RECHECK_ISCLUSTERED; + } + + /* Do the job. */ + cluster_multiple_rels(rtcs, ¶ms); + + /* Start a new transaction for the cleanup work. */ + StartTransactionCommand(); + + /* Clean up working storage */ + MemoryContextDelete(cluster_context); +} + +/* + * Given a list of relations to cluster, process each of them in a separate + * transaction. + * + * We expect to be in a transaction at start, but there isn't one when we + * return. + */ +static void +cluster_multiple_rels(List *rtcs, ClusterParams *params) +{ + ListCell *lc; + + /* Commit to get out of starting transaction */ + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* Cluster the tables, each in a separate transaction */ + foreach(lc, rtcs) + { + RelToCluster *rtc = (RelToCluster *) lfirst(lc); + + /* Start a new transaction for each relation. */ + StartTransactionCommand(); + + /* functions in indexes may want a snapshot set */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* Do the job. */ + cluster_rel(rtc->tableOid, rtc->indexOid, params); + + PopActiveSnapshot(); + CommitTransactionCommand(); + } +} + +/* + * cluster_rel + * + * This clusters the table by creating a new, clustered table and + * swapping the relfilenodes of the new table and the old table, so + * the OID of the original table is preserved. Thus we do not lose + * GRANT, inheritance nor references to this table (this was a bug + * in releases through 7.3). + * + * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading + * the new table, it's better to create the indexes afterwards than to fill + * them incrementally while we load the table. + * + * If indexOid is InvalidOid, the table will be rewritten in physical order + * instead of index order. This is the new implementation of VACUUM FULL, + * and error messages should refer to the operation as VACUUM not CLUSTER. + */ +void +cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) +{ + Relation OldHeap; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + bool verbose = ((params->options & CLUOPT_VERBOSE) != 0); + bool recheck = ((params->options & CLUOPT_RECHECK) != 0); + + /* Check for user-requested abort. */ + CHECK_FOR_INTERRUPTS(); + + pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid); + if (OidIsValid(indexOid)) + pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, + PROGRESS_CLUSTER_COMMAND_CLUSTER); + else + pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, + PROGRESS_CLUSTER_COMMAND_VACUUM_FULL); + + /* + * We grab exclusive access to the target rel and index for the duration + * of the transaction. (This is redundant for the single-transaction + * case, since cluster() already did it.) The index lock is taken inside + * check_index_is_clusterable. + */ + OldHeap = try_relation_open(tableOid, AccessExclusiveLock); + + /* If the table has gone away, we can skip processing it */ + if (!OldHeap) + { + pgstat_progress_end_command(); + return; + } + + /* + * Switch to the table owner's userid, so that any index functions are run + * as that user. Also lock down security-restricted operations and + * arrange to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(OldHeap->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + + /* + * Since we may open a new transaction for each relation, we have to check + * that the relation still is what we think it is. + * + * If this is a single-transaction CLUSTER, we can skip these tests. We + * *must* skip the one on indisclustered since it would reject an attempt + * to cluster a not-previously-clustered index. + */ + if (recheck) + { + /* Check that the user still owns the relation */ + if (!pg_class_ownercheck(tableOid, save_userid)) + { + relation_close(OldHeap, AccessExclusiveLock); + goto out; + } + + /* + * Silently skip a temp table for a remote session. Only doing this + * check in the "recheck" case is appropriate (which currently means + * somebody is executing a database-wide CLUSTER or on a partitioned + * table), because there is another check in cluster() which will stop + * any attempt to cluster remote temp tables by name. There is + * another check in cluster_rel which is redundant, but we leave it + * for extra safety. + */ + if (RELATION_IS_OTHER_TEMP(OldHeap)) + { + relation_close(OldHeap, AccessExclusiveLock); + goto out; + } + + if (OidIsValid(indexOid)) + { + /* + * Check that the index still exists + */ + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid))) + { + relation_close(OldHeap, AccessExclusiveLock); + goto out; + } + + /* + * Check that the index is still the one with indisclustered set, + * if needed. + */ + if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 && + !get_index_isclustered(indexOid)) + { + relation_close(OldHeap, AccessExclusiveLock); + goto out; + } + } + } + + /* + * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER + * would work in most respects, but the index would only get marked as + * indisclustered in the current database, leading to unexpected behavior + * if CLUSTER were later invoked in another database. + */ + if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster a shared catalog"))); + + /* + * Don't process temp tables of other backends ... their local buffer + * manager is not going to cope. + */ + if (RELATION_IS_OTHER_TEMP(OldHeap)) + { + if (OidIsValid(indexOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster temporary tables of other sessions"))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot vacuum temporary tables of other sessions"))); + } + + /* + * Also check for active uses of the relation in the current transaction, + * including open scans and pending AFTER trigger events. + */ + CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM"); + + /* Check heap and index are valid to cluster on */ + if (OidIsValid(indexOid)) + check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock); + + /* + * Quietly ignore the request if this is a materialized view which has not + * been populated from its query. No harm is done because there is no data + * to deal with, and we don't want to throw an error if this is part of a + * multi-relation request -- for example, CLUSTER was run on the entire + * database. + */ + if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW && + !RelationIsPopulated(OldHeap)) + { + relation_close(OldHeap, AccessExclusiveLock); + goto out; + } + + Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION || + OldHeap->rd_rel->relkind == RELKIND_MATVIEW || + OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE); + + /* + * All predicate locks on the tuples or pages are about to be made + * invalid, because we move tuples around. Promote them to relation + * locks. Predicate locks on indexes will be promoted when they are + * reindexed. + */ + TransferPredicateLocksToHeapRelation(OldHeap); + + /* rebuild_relation does all the dirty work */ + rebuild_relation(OldHeap, indexOid, verbose); + + /* NB: rebuild_relation does table_close() on OldHeap */ + +out: + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + pgstat_progress_end_command(); +} + +/* + * Verify that the specified heap and index are valid to cluster on + * + * Side effect: obtains lock on the index. The caller may + * in some cases already have AccessExclusiveLock on the table, but + * not in all cases so we can't rely on the table-level lock for + * protection here. + */ +void +check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode) +{ + Relation OldIndex; + + OldIndex = index_open(indexOid, lockmode); + + /* + * Check that index is in fact an index on the given relation + */ + if (OldIndex->rd_index == NULL || + OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not an index for table \"%s\"", + RelationGetRelationName(OldIndex), + RelationGetRelationName(OldHeap)))); + + /* Index AM must allow clustering */ + if (!OldIndex->rd_indam->amclusterable) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster on index \"%s\" because access method does not support clustering", + RelationGetRelationName(OldIndex)))); + + /* + * Disallow clustering on incomplete indexes (those that might not index + * every row of the relation). We could relax this by making a separate + * seqscan pass over the table to copy the missing rows, but that seems + * expensive and tedious. + */ + if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster on partial index \"%s\"", + RelationGetRelationName(OldIndex)))); + + /* + * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY; + * it might well not contain entries for every heap row, or might not even + * be internally consistent. (But note that we don't check indcheckxmin; + * the worst consequence of following broken HOT chains would be that we + * might put recently-dead tuples out-of-order in the new table, and there + * is little harm in that.) + */ + if (!OldIndex->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot cluster on invalid index \"%s\"", + RelationGetRelationName(OldIndex)))); + + /* Drop relcache refcnt on OldIndex, but keep lock */ + index_close(OldIndex, NoLock); +} + +/* + * mark_index_clustered: mark the specified index as the one clustered on + * + * With indexOid == InvalidOid, will mark all indexes of rel not-clustered. + */ +void +mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) +{ + HeapTuple indexTuple; + Form_pg_index indexForm; + Relation pg_index; + ListCell *index; + + /* Disallow applying to a partitioned table */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot mark index clustered in partitioned table"))); + + /* + * If the index is already marked clustered, no need to do anything. + */ + if (OidIsValid(indexOid)) + { + if (get_index_isclustered(indexOid)) + return; + } + + /* + * Check each index of the relation and set/clear the bit as needed. + */ + pg_index = table_open(IndexRelationId, RowExclusiveLock); + + foreach(index, RelationGetIndexList(rel)) + { + Oid thisIndexOid = lfirst_oid(index); + + indexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(thisIndexOid)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", thisIndexOid); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + /* + * Unset the bit if set. We know it's wrong because we checked this + * earlier. + */ + if (indexForm->indisclustered) + { + indexForm->indisclustered = false; + CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); + } + else if (thisIndexOid == indexOid) + { + /* this was checked earlier, but let's be real sure */ + if (!indexForm->indisvalid) + elog(ERROR, "cannot cluster on invalid index %u", indexOid); + indexForm->indisclustered = true; + CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); + } + + InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, + InvalidOid, is_internal); + + heap_freetuple(indexTuple); + } + + table_close(pg_index, RowExclusiveLock); +} + +/* + * rebuild_relation: rebuild an existing relation in index or physical order + * + * OldHeap: table to rebuild --- must be opened and exclusive-locked! + * indexOid: index to cluster by, or InvalidOid to rewrite in physical order. + * + * NB: this routine closes OldHeap at the right time; caller should not. + */ +static void +rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) +{ + Oid tableOid = RelationGetRelid(OldHeap); + Oid accessMethod = OldHeap->rd_rel->relam; + Oid tableSpace = OldHeap->rd_rel->reltablespace; + Oid OIDNewHeap; + char relpersistence; + bool is_system_catalog; + bool swap_toast_by_content; + TransactionId frozenXid; + MultiXactId cutoffMulti; + + if (OidIsValid(indexOid)) + /* Mark the correct index as clustered */ + mark_index_clustered(OldHeap, indexOid, true); + + /* Remember info about rel before closing OldHeap */ + relpersistence = OldHeap->rd_rel->relpersistence; + is_system_catalog = IsSystemRelation(OldHeap); + + /* Close relcache entry, but keep lock until transaction commit */ + table_close(OldHeap, NoLock); + + /* Create the transient table that will receive the re-ordered data */ + OIDNewHeap = make_new_heap(tableOid, tableSpace, + accessMethod, + relpersistence, + AccessExclusiveLock); + + /* Copy the heap data into the new table in the desired order */ + copy_table_data(OIDNewHeap, tableOid, indexOid, verbose, + &swap_toast_by_content, &frozenXid, &cutoffMulti); + + /* + * Swap the physical files of the target and transient tables, then + * rebuild the target's indexes and throw away the transient table. + */ + finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog, + swap_toast_by_content, false, true, + frozenXid, cutoffMulti, + relpersistence); +} + + +/* + * Create the transient table that will be filled with new data during + * CLUSTER, ALTER TABLE, and similar operations. The transient table + * duplicates the logical structure of the OldHeap; but will have the + * specified physical storage properties NewTableSpace, NewAccessMethod, and + * relpersistence. + * + * After this, the caller should load the new heap with transferred/modified + * data, then call finish_heap_swap to complete the operation. + */ +Oid +make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod, + char relpersistence, LOCKMODE lockmode) +{ + TupleDesc OldHeapDesc; + char NewHeapName[NAMEDATALEN]; + Oid OIDNewHeap; + Oid toastid; + Relation OldHeap; + HeapTuple tuple; + Datum reloptions; + bool isNull; + Oid namespaceid; + + OldHeap = table_open(OIDOldHeap, lockmode); + OldHeapDesc = RelationGetDescr(OldHeap); + + /* + * Note that the NewHeap will not receive any of the defaults or + * constraints associated with the OldHeap; we don't need 'em, and there's + * no reason to spend cycles inserting them into the catalogs only to + * delete them. + */ + + /* + * But we do want to use reloptions of the old heap for new heap. + */ + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap); + reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, + &isNull); + if (isNull) + reloptions = (Datum) 0; + + if (relpersistence == RELPERSISTENCE_TEMP) + namespaceid = LookupCreationNamespace("pg_temp"); + else + namespaceid = RelationGetNamespace(OldHeap); + + /* + * Create the new heap, using a temporary name in the same namespace as + * the existing table. NOTE: there is some risk of collision with user + * relnames. Working around this seems more trouble than it's worth; in + * particular, we can't create the new heap in a different namespace from + * the old, or we will have problems with the TEMP status of temp tables. + * + * Note: the new heap is not a shared relation, even if we are rebuilding + * a shared rel. However, we do make the new heap mapped if the source is + * mapped. This simplifies swap_relation_files, and is absolutely + * necessary for rebuilding pg_class, for reasons explained there. + */ + snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap); + + OIDNewHeap = heap_create_with_catalog(NewHeapName, + namespaceid, + NewTableSpace, + InvalidOid, + InvalidOid, + InvalidOid, + OldHeap->rd_rel->relowner, + NewAccessMethod, + OldHeapDesc, + NIL, + RELKIND_RELATION, + relpersistence, + false, + RelationIsMapped(OldHeap), + ONCOMMIT_NOOP, + reloptions, + false, + true, + true, + OIDOldHeap, + NULL); + Assert(OIDNewHeap != InvalidOid); + + ReleaseSysCache(tuple); + + /* + * Advance command counter so that the newly-created relation's catalog + * tuples will be visible to table_open. + */ + CommandCounterIncrement(); + + /* + * If necessary, create a TOAST table for the new relation. + * + * If the relation doesn't have a TOAST table already, we can't need one + * for the new relation. The other way around is possible though: if some + * wide columns have been dropped, NewHeapCreateToastTable can decide that + * no TOAST table is needed for the new table. + * + * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so + * that the TOAST table will be visible for insertion. + */ + toastid = OldHeap->rd_rel->reltoastrelid; + if (OidIsValid(toastid)) + { + /* keep the existing toast table's reloptions, if any */ + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", toastid); + reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, + &isNull); + if (isNull) + reloptions = (Datum) 0; + + NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid); + + ReleaseSysCache(tuple); + } + + table_close(OldHeap, NoLock); + + return OIDNewHeap; +} + +/* + * Do the physical copying of table data. + * + * There are three output parameters: + * *pSwapToastByContent is set true if toast tables must be swapped by content. + * *pFreezeXid receives the TransactionId used as freeze cutoff point. + * *pCutoffMulti receives the MultiXactId used as a cutoff point. + */ +static void +copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, + bool *pSwapToastByContent, TransactionId *pFreezeXid, + MultiXactId *pCutoffMulti) +{ + Relation NewHeap, + OldHeap, + OldIndex; + Relation relRelation; + HeapTuple reltup; + Form_pg_class relform; + TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY; + TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY; + TransactionId OldestXmin, + FreezeXid; + MultiXactId OldestMxact, + MultiXactCutoff; + bool use_sort; + double num_tuples = 0, + tups_vacuumed = 0, + tups_recently_dead = 0; + BlockNumber num_pages; + int elevel = verbose ? INFO : DEBUG2; + PGRUsage ru0; + char *nspname; + + pg_rusage_init(&ru0); + + /* + * Open the relations we need. + */ + NewHeap = table_open(OIDNewHeap, AccessExclusiveLock); + OldHeap = table_open(OIDOldHeap, AccessExclusiveLock); + if (OidIsValid(OIDOldIndex)) + OldIndex = index_open(OIDOldIndex, AccessExclusiveLock); + else + OldIndex = NULL; + + /* Store a copy of the namespace name for logging purposes */ + nspname = get_namespace_name(RelationGetNamespace(OldHeap)); + + /* + * Their tuple descriptors should be exactly alike, but here we only need + * assume that they have the same number of columns. + */ + oldTupDesc = RelationGetDescr(OldHeap); + newTupDesc = RelationGetDescr(NewHeap); + Assert(newTupDesc->natts == oldTupDesc->natts); + + /* + * If the OldHeap has a toast table, get lock on the toast table to keep + * it from being vacuumed. This is needed because autovacuum processes + * toast tables independently of their main tables, with no lock on the + * latter. If an autovacuum were to start on the toast table after we + * compute our OldestXmin below, it would use a later OldestXmin, and then + * possibly remove as DEAD toast tuples belonging to main tuples we think + * are only RECENTLY_DEAD. Then we'd fail while trying to copy those + * tuples. + * + * We don't need to open the toast relation here, just lock it. The lock + * will be held till end of transaction. + */ + if (OldHeap->rd_rel->reltoastrelid) + LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock); + + /* + * If both tables have TOAST tables, perform toast swap by content. It is + * possible that the old table has a toast table but the new one doesn't, + * if toastable columns have been dropped. In that case we have to do + * swap by links. This is okay because swap by content is only essential + * for system catalogs, and we don't support schema changes for them. + */ + if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid) + { + *pSwapToastByContent = true; + + /* + * When doing swap by content, any toast pointers written into NewHeap + * must use the old toast table's OID, because that's where the toast + * data will eventually be found. Set this up by setting rd_toastoid. + * This also tells toast_save_datum() to preserve the toast value + * OIDs, which we want so as not to invalidate toast pointers in + * system catalog caches, and to avoid making multiple copies of a + * single toast value. + * + * Note that we must hold NewHeap open until we are done writing data, + * since the relcache will not guarantee to remember this setting once + * the relation is closed. Also, this technique depends on the fact + * that no one will try to read from the NewHeap until after we've + * finished writing it and swapping the rels --- otherwise they could + * follow the toast pointers to the wrong place. (It would actually + * work for values copied over from the old toast table, but not for + * any values that we toast which were previously not toasted.) + */ + NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid; + } + else + *pSwapToastByContent = false; + + /* + * Compute xids used to freeze and weed out dead tuples and multixacts. + * Since we're going to rewrite the whole table anyway, there's no reason + * not to be aggressive about this. + */ + vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &OldestMxact, + &FreezeXid, &MultiXactCutoff); + + /* + * FreezeXid will become the table's new relfrozenxid, and that mustn't go + * backwards, so take the max. + */ + if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) && + TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid)) + FreezeXid = OldHeap->rd_rel->relfrozenxid; + + /* + * MultiXactCutoff, similarly, shouldn't go backwards either. + */ + if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) && + MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid)) + MultiXactCutoff = OldHeap->rd_rel->relminmxid; + + /* + * Decide whether to use an indexscan or seqscan-and-optional-sort to scan + * the OldHeap. We know how to use a sort to duplicate the ordering of a + * btree index, and will use seqscan-and-sort for that case if the planner + * tells us it's cheaper. Otherwise, always indexscan if an index is + * provided, else plain seqscan. + */ + if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID) + use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex); + else + use_sort = false; + + /* Log what we're doing */ + if (OldIndex != NULL && !use_sort) + ereport(elevel, + (errmsg("clustering \"%s.%s\" using index scan on \"%s\"", + nspname, + RelationGetRelationName(OldHeap), + RelationGetRelationName(OldIndex)))); + else if (use_sort) + ereport(elevel, + (errmsg("clustering \"%s.%s\" using sequential scan and sort", + nspname, + RelationGetRelationName(OldHeap)))); + else + ereport(elevel, + (errmsg("vacuuming \"%s.%s\"", + nspname, + RelationGetRelationName(OldHeap)))); + + /* + * Hand off the actual copying to AM specific function, the generic code + * cannot know how to deal with visibility across AMs. Note that this + * routine is allowed to set FreezeXid / MultiXactCutoff to different + * values (e.g. because the AM doesn't use freezing). + */ + table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort, + OldestXmin, &FreezeXid, &MultiXactCutoff, + &num_tuples, &tups_vacuumed, + &tups_recently_dead); + + /* return selected values to caller, get set as relfrozenxid/minmxid */ + *pFreezeXid = FreezeXid; + *pCutoffMulti = MultiXactCutoff; + + /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */ + NewHeap->rd_toastoid = InvalidOid; + + num_pages = RelationGetNumberOfBlocks(NewHeap); + + /* Log what we did */ + ereport(elevel, + (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages", + nspname, + RelationGetRelationName(OldHeap), + tups_vacuumed, num_tuples, + RelationGetNumberOfBlocks(OldHeap)), + errdetail("%.0f dead row versions cannot be removed yet.\n" + "%s.", + tups_recently_dead, + pg_rusage_show(&ru0)))); + + if (OldIndex != NULL) + index_close(OldIndex, NoLock); + table_close(OldHeap, NoLock); + table_close(NewHeap, NoLock); + + /* Update pg_class to reflect the correct values of pages and tuples. */ + relRelation = table_open(RelationRelationId, RowExclusiveLock); + + reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap)); + if (!HeapTupleIsValid(reltup)) + elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap); + relform = (Form_pg_class) GETSTRUCT(reltup); + + relform->relpages = num_pages; + relform->reltuples = num_tuples; + + /* Don't update the stats for pg_class. See swap_relation_files. */ + if (OIDOldHeap != RelationRelationId) + CatalogTupleUpdate(relRelation, &reltup->t_self, reltup); + else + CacheInvalidateRelcacheByTuple(reltup); + + /* Clean up. */ + heap_freetuple(reltup); + table_close(relRelation, RowExclusiveLock); + + /* Make the update visible */ + CommandCounterIncrement(); +} + +/* + * Swap the physical files of two given relations. + * + * We swap the physical identity (reltablespace, relfilenode) while keeping the + * same logical identities of the two relations. relpersistence is also + * swapped, which is critical since it determines where buffers live for each + * relation. + * + * We can swap associated TOAST data in either of two ways: recursively swap + * the physical content of the toast tables (and their indexes), or swap the + * TOAST links in the given relations' pg_class entries. The former is needed + * to manage rewrites of shared catalogs (where we cannot change the pg_class + * links) while the latter is the only way to handle cases in which a toast + * table is added or removed altogether. + * + * Additionally, the first relation is marked with relfrozenxid set to + * frozenXid. It seems a bit ugly to have this here, but the caller would + * have to do it anyway, so having it here saves a heap_update. Note: in + * the swap-toast-links case, we assume we don't need to change the toast + * table's relfrozenxid: the new version of the toast table should already + * have relfrozenxid set to RecentXmin, which is good enough. + * + * Lastly, if r2 and its toast table and toast index (if any) are mapped, + * their OIDs are emitted into mapped_tables[]. This is hacky but beats + * having to look the information up again later in finish_heap_swap. + */ +static void +swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, + bool swap_toast_by_content, + bool is_internal, + TransactionId frozenXid, + MultiXactId cutoffMulti, + Oid *mapped_tables) +{ + Relation relRelation; + HeapTuple reltup1, + reltup2; + Form_pg_class relform1, + relform2; + Oid relfilenode1, + relfilenode2; + Oid swaptemp; + char swptmpchr; + Oid relam1, + relam2; + + /* We need writable copies of both pg_class tuples. */ + relRelation = table_open(RelationRelationId, RowExclusiveLock); + + reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1)); + if (!HeapTupleIsValid(reltup1)) + elog(ERROR, "cache lookup failed for relation %u", r1); + relform1 = (Form_pg_class) GETSTRUCT(reltup1); + + reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2)); + if (!HeapTupleIsValid(reltup2)) + elog(ERROR, "cache lookup failed for relation %u", r2); + relform2 = (Form_pg_class) GETSTRUCT(reltup2); + + relfilenode1 = relform1->relfilenode; + relfilenode2 = relform2->relfilenode; + relam1 = relform1->relam; + relam2 = relform2->relam; + + if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2)) + { + /* + * Normal non-mapped relations: swap relfilenodes, reltablespaces, + * relpersistence + */ + Assert(!target_is_pg_class); + + swaptemp = relform1->relfilenode; + relform1->relfilenode = relform2->relfilenode; + relform2->relfilenode = swaptemp; + + swaptemp = relform1->reltablespace; + relform1->reltablespace = relform2->reltablespace; + relform2->reltablespace = swaptemp; + + swaptemp = relform1->relam; + relform1->relam = relform2->relam; + relform2->relam = swaptemp; + + swptmpchr = relform1->relpersistence; + relform1->relpersistence = relform2->relpersistence; + relform2->relpersistence = swptmpchr; + + /* Also swap toast links, if we're swapping by links */ + if (!swap_toast_by_content) + { + swaptemp = relform1->reltoastrelid; + relform1->reltoastrelid = relform2->reltoastrelid; + relform2->reltoastrelid = swaptemp; + } + } + else + { + /* + * Mapped-relation case. Here we have to swap the relation mappings + * instead of modifying the pg_class columns. Both must be mapped. + */ + if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2)) + elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation", + NameStr(relform1->relname)); + + /* + * We can't change the tablespace nor persistence of a mapped rel, and + * we can't handle toast link swapping for one either, because we must + * not apply any critical changes to its pg_class row. These cases + * should be prevented by upstream permissions tests, so these checks + * are non-user-facing emergency backstop. + */ + if (relform1->reltablespace != relform2->reltablespace) + elog(ERROR, "cannot change tablespace of mapped relation \"%s\"", + NameStr(relform1->relname)); + if (relform1->relpersistence != relform2->relpersistence) + elog(ERROR, "cannot change persistence of mapped relation \"%s\"", + NameStr(relform1->relname)); + if (relform1->relam != relform2->relam) + elog(ERROR, "cannot change access method of mapped relation \"%s\"", + NameStr(relform1->relname)); + if (!swap_toast_by_content && + (relform1->reltoastrelid || relform2->reltoastrelid)) + elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"", + NameStr(relform1->relname)); + + /* + * Fetch the mappings --- shouldn't fail, but be paranoid + */ + relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared); + if (!OidIsValid(relfilenode1)) + elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", + NameStr(relform1->relname), r1); + relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared); + if (!OidIsValid(relfilenode2)) + elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", + NameStr(relform2->relname), r2); + + /* + * Send replacement mappings to relmapper. Note these won't actually + * take effect until CommandCounterIncrement. + */ + RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false); + RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false); + + /* Pass OIDs of mapped r2 tables back to caller */ + *mapped_tables++ = r2; + } + + /* + * Recognize that rel1's relfilenode (swapped from rel2) is new in this + * subtransaction. The rel2 storage (swapped from rel1) may or may not be + * new. + */ + { + Relation rel1, + rel2; + + rel1 = relation_open(r1, NoLock); + rel2 = relation_open(r2, NoLock); + rel2->rd_createSubid = rel1->rd_createSubid; + rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid; + rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid; + RelationAssumeNewRelfilenode(rel1); + relation_close(rel1, NoLock); + relation_close(rel2, NoLock); + } + + /* + * In the case of a shared catalog, these next few steps will only affect + * our own database's pg_class row; but that's okay, because they are all + * noncritical updates. That's also an important fact for the case of a + * mapped catalog, because it's possible that we'll commit the map change + * and then fail to commit the pg_class update. + */ + + /* set rel1's frozen Xid and minimum MultiXid */ + if (relform1->relkind != RELKIND_INDEX) + { + Assert(!TransactionIdIsValid(frozenXid) || + TransactionIdIsNormal(frozenXid)); + relform1->relfrozenxid = frozenXid; + relform1->relminmxid = cutoffMulti; + } + + /* swap size statistics too, since new rel has freshly-updated stats */ + { + int32 swap_pages; + float4 swap_tuples; + int32 swap_allvisible; + + swap_pages = relform1->relpages; + relform1->relpages = relform2->relpages; + relform2->relpages = swap_pages; + + swap_tuples = relform1->reltuples; + relform1->reltuples = relform2->reltuples; + relform2->reltuples = swap_tuples; + + swap_allvisible = relform1->relallvisible; + relform1->relallvisible = relform2->relallvisible; + relform2->relallvisible = swap_allvisible; + } + + /* + * Update the tuples in pg_class --- unless the target relation of the + * swap is pg_class itself. In that case, there is zero point in making + * changes because we'd be updating the old data that we're about to throw + * away. Because the real work being done here for a mapped relation is + * just to change the relation map settings, it's all right to not update + * the pg_class rows in this case. The most important changes will instead + * performed later, in finish_heap_swap() itself. + */ + if (!target_is_pg_class) + { + CatalogIndexState indstate; + + indstate = CatalogOpenIndexes(relRelation); + CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1, + indstate); + CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2, + indstate); + CatalogCloseIndexes(indstate); + } + else + { + /* no update ... but we do still need relcache inval */ + CacheInvalidateRelcacheByTuple(reltup1); + CacheInvalidateRelcacheByTuple(reltup2); + } + + /* + * Now that pg_class has been updated with its relevant information for + * the swap, update the dependency of the relations to point to their new + * table AM, if it has changed. + */ + if (relam1 != relam2) + { + if (changeDependencyFor(RelationRelationId, + r1, + AccessMethodRelationId, + relam1, + relam2) != 1) + elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"", + get_namespace_name(get_rel_namespace(r1)), + get_rel_name(r1)); + if (changeDependencyFor(RelationRelationId, + r2, + AccessMethodRelationId, + relam2, + relam1) != 1) + elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"", + get_namespace_name(get_rel_namespace(r2)), + get_rel_name(r2)); + } + + /* + * Post alter hook for modified relations. The change to r2 is always + * internal, but r1 depends on the invocation context. + */ + InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0, + InvalidOid, is_internal); + InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0, + InvalidOid, true); + + /* + * If we have toast tables associated with the relations being swapped, + * deal with them too. + */ + if (relform1->reltoastrelid || relform2->reltoastrelid) + { + if (swap_toast_by_content) + { + if (relform1->reltoastrelid && relform2->reltoastrelid) + { + /* Recursively swap the contents of the toast tables */ + swap_relation_files(relform1->reltoastrelid, + relform2->reltoastrelid, + target_is_pg_class, + swap_toast_by_content, + is_internal, + frozenXid, + cutoffMulti, + mapped_tables); + } + else + { + /* caller messed up */ + elog(ERROR, "cannot swap toast files by content when there's only one"); + } + } + else + { + /* + * We swapped the ownership links, so we need to change dependency + * data to match. + * + * NOTE: it is possible that only one table has a toast table. + * + * NOTE: at present, a TOAST table's only dependency is the one on + * its owning table. If more are ever created, we'd need to use + * something more selective than deleteDependencyRecordsFor() to + * get rid of just the link we want. + */ + ObjectAddress baseobject, + toastobject; + long count; + + /* + * We disallow this case for system catalogs, to avoid the + * possibility that the catalog we're rebuilding is one of the + * ones the dependency changes would change. It's too late to be + * making any data changes to the target catalog. + */ + if (IsSystemClass(r1, relform1)) + elog(ERROR, "cannot swap toast files by links for system catalogs"); + + /* Delete old dependencies */ + if (relform1->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + relform1->reltoastrelid, + false); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } + if (relform2->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + relform2->reltoastrelid, + false); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } + + /* Register new dependencies */ + baseobject.classId = RelationRelationId; + baseobject.objectSubId = 0; + toastobject.classId = RelationRelationId; + toastobject.objectSubId = 0; + + if (relform1->reltoastrelid) + { + baseobject.objectId = r1; + toastobject.objectId = relform1->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, + DEPENDENCY_INTERNAL); + } + + if (relform2->reltoastrelid) + { + baseobject.objectId = r2; + toastobject.objectId = relform2->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, + DEPENDENCY_INTERNAL); + } + } + } + + /* + * If we're swapping two toast tables by content, do the same for their + * valid index. The swap can actually be safely done only if the relations + * have indexes. + */ + if (swap_toast_by_content && + relform1->relkind == RELKIND_TOASTVALUE && + relform2->relkind == RELKIND_TOASTVALUE) + { + Oid toastIndex1, + toastIndex2; + + /* Get valid index for each relation */ + toastIndex1 = toast_get_valid_index(r1, + AccessExclusiveLock); + toastIndex2 = toast_get_valid_index(r2, + AccessExclusiveLock); + + swap_relation_files(toastIndex1, + toastIndex2, + target_is_pg_class, + swap_toast_by_content, + is_internal, + InvalidTransactionId, + InvalidMultiXactId, + mapped_tables); + } + + /* Clean up. */ + heap_freetuple(reltup1); + heap_freetuple(reltup2); + + table_close(relRelation, RowExclusiveLock); + + /* + * Close both relcache entries' smgr links. We need this kluge because + * both links will be invalidated during upcoming CommandCounterIncrement. + * Whichever of the rels is the second to be cleared will have a dangling + * reference to the other's smgr entry. Rather than trying to avoid this + * by ordering operations just so, it's easiest to close the links first. + * (Fortunately, since one of the entries is local in our transaction, + * it's sufficient to clear out our own relcache this way; the problem + * cannot arise for other backends when they see our update on the + * non-transient relation.) + * + * Caution: the placement of this step interacts with the decision to + * handle toast rels by recursion. When we are trying to rebuild pg_class + * itself, the smgr close on pg_class must happen after all accesses in + * this function. + */ + RelationCloseSmgrByOid(r1); + RelationCloseSmgrByOid(r2); +} + +/* + * Remove the transient table that was built by make_new_heap, and finish + * cleaning up (including rebuilding all indexes on the old heap). + */ +void +finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, + bool is_system_catalog, + bool swap_toast_by_content, + bool check_constraints, + bool is_internal, + TransactionId frozenXid, + MultiXactId cutoffMulti, + char newrelpersistence) +{ + ObjectAddress object; + Oid mapped_tables[4]; + int reindex_flags; + ReindexParams reindex_params = {0}; + int i; + + /* Report that we are now swapping relation files */ + pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, + PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES); + + /* Zero out possible results from swapped_relation_files */ + memset(mapped_tables, 0, sizeof(mapped_tables)); + + /* + * Swap the contents of the heap relations (including any toast tables). + * Also set old heap's relfrozenxid to frozenXid. + */ + swap_relation_files(OIDOldHeap, OIDNewHeap, + (OIDOldHeap == RelationRelationId), + swap_toast_by_content, is_internal, + frozenXid, cutoffMulti, mapped_tables); + + /* + * If it's a system catalog, queue a sinval message to flush all catcaches + * on the catalog when we reach CommandCounterIncrement. + */ + if (is_system_catalog) + CacheInvalidateCatalog(OIDOldHeap); + + /* + * Rebuild each index on the relation (but not the toast table, which is + * all-new at this point). It is important to do this before the DROP + * step because if we are processing a system catalog that will be used + * during DROP, we want to have its indexes available. There is no + * advantage to the other order anyway because this is all transactional, + * so no chance to reclaim disk space before commit. We do not need a + * final CommandCounterIncrement() because reindex_relation does it. + * + * Note: because index_build is called via reindex_relation, it will never + * set indcheckxmin true for the indexes. This is OK even though in some + * sense we are building new indexes rather than rebuilding existing ones, + * because the new heap won't contain any HOT chains at all, let alone + * broken ones, so it can't be necessary to set indcheckxmin. + */ + reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE; + if (check_constraints) + reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS; + + /* + * Ensure that the indexes have the same persistence as the parent + * relation. + */ + if (newrelpersistence == RELPERSISTENCE_UNLOGGED) + reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED; + else if (newrelpersistence == RELPERSISTENCE_PERMANENT) + reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT; + + /* Report that we are now reindexing relations */ + pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, + PROGRESS_CLUSTER_PHASE_REBUILD_INDEX); + + reindex_relation(OIDOldHeap, reindex_flags, &reindex_params); + + /* Report that we are now doing clean up */ + pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, + PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP); + + /* + * If the relation being rebuilt is pg_class, swap_relation_files() + * couldn't update pg_class's own pg_class entry (check comments in + * swap_relation_files()), thus relfrozenxid was not updated. That's + * annoying because a potential reason for doing a VACUUM FULL is a + * imminent or actual anti-wraparound shutdown. So, now that we can + * access the new relation using its indices, update relfrozenxid. + * pg_class doesn't have a toast relation, so we don't need to update the + * corresponding toast relation. Not that there's little point moving all + * relfrozenxid updates here since swap_relation_files() needs to write to + * pg_class for non-mapped relations anyway. + */ + if (OIDOldHeap == RelationRelationId) + { + Relation relRelation; + HeapTuple reltup; + Form_pg_class relform; + + relRelation = table_open(RelationRelationId, RowExclusiveLock); + + reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap)); + if (!HeapTupleIsValid(reltup)) + elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap); + relform = (Form_pg_class) GETSTRUCT(reltup); + + relform->relfrozenxid = frozenXid; + relform->relminmxid = cutoffMulti; + + CatalogTupleUpdate(relRelation, &reltup->t_self, reltup); + + table_close(relRelation, RowExclusiveLock); + } + + /* Destroy new heap with old filenode */ + object.classId = RelationRelationId; + object.objectId = OIDNewHeap; + object.objectSubId = 0; + + /* + * The new relation is local to our transaction and we know nothing + * depends on it, so DROP_RESTRICT should be OK. + */ + performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); + + /* performDeletion does CommandCounterIncrement at end */ + + /* + * Now we must remove any relation mapping entries that we set up for the + * transient table, as well as its toast table and toast index if any. If + * we fail to do this before commit, the relmapper will complain about new + * permanent map entries being added post-bootstrap. + */ + for (i = 0; OidIsValid(mapped_tables[i]); i++) + RelationMapRemoveMapping(mapped_tables[i]); + + /* + * At this point, everything is kosher except that, if we did toast swap + * by links, the toast table's name corresponds to the transient table. + * The name is irrelevant to the backend because it's referenced by OID, + * but users looking at the catalogs could be confused. Rename it to + * prevent this problem. + * + * Note no lock required on the relation, because we already hold an + * exclusive lock on it. + */ + if (!swap_toast_by_content) + { + Relation newrel; + + newrel = table_open(OIDOldHeap, NoLock); + if (OidIsValid(newrel->rd_rel->reltoastrelid)) + { + Oid toastidx; + char NewToastName[NAMEDATALEN]; + + /* Get the associated valid index to be renamed */ + toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid, + NoLock); + + /* rename the toast table ... */ + snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", + OIDOldHeap); + RenameRelationInternal(newrel->rd_rel->reltoastrelid, + NewToastName, true, false); + + /* ... and its valid index too. */ + snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", + OIDOldHeap); + + RenameRelationInternal(toastidx, + NewToastName, true, true); + + /* + * Reset the relrewrite for the toast. The command-counter + * increment is required here as we are about to update the tuple + * that is updated as part of RenameRelationInternal. + */ + CommandCounterIncrement(); + ResetRelRewrite(newrel->rd_rel->reltoastrelid); + } + relation_close(newrel, NoLock); + } + + /* if it's not a catalog table, clear any missing attribute settings */ + if (!is_system_catalog) + { + Relation newrel; + + newrel = table_open(OIDOldHeap, NoLock); + RelationClearMissing(newrel); + relation_close(newrel, NoLock); + } +} + + +/* + * Get a list of tables that the current user owns and + * have indisclustered set. Return the list in a List * of RelToCluster + * (stored in the specified memory context), each one giving the tableOid + * and the indexOid on which the table is already clustered. + */ +static List * +get_tables_to_cluster(MemoryContext cluster_context) +{ + Relation indRelation; + TableScanDesc scan; + ScanKeyData entry; + HeapTuple indexTuple; + Form_pg_index index; + MemoryContext old_context; + List *rtcs = NIL; + + /* + * Get all indexes that have indisclustered set and are owned by + * appropriate user. + */ + indRelation = table_open(IndexRelationId, AccessShareLock); + ScanKeyInit(&entry, + Anum_pg_index_indisclustered, + BTEqualStrategyNumber, F_BOOLEQ, + BoolGetDatum(true)); + scan = table_beginscan_catalog(indRelation, 1, &entry); + while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + RelToCluster *rtc; + + index = (Form_pg_index) GETSTRUCT(indexTuple); + + if (!pg_class_ownercheck(index->indrelid, GetUserId())) + continue; + + /* Use a permanent memory context for the result list */ + old_context = MemoryContextSwitchTo(cluster_context); + + rtc = (RelToCluster *) palloc(sizeof(RelToCluster)); + rtc->tableOid = index->indrelid; + rtc->indexOid = index->indexrelid; + rtcs = lappend(rtcs, rtc); + + MemoryContextSwitchTo(old_context); + } + table_endscan(scan); + + relation_close(indRelation, AccessShareLock); + + return rtcs; +} + +/* + * Given an index on a partitioned table, return a list of RelToCluster for + * all the children leaves tables/indexes. + * + * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock + * on the table containing the index. + */ +static List * +get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid) +{ + List *inhoids; + ListCell *lc; + List *rtcs = NIL; + MemoryContext old_context; + + /* Do not lock the children until they're processed */ + inhoids = find_all_inheritors(indexOid, NoLock, NULL); + + foreach(lc, inhoids) + { + Oid indexrelid = lfirst_oid(lc); + Oid relid = IndexGetRelation(indexrelid, false); + RelToCluster *rtc; + + /* consider only leaf indexes */ + if (get_rel_relkind(indexrelid) != RELKIND_INDEX) + continue; + + /* Silently skip partitions which the user has no access to. */ + if (!pg_class_ownercheck(relid, GetUserId()) && + (!pg_database_ownercheck(MyDatabaseId, GetUserId()) || + IsSharedRelation(relid))) + continue; + + /* Use a permanent memory context for the result list */ + old_context = MemoryContextSwitchTo(cluster_context); + + rtc = (RelToCluster *) palloc(sizeof(RelToCluster)); + rtc->tableOid = relid; + rtc->indexOid = indexrelid; + rtcs = lappend(rtcs, rtc); + + MemoryContextSwitchTo(old_context); + } + + return rtcs; +} diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c new file mode 100644 index 0000000..fcfc02d --- /dev/null +++ b/src/backend/commands/collationcmds.c @@ -0,0 +1,820 @@ +/*------------------------------------------------------------------------- + * + * collationcmds.c + * collation-related commands support code + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/collationcmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_collation.h" +#include "commands/alter.h" +#include "commands/collationcmds.h" +#include "commands/comment.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "common/string.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +typedef struct +{ + char *localename; /* name of locale, as per "locale -a" */ + char *alias; /* shortened alias for same */ + int enc; /* encoding */ +} CollAliasData; + + +/* + * CREATE COLLATION + */ +ObjectAddress +DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists) +{ + char *collName; + Oid collNamespace; + AclResult aclresult; + ListCell *pl; + DefElem *fromEl = NULL; + DefElem *localeEl = NULL; + DefElem *lccollateEl = NULL; + DefElem *lcctypeEl = NULL; + DefElem *providerEl = NULL; + DefElem *deterministicEl = NULL; + DefElem *versionEl = NULL; + char *collcollate; + char *collctype; + char *colliculocale; + bool collisdeterministic; + int collencoding; + char collprovider; + char *collversion = NULL; + Oid newoid; + ObjectAddress address; + + collNamespace = QualifiedNameGetCreationNamespace(names, &collName); + + aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(collNamespace)); + + foreach(pl, parameters) + { + DefElem *defel = lfirst_node(DefElem, pl); + DefElem **defelp; + + if (strcmp(defel->defname, "from") == 0) + defelp = &fromEl; + else if (strcmp(defel->defname, "locale") == 0) + defelp = &localeEl; + else if (strcmp(defel->defname, "lc_collate") == 0) + defelp = &lccollateEl; + else if (strcmp(defel->defname, "lc_ctype") == 0) + defelp = &lcctypeEl; + else if (strcmp(defel->defname, "provider") == 0) + defelp = &providerEl; + else if (strcmp(defel->defname, "deterministic") == 0) + defelp = &deterministicEl; + else if (strcmp(defel->defname, "version") == 0) + defelp = &versionEl; + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("collation attribute \"%s\" not recognized", + defel->defname), + parser_errposition(pstate, defel->location))); + break; + } + if (*defelp != NULL) + errorConflictingDefElem(defel, pstate); + *defelp = defel; + } + + if (localeEl && (lccollateEl || lcctypeEl)) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE.")); + + if (fromEl && list_length(parameters) != 1) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + errdetail("FROM cannot be specified together with any other options.")); + + if (fromEl) + { + Oid collid; + HeapTuple tp; + Datum datum; + bool isnull; + + collid = get_collation_oid(defGetQualifiedName(fromEl), false); + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collid); + + collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic; + collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); + if (!isnull) + collcollate = TextDatumGetCString(datum); + else + collcollate = NULL; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull); + if (!isnull) + collctype = TextDatumGetCString(datum); + else + collctype = NULL; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull); + if (!isnull) + colliculocale = TextDatumGetCString(datum); + else + colliculocale = NULL; + + ReleaseSysCache(tp); + + /* + * Copying the "default" collation is not allowed because most code + * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT, + * and so having a second collation with COLLPROVIDER_DEFAULT would + * not work and potentially confuse or crash some code. This could be + * fixed with some legwork. + */ + if (collprovider == COLLPROVIDER_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("collation \"default\" cannot be copied"))); + } + else + { + char *collproviderstr = NULL; + + collcollate = NULL; + collctype = NULL; + colliculocale = NULL; + + if (providerEl) + collproviderstr = defGetString(providerEl); + + if (deterministicEl) + collisdeterministic = defGetBoolean(deterministicEl); + else + collisdeterministic = true; + + if (versionEl) + collversion = defGetString(versionEl); + + if (collproviderstr) + { + if (pg_strcasecmp(collproviderstr, "icu") == 0) + collprovider = COLLPROVIDER_ICU; + else if (pg_strcasecmp(collproviderstr, "libc") == 0) + collprovider = COLLPROVIDER_LIBC; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("unrecognized collation provider: %s", + collproviderstr))); + } + else + collprovider = COLLPROVIDER_LIBC; + + if (localeEl) + { + if (collprovider == COLLPROVIDER_LIBC) + { + collcollate = defGetString(localeEl); + collctype = defGetString(localeEl); + } + else + colliculocale = defGetString(localeEl); + } + + if (lccollateEl) + collcollate = defGetString(lccollateEl); + + if (lcctypeEl) + collctype = defGetString(lcctypeEl); + + if (collprovider == COLLPROVIDER_LIBC) + { + if (!collcollate) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"lc_collate\" must be specified"))); + + if (!collctype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"lc_ctype\" must be specified"))); + } + else if (collprovider == COLLPROVIDER_ICU) + { + if (!colliculocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"locale\" must be specified"))); + } + + /* + * Nondeterministic collations are currently only supported with ICU + * because that's the only case where it can actually make a + * difference. So we can save writing the code for the other + * providers. + */ + if (!collisdeterministic && collprovider != COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations not supported with this provider"))); + + if (collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + /* + * We could create ICU collations with collencoding == database + * encoding, but it seems better to use -1 so that it matches the + * way initdb would create ICU collations. However, only allow + * one to be created when the current database's encoding is + * supported. Otherwise the collation is useless, plus we get + * surprising behaviors like not being able to drop the collation. + * + * Skip this test when !USE_ICU, because the error we want to + * throw for that isn't thrown till later. + */ + if (!is_encoding_supported_by_icu(GetDatabaseEncoding())) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("current database's encoding is not supported with this provider"))); +#endif + collencoding = -1; + } + else + { + collencoding = GetDatabaseEncoding(); + check_encoding_locale_matches(collencoding, collcollate, collctype); + } + } + + if (!collversion) + collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colliculocale : collcollate); + + newoid = CollationCreate(collName, + collNamespace, + GetUserId(), + collprovider, + collisdeterministic, + collencoding, + collcollate, + collctype, + colliculocale, + collversion, + if_not_exists, + false); /* not quiet */ + + if (!OidIsValid(newoid)) + return InvalidObjectAddress; + + /* + * Check that the locales can be loaded. NB: pg_newlocale_from_collation + * is only supposed to be called on non-C-equivalent locales. + */ + CommandCounterIncrement(); + if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid)) + (void) pg_newlocale_from_collation(newoid); + + ObjectAddressSet(address, CollationRelationId, newoid); + + return address; +} + +/* + * Subroutine for ALTER COLLATION SET SCHEMA and RENAME + * + * Is there a collation with the same name of the given collation already in + * the given namespace? If so, raise an appropriate error message. + */ +void +IsThereCollationInNamespace(const char *collname, Oid nspOid) +{ + /* make sure the name doesn't already exist in new schema */ + if (SearchSysCacheExists3(COLLNAMEENCNSP, + CStringGetDatum(collname), + Int32GetDatum(GetDatabaseEncoding()), + ObjectIdGetDatum(nspOid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"", + collname, GetDatabaseEncodingName(), + get_namespace_name(nspOid)))); + + /* mustn't match an any-encoding entry, either */ + if (SearchSysCacheExists3(COLLNAMEENCNSP, + CStringGetDatum(collname), + Int32GetDatum(-1), + ObjectIdGetDatum(nspOid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" already exists in schema \"%s\"", + collname, get_namespace_name(nspOid)))); +} + +/* + * ALTER COLLATION + */ +ObjectAddress +AlterCollation(AlterCollationStmt *stmt) +{ + Relation rel; + Oid collOid; + HeapTuple tup; + Form_pg_collation collForm; + Datum datum; + bool isnull; + char *oldversion; + char *newversion; + ObjectAddress address; + + rel = table_open(CollationRelationId, RowExclusiveLock); + collOid = get_collation_oid(stmt->collname, false); + + if (!pg_collation_ownercheck(collOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION, + NameListToString(stmt->collname)); + + tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for collation %u", collOid); + + collForm = (Form_pg_collation) GETSTRUCT(tup); + datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull); + oldversion = isnull ? NULL : TextDatumGetCString(datum); + + datum = SysCacheGetAttr(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull); + if (isnull) + elog(ERROR, "unexpected null in pg_collation"); + newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum)); + + /* cannot change from NULL to non-NULL or vice versa */ + if ((!oldversion && newversion) || (oldversion && !newversion)) + elog(ERROR, "invalid collation version change"); + else if (oldversion && newversion && strcmp(newversion, oldversion) != 0) + { + bool nulls[Natts_pg_collation]; + bool replaces[Natts_pg_collation]; + Datum values[Natts_pg_collation]; + + ereport(NOTICE, + (errmsg("changing version from %s to %s", + oldversion, newversion))); + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion); + replaces[Anum_pg_collation_collversion - 1] = true; + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), + values, nulls, replaces); + } + else + ereport(NOTICE, + (errmsg("version has not changed"))); + + CatalogTupleUpdate(rel, &tup->t_self, tup); + + InvokeObjectPostAlterHook(CollationRelationId, collOid, 0); + + ObjectAddressSet(address, CollationRelationId, collOid); + + heap_freetuple(tup); + table_close(rel, NoLock); + + return address; +} + + +Datum +pg_collation_actual_version(PG_FUNCTION_ARGS) +{ + Oid collid = PG_GETARG_OID(0); + HeapTuple tp; + char collprovider; + Datum datum; + bool isnull; + char *version; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("collation with OID %u does not exist", collid))); + + collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + + if (collprovider != COLLPROVIDER_DEFAULT) + { + datum = SysCacheGetAttr(COLLOID, tp, collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull); + if (isnull) + elog(ERROR, "unexpected null in pg_collation"); + version = get_collation_actual_version(collprovider, TextDatumGetCString(datum)); + } + else + version = NULL; + + ReleaseSysCache(tp); + + if (version) + PG_RETURN_TEXT_P(cstring_to_text(version)); + else + PG_RETURN_NULL(); +} + + +/* will we use "locale -a" in pg_import_system_collations? */ +#if defined(HAVE_LOCALE_T) && !defined(WIN32) +#define READ_LOCALE_A_OUTPUT +#endif + +#ifdef READ_LOCALE_A_OUTPUT +/* + * "Normalize" a libc locale name, stripping off encoding tags such as + * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" + * -> "br_FR@euro"). Return true if a new, different name was + * generated. + */ +static bool +normalize_libc_locale_name(char *new, const char *old) +{ + char *n = new; + const char *o = old; + bool changed = false; + + while (*o) + { + if (*o == '.') + { + /* skip over encoding tag such as ".utf8" or ".UTF-8" */ + o++; + while ((*o >= 'A' && *o <= 'Z') + || (*o >= 'a' && *o <= 'z') + || (*o >= '0' && *o <= '9') + || (*o == '-')) + o++; + changed = true; + } + else + *n++ = *o++; + } + *n = '\0'; + + return changed; +} + +/* + * qsort comparator for CollAliasData items + */ +static int +cmpaliases(const void *a, const void *b) +{ + const CollAliasData *ca = (const CollAliasData *) a; + const CollAliasData *cb = (const CollAliasData *) b; + + /* comparing localename is enough because other fields are derived */ + return strcmp(ca->localename, cb->localename); +} +#endif /* READ_LOCALE_A_OUTPUT */ + + +#ifdef USE_ICU +/* + * Get the ICU language tag for a locale name. + * The result is a palloc'd string. + */ +static char * +get_icu_language_tag(const char *localename) +{ + char buf[ULOC_FULLNAME_CAPACITY]; + UErrorCode status; + + status = U_ZERO_ERROR; + uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not convert locale name \"%s\" to language tag: %s", + localename, u_errorName(status)))); + + return pstrdup(buf); +} + +/* + * Get a comment (specifically, the display name) for an ICU locale. + * The result is a palloc'd string, or NULL if we can't get a comment + * or find that it's not all ASCII. (We can *not* accept non-ASCII + * comments, because the contents of template0 must be encoding-agnostic.) + */ +static char * +get_icu_locale_comment(const char *localename) +{ + UErrorCode status; + UChar displayname[128]; + int32 len_uchar; + int32 i; + char *result; + + status = U_ZERO_ERROR; + len_uchar = uloc_getDisplayName(localename, "en", + displayname, lengthof(displayname), + &status); + if (U_FAILURE(status)) + return NULL; /* no good reason to raise an error */ + + /* Check for non-ASCII comment (can't use pg_is_ascii for this) */ + for (i = 0; i < len_uchar; i++) + { + if (displayname[i] > 127) + return NULL; + } + + /* OK, transcribe */ + result = palloc(len_uchar + 1); + for (i = 0; i < len_uchar; i++) + result[i] = displayname[i]; + result[len_uchar] = '\0'; + + return result; +} +#endif /* USE_ICU */ + + +/* + * pg_import_system_collations: add known system collations to pg_collation + */ +Datum +pg_import_system_collations(PG_FUNCTION_ARGS) +{ + Oid nspid = PG_GETARG_OID(0); + int ncreated = 0; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to import system collations"))); + + if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema with OID %u does not exist", nspid))); + + /* Load collations known to libc, using "locale -a" to enumerate them */ +#ifdef READ_LOCALE_A_OUTPUT + { + FILE *locale_a_handle; + char localebuf[LOCALE_NAME_BUFLEN]; + int nvalid = 0; + Oid collid; + CollAliasData *aliases; + int naliases, + maxaliases, + i; + + /* expansible array of aliases */ + maxaliases = 100; + aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData)); + naliases = 0; + + locale_a_handle = OpenPipeStream("locale -a", "r"); + if (locale_a_handle == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + "locale -a"))); + + while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) + { + size_t len; + int enc; + char alias[LOCALE_NAME_BUFLEN]; + + len = strlen(localebuf); + + if (len == 0 || localebuf[len - 1] != '\n') + { + elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf); + continue; + } + localebuf[len - 1] = '\0'; + + /* + * Some systems have locale names that don't consist entirely of + * ASCII letters (such as "bokmål" or "français"). + * This is pretty silly, since we need the locale itself to + * interpret the non-ASCII characters. We can't do much with + * those, so we filter them out. + */ + if (!pg_is_ascii(localebuf)) + { + elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf); + continue; + } + + enc = pg_get_encoding_from_locale(localebuf, false); + if (enc < 0) + { + elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", + localebuf); + continue; + } + if (!PG_VALID_BE_ENCODING(enc)) + { + elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf); + continue; + } + if (enc == PG_SQL_ASCII) + continue; /* C/POSIX are already in the catalog */ + + /* count valid locales found in operating system */ + nvalid++; + + /* + * Create a collation named the same as the locale, but quietly + * doing nothing if it already exists. This is the behavior we + * need even at initdb time, because some versions of "locale -a" + * can report the same locale name more than once. And it's + * convenient for later import runs, too, since you just about + * always want to add on new locales without a lot of chatter + * about existing ones. + */ + collid = CollationCreate(localebuf, nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + localebuf, localebuf, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + /* Must do CCI between inserts to handle duplicates correctly */ + CommandCounterIncrement(); + } + + /* + * Generate aliases such as "en_US" in addition to "en_US.utf8" + * for ease of use. Note that collation names are unique per + * encoding only, so this doesn't clash with "en_US" for LATIN1, + * say. + * + * However, it might conflict with a name we'll see later in the + * "locale -a" output. So save up the aliases and try to add them + * after we've read all the output. + */ + if (normalize_libc_locale_name(alias, localebuf)) + { + if (naliases >= maxaliases) + { + maxaliases *= 2; + aliases = (CollAliasData *) + repalloc(aliases, maxaliases * sizeof(CollAliasData)); + } + aliases[naliases].localename = pstrdup(localebuf); + aliases[naliases].alias = pstrdup(alias); + aliases[naliases].enc = enc; + naliases++; + } + } + + ClosePipeStream(locale_a_handle); + + /* + * Before processing the aliases, sort them by locale name. The point + * here is that if "locale -a" gives us multiple locale names with the + * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we + * want to pick a deterministic one of them. First in ASCII sort + * order is a good enough rule. (Before PG 10, the code corresponding + * to this logic in initdb.c had an additional ordering rule, to + * prefer the locale name exactly matching the alias, if any. We + * don't need to consider that here, because we would have already + * created such a pg_collation entry above, and that one will win.) + */ + if (naliases > 1) + qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases); + + /* Now add aliases, ignoring any that match pre-existing entries */ + for (i = 0; i < naliases; i++) + { + char *locale = aliases[i].localename; + char *alias = aliases[i].alias; + int enc = aliases[i].enc; + + collid = CollationCreate(alias, nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + locale, locale, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + CommandCounterIncrement(); + } + } + + /* Give a warning if "locale -a" seems to be malfunctioning */ + if (nvalid == 0) + ereport(WARNING, + (errmsg("no usable system locales were found"))); + } +#endif /* READ_LOCALE_A_OUTPUT */ + + /* + * Load collations known to ICU + * + * We use uloc_countAvailable()/uloc_getAvailable() rather than + * ucol_countAvailable()/ucol_getAvailable(). The former returns a full + * set of language+region combinations, whereas the latter only returns + * language+region combinations if they are distinct from the language's + * base collation. So there might not be a de-DE or en-GB, which would be + * confusing. + */ +#ifdef USE_ICU + { + int i; + + /* + * Start the loop at -1 to sneak in the root locale without too much + * code duplication. + */ + for (i = -1; i < uloc_countAvailable(); i++) + { + const char *name; + char *langtag; + char *icucomment; + const char *iculocstr; + Oid collid; + + if (i == -1) + name = ""; /* ICU root locale */ + else + name = uloc_getAvailable(i); + + langtag = get_icu_language_tag(name); + iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; + + /* + * Be paranoid about not allowing any non-ASCII strings into + * pg_collation + */ + if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr)) + continue; + + collid = CollationCreate(psprintf("%s-x-icu", langtag), + nspid, GetUserId(), + COLLPROVIDER_ICU, true, -1, + NULL, NULL, iculocstr, + get_collation_actual_version(COLLPROVIDER_ICU, iculocstr), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + CommandCounterIncrement(); + + icucomment = get_icu_locale_comment(name); + if (icucomment) + CreateComments(collid, CollationRelationId, 0, + icucomment); + } + } + } +#endif /* USE_ICU */ + + PG_RETURN_INT32(ncreated); +} diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c new file mode 100644 index 0000000..86985a9 --- /dev/null +++ b/src/backend/commands/comment.c @@ -0,0 +1,459 @@ +/*------------------------------------------------------------------------- + * + * comment.c + * + * PostgreSQL object comments utility code. + * + * Copyright (c) 1996-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/commands/comment.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/relation.h" +#include "access/table.h" +#include "catalog/indexing.h" +#include "catalog/objectaddress.h" +#include "catalog/pg_description.h" +#include "catalog/pg_shdescription.h" +#include "commands/comment.h" +#include "commands/dbcommands.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/rel.h" + + +/* + * CommentObject -- + * + * This routine is used to add the associated comment into + * pg_description for the object specified by the given SQL command. + */ +ObjectAddress +CommentObject(CommentStmt *stmt) +{ + Relation relation; + ObjectAddress address = InvalidObjectAddress; + + /* + * When loading a dump, we may see a COMMENT ON DATABASE for the old name + * of the database. Erroring out would prevent pg_restore from completing + * (which is really pg_restore's fault, but for now we will work around + * the problem here). Consensus is that the best fix is to treat wrong + * database name as a WARNING not an ERROR; hence, the following special + * case. + */ + if (stmt->objtype == OBJECT_DATABASE) + { + char *database = strVal(stmt->object); + + if (!OidIsValid(get_database_oid(database, true))) + { + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", database))); + return address; + } + } + + /* + * Translate the parser representation that identifies this object into an + * ObjectAddress. get_object_address() will throw an error if the object + * does not exist, and will also acquire a lock on the target to guard + * against concurrent DROP operations. + */ + address = get_object_address(stmt->objtype, stmt->object, + &relation, ShareUpdateExclusiveLock, false); + + /* Require ownership of the target object. */ + check_object_ownership(GetUserId(), stmt->objtype, address, + stmt->object, relation); + + /* Perform other integrity checks as needed. */ + switch (stmt->objtype) + { + case OBJECT_COLUMN: + + /* + * Allow comments only on columns of tables, views, materialized + * views, composite types, and foreign tables (which are the only + * relkinds for which pg_dump will dump per-column comments). In + * particular we wish to disallow comments on index columns, + * because the naming of an index's columns may change across PG + * versions, so dumping per-column comments could create reload + * failures. + */ + if (relation->rd_rel->relkind != RELKIND_RELATION && + relation->rd_rel->relkind != RELKIND_VIEW && + relation->rd_rel->relkind != RELKIND_MATVIEW && + relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE && + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot set comment on relation \"%s\"", + RelationGetRelationName(relation)), + errdetail_relkind_not_supported(relation->rd_rel->relkind))); + break; + default: + break; + } + + /* + * Databases, tablespaces, and roles are cluster-wide objects, so any + * comments on those objects are recorded in the shared pg_shdescription + * catalog. Comments on all other objects are recorded in pg_description. + */ + if (stmt->objtype == OBJECT_DATABASE || stmt->objtype == OBJECT_TABLESPACE + || stmt->objtype == OBJECT_ROLE) + CreateSharedComments(address.objectId, address.classId, stmt->comment); + else + CreateComments(address.objectId, address.classId, address.objectSubId, + stmt->comment); + + /* + * If get_object_address() opened the relation for us, we close it to keep + * the reference count correct - but we retain any locks acquired by + * get_object_address() until commit time, to guard against concurrent + * activity. + */ + if (relation != NULL) + relation_close(relation, NoLock); + + return address; +} + +/* + * CreateComments -- + * + * Create a comment for the specified object descriptor. Inserts a new + * pg_description tuple, or replaces an existing one with the same key. + * + * If the comment given is null or an empty string, instead delete any + * existing comment for the specified key. + */ +void +CreateComments(Oid oid, Oid classoid, int32 subid, const char *comment) +{ + Relation description; + ScanKeyData skey[3]; + SysScanDesc sd; + HeapTuple oldtuple; + HeapTuple newtuple = NULL; + Datum values[Natts_pg_description]; + bool nulls[Natts_pg_description]; + bool replaces[Natts_pg_description]; + int i; + + /* Reduce empty-string to NULL case */ + if (comment != NULL && strlen(comment) == 0) + comment = NULL; + + /* Prepare to form or update a tuple, if necessary */ + if (comment != NULL) + { + for (i = 0; i < Natts_pg_description; i++) + { + nulls[i] = false; + replaces[i] = true; + } + values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(oid); + values[Anum_pg_description_classoid - 1] = ObjectIdGetDatum(classoid); + values[Anum_pg_description_objsubid - 1] = Int32GetDatum(subid); + values[Anum_pg_description_description - 1] = CStringGetTextDatum(comment); + } + + /* Use the index to search for a matching old tuple */ + + ScanKeyInit(&skey[0], + Anum_pg_description_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oid)); + ScanKeyInit(&skey[1], + Anum_pg_description_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(classoid)); + ScanKeyInit(&skey[2], + Anum_pg_description_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(subid)); + + description = table_open(DescriptionRelationId, RowExclusiveLock); + + sd = systable_beginscan(description, DescriptionObjIndexId, true, + NULL, 3, skey); + + while ((oldtuple = systable_getnext(sd)) != NULL) + { + /* Found the old tuple, so delete or update it */ + + if (comment == NULL) + CatalogTupleDelete(description, &oldtuple->t_self); + else + { + newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(description), values, + nulls, replaces); + CatalogTupleUpdate(description, &oldtuple->t_self, newtuple); + } + + break; /* Assume there can be only one match */ + } + + systable_endscan(sd); + + /* If we didn't find an old tuple, insert a new one */ + + if (newtuple == NULL && comment != NULL) + { + newtuple = heap_form_tuple(RelationGetDescr(description), + values, nulls); + CatalogTupleInsert(description, newtuple); + } + + if (newtuple != NULL) + heap_freetuple(newtuple); + + /* Done */ + + table_close(description, NoLock); +} + +/* + * CreateSharedComments -- + * + * Create a comment for the specified shared object descriptor. Inserts a + * new pg_shdescription tuple, or replaces an existing one with the same key. + * + * If the comment given is null or an empty string, instead delete any + * existing comment for the specified key. + */ +void +CreateSharedComments(Oid oid, Oid classoid, const char *comment) +{ + Relation shdescription; + ScanKeyData skey[2]; + SysScanDesc sd; + HeapTuple oldtuple; + HeapTuple newtuple = NULL; + Datum values[Natts_pg_shdescription]; + bool nulls[Natts_pg_shdescription]; + bool replaces[Natts_pg_shdescription]; + int i; + + /* Reduce empty-string to NULL case */ + if (comment != NULL && strlen(comment) == 0) + comment = NULL; + + /* Prepare to form or update a tuple, if necessary */ + if (comment != NULL) + { + for (i = 0; i < Natts_pg_shdescription; i++) + { + nulls[i] = false; + replaces[i] = true; + } + values[Anum_pg_shdescription_objoid - 1] = ObjectIdGetDatum(oid); + values[Anum_pg_shdescription_classoid - 1] = ObjectIdGetDatum(classoid); + values[Anum_pg_shdescription_description - 1] = CStringGetTextDatum(comment); + } + + /* Use the index to search for a matching old tuple */ + + ScanKeyInit(&skey[0], + Anum_pg_shdescription_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oid)); + ScanKeyInit(&skey[1], + Anum_pg_shdescription_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(classoid)); + + shdescription = table_open(SharedDescriptionRelationId, RowExclusiveLock); + + sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true, + NULL, 2, skey); + + while ((oldtuple = systable_getnext(sd)) != NULL) + { + /* Found the old tuple, so delete or update it */ + + if (comment == NULL) + CatalogTupleDelete(shdescription, &oldtuple->t_self); + else + { + newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(shdescription), + values, nulls, replaces); + CatalogTupleUpdate(shdescription, &oldtuple->t_self, newtuple); + } + + break; /* Assume there can be only one match */ + } + + systable_endscan(sd); + + /* If we didn't find an old tuple, insert a new one */ + + if (newtuple == NULL && comment != NULL) + { + newtuple = heap_form_tuple(RelationGetDescr(shdescription), + values, nulls); + CatalogTupleInsert(shdescription, newtuple); + } + + if (newtuple != NULL) + heap_freetuple(newtuple); + + /* Done */ + + table_close(shdescription, NoLock); +} + +/* + * DeleteComments -- remove comments for an object + * + * If subid is nonzero then only comments matching it will be removed. + * If subid is zero, all comments matching the oid/classoid will be removed + * (this corresponds to deleting a whole object). + */ +void +DeleteComments(Oid oid, Oid classoid, int32 subid) +{ + Relation description; + ScanKeyData skey[3]; + int nkeys; + SysScanDesc sd; + HeapTuple oldtuple; + + /* Use the index to search for all matching old tuples */ + + ScanKeyInit(&skey[0], + Anum_pg_description_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oid)); + ScanKeyInit(&skey[1], + Anum_pg_description_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(classoid)); + + if (subid != 0) + { + ScanKeyInit(&skey[2], + Anum_pg_description_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(subid)); + nkeys = 3; + } + else + nkeys = 2; + + description = table_open(DescriptionRelationId, RowExclusiveLock); + + sd = systable_beginscan(description, DescriptionObjIndexId, true, + NULL, nkeys, skey); + + while ((oldtuple = systable_getnext(sd)) != NULL) + CatalogTupleDelete(description, &oldtuple->t_self); + + /* Done */ + + systable_endscan(sd); + table_close(description, RowExclusiveLock); +} + +/* + * DeleteSharedComments -- remove comments for a shared object + */ +void +DeleteSharedComments(Oid oid, Oid classoid) +{ + Relation shdescription; + ScanKeyData skey[2]; + SysScanDesc sd; + HeapTuple oldtuple; + + /* Use the index to search for all matching old tuples */ + + ScanKeyInit(&skey[0], + Anum_pg_shdescription_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oid)); + ScanKeyInit(&skey[1], + Anum_pg_shdescription_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(classoid)); + + shdescription = table_open(SharedDescriptionRelationId, RowExclusiveLock); + + sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true, + NULL, 2, skey); + + while ((oldtuple = systable_getnext(sd)) != NULL) + CatalogTupleDelete(shdescription, &oldtuple->t_self); + + /* Done */ + + systable_endscan(sd); + table_close(shdescription, RowExclusiveLock); +} + +/* + * GetComment -- get the comment for an object, or null if not found. + */ +char * +GetComment(Oid oid, Oid classoid, int32 subid) +{ + Relation description; + ScanKeyData skey[3]; + SysScanDesc sd; + TupleDesc tupdesc; + HeapTuple tuple; + char *comment; + + /* Use the index to search for a matching old tuple */ + + ScanKeyInit(&skey[0], + Anum_pg_description_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oid)); + ScanKeyInit(&skey[1], + Anum_pg_description_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(classoid)); + ScanKeyInit(&skey[2], + Anum_pg_description_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(subid)); + + description = table_open(DescriptionRelationId, AccessShareLock); + tupdesc = RelationGetDescr(description); + + sd = systable_beginscan(description, DescriptionObjIndexId, true, + NULL, 3, skey); + + comment = NULL; + while ((tuple = systable_getnext(sd)) != NULL) + { + Datum value; + bool isnull; + + /* Found the tuple, get description field */ + value = heap_getattr(tuple, Anum_pg_description_description, tupdesc, &isnull); + if (!isnull) + comment = TextDatumGetCString(value); + break; /* Assume there can be only one match */ + } + + systable_endscan(sd); + + /* Done */ + table_close(description, AccessShareLock); + + return comment; +} diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c new file mode 100644 index 0000000..721de17 --- /dev/null +++ b/src/backend/commands/constraint.c @@ -0,0 +1,205 @@ +/*------------------------------------------------------------------------- + * + * constraint.c + * PostgreSQL CONSTRAINT support code. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/constraint.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/tableam.h" +#include "catalog/index.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "utils/builtins.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" + + +/* + * unique_key_recheck - trigger function to do a deferred uniqueness check. + * + * This now also does deferred exclusion-constraint checks, so the name is + * somewhat historical. + * + * This is invoked as an AFTER ROW trigger for both INSERT and UPDATE, + * for any rows recorded as potentially violating a deferrable unique + * or exclusion constraint. + * + * This may be an end-of-statement check, a commit-time check, or a + * check triggered by a SET CONSTRAINTS command. + */ +Datum +unique_key_recheck(PG_FUNCTION_ARGS) +{ + TriggerData *trigdata = (TriggerData *) fcinfo->context; + const char *funcname = "unique_key_recheck"; + ItemPointerData checktid; + ItemPointerData tmptid; + Relation indexRel; + IndexInfo *indexInfo; + EState *estate; + ExprContext *econtext; + TupleTableSlot *slot; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + + /* + * Make sure this is being called as an AFTER ROW trigger. Note: + * translatable error strings are shared with ri_triggers.c, so resist the + * temptation to fold the function name into them. + */ + if (!CALLED_AS_TRIGGER(fcinfo)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" was not called by trigger manager", + funcname))); + + if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) || + !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event)) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" must be fired AFTER ROW", + funcname))); + + /* + * Get the new data that was inserted/updated. + */ + if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) + checktid = trigdata->tg_trigslot->tts_tid; + else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + checktid = trigdata->tg_newslot->tts_tid; + else + { + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("function \"%s\" must be fired for INSERT or UPDATE", + funcname))); + ItemPointerSetInvalid(&checktid); /* keep compiler quiet */ + } + + slot = table_slot_create(trigdata->tg_relation, NULL); + + /* + * If the row pointed at by checktid is now dead (ie, inserted and then + * deleted within our transaction), we can skip the check. However, we + * have to be careful, because this trigger gets queued only in response + * to index insertions; which means it does not get queued e.g. for HOT + * updates. The row we are called for might now be dead, but have a live + * HOT child, in which case we still need to make the check --- + * effectively, we're applying the check against the live child row, + * although we can use the values from this row since by definition all + * columns of interest to us are the same. + * + * This might look like just an optimization, because the index AM will + * make this identical test before throwing an error. But it's actually + * needed for correctness, because the index AM will also throw an error + * if it doesn't find the index entry for the row. If the row's dead then + * it's possible the index entry has also been marked dead, and even + * removed. + */ + tmptid = checktid; + { + IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation); + bool call_again = false; + + if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot, + &call_again, NULL)) + { + /* + * All rows referenced by the index entry are dead, so skip the + * check. + */ + ExecDropSingleTupleTableSlot(slot); + table_index_fetch_end(scan); + return PointerGetDatum(NULL); + } + table_index_fetch_end(scan); + } + + /* + * Open the index, acquiring a RowExclusiveLock, just as if we were going + * to update it. (This protects against possible changes of the index + * schema, not against concurrent updates.) + */ + indexRel = index_open(trigdata->tg_trigger->tgconstrindid, + RowExclusiveLock); + indexInfo = BuildIndexInfo(indexRel); + + /* + * Typically the index won't have expressions, but if it does we need an + * EState to evaluate them. We need it for exclusion constraints too, + * even if they are just on simple columns. + */ + if (indexInfo->ii_Expressions != NIL || + indexInfo->ii_ExclusionOps != NULL) + { + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + econtext->ecxt_scantuple = slot; + } + else + estate = NULL; + + /* + * Form the index values and isnull flags for the index entry that we need + * to check. + * + * Note: if the index uses functions that are not as immutable as they are + * supposed to be, this could produce an index tuple different from the + * original. The index AM can catch such errors by verifying that it + * finds a matching index entry with the tuple's TID. For exclusion + * constraints we check this in check_exclusion_constraint(). + */ + FormIndexDatum(indexInfo, slot, estate, values, isnull); + + /* + * Now do the appropriate check. + */ + if (indexInfo->ii_ExclusionOps == NULL) + { + /* + * Note: this is not a real insert; it is a check that the index entry + * that has already been inserted is unique. Passing the tuple's tid + * (i.e. unmodified by table_index_fetch_tuple()) is correct even if + * the row is now dead, because that is the TID the index will know + * about. + */ + index_insert(indexRel, values, isnull, &checktid, + trigdata->tg_relation, UNIQUE_CHECK_EXISTING, + false, indexInfo); + } + else + { + /* + * For exclusion constraints we just do the normal check, but now it's + * okay to throw error. In the HOT-update case, we must use the live + * HOT child's TID here, else check_exclusion_constraint will think + * the child is a conflict. + */ + check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo, + &tmptid, values, isnull, + estate, false); + } + + /* + * If that worked, then this index entry is unique or non-excluded, and we + * are done. + */ + if (estate != NULL) + FreeExecutorState(estate); + + ExecDropSingleTupleTableSlot(slot); + + index_close(indexRel, RowExclusiveLock); + + return PointerGetDatum(NULL); +} diff --git a/src/backend/commands/conversioncmds.c b/src/backend/commands/conversioncmds.c new file mode 100644 index 0000000..67feda3 --- /dev/null +++ b/src/backend/commands/conversioncmds.c @@ -0,0 +1,139 @@ +/*------------------------------------------------------------------------- + * + * conversioncmds.c + * conversion creation command support code + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/conversioncmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/pg_conversion.h" +#include "catalog/pg_type.h" +#include "commands/alter.h" +#include "commands/conversioncmds.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +/* + * CREATE CONVERSION + */ +ObjectAddress +CreateConversionCommand(CreateConversionStmt *stmt) +{ + Oid namespaceId; + char *conversion_name; + AclResult aclresult; + int from_encoding; + int to_encoding; + Oid funcoid; + const char *from_encoding_name = stmt->for_encoding_name; + const char *to_encoding_name = stmt->to_encoding_name; + List *func_name = stmt->func_name; + static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID}; + char result[1]; + Datum funcresult; + + /* Convert list of names to a name and namespace */ + namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name, + &conversion_name); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + + /* Check the encoding names */ + from_encoding = pg_char_to_encoding(from_encoding_name); + if (from_encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("source encoding \"%s\" does not exist", + from_encoding_name))); + + to_encoding = pg_char_to_encoding(to_encoding_name); + if (to_encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("destination encoding \"%s\" does not exist", + to_encoding_name))); + + /* + * We consider conversions to or from SQL_ASCII to be meaningless. (If + * you wish to change this, note that pg_do_encoding_conversion() and its + * sister functions have hard-wired fast paths for any conversion in which + * the source or target encoding is SQL_ASCII, so that an encoding + * conversion function declared for such a case will never be used.) + */ + if (from_encoding == PG_SQL_ASCII || to_encoding == PG_SQL_ASCII) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("encoding conversion to or from \"SQL_ASCII\" is not supported"))); + + /* + * Check the existence of the conversion function. Function name could be + * a qualified name. + */ + funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid), + funcargs, false); + + /* Check it returns int4, else it's probably the wrong function */ + if (get_func_rettype(funcoid) != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("encoding conversion function %s must return type %s", + NameListToString(func_name), "integer"))); + + /* Check we have EXECUTE rights for the function */ + aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(func_name)); + + /* + * Check that the conversion function is suitable for the requested source + * and target encodings. We do that by calling the function with an empty + * string; the conversion function should throw an error if it can't + * perform the requested conversion. + */ + funcresult = OidFunctionCall6(funcoid, + Int32GetDatum(from_encoding), + Int32GetDatum(to_encoding), + CStringGetDatum(""), + CStringGetDatum(result), + Int32GetDatum(0), + BoolGetDatum(false)); + + /* + * The function should return 0 for empty input. Might as well check that, + * too. + */ + if (DatumGetInt32(funcresult) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("encoding conversion function %s returned incorrect result for empty input", + NameListToString(func_name)))); + + /* + * All seem ok, go ahead (possible failure would be a duplicate conversion + * name) + */ + return ConversionCreate(conversion_name, namespaceId, GetUserId(), + from_encoding, to_encoding, funcoid, stmt->def); +} diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c new file mode 100644 index 0000000..cc1909e --- /dev/null +++ b/src/backend/commands/copy.c @@ -0,0 +1,798 @@ +/*------------------------------------------------------------------------- + * + * copy.c + * Implements the COPY utility command + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/copy.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "access/sysattr.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/pg_authid.h" +#include "commands/copy.h" +#include "commands/defrem.h" +#include "executor/executor.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parse_coerce.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_relation.h" +#include "rewrite/rewriteHandler.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/rls.h" + +/* + * DoCopy executes the SQL COPY statement + * + * Either unload or reload contents of table , depending on . + * ( = true means we are inserting into the table.) In the "TO" case + * we also support copying the output of an arbitrary SELECT, INSERT, UPDATE + * or DELETE query. + * + * If is false, transfer is between the table and the file named + * . Otherwise, transfer is between the table and our regular + * input/output stream. The latter could be either stdin/stdout or a + * socket, depending on whether we're running under Postmaster control. + * + * Do not allow a Postgres user without the 'pg_read_server_files' or + * 'pg_write_server_files' role to read from or write to a file. + * + * Do not allow the copy if user doesn't have proper permission to access + * the table or the specifically requested columns. + */ +void +DoCopy(ParseState *pstate, const CopyStmt *stmt, + int stmt_location, int stmt_len, + uint64 *processed) +{ + bool is_from = stmt->is_from; + bool pipe = (stmt->filename == NULL); + Relation rel; + Oid relid; + RawStmt *query = NULL; + Node *whereClause = NULL; + + /* + * Disallow COPY to/from file or program except to users with the + * appropriate role. + */ + if (!pipe) + { + if (stmt->is_program) + { + if (!has_privs_of_role(GetUserId(), ROLE_PG_EXECUTE_SERVER_PROGRAM)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser or have privileges of the pg_execute_server_program role to COPY to or from an external program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + else + { + if (is_from && !has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser or have privileges of the pg_read_server_files role to COPY from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + + if (!is_from && !has_privs_of_role(GetUserId(), ROLE_PG_WRITE_SERVER_FILES)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser or have privileges of the pg_write_server_files role to COPY to a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + } + + if (stmt->relation) + { + LOCKMODE lockmode = is_from ? RowExclusiveLock : AccessShareLock; + ParseNamespaceItem *nsitem; + RangeTblEntry *rte; + TupleDesc tupDesc; + List *attnums; + ListCell *cur; + + Assert(!stmt->query); + + /* Open and lock the relation, using the appropriate lock type. */ + rel = table_openrv(stmt->relation, lockmode); + + relid = RelationGetRelid(rel); + + nsitem = addRangeTableEntryForRelation(pstate, rel, lockmode, + NULL, false, false); + rte = nsitem->p_rte; + rte->requiredPerms = (is_from ? ACL_INSERT : ACL_SELECT); + + if (stmt->whereClause) + { + /* add nsitem to query namespace */ + addNSItemToQuery(pstate, nsitem, false, true, true); + + /* Transform the raw expression tree */ + whereClause = transformExpr(pstate, stmt->whereClause, EXPR_KIND_COPY_WHERE); + + /* Make sure it yields a boolean result. */ + whereClause = coerce_to_boolean(pstate, whereClause, "WHERE"); + + /* we have to fix its collations too */ + assign_expr_collations(pstate, whereClause); + + whereClause = eval_const_expressions(NULL, whereClause); + + whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false); + whereClause = (Node *) make_ands_implicit((Expr *) whereClause); + } + + tupDesc = RelationGetDescr(rel); + attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist); + foreach(cur, attnums) + { + int attno = lfirst_int(cur) - + FirstLowInvalidHeapAttributeNumber; + + if (is_from) + rte->insertedCols = bms_add_member(rte->insertedCols, attno); + else + rte->selectedCols = bms_add_member(rte->selectedCols, attno); + } + ExecCheckRTPerms(pstate->p_rtable, true); + + /* + * Permission check for row security policies. + * + * check_enable_rls will ereport(ERROR) if the user has requested + * something invalid and will otherwise indicate if we should enable + * RLS (returns RLS_ENABLED) or not for this COPY statement. + * + * If the relation has a row security policy and we are to apply it + * then perform a "query" copy and allow the normal query processing + * to handle the policies. + * + * If RLS is not enabled for this, then just fall through to the + * normal non-filtering relation handling. + */ + if (check_enable_rls(rte->relid, InvalidOid, false) == RLS_ENABLED) + { + SelectStmt *select; + ColumnRef *cr; + ResTarget *target; + RangeVar *from; + List *targetList = NIL; + + if (is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY FROM not supported with row-level security"), + errhint("Use INSERT statements instead."))); + + /* + * Build target list + * + * If no columns are specified in the attribute list of the COPY + * command, then the target list is 'all' columns. Therefore, '*' + * should be used as the target list for the resulting SELECT + * statement. + * + * In the case that columns are specified in the attribute list, + * create a ColumnRef and ResTarget for each column and add them + * to the target list for the resulting SELECT statement. + */ + if (!stmt->attlist) + { + cr = makeNode(ColumnRef); + cr->fields = list_make1(makeNode(A_Star)); + cr->location = -1; + + target = makeNode(ResTarget); + target->name = NULL; + target->indirection = NIL; + target->val = (Node *) cr; + target->location = -1; + + targetList = list_make1(target); + } + else + { + ListCell *lc; + + foreach(lc, stmt->attlist) + { + /* + * Build the ColumnRef for each column. The ColumnRef + * 'fields' property is a String node that corresponds to + * the column name respectively. + */ + cr = makeNode(ColumnRef); + cr->fields = list_make1(lfirst(lc)); + cr->location = -1; + + /* Build the ResTarget and add the ColumnRef to it. */ + target = makeNode(ResTarget); + target->name = NULL; + target->indirection = NIL; + target->val = (Node *) cr; + target->location = -1; + + /* Add each column to the SELECT statement's target list */ + targetList = lappend(targetList, target); + } + } + + /* + * Build RangeVar for from clause, fully qualified based on the + * relation which we have opened and locked. Use "ONLY" so that + * COPY retrieves rows from only the target table not any + * inheritance children, the same as when RLS doesn't apply. + */ + from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)), + pstrdup(RelationGetRelationName(rel)), + -1); + from->inh = false; /* apply ONLY */ + + /* Build query */ + select = makeNode(SelectStmt); + select->targetList = targetList; + select->fromClause = list_make1(from); + + query = makeNode(RawStmt); + query->stmt = (Node *) select; + query->stmt_location = stmt_location; + query->stmt_len = stmt_len; + + /* + * Close the relation for now, but keep the lock on it to prevent + * changes between now and when we start the query-based COPY. + * + * We'll reopen it later as part of the query-based COPY. + */ + table_close(rel, NoLock); + rel = NULL; + } + } + else + { + Assert(stmt->query); + + /* MERGE is allowed by parser, but unimplemented. Reject for now */ + if (IsA(stmt->query, MergeStmt)) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("MERGE not supported in COPY")); + + query = makeNode(RawStmt); + query->stmt = stmt->query; + query->stmt_location = stmt_location; + query->stmt_len = stmt_len; + + relid = InvalidOid; + rel = NULL; + } + + if (is_from) + { + CopyFromState cstate; + + Assert(rel); + + /* check read-only transaction and parallel mode */ + if (XactReadOnly && !rel->rd_islocaltemp) + PreventCommandIfReadOnly("COPY FROM"); + + cstate = BeginCopyFrom(pstate, rel, whereClause, + stmt->filename, stmt->is_program, + NULL, stmt->attlist, stmt->options); + *processed = CopyFrom(cstate); /* copy from file to database */ + EndCopyFrom(cstate); + } + else + { + CopyToState cstate; + + cstate = BeginCopyTo(pstate, rel, query, relid, + stmt->filename, stmt->is_program, + stmt->attlist, stmt->options); + *processed = DoCopyTo(cstate); /* copy from database to file */ + EndCopyTo(cstate); + } + + if (rel != NULL) + table_close(rel, NoLock); +} + +/* + * Extract a CopyHeaderChoice value from a DefElem. This is like + * defGetBoolean() but also accepts the special value "match". + */ +static CopyHeaderChoice +defGetCopyHeaderChoice(DefElem *def, bool is_from) +{ + /* + * If no parameter given, assume "true" is meant. + */ + if (def->arg == NULL) + return COPY_HEADER_TRUE; + + /* + * Allow 0, 1, "true", "false", "on", "off", or "match". + */ + switch (nodeTag(def->arg)) + { + case T_Integer: + switch (intVal(def->arg)) + { + case 0: + return COPY_HEADER_FALSE; + case 1: + return COPY_HEADER_TRUE; + default: + /* otherwise, error out below */ + break; + } + break; + default: + { + char *sval = defGetString(def); + + /* + * The set of strings accepted here should match up with the + * grammar's opt_boolean_or_string production. + */ + if (pg_strcasecmp(sval, "true") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(sval, "false") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(sval, "on") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(sval, "off") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(sval, "match") == 0) + { + if (!is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use \"%s\" with HEADER in COPY TO", + sval))); + return COPY_HEADER_MATCH; + } + } + break; + } + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a Boolean value or \"match\"", + def->defname))); + return COPY_HEADER_FALSE; /* keep compiler quiet */ +} + +/* + * Process the statement option list for COPY. + * + * Scan the options list (a list of DefElem) and transpose the information + * into *opts_out, applying appropriate error checking. + * + * If 'opts_out' is not NULL, it is assumed to be filled with zeroes initially. + * + * This is exported so that external users of the COPY API can sanity-check + * a list of options. In that usage, 'opts_out' can be passed as NULL and + * the collected data is just leaked until CurrentMemoryContext is reset. + * + * Note that additional checking, such as whether column names listed in FORCE + * QUOTE actually exist, has to be applied later. This just checks for + * self-consistency of the options list. + */ +void +ProcessCopyOptions(ParseState *pstate, + CopyFormatOptions *opts_out, + bool is_from, + List *options) +{ + bool format_specified = false; + bool freeze_specified = false; + bool header_specified = false; + ListCell *option; + + /* Support external use for option sanity checking */ + if (opts_out == NULL) + opts_out = (CopyFormatOptions *) palloc0(sizeof(CopyFormatOptions)); + + opts_out->file_encoding = -1; + + /* Extract options from the statement node tree */ + foreach(option, options) + { + DefElem *defel = lfirst_node(DefElem, option); + + if (strcmp(defel->defname, "format") == 0) + { + char *fmt = defGetString(defel); + + if (format_specified) + errorConflictingDefElem(defel, pstate); + format_specified = true; + if (strcmp(fmt, "text") == 0) + /* default format */ ; + else if (strcmp(fmt, "csv") == 0) + opts_out->csv_mode = true; + else if (strcmp(fmt, "binary") == 0) + opts_out->binary = true; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY format \"%s\" not recognized", fmt), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "freeze") == 0) + { + if (freeze_specified) + errorConflictingDefElem(defel, pstate); + freeze_specified = true; + opts_out->freeze = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "delimiter") == 0) + { + if (opts_out->delim) + errorConflictingDefElem(defel, pstate); + opts_out->delim = defGetString(defel); + } + else if (strcmp(defel->defname, "null") == 0) + { + if (opts_out->null_print) + errorConflictingDefElem(defel, pstate); + opts_out->null_print = defGetString(defel); + } + else if (strcmp(defel->defname, "header") == 0) + { + if (header_specified) + errorConflictingDefElem(defel, pstate); + header_specified = true; + opts_out->header_line = defGetCopyHeaderChoice(defel, is_from); + } + else if (strcmp(defel->defname, "quote") == 0) + { + if (opts_out->quote) + errorConflictingDefElem(defel, pstate); + opts_out->quote = defGetString(defel); + } + else if (strcmp(defel->defname, "escape") == 0) + { + if (opts_out->escape) + errorConflictingDefElem(defel, pstate); + opts_out->escape = defGetString(defel); + } + else if (strcmp(defel->defname, "force_quote") == 0) + { + if (opts_out->force_quote || opts_out->force_quote_all) + errorConflictingDefElem(defel, pstate); + if (defel->arg && IsA(defel->arg, A_Star)) + opts_out->force_quote_all = true; + else if (defel->arg && IsA(defel->arg, List)) + opts_out->force_quote = castNode(List, defel->arg); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a list of column names", + defel->defname), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "force_not_null") == 0) + { + if (opts_out->force_notnull) + errorConflictingDefElem(defel, pstate); + if (defel->arg && IsA(defel->arg, List)) + opts_out->force_notnull = castNode(List, defel->arg); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a list of column names", + defel->defname), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "force_null") == 0) + { + if (opts_out->force_null) + errorConflictingDefElem(defel, pstate); + if (defel->arg && IsA(defel->arg, List)) + opts_out->force_null = castNode(List, defel->arg); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a list of column names", + defel->defname), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "convert_selectively") == 0) + { + /* + * Undocumented, not-accessible-from-SQL option: convert only the + * named columns to binary form, storing the rest as NULLs. It's + * allowed for the column list to be NIL. + */ + if (opts_out->convert_selectively) + errorConflictingDefElem(defel, pstate); + opts_out->convert_selectively = true; + if (defel->arg == NULL || IsA(defel->arg, List)) + opts_out->convert_select = castNode(List, defel->arg); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a list of column names", + defel->defname), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "encoding") == 0) + { + if (opts_out->file_encoding >= 0) + errorConflictingDefElem(defel, pstate); + opts_out->file_encoding = pg_char_to_encoding(defGetString(defel)); + if (opts_out->file_encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("argument to option \"%s\" must be a valid encoding name", + defel->defname), + parser_errposition(pstate, defel->location))); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("option \"%s\" not recognized", + defel->defname), + parser_errposition(pstate, defel->location))); + } + + /* + * Check for incompatible options (must do these two before inserting + * defaults) + */ + if (opts_out->binary && opts_out->delim) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify DELIMITER in BINARY mode"))); + + if (opts_out->binary && opts_out->null_print) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify NULL in BINARY mode"))); + + /* Set defaults for omitted options */ + if (!opts_out->delim) + opts_out->delim = opts_out->csv_mode ? "," : "\t"; + + if (!opts_out->null_print) + opts_out->null_print = opts_out->csv_mode ? "" : "\\N"; + opts_out->null_print_len = strlen(opts_out->null_print); + + if (opts_out->csv_mode) + { + if (!opts_out->quote) + opts_out->quote = "\""; + if (!opts_out->escape) + opts_out->escape = opts_out->quote; + } + + /* Only single-byte delimiter strings are supported. */ + if (strlen(opts_out->delim) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY delimiter must be a single one-byte character"))); + + /* Disallow end-of-line characters */ + if (strchr(opts_out->delim, '\r') != NULL || + strchr(opts_out->delim, '\n') != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY delimiter cannot be newline or carriage return"))); + + if (strchr(opts_out->null_print, '\r') != NULL || + strchr(opts_out->null_print, '\n') != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY null representation cannot use newline or carriage return"))); + + /* + * Disallow unsafe delimiter characters in non-CSV mode. We can't allow + * backslash because it would be ambiguous. We can't allow the other + * cases because data characters matching the delimiter must be + * backslashed, and certain backslash combinations are interpreted + * non-literally by COPY IN. Disallowing all lower case ASCII letters is + * more than strictly necessary, but seems best for consistency and + * future-proofing. Likewise we disallow all digits though only octal + * digits are actually dangerous. + */ + if (!opts_out->csv_mode && + strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789", + opts_out->delim[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); + + /* Check header */ + if (opts_out->binary && opts_out->header_line) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify HEADER in BINARY mode"))); + + /* Check quote */ + if (!opts_out->csv_mode && opts_out->quote != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY quote available only in CSV mode"))); + + if (opts_out->csv_mode && strlen(opts_out->quote) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY quote must be a single one-byte character"))); + + if (opts_out->csv_mode && opts_out->delim[0] == opts_out->quote[0]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY delimiter and quote must be different"))); + + /* Check escape */ + if (!opts_out->csv_mode && opts_out->escape != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY escape available only in CSV mode"))); + + if (opts_out->csv_mode && strlen(opts_out->escape) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY escape must be a single one-byte character"))); + + /* Check force_quote */ + if (!opts_out->csv_mode && (opts_out->force_quote || opts_out->force_quote_all)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force quote available only in CSV mode"))); + if ((opts_out->force_quote || opts_out->force_quote_all) && is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force quote only available using COPY TO"))); + + /* Check force_notnull */ + if (!opts_out->csv_mode && opts_out->force_notnull != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force not null available only in CSV mode"))); + if (opts_out->force_notnull != NIL && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force not null only available using COPY FROM"))); + + /* Check force_null */ + if (!opts_out->csv_mode && opts_out->force_null != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force null available only in CSV mode"))); + + if (opts_out->force_null != NIL && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY force null only available using COPY FROM"))); + + /* Don't allow the delimiter to appear in the null string. */ + if (strchr(opts_out->null_print, opts_out->delim[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY delimiter must not appear in the NULL specification"))); + + /* Don't allow the CSV quote char to appear in the null string. */ + if (opts_out->csv_mode && + strchr(opts_out->null_print, opts_out->quote[0]) != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("CSV quote character must not appear in the NULL specification"))); +} + +/* + * CopyGetAttnums - build an integer list of attnums to be copied + * + * The input attnamelist is either the user-specified column list, + * or NIL if there was none (in which case we want all the non-dropped + * columns). + * + * We don't include generated columns in the generated full list and we don't + * allow them to be specified explicitly. They don't make sense for COPY + * FROM, but we could possibly allow them for COPY TO. But this way it's at + * least ensured that whatever we copy out can be copied back in. + * + * rel can be NULL ... it's only used for error reports. + */ +List * +CopyGetAttnums(TupleDesc tupDesc, Relation rel, List *attnamelist) +{ + List *attnums = NIL; + + if (attnamelist == NIL) + { + /* Generate default column list */ + int attr_count = tupDesc->natts; + int i; + + for (i = 0; i < attr_count; i++) + { + if (TupleDescAttr(tupDesc, i)->attisdropped) + continue; + if (TupleDescAttr(tupDesc, i)->attgenerated) + continue; + attnums = lappend_int(attnums, i + 1); + } + } + else + { + /* Validate the user-supplied list and extract attnums */ + ListCell *l; + + foreach(l, attnamelist) + { + char *name = strVal(lfirst(l)); + int attnum; + int i; + + /* Lookup column name */ + attnum = InvalidAttrNumber; + for (i = 0; i < tupDesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupDesc, i); + + if (att->attisdropped) + continue; + if (namestrcmp(&(att->attname), name) == 0) + { + if (att->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("column \"%s\" is a generated column", + name), + errdetail("Generated columns cannot be used in COPY."))); + attnum = att->attnum; + break; + } + } + if (attnum == InvalidAttrNumber) + { + if (rel != NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + name, RelationGetRelationName(rel)))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + name))); + } + /* Check for duplicates */ + if (list_member_int(attnums, attnum)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" specified more than once", + name))); + attnums = lappend_int(attnums, attnum); + } + } + + return attnums; +} diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c new file mode 100644 index 0000000..c6dbd97 --- /dev/null +++ b/src/backend/commands/copyfrom.c @@ -0,0 +1,1624 @@ +/*------------------------------------------------------------------------- + * + * copyfrom.c + * COPY FROM file/program/client + * + * This file contains routines needed to efficiently load tuples into a + * table. That includes looking up the correct partition, firing triggers, + * calling the table AM function to insert the data, and updating indexes. + * Reading data from the input file or client and parsing it into Datums + * is handled in copyfromparse.c. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/copyfrom.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/namespace.h" +#include "commands/copy.h" +#include "commands/copyfrom_internal.h" +#include "commands/progress.h" +#include "commands/trigger.h" +#include "executor/execPartition.h" +#include "executor/executor.h" +#include "executor/nodeModifyTable.h" +#include "executor/tuptable.h" +#include "foreign/fdwapi.h" +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "optimizer/optimizer.h" +#include "pgstat.h" +#include "rewrite/rewriteHandler.h" +#include "storage/fd.h" +#include "tcop/tcopprot.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/portal.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" + +/* + * No more than this many tuples per CopyMultiInsertBuffer + * + * Caution: Don't make this too big, as we could end up with this many + * CopyMultiInsertBuffer items stored in CopyMultiInsertInfo's + * multiInsertBuffers list. Increasing this can cause quadratic growth in + * memory requirements during copies into partitioned tables with a large + * number of partitions. + */ +#define MAX_BUFFERED_TUPLES 1000 + +/* + * Flush buffers if there are >= this many bytes, as counted by the input + * size, of tuples stored. + */ +#define MAX_BUFFERED_BYTES 65535 + +/* Trim the list of buffers back down to this number after flushing */ +#define MAX_PARTITION_BUFFERS 32 + +/* Stores multi-insert data related to a single relation in CopyFrom. */ +typedef struct CopyMultiInsertBuffer +{ + TupleTableSlot *slots[MAX_BUFFERED_TUPLES]; /* Array to store tuples */ + ResultRelInfo *resultRelInfo; /* ResultRelInfo for 'relid' */ + BulkInsertState bistate; /* BulkInsertState for this rel */ + int nused; /* number of 'slots' containing tuples */ + uint64 linenos[MAX_BUFFERED_TUPLES]; /* Line # of tuple in copy + * stream */ +} CopyMultiInsertBuffer; + +/* + * Stores one or many CopyMultiInsertBuffers and details about the size and + * number of tuples which are stored in them. This allows multiple buffers to + * exist at once when COPYing into a partitioned table. + */ +typedef struct CopyMultiInsertInfo +{ + List *multiInsertBuffers; /* List of tracked CopyMultiInsertBuffers */ + int bufferedTuples; /* number of tuples buffered over all buffers */ + int bufferedBytes; /* number of bytes from all buffered tuples */ + CopyFromState cstate; /* Copy state for this CopyMultiInsertInfo */ + EState *estate; /* Executor state used for COPY */ + CommandId mycid; /* Command Id used for COPY */ + int ti_options; /* table insert options */ +} CopyMultiInsertInfo; + + +/* non-export function prototypes */ +static char *limit_printout_length(const char *str); + +static void ClosePipeFromProgram(CopyFromState cstate); + +/* + * error context callback for COPY FROM + * + * The argument for the error context must be CopyFromState. + */ +void +CopyFromErrorCallback(void *arg) +{ + CopyFromState cstate = (CopyFromState) arg; + + if (cstate->opts.binary) + { + /* can't usefully display the data */ + if (cstate->cur_attname) + errcontext("COPY %s, line %llu, column %s", + cstate->cur_relname, + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname); + else + errcontext("COPY %s, line %llu", + cstate->cur_relname, + (unsigned long long) cstate->cur_lineno); + } + else + { + if (cstate->cur_attname && cstate->cur_attval) + { + /* error is relevant to a particular column */ + char *attval; + + attval = limit_printout_length(cstate->cur_attval); + errcontext("COPY %s, line %llu, column %s: \"%s\"", + cstate->cur_relname, + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname, + attval); + pfree(attval); + } + else if (cstate->cur_attname) + { + /* error is relevant to a particular column, value is NULL */ + errcontext("COPY %s, line %llu, column %s: null input", + cstate->cur_relname, + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname); + } + else + { + /* + * Error is relevant to a particular line. + * + * If line_buf still contains the correct line, print it. + */ + if (cstate->line_buf_valid) + { + char *lineval; + + lineval = limit_printout_length(cstate->line_buf.data); + errcontext("COPY %s, line %llu: \"%s\"", + cstate->cur_relname, + (unsigned long long) cstate->cur_lineno, lineval); + pfree(lineval); + } + else + { + errcontext("COPY %s, line %llu", + cstate->cur_relname, + (unsigned long long) cstate->cur_lineno); + } + } + } +} + +/* + * Make sure we don't print an unreasonable amount of COPY data in a message. + * + * Returns a pstrdup'd copy of the input. + */ +static char * +limit_printout_length(const char *str) +{ +#define MAX_COPY_DATA_DISPLAY 100 + + int slen = strlen(str); + int len; + char *res; + + /* Fast path if definitely okay */ + if (slen <= MAX_COPY_DATA_DISPLAY) + return pstrdup(str); + + /* Apply encoding-dependent truncation */ + len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY); + + /* + * Truncate, and add "..." to show we truncated the input. + */ + res = (char *) palloc(len + 4); + memcpy(res, str, len); + strcpy(res + len, "..."); + + return res; +} + +/* + * Allocate memory and initialize a new CopyMultiInsertBuffer for this + * ResultRelInfo. + */ +static CopyMultiInsertBuffer * +CopyMultiInsertBufferInit(ResultRelInfo *rri) +{ + CopyMultiInsertBuffer *buffer; + + buffer = (CopyMultiInsertBuffer *) palloc(sizeof(CopyMultiInsertBuffer)); + memset(buffer->slots, 0, sizeof(TupleTableSlot *) * MAX_BUFFERED_TUPLES); + buffer->resultRelInfo = rri; + buffer->bistate = GetBulkInsertState(); + buffer->nused = 0; + + return buffer; +} + +/* + * Make a new buffer for this ResultRelInfo. + */ +static inline void +CopyMultiInsertInfoSetupBuffer(CopyMultiInsertInfo *miinfo, + ResultRelInfo *rri) +{ + CopyMultiInsertBuffer *buffer; + + buffer = CopyMultiInsertBufferInit(rri); + + /* Setup back-link so we can easily find this buffer again */ + rri->ri_CopyMultiInsertBuffer = buffer; + /* Record that we're tracking this buffer */ + miinfo->multiInsertBuffers = lappend(miinfo->multiInsertBuffers, buffer); +} + +/* + * Initialize an already allocated CopyMultiInsertInfo. + * + * If rri is a non-partitioned table then a CopyMultiInsertBuffer is set up + * for that table. + */ +static void +CopyMultiInsertInfoInit(CopyMultiInsertInfo *miinfo, ResultRelInfo *rri, + CopyFromState cstate, EState *estate, CommandId mycid, + int ti_options) +{ + miinfo->multiInsertBuffers = NIL; + miinfo->bufferedTuples = 0; + miinfo->bufferedBytes = 0; + miinfo->cstate = cstate; + miinfo->estate = estate; + miinfo->mycid = mycid; + miinfo->ti_options = ti_options; + + /* + * Only setup the buffer when not dealing with a partitioned table. + * Buffers for partitioned tables will just be setup when we need to send + * tuples their way for the first time. + */ + if (rri->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + CopyMultiInsertInfoSetupBuffer(miinfo, rri); +} + +/* + * Returns true if the buffers are full + */ +static inline bool +CopyMultiInsertInfoIsFull(CopyMultiInsertInfo *miinfo) +{ + if (miinfo->bufferedTuples >= MAX_BUFFERED_TUPLES || + miinfo->bufferedBytes >= MAX_BUFFERED_BYTES) + return true; + return false; +} + +/* + * Returns true if we have no buffered tuples + */ +static inline bool +CopyMultiInsertInfoIsEmpty(CopyMultiInsertInfo *miinfo) +{ + return miinfo->bufferedTuples == 0; +} + +/* + * Write the tuples stored in 'buffer' out to the table. + */ +static inline void +CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo, + CopyMultiInsertBuffer *buffer) +{ + MemoryContext oldcontext; + int i; + uint64 save_cur_lineno; + CopyFromState cstate = miinfo->cstate; + EState *estate = miinfo->estate; + CommandId mycid = miinfo->mycid; + int ti_options = miinfo->ti_options; + bool line_buf_valid = cstate->line_buf_valid; + int nused = buffer->nused; + ResultRelInfo *resultRelInfo = buffer->resultRelInfo; + TupleTableSlot **slots = buffer->slots; + + /* + * Print error context information correctly, if one of the operations + * below fails. + */ + cstate->line_buf_valid = false; + save_cur_lineno = cstate->cur_lineno; + + /* + * table_multi_insert may leak memory, so switch to short-lived memory + * context before calling it. + */ + oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + table_multi_insert(resultRelInfo->ri_RelationDesc, + slots, + nused, + mycid, + ti_options, + buffer->bistate); + MemoryContextSwitchTo(oldcontext); + + for (i = 0; i < nused; i++) + { + /* + * If there are any indexes, update them for all the inserted tuples, + * and run AFTER ROW INSERT triggers. + */ + if (resultRelInfo->ri_NumIndices > 0) + { + List *recheckIndexes; + + cstate->cur_lineno = buffer->linenos[i]; + recheckIndexes = + ExecInsertIndexTuples(resultRelInfo, + buffer->slots[i], estate, false, false, + NULL, NIL); + ExecARInsertTriggers(estate, resultRelInfo, + slots[i], recheckIndexes, + cstate->transition_capture); + list_free(recheckIndexes); + } + + /* + * There's no indexes, but see if we need to run AFTER ROW INSERT + * triggers anyway. + */ + else if (resultRelInfo->ri_TrigDesc != NULL && + (resultRelInfo->ri_TrigDesc->trig_insert_after_row || + resultRelInfo->ri_TrigDesc->trig_insert_new_table)) + { + cstate->cur_lineno = buffer->linenos[i]; + ExecARInsertTriggers(estate, resultRelInfo, + slots[i], NIL, cstate->transition_capture); + } + + ExecClearTuple(slots[i]); + } + + /* Mark that all slots are free */ + buffer->nused = 0; + + /* reset cur_lineno and line_buf_valid to what they were */ + cstate->line_buf_valid = line_buf_valid; + cstate->cur_lineno = save_cur_lineno; +} + +/* + * Drop used slots and free member for this buffer. + * + * The buffer must be flushed before cleanup. + */ +static inline void +CopyMultiInsertBufferCleanup(CopyMultiInsertInfo *miinfo, + CopyMultiInsertBuffer *buffer) +{ + int i; + + /* Ensure buffer was flushed */ + Assert(buffer->nused == 0); + + /* Remove back-link to ourself */ + buffer->resultRelInfo->ri_CopyMultiInsertBuffer = NULL; + + FreeBulkInsertState(buffer->bistate); + + /* Since we only create slots on demand, just drop the non-null ones. */ + for (i = 0; i < MAX_BUFFERED_TUPLES && buffer->slots[i] != NULL; i++) + ExecDropSingleTupleTableSlot(buffer->slots[i]); + + table_finish_bulk_insert(buffer->resultRelInfo->ri_RelationDesc, + miinfo->ti_options); + + pfree(buffer); +} + +/* + * Write out all stored tuples in all buffers out to the tables. + * + * Once flushed we also trim the tracked buffers list down to size by removing + * the buffers created earliest first. + * + * Callers should pass 'curr_rri' as the ResultRelInfo that's currently being + * used. When cleaning up old buffers we'll never remove the one for + * 'curr_rri'. + */ +static inline void +CopyMultiInsertInfoFlush(CopyMultiInsertInfo *miinfo, ResultRelInfo *curr_rri) +{ + ListCell *lc; + + foreach(lc, miinfo->multiInsertBuffers) + { + CopyMultiInsertBuffer *buffer = (CopyMultiInsertBuffer *) lfirst(lc); + + CopyMultiInsertBufferFlush(miinfo, buffer); + } + + miinfo->bufferedTuples = 0; + miinfo->bufferedBytes = 0; + + /* + * Trim the list of tracked buffers down if it exceeds the limit. Here we + * remove buffers starting with the ones we created first. It seems less + * likely that these older ones will be needed than the ones that were + * just created. + */ + while (list_length(miinfo->multiInsertBuffers) > MAX_PARTITION_BUFFERS) + { + CopyMultiInsertBuffer *buffer; + + buffer = (CopyMultiInsertBuffer *) linitial(miinfo->multiInsertBuffers); + + /* + * We never want to remove the buffer that's currently being used, so + * if we happen to find that then move it to the end of the list. + */ + if (buffer->resultRelInfo == curr_rri) + { + miinfo->multiInsertBuffers = list_delete_first(miinfo->multiInsertBuffers); + miinfo->multiInsertBuffers = lappend(miinfo->multiInsertBuffers, buffer); + buffer = (CopyMultiInsertBuffer *) linitial(miinfo->multiInsertBuffers); + } + + CopyMultiInsertBufferCleanup(miinfo, buffer); + miinfo->multiInsertBuffers = list_delete_first(miinfo->multiInsertBuffers); + } +} + +/* + * Cleanup allocated buffers and free memory + */ +static inline void +CopyMultiInsertInfoCleanup(CopyMultiInsertInfo *miinfo) +{ + ListCell *lc; + + foreach(lc, miinfo->multiInsertBuffers) + CopyMultiInsertBufferCleanup(miinfo, lfirst(lc)); + + list_free(miinfo->multiInsertBuffers); +} + +/* + * Get the next TupleTableSlot that the next tuple should be stored in. + * + * Callers must ensure that the buffer is not full. + * + * Note: 'miinfo' is unused but has been included for consistency with the + * other functions in this area. + */ +static inline TupleTableSlot * +CopyMultiInsertInfoNextFreeSlot(CopyMultiInsertInfo *miinfo, + ResultRelInfo *rri) +{ + CopyMultiInsertBuffer *buffer = rri->ri_CopyMultiInsertBuffer; + int nused = buffer->nused; + + Assert(buffer != NULL); + Assert(nused < MAX_BUFFERED_TUPLES); + + if (buffer->slots[nused] == NULL) + buffer->slots[nused] = table_slot_create(rri->ri_RelationDesc, NULL); + return buffer->slots[nused]; +} + +/* + * Record the previously reserved TupleTableSlot that was reserved by + * CopyMultiInsertInfoNextFreeSlot as being consumed. + */ +static inline void +CopyMultiInsertInfoStore(CopyMultiInsertInfo *miinfo, ResultRelInfo *rri, + TupleTableSlot *slot, int tuplen, uint64 lineno) +{ + CopyMultiInsertBuffer *buffer = rri->ri_CopyMultiInsertBuffer; + + Assert(buffer != NULL); + Assert(slot == buffer->slots[buffer->nused]); + + /* Store the line number so we can properly report any errors later */ + buffer->linenos[buffer->nused] = lineno; + + /* Record this slot as being used */ + buffer->nused++; + + /* Update how many tuples are stored and their size */ + miinfo->bufferedTuples++; + miinfo->bufferedBytes += tuplen; +} + +/* + * Copy FROM file to relation. + */ +uint64 +CopyFrom(CopyFromState cstate) +{ + ResultRelInfo *resultRelInfo; + ResultRelInfo *target_resultRelInfo; + ResultRelInfo *prevResultRelInfo = NULL; + EState *estate = CreateExecutorState(); /* for ExecConstraints() */ + ModifyTableState *mtstate; + ExprContext *econtext; + TupleTableSlot *singleslot = NULL; + MemoryContext oldcontext = CurrentMemoryContext; + + PartitionTupleRouting *proute = NULL; + ErrorContextCallback errcallback; + CommandId mycid = GetCurrentCommandId(true); + int ti_options = 0; /* start with default options for insert */ + BulkInsertState bistate = NULL; + CopyInsertMethod insertMethod; + CopyMultiInsertInfo multiInsertInfo = {0}; /* pacify compiler */ + int64 processed = 0; + int64 excluded = 0; + bool has_before_insert_row_trig; + bool has_instead_insert_row_trig; + bool leafpart_use_multi_insert = false; + + Assert(cstate->rel); + Assert(list_length(cstate->range_table) == 1); + + /* + * The target must be a plain, foreign, or partitioned relation, or have + * an INSTEAD OF INSERT row trigger. (Currently, such triggers are only + * allowed on views, so we only hint about them in the view case.) + */ + if (cstate->rel->rd_rel->relkind != RELKIND_RELATION && + cstate->rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE && + !(cstate->rel->trigdesc && + cstate->rel->trigdesc->trig_insert_instead_row)) + { + if (cstate->rel->rd_rel->relkind == RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy to view \"%s\"", + RelationGetRelationName(cstate->rel)), + errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger."))); + else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy to materialized view \"%s\"", + RelationGetRelationName(cstate->rel)))); + else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy to sequence \"%s\"", + RelationGetRelationName(cstate->rel)))); + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy to non-table relation \"%s\"", + RelationGetRelationName(cstate->rel)))); + } + + /* + * If the target file is new-in-transaction, we assume that checking FSM + * for free space is a waste of time. This could possibly be wrong, but + * it's unlikely. + */ + if (RELKIND_HAS_STORAGE(cstate->rel->rd_rel->relkind) && + (cstate->rel->rd_createSubid != InvalidSubTransactionId || + cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId)) + ti_options |= TABLE_INSERT_SKIP_FSM; + + /* + * Optimize if new relfilenode was created in this subxact or one of its + * committed children and we won't see those rows later as part of an + * earlier scan or command. The subxact test ensures that if this subxact + * aborts then the frozen rows won't be visible after xact cleanup. Note + * that the stronger test of exactly which subtransaction created it is + * crucial for correctness of this optimization. The test for an earlier + * scan or command tolerates false negatives. FREEZE causes other sessions + * to see rows they would not see under MVCC, and a false negative merely + * spreads that anomaly to the current session. + */ + if (cstate->opts.freeze) + { + /* + * We currently disallow COPY FREEZE on partitioned tables. The + * reason for this is that we've simply not yet opened the partitions + * to determine if the optimization can be applied to them. We could + * go and open them all here, but doing so may be quite a costly + * overhead for small copies. In any case, we may just end up routing + * tuples to a small number of partitions. It seems better just to + * raise an ERROR for partitioned tables. + */ + if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform COPY FREEZE on a partitioned table"))); + } + + /* + * Tolerate one registration for the benefit of FirstXactSnapshot. + * Scan-bearing queries generally create at least two registrations, + * though relying on that is fragile, as is ignoring ActiveSnapshot. + * Clear CatalogSnapshot to avoid counting its registration. We'll + * still detect ongoing catalog scans, each of which separately + * registers the snapshot it uses. + */ + InvalidateCatalogSnapshot(); + if (!ThereAreNoPriorRegisteredSnapshots() || !ThereAreNoReadyPortals()) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TRANSACTION_STATE), + errmsg("cannot perform COPY FREEZE because of prior transaction activity"))); + + if (cstate->rel->rd_createSubid != GetCurrentSubTransactionId() && + cstate->rel->rd_newRelfilenodeSubid != GetCurrentSubTransactionId()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot perform COPY FREEZE because the table was not created or truncated in the current subtransaction"))); + + ti_options |= TABLE_INSERT_FROZEN; + } + + /* + * We need a ResultRelInfo so we can use the regular executor's + * index-entry-making machinery. (There used to be a huge amount of code + * here that basically duplicated execUtils.c ...) + */ + ExecInitRangeTable(estate, cstate->range_table); + resultRelInfo = target_resultRelInfo = makeNode(ResultRelInfo); + ExecInitResultRelation(estate, resultRelInfo, 1); + + /* Verify the named relation is a valid target for INSERT */ + CheckValidResultRel(resultRelInfo, CMD_INSERT); + + ExecOpenIndices(resultRelInfo, false); + + /* + * Set up a ModifyTableState so we can let FDW(s) init themselves for + * foreign-table result relation(s). + */ + mtstate = makeNode(ModifyTableState); + mtstate->ps.plan = NULL; + mtstate->ps.state = estate; + mtstate->operation = CMD_INSERT; + mtstate->mt_nrels = 1; + mtstate->resultRelInfo = resultRelInfo; + mtstate->rootResultRelInfo = resultRelInfo; + + if (resultRelInfo->ri_FdwRoutine != NULL && + resultRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL) + resultRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, + resultRelInfo); + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + /* + * If there are any triggers with transition tables on the named relation, + * we need to be prepared to capture transition tuples. + * + * Because partition tuple routing would like to know about whether + * transition capture is active, we also set it in mtstate, which is + * passed to ExecFindPartition() below. + */ + cstate->transition_capture = mtstate->mt_transition_capture = + MakeTransitionCaptureState(cstate->rel->trigdesc, + RelationGetRelid(cstate->rel), + CMD_INSERT); + + /* + * If the named relation is a partitioned table, initialize state for + * CopyFrom tuple routing. + */ + if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + proute = ExecSetupPartitionTupleRouting(estate, cstate->rel); + + if (cstate->whereClause) + cstate->qualexpr = ExecInitQual(castNode(List, cstate->whereClause), + &mtstate->ps); + + /* + * It's generally more efficient to prepare a bunch of tuples for + * insertion, and insert them in one table_multi_insert() call, than call + * table_tuple_insert() separately for every tuple. However, there are a + * number of reasons why we might not be able to do this. These are + * explained below. + */ + if (resultRelInfo->ri_TrigDesc != NULL && + (resultRelInfo->ri_TrigDesc->trig_insert_before_row || + resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) + { + /* + * Can't support multi-inserts when there are any BEFORE/INSTEAD OF + * triggers on the table. Such triggers might query the table we're + * inserting into and act differently if the tuples that have already + * been processed and prepared for insertion are not there. + */ + insertMethod = CIM_SINGLE; + } + else if (proute != NULL && resultRelInfo->ri_TrigDesc != NULL && + resultRelInfo->ri_TrigDesc->trig_insert_new_table) + { + /* + * For partitioned tables we can't support multi-inserts when there + * are any statement level insert triggers. It might be possible to + * allow partitioned tables with such triggers in the future, but for + * now, CopyMultiInsertInfoFlush expects that any after row insert and + * statement level insert triggers are on the same relation. + */ + insertMethod = CIM_SINGLE; + } + else if (resultRelInfo->ri_FdwRoutine != NULL || + cstate->volatile_defexprs) + { + /* + * Can't support multi-inserts to foreign tables or if there are any + * volatile default expressions in the table. Similarly to the + * trigger case above, such expressions may query the table we're + * inserting into. + * + * Note: It does not matter if any partitions have any volatile + * default expressions as we use the defaults from the target of the + * COPY command. + */ + insertMethod = CIM_SINGLE; + } + else if (contain_volatile_functions(cstate->whereClause)) + { + /* + * Can't support multi-inserts if there are any volatile function + * expressions in WHERE clause. Similarly to the trigger case above, + * such expressions may query the table we're inserting into. + */ + insertMethod = CIM_SINGLE; + } + else + { + /* + * For partitioned tables, we may still be able to perform bulk + * inserts. However, the possibility of this depends on which types + * of triggers exist on the partition. We must disable bulk inserts + * if the partition is a foreign table or it has any before row insert + * or insert instead triggers (same as we checked above for the parent + * table). Since the partition's resultRelInfos are initialized only + * when we actually need to insert the first tuple into them, we must + * have the intermediate insert method of CIM_MULTI_CONDITIONAL to + * flag that we must later determine if we can use bulk-inserts for + * the partition being inserted into. + */ + if (proute) + insertMethod = CIM_MULTI_CONDITIONAL; + else + insertMethod = CIM_MULTI; + + CopyMultiInsertInfoInit(&multiInsertInfo, resultRelInfo, cstate, + estate, mycid, ti_options); + } + + /* + * If not using batch mode (which allocates slots as needed) set up a + * tuple slot too. When inserting into a partitioned table, we also need + * one, even if we might batch insert, to read the tuple in the root + * partition's form. + */ + if (insertMethod == CIM_SINGLE || insertMethod == CIM_MULTI_CONDITIONAL) + { + singleslot = table_slot_create(resultRelInfo->ri_RelationDesc, + &estate->es_tupleTable); + bistate = GetBulkInsertState(); + } + + has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_before_row); + + has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_instead_row); + + /* + * Check BEFORE STATEMENT insertion triggers. It's debatable whether we + * should do this for COPY, since it's not really an "INSERT" statement as + * such. However, executing these triggers maintains consistency with the + * EACH ROW triggers that we already fire on COPY. + */ + ExecBSInsertTriggers(estate, resultRelInfo); + + econtext = GetPerTupleExprContext(estate); + + /* Set up callback to identify error line number */ + errcallback.callback = CopyFromErrorCallback; + errcallback.arg = (void *) cstate; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + for (;;) + { + TupleTableSlot *myslot; + bool skip_tuple; + + CHECK_FOR_INTERRUPTS(); + + /* + * Reset the per-tuple exprcontext. We do this after every tuple, to + * clean-up after expression evaluations etc. + */ + ResetPerTupleExprContext(estate); + + /* select slot to (initially) load row into */ + if (insertMethod == CIM_SINGLE || proute) + { + myslot = singleslot; + Assert(myslot != NULL); + } + else + { + Assert(resultRelInfo == target_resultRelInfo); + Assert(insertMethod == CIM_MULTI); + + myslot = CopyMultiInsertInfoNextFreeSlot(&multiInsertInfo, + resultRelInfo); + } + + /* + * Switch to per-tuple context before calling NextCopyFrom, which does + * evaluate default expressions etc. and requires per-tuple context. + */ + MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + ExecClearTuple(myslot); + + /* Directly store the values/nulls array in the slot */ + if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull)) + break; + + ExecStoreVirtualTuple(myslot); + + /* + * Constraints and where clause might reference the tableoid column, + * so (re-)initialize tts_tableOid before evaluating them. + */ + myslot->tts_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc); + + /* Triggers and stuff need to be invoked in query context. */ + MemoryContextSwitchTo(oldcontext); + + if (cstate->whereClause) + { + econtext->ecxt_scantuple = myslot; + /* Skip items that don't match COPY's WHERE clause */ + if (!ExecQual(cstate->qualexpr, econtext)) + { + /* + * Report that this tuple was filtered out by the WHERE + * clause. + */ + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_EXCLUDED, + ++excluded); + continue; + } + } + + /* Determine the partition to insert the tuple into */ + if (proute) + { + TupleConversionMap *map; + + /* + * Attempt to find a partition suitable for this tuple. + * ExecFindPartition() will raise an error if none can be found or + * if the found partition is not suitable for INSERTs. + */ + resultRelInfo = ExecFindPartition(mtstate, target_resultRelInfo, + proute, myslot, estate); + + if (prevResultRelInfo != resultRelInfo) + { + /* Determine which triggers exist on this partition */ + has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_before_row); + + has_instead_insert_row_trig = (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_instead_row); + + /* + * Disable multi-inserts when the partition has BEFORE/INSTEAD + * OF triggers, or if the partition is a foreign partition. + */ + leafpart_use_multi_insert = insertMethod == CIM_MULTI_CONDITIONAL && + !has_before_insert_row_trig && + !has_instead_insert_row_trig && + resultRelInfo->ri_FdwRoutine == NULL; + + /* Set the multi-insert buffer to use for this partition. */ + if (leafpart_use_multi_insert) + { + if (resultRelInfo->ri_CopyMultiInsertBuffer == NULL) + CopyMultiInsertInfoSetupBuffer(&multiInsertInfo, + resultRelInfo); + } + else if (insertMethod == CIM_MULTI_CONDITIONAL && + !CopyMultiInsertInfoIsEmpty(&multiInsertInfo)) + { + /* + * Flush pending inserts if this partition can't use + * batching, so rows are visible to triggers etc. + */ + CopyMultiInsertInfoFlush(&multiInsertInfo, resultRelInfo); + } + + if (bistate != NULL) + ReleaseBulkInsertStatePin(bistate); + prevResultRelInfo = resultRelInfo; + } + + /* + * If we're capturing transition tuples, we might need to convert + * from the partition rowtype to root rowtype. But if there are no + * BEFORE triggers on the partition that could change the tuple, + * we can just remember the original unconverted tuple to avoid a + * needless round trip conversion. + */ + if (cstate->transition_capture != NULL) + cstate->transition_capture->tcs_original_insert_tuple = + !has_before_insert_row_trig ? myslot : NULL; + + /* + * We might need to convert from the root rowtype to the partition + * rowtype. + */ + map = resultRelInfo->ri_RootToPartitionMap; + if (insertMethod == CIM_SINGLE || !leafpart_use_multi_insert) + { + /* non batch insert */ + if (map != NULL) + { + TupleTableSlot *new_slot; + + new_slot = resultRelInfo->ri_PartitionTupleSlot; + myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot); + } + } + else + { + /* + * Prepare to queue up tuple for later batch insert into + * current partition. + */ + TupleTableSlot *batchslot; + + /* no other path available for partitioned table */ + Assert(insertMethod == CIM_MULTI_CONDITIONAL); + + batchslot = CopyMultiInsertInfoNextFreeSlot(&multiInsertInfo, + resultRelInfo); + + if (map != NULL) + myslot = execute_attr_map_slot(map->attrMap, myslot, + batchslot); + else + { + /* + * This looks more expensive than it is (Believe me, I + * optimized it away. Twice.). The input is in virtual + * form, and we'll materialize the slot below - for most + * slot types the copy performs the work materialization + * would later require anyway. + */ + ExecCopySlot(batchslot, myslot); + myslot = batchslot; + } + } + + /* ensure that triggers etc see the right relation */ + myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + } + + skip_tuple = false; + + /* BEFORE ROW INSERT Triggers */ + if (has_before_insert_row_trig) + { + if (!ExecBRInsertTriggers(estate, resultRelInfo, myslot)) + skip_tuple = true; /* "do nothing" */ + } + + if (!skip_tuple) + { + /* + * If there is an INSTEAD OF INSERT ROW trigger, let it handle the + * tuple. Otherwise, proceed with inserting the tuple into the + * table or foreign table. + */ + if (has_instead_insert_row_trig) + { + ExecIRInsertTriggers(estate, resultRelInfo, myslot); + } + else + { + /* Compute stored generated columns */ + if (resultRelInfo->ri_RelationDesc->rd_att->constr && + resultRelInfo->ri_RelationDesc->rd_att->constr->has_generated_stored) + ExecComputeStoredGenerated(resultRelInfo, estate, myslot, + CMD_INSERT); + + /* + * If the target is a plain table, check the constraints of + * the tuple. + */ + if (resultRelInfo->ri_FdwRoutine == NULL && + resultRelInfo->ri_RelationDesc->rd_att->constr) + ExecConstraints(resultRelInfo, myslot, estate); + + /* + * Also check the tuple against the partition constraint, if + * there is one; except that if we got here via tuple-routing, + * we don't need to if there's no BR trigger defined on the + * partition. + */ + if (resultRelInfo->ri_RelationDesc->rd_rel->relispartition && + (proute == NULL || has_before_insert_row_trig)) + ExecPartitionCheck(resultRelInfo, myslot, estate, true); + + /* Store the slot in the multi-insert buffer, when enabled. */ + if (insertMethod == CIM_MULTI || leafpart_use_multi_insert) + { + /* + * The slot previously might point into the per-tuple + * context. For batching it needs to be longer lived. + */ + ExecMaterializeSlot(myslot); + + /* Add this tuple to the tuple buffer */ + CopyMultiInsertInfoStore(&multiInsertInfo, + resultRelInfo, myslot, + cstate->line_buf.len, + cstate->cur_lineno); + + /* + * If enough inserts have queued up, then flush all + * buffers out to their tables. + */ + if (CopyMultiInsertInfoIsFull(&multiInsertInfo)) + CopyMultiInsertInfoFlush(&multiInsertInfo, resultRelInfo); + } + else + { + List *recheckIndexes = NIL; + + /* OK, store the tuple */ + if (resultRelInfo->ri_FdwRoutine != NULL) + { + myslot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate, + resultRelInfo, + myslot, + NULL); + + if (myslot == NULL) /* "do nothing" */ + continue; /* next tuple please */ + + /* + * AFTER ROW Triggers might reference the tableoid + * column, so (re-)initialize tts_tableOid before + * evaluating them. + */ + myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + } + else + { + /* OK, store the tuple and create index entries for it */ + table_tuple_insert(resultRelInfo->ri_RelationDesc, + myslot, mycid, ti_options, bistate); + + if (resultRelInfo->ri_NumIndices > 0) + recheckIndexes = ExecInsertIndexTuples(resultRelInfo, + myslot, + estate, + false, + false, + NULL, + NIL); + } + + /* AFTER ROW INSERT Triggers */ + ExecARInsertTriggers(estate, resultRelInfo, myslot, + recheckIndexes, cstate->transition_capture); + + list_free(recheckIndexes); + } + } + + /* + * We count only tuples not suppressed by a BEFORE INSERT trigger + * or FDW; this is the same definition used by nodeModifyTable.c + * for counting tuples inserted by an INSERT command. Update + * progress of the COPY command as well. + */ + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, + ++processed); + } + } + + /* Flush any remaining buffered tuples */ + if (insertMethod != CIM_SINGLE) + { + if (!CopyMultiInsertInfoIsEmpty(&multiInsertInfo)) + CopyMultiInsertInfoFlush(&multiInsertInfo, NULL); + } + + /* Done, clean up */ + error_context_stack = errcallback.previous; + + if (bistate != NULL) + FreeBulkInsertState(bistate); + + MemoryContextSwitchTo(oldcontext); + + /* Execute AFTER STATEMENT insertion triggers */ + ExecASInsertTriggers(estate, target_resultRelInfo, cstate->transition_capture); + + /* Handle queued AFTER triggers */ + AfterTriggerEndQuery(estate); + + ExecResetTupleTable(estate->es_tupleTable, false); + + /* Allow the FDW to shut down */ + if (target_resultRelInfo->ri_FdwRoutine != NULL && + target_resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL) + target_resultRelInfo->ri_FdwRoutine->EndForeignInsert(estate, + target_resultRelInfo); + + /* Tear down the multi-insert buffer data */ + if (insertMethod != CIM_SINGLE) + CopyMultiInsertInfoCleanup(&multiInsertInfo); + + /* Close all the partitioned tables, leaf partitions, and their indices */ + if (proute) + ExecCleanupTupleRouting(mtstate, proute); + + /* Close the result relations, including any trigger target relations */ + ExecCloseResultRelations(estate); + ExecCloseRangeTableRelations(estate); + + FreeExecutorState(estate); + + return processed; +} + +/* + * Setup to read tuples from a file for COPY FROM. + * + * 'rel': Used as a template for the tuples + * 'whereClause': WHERE clause from the COPY FROM command + * 'filename': Name of server-local file to read, NULL for STDIN + * 'is_program': true if 'filename' is program to execute + * 'data_source_cb': callback that provides the input data + * 'attnamelist': List of char *, columns to include. NIL selects all cols. + * 'options': List of DefElem. See copy_opt_item in gram.y for selections. + * + * Returns a CopyFromState, to be passed to NextCopyFrom and related functions. + */ +CopyFromState +BeginCopyFrom(ParseState *pstate, + Relation rel, + Node *whereClause, + const char *filename, + bool is_program, + copy_data_source_cb data_source_cb, + List *attnamelist, + List *options) +{ + CopyFromState cstate; + bool pipe = (filename == NULL); + TupleDesc tupDesc; + AttrNumber num_phys_attrs, + num_defaults; + FmgrInfo *in_functions; + Oid *typioparams; + int attnum; + Oid in_func_oid; + int *defmap; + ExprState **defexprs; + MemoryContext oldcontext; + bool volatile_defexprs; + const int progress_cols[] = { + PROGRESS_COPY_COMMAND, + PROGRESS_COPY_TYPE, + PROGRESS_COPY_BYTES_TOTAL + }; + int64 progress_vals[] = { + PROGRESS_COPY_COMMAND_FROM, + 0, + 0 + }; + + /* Allocate workspace and zero all fields */ + cstate = (CopyFromStateData *) palloc0(sizeof(CopyFromStateData)); + + /* + * We allocate everything used by a cstate in a new memory context. This + * avoids memory leaks during repeated use of COPY in a query. + */ + cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext, + "COPY", + ALLOCSET_DEFAULT_SIZES); + + oldcontext = MemoryContextSwitchTo(cstate->copycontext); + + /* Extract options from the statement node tree */ + ProcessCopyOptions(pstate, &cstate->opts, true /* is_from */ , options); + + /* Process the target relation */ + cstate->rel = rel; + + tupDesc = RelationGetDescr(cstate->rel); + + /* process common options or initialization */ + + /* Generate or convert list of attributes to process */ + cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist); + + num_phys_attrs = tupDesc->natts; + + /* Convert FORCE_NOT_NULL name list to per-column flags, check validity */ + cstate->opts.force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + if (cstate->opts.force_notnull) + { + List *attnums; + ListCell *cur; + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_notnull); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE_NOT_NULL column \"%s\" not referenced by COPY", + NameStr(attr->attname)))); + cstate->opts.force_notnull_flags[attnum - 1] = true; + } + } + + /* Convert FORCE_NULL name list to per-column flags, check validity */ + cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + if (cstate->opts.force_null) + { + List *attnums; + ListCell *cur; + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_null); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE_NULL column \"%s\" not referenced by COPY", + NameStr(attr->attname)))); + cstate->opts.force_null_flags[attnum - 1] = true; + } + } + + /* Convert convert_selectively name list to per-column flags */ + if (cstate->opts.convert_selectively) + { + List *attnums; + ListCell *cur; + + cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.convert_select); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg_internal("selected column \"%s\" not referenced by COPY", + NameStr(attr->attname)))); + cstate->convert_select_flags[attnum - 1] = true; + } + } + + /* Use client encoding when ENCODING option is not specified. */ + if (cstate->opts.file_encoding < 0) + cstate->file_encoding = pg_get_client_encoding(); + else + cstate->file_encoding = cstate->opts.file_encoding; + + /* + * Look up encoding conversion function. + */ + if (cstate->file_encoding == GetDatabaseEncoding() || + cstate->file_encoding == PG_SQL_ASCII || + GetDatabaseEncoding() == PG_SQL_ASCII) + { + cstate->need_transcoding = false; + } + else + { + cstate->need_transcoding = true; + cstate->conversion_proc = FindDefaultConversionProc(cstate->file_encoding, + GetDatabaseEncoding()); + if (!OidIsValid(cstate->conversion_proc)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist", + pg_encoding_to_char(cstate->file_encoding), + pg_encoding_to_char(GetDatabaseEncoding())))); + } + + cstate->copy_src = COPY_FILE; /* default */ + + cstate->whereClause = whereClause; + + /* Initialize state variables */ + cstate->eol_type = EOL_UNKNOWN; + cstate->cur_relname = RelationGetRelationName(cstate->rel); + cstate->cur_lineno = 0; + cstate->cur_attname = NULL; + cstate->cur_attval = NULL; + + /* + * Allocate buffers for the input pipeline. + * + * attribute_buf and raw_buf are used in both text and binary modes, but + * input_buf and line_buf only in text mode. + */ + cstate->raw_buf = palloc(RAW_BUF_SIZE + 1); + cstate->raw_buf_index = cstate->raw_buf_len = 0; + cstate->raw_reached_eof = false; + + if (!cstate->opts.binary) + { + /* + * If encoding conversion is needed, we need another buffer to hold + * the converted input data. Otherwise, we can just point input_buf + * to the same buffer as raw_buf. + */ + if (cstate->need_transcoding) + { + cstate->input_buf = (char *) palloc(INPUT_BUF_SIZE + 1); + cstate->input_buf_index = cstate->input_buf_len = 0; + } + else + cstate->input_buf = cstate->raw_buf; + cstate->input_reached_eof = false; + + initStringInfo(&cstate->line_buf); + } + + initStringInfo(&cstate->attribute_buf); + + /* Assign range table, we'll need it in CopyFrom. */ + if (pstate) + cstate->range_table = pstate->p_rtable; + + tupDesc = RelationGetDescr(cstate->rel); + num_phys_attrs = tupDesc->natts; + num_defaults = 0; + volatile_defexprs = false; + + /* + * Pick up the required catalog information for each attribute in the + * relation, including the input function, the element type (to pass to + * the input function), and info about defaults and constraints. (Which + * input function we use depends on text/binary format choice.) + */ + in_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo)); + typioparams = (Oid *) palloc(num_phys_attrs * sizeof(Oid)); + defmap = (int *) palloc(num_phys_attrs * sizeof(int)); + defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *)); + + for (attnum = 1; attnum <= num_phys_attrs; attnum++) + { + Form_pg_attribute att = TupleDescAttr(tupDesc, attnum - 1); + + /* We don't need info for dropped attributes */ + if (att->attisdropped) + continue; + + /* Fetch the input function and typioparam info */ + if (cstate->opts.binary) + getTypeBinaryInputInfo(att->atttypid, + &in_func_oid, &typioparams[attnum - 1]); + else + getTypeInputInfo(att->atttypid, + &in_func_oid, &typioparams[attnum - 1]); + fmgr_info(in_func_oid, &in_functions[attnum - 1]); + + /* Get default info if needed */ + if (!list_member_int(cstate->attnumlist, attnum) && !att->attgenerated) + { + /* attribute is NOT to be copied from input */ + /* use default value if one exists */ + Expr *defexpr = (Expr *) build_column_default(cstate->rel, + attnum); + + if (defexpr != NULL) + { + /* Run the expression through planner */ + defexpr = expression_planner(defexpr); + + /* Initialize executable expression in copycontext */ + defexprs[num_defaults] = ExecInitExpr(defexpr, NULL); + defmap[num_defaults] = attnum - 1; + num_defaults++; + + /* + * If a default expression looks at the table being loaded, + * then it could give the wrong answer when using + * multi-insert. Since database access can be dynamic this is + * hard to test for exactly, so we use the much wider test of + * whether the default expression is volatile. We allow for + * the special case of when the default expression is the + * nextval() of a sequence which in this specific case is + * known to be safe for use with the multi-insert + * optimization. Hence we use this special case function + * checker rather than the standard check for + * contain_volatile_functions(). + */ + if (!volatile_defexprs) + volatile_defexprs = contain_volatile_functions_not_nextval((Node *) defexpr); + } + } + } + + + /* initialize progress */ + pgstat_progress_start_command(PROGRESS_COMMAND_COPY, + cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid); + cstate->bytes_processed = 0; + + /* We keep those variables in cstate. */ + cstate->in_functions = in_functions; + cstate->typioparams = typioparams; + cstate->defmap = defmap; + cstate->defexprs = defexprs; + cstate->volatile_defexprs = volatile_defexprs; + cstate->num_defaults = num_defaults; + cstate->is_program = is_program; + + if (data_source_cb) + { + progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK; + cstate->copy_src = COPY_CALLBACK; + cstate->data_source_cb = data_source_cb; + } + else if (pipe) + { + progress_vals[1] = PROGRESS_COPY_TYPE_PIPE; + Assert(!is_program); /* the grammar does not allow this */ + if (whereToSendOutput == DestRemote) + ReceiveCopyBegin(cstate); + else + cstate->copy_file = stdin; + } + else + { + cstate->filename = pstrdup(filename); + + if (cstate->is_program) + { + progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM; + cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R); + if (cstate->copy_file == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + cstate->filename))); + } + else + { + struct stat st; + + progress_vals[1] = PROGRESS_COPY_TYPE_FILE; + cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R); + if (cstate->copy_file == NULL) + { + /* copy errno because ereport subfunctions might change it */ + int save_errno = errno; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + cstate->filename), + (save_errno == ENOENT || save_errno == EACCES) ? + errhint("COPY FROM instructs the PostgreSQL server process to read a file. " + "You may want a client-side facility such as psql's \\copy.") : 0)); + } + + if (fstat(fileno(cstate->copy_file), &st)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + cstate->filename))); + + if (S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a directory", cstate->filename))); + + progress_vals[2] = st.st_size; + } + } + + pgstat_progress_update_multi_param(3, progress_cols, progress_vals); + + if (cstate->opts.binary) + { + /* Read and verify binary header */ + ReceiveCopyBinaryHeader(cstate); + } + + /* create workspace for CopyReadAttributes results */ + if (!cstate->opts.binary) + { + AttrNumber attr_count = list_length(cstate->attnumlist); + + cstate->max_fields = attr_count; + cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *)); + } + + MemoryContextSwitchTo(oldcontext); + + return cstate; +} + +/* + * Clean up storage and release resources for COPY FROM. + */ +void +EndCopyFrom(CopyFromState cstate) +{ + /* No COPY FROM related resources except memory. */ + if (cstate->is_program) + { + ClosePipeFromProgram(cstate); + } + else + { + if (cstate->filename != NULL && FreeFile(cstate->copy_file)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + cstate->filename))); + } + + pgstat_progress_end_command(); + + MemoryContextDelete(cstate->copycontext); + pfree(cstate); +} + +/* + * Closes the pipe from an external program, checking the pclose() return code. + */ +static void +ClosePipeFromProgram(CopyFromState cstate) +{ + int pclose_rc; + + Assert(cstate->is_program); + + pclose_rc = ClosePipeStream(cstate->copy_file); + if (pclose_rc == -1) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close pipe to external command: %m"))); + else if (pclose_rc != 0) + { + /* + * If we ended a COPY FROM PROGRAM before reaching EOF, then it's + * expectable for the called program to fail with SIGPIPE, and we + * should not report that as an error. Otherwise, SIGPIPE indicates a + * problem. + */ + if (!cstate->raw_reached_eof && + wait_result_is_signal(pclose_rc, SIGPIPE)) + return; + + ereport(ERROR, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg("program \"%s\" failed", + cstate->filename), + errdetail_internal("%s", wait_result_to_str(pclose_rc)))); + } +} diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c new file mode 100644 index 0000000..097414e --- /dev/null +++ b/src/backend/commands/copyfromparse.c @@ -0,0 +1,1921 @@ +/*------------------------------------------------------------------------- + * + * copyfromparse.c + * Parse CSV/text/binary format for COPY FROM. + * + * This file contains routines to parse the text, CSV and binary input + * formats. The main entry point is NextCopyFrom(), which parses the + * next input line and returns it as Datums. + * + * In text/CSV mode, the parsing happens in multiple stages: + * + * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf + * 1. 2. 3. 4. + * + * 1. CopyLoadRawBuf() reads raw data from the input file or client, and + * places it into 'raw_buf'. + * + * 2. CopyConvertBuf() calls the encoding conversion function to convert + * the data in 'raw_buf' from client to server encoding, placing the + * converted result in 'input_buf'. + * + * 3. CopyReadLine() parses the data in 'input_buf', one line at a time. + * It is responsible for finding the next newline marker, taking quote and + * escape characters into account according to the COPY options. The line + * is copied into 'line_buf', with quotes and escape characters still + * intact. + * + * 4. CopyReadAttributesText/CSV() function takes the input line from + * 'line_buf', and splits it into fields, unescaping the data as required. + * The fields are stored in 'attribute_buf', and 'raw_fields' array holds + * pointers to each field. + * + * If encoding conversion is not required, a shortcut is taken in step 2 to + * avoid copying the data unnecessarily. The 'input_buf' pointer is set to + * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data + * directly into 'input_buf'. CopyConvertBuf() then merely validates that + * the data is valid in the current encoding. + * + * In binary mode, the pipeline is much simpler. Input is loaded into + * 'raw_buf', and encoding conversion is done in the datatype-specific + * receive functions, if required. 'input_buf' and 'line_buf' are not used, + * but 'attribute_buf' is used as a temporary buffer to hold one attribute's + * data when it's passed the receive function. + * + * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also + * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf' + * and 'attribute_buf' are expanded on demand, to hold the longest line + * encountered so far. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/copyfromparse.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "commands/copy.h" +#include "commands/copyfrom_internal.h" +#include "commands/progress.h" +#include "executor/executor.h" +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "port/pg_bswap.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7')) +#define OCTVALUE(c) ((c) - '0') + +/* + * These macros centralize code used to process line_buf and input_buf buffers. + * They are macros because they often do continue/break control and to avoid + * function call overhead in tight COPY loops. + * + * We must use "if (1)" because the usual "do {...} while(0)" wrapper would + * prevent the continue/break processing from working. We end the "if (1)" + * with "else ((void) 0)" to ensure the "if" does not unintentionally match + * any "else" in the calling code, and to avoid any compiler warnings about + * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros. + */ + +/* + * This keeps the character read at the top of the loop in the buffer + * even if there is more than one read-ahead. + */ +#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \ +if (1) \ +{ \ + if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \ + { \ + input_buf_ptr = prev_raw_ptr; /* undo fetch */ \ + need_data = true; \ + continue; \ + } \ +} else ((void) 0) + +/* This consumes the remainder of the buffer and breaks */ +#define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \ +if (1) \ +{ \ + if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \ + { \ + if (extralen) \ + input_buf_ptr = copy_buf_len; /* consume the partial character */ \ + /* backslash just before EOF, treat as data char */ \ + result = true; \ + break; \ + } \ +} else ((void) 0) + +/* + * Transfer any approved data to line_buf; must do this to be sure + * there is some room in input_buf. + */ +#define REFILL_LINEBUF \ +if (1) \ +{ \ + if (input_buf_ptr > cstate->input_buf_index) \ + { \ + appendBinaryStringInfo(&cstate->line_buf, \ + cstate->input_buf + cstate->input_buf_index, \ + input_buf_ptr - cstate->input_buf_index); \ + cstate->input_buf_index = input_buf_ptr; \ + } \ +} else ((void) 0) + +/* Undo any read-ahead and jump out of the block. */ +#define NO_END_OF_COPY_GOTO \ +if (1) \ +{ \ + input_buf_ptr = prev_raw_ptr + 1; \ + goto not_end_of_copy; \ +} else ((void) 0) + +/* NOTE: there's a copy of this in copyto.c */ +static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; + + +/* non-export function prototypes */ +static bool CopyReadLine(CopyFromState cstate); +static bool CopyReadLineText(CopyFromState cstate); +static int CopyReadAttributesText(CopyFromState cstate); +static int CopyReadAttributesCSV(CopyFromState cstate); +static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, + Oid typioparam, int32 typmod, + bool *isnull); + + +/* Low-level communications functions */ +static int CopyGetData(CopyFromState cstate, void *databuf, + int minread, int maxread); +static inline bool CopyGetInt32(CopyFromState cstate, int32 *val); +static inline bool CopyGetInt16(CopyFromState cstate, int16 *val); +static void CopyLoadInputBuf(CopyFromState cstate); +static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes); + +void +ReceiveCopyBegin(CopyFromState cstate) +{ + StringInfoData buf; + int natts = list_length(cstate->attnumlist); + int16 format = (cstate->opts.binary ? 1 : 0); + int i; + + pq_beginmessage(&buf, 'G'); + pq_sendbyte(&buf, format); /* overall format */ + pq_sendint16(&buf, natts); + for (i = 0; i < natts; i++) + pq_sendint16(&buf, format); /* per-column formats */ + pq_endmessage(&buf); + cstate->copy_src = COPY_FRONTEND; + cstate->fe_msgbuf = makeStringInfo(); + /* We *must* flush here to ensure FE knows it can send. */ + pq_flush(); +} + +void +ReceiveCopyBinaryHeader(CopyFromState cstate) +{ + char readSig[11]; + int32 tmp; + + /* Signature */ + if (CopyReadBinaryData(cstate, readSig, 11) != 11 || + memcmp(readSig, BinarySignature, 11) != 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("COPY file signature not recognized"))); + /* Flags field */ + if (!CopyGetInt32(cstate, &tmp)) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("invalid COPY file header (missing flags)"))); + if ((tmp & (1 << 16)) != 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("invalid COPY file header (WITH OIDS)"))); + tmp &= ~(1 << 16); + if ((tmp >> 16) != 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("unrecognized critical flags in COPY file header"))); + /* Header extension length */ + if (!CopyGetInt32(cstate, &tmp) || + tmp < 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("invalid COPY file header (missing length)"))); + /* Skip extension header, if present */ + while (tmp-- > 0) + { + if (CopyReadBinaryData(cstate, readSig, 1) != 1) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("invalid COPY file header (wrong length)"))); + } +} + +/* + * CopyGetData reads data from the source (file or frontend) + * + * We attempt to read at least minread, and at most maxread, bytes from + * the source. The actual number of bytes read is returned; if this is + * less than minread, EOF was detected. + * + * Note: when copying from the frontend, we expect a proper EOF mark per + * protocol; if the frontend simply drops the connection, we raise error. + * It seems unwise to allow the COPY IN to complete normally in that case. + * + * NB: no data conversion is applied here. + */ +static int +CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread) +{ + int bytesread = 0; + + switch (cstate->copy_src) + { + case COPY_FILE: + bytesread = fread(databuf, 1, maxread, cstate->copy_file); + if (ferror(cstate->copy_file)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from COPY file: %m"))); + if (bytesread == 0) + cstate->raw_reached_eof = true; + break; + case COPY_FRONTEND: + while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof) + { + int avail; + + while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len) + { + /* Try to receive another message */ + int mtype; + int maxmsglen; + + readmessage: + HOLD_CANCEL_INTERRUPTS(); + pq_startmsgread(); + mtype = pq_getbyte(); + if (mtype == EOF) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("unexpected EOF on client connection with an open transaction"))); + /* Validate message type and set packet size limit */ + switch (mtype) + { + case 'd': /* CopyData */ + maxmsglen = PQ_LARGE_MESSAGE_LIMIT; + break; + case 'c': /* CopyDone */ + case 'f': /* CopyFail */ + case 'H': /* Flush */ + case 'S': /* Sync */ + maxmsglen = PQ_SMALL_MESSAGE_LIMIT; + break; + default: + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("unexpected message type 0x%02X during COPY from stdin", + mtype))); + maxmsglen = 0; /* keep compiler quiet */ + break; + } + /* Now collect the message body */ + if (pq_getmessage(cstate->fe_msgbuf, maxmsglen)) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("unexpected EOF on client connection with an open transaction"))); + RESUME_CANCEL_INTERRUPTS(); + /* ... and process it */ + switch (mtype) + { + case 'd': /* CopyData */ + break; + case 'c': /* CopyDone */ + /* COPY IN correctly terminated by frontend */ + cstate->raw_reached_eof = true; + return bytesread; + case 'f': /* CopyFail */ + ereport(ERROR, + (errcode(ERRCODE_QUERY_CANCELED), + errmsg("COPY from stdin failed: %s", + pq_getmsgstring(cstate->fe_msgbuf)))); + break; + case 'H': /* Flush */ + case 'S': /* Sync */ + + /* + * Ignore Flush/Sync for the convenience of client + * libraries (such as libpq) that may send those + * without noticing that the command they just + * sent was COPY. + */ + goto readmessage; + default: + Assert(false); /* NOT REACHED */ + } + } + avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor; + if (avail > maxread) + avail = maxread; + pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail); + databuf = (void *) ((char *) databuf + avail); + maxread -= avail; + bytesread += avail; + } + break; + case COPY_CALLBACK: + bytesread = cstate->data_source_cb(databuf, minread, maxread); + break; + } + + return bytesread; +} + + +/* + * These functions do apply some data conversion + */ + +/* + * CopyGetInt32 reads an int32 that appears in network byte order + * + * Returns true if OK, false if EOF + */ +static inline bool +CopyGetInt32(CopyFromState cstate, int32 *val) +{ + uint32 buf; + + if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf)) + { + *val = 0; /* suppress compiler warning */ + return false; + } + *val = (int32) pg_ntoh32(buf); + return true; +} + +/* + * CopyGetInt16 reads an int16 that appears in network byte order + */ +static inline bool +CopyGetInt16(CopyFromState cstate, int16 *val) +{ + uint16 buf; + + if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf)) + { + *val = 0; /* suppress compiler warning */ + return false; + } + *val = (int16) pg_ntoh16(buf); + return true; +} + + +/* + * Perform encoding conversion on data in 'raw_buf', writing the converted + * data into 'input_buf'. + * + * On entry, there must be some data to convert in 'raw_buf'. + */ +static void +CopyConvertBuf(CopyFromState cstate) +{ + /* + * If the file and server encoding are the same, no encoding conversion is + * required. However, we still need to verify that the input is valid for + * the encoding. + */ + if (!cstate->need_transcoding) + { + /* + * When conversion is not required, input_buf and raw_buf are the + * same. raw_buf_len is the total number of bytes in the buffer, and + * input_buf_len tracks how many of those bytes have already been + * verified. + */ + int preverifiedlen = cstate->input_buf_len; + int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len; + int nverified; + + if (unverifiedlen == 0) + { + /* + * If no more raw data is coming, report the EOF to the caller. + */ + if (cstate->raw_reached_eof) + cstate->input_reached_eof = true; + return; + } + + /* + * Verify the new data, including any residual unverified bytes from + * previous round. + */ + nverified = pg_encoding_verifymbstr(cstate->file_encoding, + cstate->raw_buf + preverifiedlen, + unverifiedlen); + if (nverified == 0) + { + /* + * Could not verify anything. + * + * If there is no more raw input data coming, it means that there + * was an incomplete multi-byte sequence at the end. Also, if + * there's "enough" input left, we should be able to verify at + * least one character, and a failure to do so means that we've + * hit an invalid byte sequence. + */ + if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding)) + cstate->input_reached_error = true; + return; + } + cstate->input_buf_len += nverified; + } + else + { + /* + * Encoding conversion is needed. + */ + int nbytes; + unsigned char *src; + int srclen; + unsigned char *dst; + int dstlen; + int convertedlen; + + if (RAW_BUF_BYTES(cstate) == 0) + { + /* + * If no more raw data is coming, report the EOF to the caller. + */ + if (cstate->raw_reached_eof) + cstate->input_reached_eof = true; + return; + } + + /* + * First, copy down any unprocessed data. + */ + nbytes = INPUT_BUF_BYTES(cstate); + if (nbytes > 0 && cstate->input_buf_index > 0) + memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index, + nbytes); + cstate->input_buf_index = 0; + cstate->input_buf_len = nbytes; + cstate->input_buf[nbytes] = '\0'; + + src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index; + srclen = cstate->raw_buf_len - cstate->raw_buf_index; + dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len; + dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1; + + /* + * Do the conversion. This might stop short, if there is an invalid + * byte sequence in the input. We'll convert as much as we can in + * that case. + * + * Note: Even if we hit an invalid byte sequence, we don't report the + * error until all the valid bytes have been consumed. The input + * might contain an end-of-input marker (\.), and we don't want to + * report an error if the invalid byte sequence is after the + * end-of-input marker. We might unnecessarily convert some data + * after the end-of-input marker as long as it's valid for the + * encoding, but that's harmless. + */ + convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc, + cstate->file_encoding, + GetDatabaseEncoding(), + src, srclen, + dst, dstlen, + true); + if (convertedlen == 0) + { + /* + * Could not convert anything. If there is no more raw input data + * coming, it means that there was an incomplete multi-byte + * sequence at the end. Also, if there is plenty of input left, + * we should be able to convert at least one character, so a + * failure to do so must mean that we've hit a byte sequence + * that's invalid. + */ + if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH) + cstate->input_reached_error = true; + return; + } + cstate->raw_buf_index += convertedlen; + cstate->input_buf_len += strlen((char *) dst); + } +} + +/* + * Report an encoding or conversion error. + */ +static void +CopyConversionError(CopyFromState cstate) +{ + Assert(cstate->raw_buf_len > 0); + Assert(cstate->input_reached_error); + + if (!cstate->need_transcoding) + { + /* + * Everything up to input_buf_len was successfully verified, and + * input_buf_len points to the invalid or incomplete character. + */ + report_invalid_encoding(cstate->file_encoding, + cstate->raw_buf + cstate->input_buf_len, + cstate->raw_buf_len - cstate->input_buf_len); + } + else + { + /* + * raw_buf_index points to the invalid or untranslatable character. We + * let the conversion routine report the error, because it can provide + * a more specific error message than we could here. An earlier call + * to the conversion routine in CopyConvertBuf() detected that there + * is an error, now we call the conversion routine again with + * noError=false, to have it throw the error. + */ + unsigned char *src; + int srclen; + unsigned char *dst; + int dstlen; + + src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index; + srclen = cstate->raw_buf_len - cstate->raw_buf_index; + dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len; + dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1; + + (void) pg_do_encoding_conversion_buf(cstate->conversion_proc, + cstate->file_encoding, + GetDatabaseEncoding(), + src, srclen, + dst, dstlen, + false); + + /* + * The conversion routine should have reported an error, so this + * should not be reached. + */ + elog(ERROR, "encoding conversion failed without error"); + } +} + +/* + * Load more data from data source to raw_buf. + * + * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the + * beginning of the buffer, and we load new data after that. + */ +static void +CopyLoadRawBuf(CopyFromState cstate) +{ + int nbytes; + int inbytes; + + /* + * In text mode, if encoding conversion is not required, raw_buf and + * input_buf point to the same buffer. Their len/index better agree, too. + */ + if (cstate->raw_buf == cstate->input_buf) + { + Assert(!cstate->need_transcoding); + Assert(cstate->raw_buf_index == cstate->input_buf_index); + Assert(cstate->input_buf_len <= cstate->raw_buf_len); + } + + /* + * Copy down the unprocessed data if any. + */ + nbytes = RAW_BUF_BYTES(cstate); + if (nbytes > 0 && cstate->raw_buf_index > 0) + memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index, + nbytes); + cstate->raw_buf_len -= cstate->raw_buf_index; + cstate->raw_buf_index = 0; + + /* + * If raw_buf and input_buf are in fact the same buffer, adjust the + * input_buf variables, too. + */ + if (cstate->raw_buf == cstate->input_buf) + { + cstate->input_buf_len -= cstate->input_buf_index; + cstate->input_buf_index = 0; + } + + /* Load more data */ + inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len, + 1, RAW_BUF_SIZE - cstate->raw_buf_len); + nbytes += inbytes; + cstate->raw_buf[nbytes] = '\0'; + cstate->raw_buf_len = nbytes; + + cstate->bytes_processed += inbytes; + pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed); + + if (inbytes == 0) + cstate->raw_reached_eof = true; +} + +/* + * CopyLoadInputBuf loads some more data into input_buf + * + * On return, at least one more input character is loaded into + * input_buf, or input_reached_eof is set. + * + * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start + * of the buffer and then we load more data after that. + */ +static void +CopyLoadInputBuf(CopyFromState cstate) +{ + int nbytes = INPUT_BUF_BYTES(cstate); + + /* + * The caller has updated input_buf_index to indicate how much of the + * input has been consumed and isn't needed anymore. If input_buf is the + * same physical area as raw_buf, update raw_buf_index accordingly. + */ + if (cstate->raw_buf == cstate->input_buf) + { + Assert(!cstate->need_transcoding); + Assert(cstate->input_buf_index >= cstate->raw_buf_index); + cstate->raw_buf_index = cstate->input_buf_index; + } + + for (;;) + { + /* If we now have some unconverted data, try to convert it */ + CopyConvertBuf(cstate); + + /* If we now have some more input bytes ready, return them */ + if (INPUT_BUF_BYTES(cstate) > nbytes) + return; + + /* + * If we reached an invalid byte sequence, or we're at an incomplete + * multi-byte character but there is no more raw input data, report + * conversion error. + */ + if (cstate->input_reached_error) + CopyConversionError(cstate); + + /* no more input, and everything has been converted */ + if (cstate->input_reached_eof) + break; + + /* Try to load more raw data */ + Assert(!cstate->raw_reached_eof); + CopyLoadRawBuf(cstate); + } +} + +/* + * CopyReadBinaryData + * + * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf + * and writes them to 'dest'. Returns the number of bytes read (which + * would be less than 'nbytes' only if we reach EOF). + */ +static int +CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes) +{ + int copied_bytes = 0; + + if (RAW_BUF_BYTES(cstate) >= nbytes) + { + /* Enough bytes are present in the buffer. */ + memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes); + cstate->raw_buf_index += nbytes; + copied_bytes = nbytes; + } + else + { + /* + * Not enough bytes in the buffer, so must read from the file. Need + * to loop since 'nbytes' could be larger than the buffer size. + */ + do + { + int copy_bytes; + + /* Load more data if buffer is empty. */ + if (RAW_BUF_BYTES(cstate) == 0) + { + CopyLoadRawBuf(cstate); + if (cstate->raw_reached_eof) + break; /* EOF */ + } + + /* Transfer some bytes. */ + copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate)); + memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes); + cstate->raw_buf_index += copy_bytes; + dest += copy_bytes; + copied_bytes += copy_bytes; + } while (copied_bytes < nbytes); + } + + return copied_bytes; +} + +/* + * Read raw fields in the next line for COPY FROM in text or csv mode. + * Return false if no more lines. + * + * An internal temporary buffer is returned via 'fields'. It is valid until + * the next call of the function. Since the function returns all raw fields + * in the input file, 'nfields' could be different from the number of columns + * in the relation. + * + * NOTE: force_not_null option are not applied to the returned fields. + */ +bool +NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields) +{ + int fldct; + bool done; + + /* only available for text or csv input */ + Assert(!cstate->opts.binary); + + /* on input check that the header line is correct if needed */ + if (cstate->cur_lineno == 0 && cstate->opts.header_line) + { + ListCell *cur; + TupleDesc tupDesc; + + tupDesc = RelationGetDescr(cstate->rel); + + cstate->cur_lineno++; + done = CopyReadLine(cstate); + + if (cstate->opts.header_line == COPY_HEADER_MATCH) + { + int fldnum; + + if (cstate->opts.csv_mode) + fldct = CopyReadAttributesCSV(cstate); + else + fldct = CopyReadAttributesText(cstate); + + if (fldct != list_length(cstate->attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("wrong number of fields in header line: got %d, expected %d", + fldct, list_length(cstate->attnumlist)))); + + fldnum = 0; + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + char *colName; + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + Assert(fldnum < cstate->max_fields); + + colName = cstate->raw_fields[fldnum++]; + if (colName == NULL) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"", + fldnum, cstate->opts.null_print, NameStr(attr->attname)))); + + if (namestrcmp(&attr->attname, colName) != 0) + { + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"", + fldnum, colName, NameStr(attr->attname)))); + } + } + } + + if (done) + return false; + } + + cstate->cur_lineno++; + + /* Actually read the line into memory here */ + done = CopyReadLine(cstate); + + /* + * EOF at start of line means we're done. If we see EOF after some + * characters, we act as though it was newline followed by EOF, ie, + * process the line and then exit loop on next iteration. + */ + if (done && cstate->line_buf.len == 0) + return false; + + /* Parse the line into de-escaped field values */ + if (cstate->opts.csv_mode) + fldct = CopyReadAttributesCSV(cstate); + else + fldct = CopyReadAttributesText(cstate); + + *fields = cstate->raw_fields; + *nfields = fldct; + return true; +} + +/* + * Read next tuple from file for COPY FROM. Return false if no more tuples. + * + * 'econtext' is used to evaluate default expression for each column not + * read from the file. It can be NULL when no default values are used, i.e. + * when all columns are read from the file. + * + * 'values' and 'nulls' arrays must be the same length as columns of the + * relation passed to BeginCopyFrom. This function fills the arrays. + */ +bool +NextCopyFrom(CopyFromState cstate, ExprContext *econtext, + Datum *values, bool *nulls) +{ + TupleDesc tupDesc; + AttrNumber num_phys_attrs, + attr_count, + num_defaults = cstate->num_defaults; + FmgrInfo *in_functions = cstate->in_functions; + Oid *typioparams = cstate->typioparams; + int i; + int *defmap = cstate->defmap; + ExprState **defexprs = cstate->defexprs; + + tupDesc = RelationGetDescr(cstate->rel); + num_phys_attrs = tupDesc->natts; + attr_count = list_length(cstate->attnumlist); + + /* Initialize all values for row to NULL */ + MemSet(values, 0, num_phys_attrs * sizeof(Datum)); + MemSet(nulls, true, num_phys_attrs * sizeof(bool)); + + if (!cstate->opts.binary) + { + char **field_strings; + ListCell *cur; + int fldct; + int fieldno; + char *string; + + /* read raw fields in the next line */ + if (!NextCopyFromRawFields(cstate, &field_strings, &fldct)) + return false; + + /* check for overflowing fields */ + if (attr_count > 0 && fldct > attr_count) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("extra data after last expected column"))); + + fieldno = 0; + + /* Loop to read the user attributes on the line. */ + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + int m = attnum - 1; + Form_pg_attribute att = TupleDescAttr(tupDesc, m); + + if (fieldno >= fldct) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("missing data for column \"%s\"", + NameStr(att->attname)))); + string = field_strings[fieldno++]; + + if (cstate->convert_select_flags && + !cstate->convert_select_flags[m]) + { + /* ignore input field, leaving column as NULL */ + continue; + } + + if (cstate->opts.csv_mode) + { + if (string == NULL && + cstate->opts.force_notnull_flags[m]) + { + /* + * FORCE_NOT_NULL option is set and column is NULL - + * convert it to the NULL string. + */ + string = cstate->opts.null_print; + } + else if (string != NULL && cstate->opts.force_null_flags[m] + && strcmp(string, cstate->opts.null_print) == 0) + { + /* + * FORCE_NULL option is set and column matches the NULL + * string. It must have been quoted, or otherwise the + * string would already have been set to NULL. Convert it + * to NULL as specified. + */ + string = NULL; + } + } + + cstate->cur_attname = NameStr(att->attname); + cstate->cur_attval = string; + values[m] = InputFunctionCall(&in_functions[m], + string, + typioparams[m], + att->atttypmod); + if (string != NULL) + nulls[m] = false; + cstate->cur_attname = NULL; + cstate->cur_attval = NULL; + } + + Assert(fieldno == attr_count); + } + else + { + /* binary */ + int16 fld_count; + ListCell *cur; + + cstate->cur_lineno++; + + if (!CopyGetInt16(cstate, &fld_count)) + { + /* EOF detected (end of file, or protocol-level EOF) */ + return false; + } + + if (fld_count == -1) + { + /* + * Received EOF marker. Wait for the protocol-level EOF, and + * complain if it doesn't come immediately. In COPY FROM STDIN, + * this ensures that we correctly handle CopyFail, if client + * chooses to send that now. When copying from file, we could + * ignore the rest of the file like in text mode, but we choose to + * be consistent with the COPY FROM STDIN case. + */ + char dummy; + + if (CopyReadBinaryData(cstate, &dummy, 1) > 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("received copy data after EOF marker"))); + return false; + } + + if (fld_count != attr_count) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("row field count is %d, expected %d", + (int) fld_count, attr_count))); + + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + int m = attnum - 1; + Form_pg_attribute att = TupleDescAttr(tupDesc, m); + + cstate->cur_attname = NameStr(att->attname); + values[m] = CopyReadBinaryAttribute(cstate, + &in_functions[m], + typioparams[m], + att->atttypmod, + &nulls[m]); + cstate->cur_attname = NULL; + } + } + + /* + * Now compute and insert any defaults available for the columns not + * provided by the input data. Anything not processed here or above will + * remain NULL. + */ + for (i = 0; i < num_defaults; i++) + { + /* + * The caller must supply econtext and have switched into the + * per-tuple memory context in it. + */ + Assert(econtext != NULL); + Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory); + + values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext, + &nulls[defmap[i]]); + } + + return true; +} + +/* + * Read the next input line and stash it in line_buf. + * + * Result is true if read was terminated by EOF, false if terminated + * by newline. The terminating newline or EOF marker is not included + * in the final value of line_buf. + */ +static bool +CopyReadLine(CopyFromState cstate) +{ + bool result; + + resetStringInfo(&cstate->line_buf); + cstate->line_buf_valid = false; + + /* Parse data and transfer into line_buf */ + result = CopyReadLineText(cstate); + + if (result) + { + /* + * Reached EOF. In protocol version 3, we should ignore anything + * after \. up to the protocol end of copy data. (XXX maybe better + * not to treat \. as special?) + */ + if (cstate->copy_src == COPY_FRONTEND) + { + int inbytes; + + do + { + inbytes = CopyGetData(cstate, cstate->input_buf, + 1, INPUT_BUF_SIZE); + } while (inbytes > 0); + cstate->input_buf_index = 0; + cstate->input_buf_len = 0; + cstate->raw_buf_index = 0; + cstate->raw_buf_len = 0; + } + } + else + { + /* + * If we didn't hit EOF, then we must have transferred the EOL marker + * to line_buf along with the data. Get rid of it. + */ + switch (cstate->eol_type) + { + case EOL_NL: + Assert(cstate->line_buf.len >= 1); + Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n'); + cstate->line_buf.len--; + cstate->line_buf.data[cstate->line_buf.len] = '\0'; + break; + case EOL_CR: + Assert(cstate->line_buf.len >= 1); + Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r'); + cstate->line_buf.len--; + cstate->line_buf.data[cstate->line_buf.len] = '\0'; + break; + case EOL_CRNL: + Assert(cstate->line_buf.len >= 2); + Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r'); + Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n'); + cstate->line_buf.len -= 2; + cstate->line_buf.data[cstate->line_buf.len] = '\0'; + break; + case EOL_UNKNOWN: + /* shouldn't get here */ + Assert(false); + break; + } + } + + /* Now it's safe to use the buffer in error messages */ + cstate->line_buf_valid = true; + + return result; +} + +/* + * CopyReadLineText - inner loop of CopyReadLine for text mode + */ +static bool +CopyReadLineText(CopyFromState cstate) +{ + char *copy_input_buf; + int input_buf_ptr; + int copy_buf_len; + bool need_data = false; + bool hit_eof = false; + bool result = false; + + /* CSV variables */ + bool first_char_in_line = true; + bool in_quote = false, + last_was_esc = false; + char quotec = '\0'; + char escapec = '\0'; + + if (cstate->opts.csv_mode) + { + quotec = cstate->opts.quote[0]; + escapec = cstate->opts.escape[0]; + /* ignore special escape processing if it's the same as quotec */ + if (quotec == escapec) + escapec = '\0'; + } + + /* + * The objective of this loop is to transfer the entire next input line + * into line_buf. Hence, we only care for detecting newlines (\r and/or + * \n) and the end-of-copy marker (\.). + * + * In CSV mode, \r and \n inside a quoted field are just part of the data + * value and are put in line_buf. We keep just enough state to know if we + * are currently in a quoted field or not. + * + * These four characters, and the CSV escape and quote characters, are + * assumed the same in frontend and backend encodings. + * + * The input has already been converted to the database encoding. All + * supported server encodings have the property that all bytes in a + * multi-byte sequence have the high bit set, so a multibyte character + * cannot contain any newline or escape characters embedded in the + * multibyte sequence. Therefore, we can process the input byte-by-byte, + * regardless of the encoding. + * + * For speed, we try to move data from input_buf to line_buf in chunks + * rather than one character at a time. input_buf_ptr points to the next + * character to examine; any characters from input_buf_index to + * input_buf_ptr have been determined to be part of the line, but not yet + * transferred to line_buf. + * + * For a little extra speed within the loop, we copy input_buf and + * input_buf_len into local variables. + */ + copy_input_buf = cstate->input_buf; + input_buf_ptr = cstate->input_buf_index; + copy_buf_len = cstate->input_buf_len; + + for (;;) + { + int prev_raw_ptr; + char c; + + /* + * Load more data if needed. + * + * TODO: We could just force four bytes of read-ahead and avoid the + * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was + * unsafe with the old v2 COPY protocol, but we don't support that + * anymore. + */ + if (input_buf_ptr >= copy_buf_len || need_data) + { + REFILL_LINEBUF; + + CopyLoadInputBuf(cstate); + /* update our local variables */ + hit_eof = cstate->input_reached_eof; + input_buf_ptr = cstate->input_buf_index; + copy_buf_len = cstate->input_buf_len; + + /* + * If we are completely out of data, break out of the loop, + * reporting EOF. + */ + if (INPUT_BUF_BYTES(cstate) <= 0) + { + result = true; + break; + } + need_data = false; + } + + /* OK to fetch a character */ + prev_raw_ptr = input_buf_ptr; + c = copy_input_buf[input_buf_ptr++]; + + if (cstate->opts.csv_mode) + { + /* + * If character is '\\' or '\r', we may need to look ahead below. + * Force fetch of the next character if we don't already have it. + * We need to do this before changing CSV state, in case one of + * these characters is also the quote or escape character. + */ + if (c == '\\' || c == '\r') + { + IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0); + } + + /* + * Dealing with quotes and escapes here is mildly tricky. If the + * quote char is also the escape char, there's no problem - we + * just use the char as a toggle. If they are different, we need + * to ensure that we only take account of an escape inside a + * quoted field and immediately preceding a quote char, and not + * the second in an escape-escape sequence. + */ + if (in_quote && c == escapec) + last_was_esc = !last_was_esc; + if (c == quotec && !last_was_esc) + in_quote = !in_quote; + if (c != escapec) + last_was_esc = false; + + /* + * Updating the line count for embedded CR and/or LF chars is + * necessarily a little fragile - this test is probably about the + * best we can do. (XXX it's arguable whether we should do this + * at all --- is cur_lineno a physical or logical count?) + */ + if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r')) + cstate->cur_lineno++; + } + + /* Process \r */ + if (c == '\r' && (!cstate->opts.csv_mode || !in_quote)) + { + /* Check for \r\n on first line, _and_ handle \r\n. */ + if (cstate->eol_type == EOL_UNKNOWN || + cstate->eol_type == EOL_CRNL) + { + /* + * If need more data, go back to loop top to load it. + * + * Note that if we are at EOF, c will wind up as '\0' because + * of the guaranteed pad of input_buf. + */ + IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0); + + /* get next char */ + c = copy_input_buf[input_buf_ptr]; + + if (c == '\n') + { + input_buf_ptr++; /* eat newline */ + cstate->eol_type = EOL_CRNL; /* in case not set yet */ + } + else + { + /* found \r, but no \n */ + if (cstate->eol_type == EOL_CRNL) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + !cstate->opts.csv_mode ? + errmsg("literal carriage return found in data") : + errmsg("unquoted carriage return found in data"), + !cstate->opts.csv_mode ? + errhint("Use \"\\r\" to represent carriage return.") : + errhint("Use quoted CSV field to represent carriage return."))); + + /* + * if we got here, it is the first line and we didn't find + * \n, so don't consume the peeked character + */ + cstate->eol_type = EOL_CR; + } + } + else if (cstate->eol_type == EOL_NL) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + !cstate->opts.csv_mode ? + errmsg("literal carriage return found in data") : + errmsg("unquoted carriage return found in data"), + !cstate->opts.csv_mode ? + errhint("Use \"\\r\" to represent carriage return.") : + errhint("Use quoted CSV field to represent carriage return."))); + /* If reach here, we have found the line terminator */ + break; + } + + /* Process \n */ + if (c == '\n' && (!cstate->opts.csv_mode || !in_quote)) + { + if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + !cstate->opts.csv_mode ? + errmsg("literal newline found in data") : + errmsg("unquoted newline found in data"), + !cstate->opts.csv_mode ? + errhint("Use \"\\n\" to represent newline.") : + errhint("Use quoted CSV field to represent newline."))); + cstate->eol_type = EOL_NL; /* in case not set yet */ + /* If reach here, we have found the line terminator */ + break; + } + + /* + * In CSV mode, we only recognize \. alone on a line. This is because + * \. is a valid CSV data value. + */ + if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line)) + { + char c2; + + IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0); + IF_NEED_REFILL_AND_EOF_BREAK(0); + + /* ----- + * get next character + * Note: we do not change c so if it isn't \., we can fall + * through and continue processing. + * ----- + */ + c2 = copy_input_buf[input_buf_ptr]; + + if (c2 == '.') + { + input_buf_ptr++; /* consume the '.' */ + + /* + * Note: if we loop back for more data here, it does not + * matter that the CSV state change checks are re-executed; we + * will come back here with no important state changed. + */ + if (cstate->eol_type == EOL_CRNL) + { + /* Get the next character */ + IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0); + /* if hit_eof, c2 will become '\0' */ + c2 = copy_input_buf[input_buf_ptr++]; + + if (c2 == '\n') + { + if (!cstate->opts.csv_mode) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("end-of-copy marker does not match previous newline style"))); + else + NO_END_OF_COPY_GOTO; + } + else if (c2 != '\r') + { + if (!cstate->opts.csv_mode) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("end-of-copy marker corrupt"))); + else + NO_END_OF_COPY_GOTO; + } + } + + /* Get the next character */ + IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0); + /* if hit_eof, c2 will become '\0' */ + c2 = copy_input_buf[input_buf_ptr++]; + + if (c2 != '\r' && c2 != '\n') + { + if (!cstate->opts.csv_mode) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("end-of-copy marker corrupt"))); + else + NO_END_OF_COPY_GOTO; + } + + if ((cstate->eol_type == EOL_NL && c2 != '\n') || + (cstate->eol_type == EOL_CRNL && c2 != '\n') || + (cstate->eol_type == EOL_CR && c2 != '\r')) + { + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("end-of-copy marker does not match previous newline style"))); + } + + /* + * Transfer only the data before the \. into line_buf, then + * discard the data and the \. sequence. + */ + if (prev_raw_ptr > cstate->input_buf_index) + appendBinaryStringInfo(&cstate->line_buf, + cstate->input_buf + cstate->input_buf_index, + prev_raw_ptr - cstate->input_buf_index); + cstate->input_buf_index = input_buf_ptr; + result = true; /* report EOF */ + break; + } + else if (!cstate->opts.csv_mode) + { + /* + * If we are here, it means we found a backslash followed by + * something other than a period. In non-CSV mode, anything + * after a backslash is special, so we skip over that second + * character too. If we didn't do that \\. would be + * considered an eof-of copy, while in non-CSV mode it is a + * literal backslash followed by a period. In CSV mode, + * backslashes are not special, so we want to process the + * character after the backslash just like a normal character, + * so we don't increment in those cases. + */ + input_buf_ptr++; + } + } + + /* + * This label is for CSV cases where \. appears at the start of a + * line, but there is more text after it, meaning it was a data value. + * We are more strict for \. in CSV mode because \. could be a data + * value, while in non-CSV mode, \. cannot be a data value. + */ +not_end_of_copy: + first_char_in_line = false; + } /* end of outer loop */ + + /* + * Transfer any still-uncopied data to line_buf. + */ + REFILL_LINEBUF; + + return result; +} + +/* + * Return decimal value for a hexadecimal digit + */ +static int +GetDecimalFromHex(char hex) +{ + if (isdigit((unsigned char) hex)) + return hex - '0'; + else + return tolower((unsigned char) hex) - 'a' + 10; +} + +/* + * Parse the current line into separate attributes (fields), + * performing de-escaping as needed. + * + * The input is in line_buf. We use attribute_buf to hold the result + * strings. cstate->raw_fields[k] is set to point to the k'th attribute + * string, or NULL when the input matches the null marker string. + * This array is expanded as necessary. + * + * (Note that the caller cannot check for nulls since the returned + * string would be the post-de-escaping equivalent, which may look + * the same as some valid data string.) + * + * delim is the column delimiter string (must be just one byte for now). + * null_print is the null marker string. Note that this is compared to + * the pre-de-escaped input string. + * + * The return value is the number of fields actually read. + */ +static int +CopyReadAttributesText(CopyFromState cstate) +{ + char delimc = cstate->opts.delim[0]; + int fieldno; + char *output_ptr; + char *cur_ptr; + char *line_end_ptr; + + /* + * We need a special case for zero-column tables: check that the input + * line is empty, and return. + */ + if (cstate->max_fields <= 0) + { + if (cstate->line_buf.len != 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("extra data after last expected column"))); + return 0; + } + + resetStringInfo(&cstate->attribute_buf); + + /* + * The de-escaped attributes will certainly not be longer than the input + * data line, so we can just force attribute_buf to be large enough and + * then transfer data without any checks for enough space. We need to do + * it this way because enlarging attribute_buf mid-stream would invalidate + * pointers already stored into cstate->raw_fields[]. + */ + if (cstate->attribute_buf.maxlen <= cstate->line_buf.len) + enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len); + output_ptr = cstate->attribute_buf.data; + + /* set pointer variables for loop */ + cur_ptr = cstate->line_buf.data; + line_end_ptr = cstate->line_buf.data + cstate->line_buf.len; + + /* Outer loop iterates over fields */ + fieldno = 0; + for (;;) + { + bool found_delim = false; + char *start_ptr; + char *end_ptr; + int input_len; + bool saw_non_ascii = false; + + /* Make sure there is enough space for the next value */ + if (fieldno >= cstate->max_fields) + { + cstate->max_fields *= 2; + cstate->raw_fields = + repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *)); + } + + /* Remember start of field on both input and output sides */ + start_ptr = cur_ptr; + cstate->raw_fields[fieldno] = output_ptr; + + /* + * Scan data for field. + * + * Note that in this loop, we are scanning to locate the end of field + * and also speculatively performing de-escaping. Once we find the + * end-of-field, we can match the raw field contents against the null + * marker string. Only after that comparison fails do we know that + * de-escaping is actually the right thing to do; therefore we *must + * not* throw any syntax errors before we've done the null-marker + * check. + */ + for (;;) + { + char c; + + end_ptr = cur_ptr; + if (cur_ptr >= line_end_ptr) + break; + c = *cur_ptr++; + if (c == delimc) + { + found_delim = true; + break; + } + if (c == '\\') + { + if (cur_ptr >= line_end_ptr) + break; + c = *cur_ptr++; + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + /* handle \013 */ + int val; + + val = OCTVALUE(c); + if (cur_ptr < line_end_ptr) + { + c = *cur_ptr; + if (ISOCTAL(c)) + { + cur_ptr++; + val = (val << 3) + OCTVALUE(c); + if (cur_ptr < line_end_ptr) + { + c = *cur_ptr; + if (ISOCTAL(c)) + { + cur_ptr++; + val = (val << 3) + OCTVALUE(c); + } + } + } + } + c = val & 0377; + if (c == '\0' || IS_HIGHBIT_SET(c)) + saw_non_ascii = true; + } + break; + case 'x': + /* Handle \x3F */ + if (cur_ptr < line_end_ptr) + { + char hexchar = *cur_ptr; + + if (isxdigit((unsigned char) hexchar)) + { + int val = GetDecimalFromHex(hexchar); + + cur_ptr++; + if (cur_ptr < line_end_ptr) + { + hexchar = *cur_ptr; + if (isxdigit((unsigned char) hexchar)) + { + cur_ptr++; + val = (val << 4) + GetDecimalFromHex(hexchar); + } + } + c = val & 0xff; + if (c == '\0' || IS_HIGHBIT_SET(c)) + saw_non_ascii = true; + } + } + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + + /* + * in all other cases, take the char after '\' + * literally + */ + } + } + + /* Add c to output string */ + *output_ptr++ = c; + } + + /* Check whether raw input matched null marker */ + input_len = end_ptr - start_ptr; + if (input_len == cstate->opts.null_print_len && + strncmp(start_ptr, cstate->opts.null_print, input_len) == 0) + cstate->raw_fields[fieldno] = NULL; + else + { + /* + * At this point we know the field is supposed to contain data. + * + * If we de-escaped any non-7-bit-ASCII chars, make sure the + * resulting string is valid data for the db encoding. + */ + if (saw_non_ascii) + { + char *fld = cstate->raw_fields[fieldno]; + + pg_verifymbstr(fld, output_ptr - fld, false); + } + } + + /* Terminate attribute value in output area */ + *output_ptr++ = '\0'; + + fieldno++; + /* Done if we hit EOL instead of a delim */ + if (!found_delim) + break; + } + + /* Clean up state of attribute_buf */ + output_ptr--; + Assert(*output_ptr == '\0'); + cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data); + + return fieldno; +} + +/* + * Parse the current line into separate attributes (fields), + * performing de-escaping as needed. This has exactly the same API as + * CopyReadAttributesText, except we parse the fields according to + * "standard" (i.e. common) CSV usage. + */ +static int +CopyReadAttributesCSV(CopyFromState cstate) +{ + char delimc = cstate->opts.delim[0]; + char quotec = cstate->opts.quote[0]; + char escapec = cstate->opts.escape[0]; + int fieldno; + char *output_ptr; + char *cur_ptr; + char *line_end_ptr; + + /* + * We need a special case for zero-column tables: check that the input + * line is empty, and return. + */ + if (cstate->max_fields <= 0) + { + if (cstate->line_buf.len != 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("extra data after last expected column"))); + return 0; + } + + resetStringInfo(&cstate->attribute_buf); + + /* + * The de-escaped attributes will certainly not be longer than the input + * data line, so we can just force attribute_buf to be large enough and + * then transfer data without any checks for enough space. We need to do + * it this way because enlarging attribute_buf mid-stream would invalidate + * pointers already stored into cstate->raw_fields[]. + */ + if (cstate->attribute_buf.maxlen <= cstate->line_buf.len) + enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len); + output_ptr = cstate->attribute_buf.data; + + /* set pointer variables for loop */ + cur_ptr = cstate->line_buf.data; + line_end_ptr = cstate->line_buf.data + cstate->line_buf.len; + + /* Outer loop iterates over fields */ + fieldno = 0; + for (;;) + { + bool found_delim = false; + bool saw_quote = false; + char *start_ptr; + char *end_ptr; + int input_len; + + /* Make sure there is enough space for the next value */ + if (fieldno >= cstate->max_fields) + { + cstate->max_fields *= 2; + cstate->raw_fields = + repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *)); + } + + /* Remember start of field on both input and output sides */ + start_ptr = cur_ptr; + cstate->raw_fields[fieldno] = output_ptr; + + /* + * Scan data for field, + * + * The loop starts in "not quote" mode and then toggles between that + * and "in quote" mode. The loop exits normally if it is in "not + * quote" mode and a delimiter or line end is seen. + */ + for (;;) + { + char c; + + /* Not in quote */ + for (;;) + { + end_ptr = cur_ptr; + if (cur_ptr >= line_end_ptr) + goto endfield; + c = *cur_ptr++; + /* unquoted field delimiter */ + if (c == delimc) + { + found_delim = true; + goto endfield; + } + /* start of quoted field (or part of field) */ + if (c == quotec) + { + saw_quote = true; + break; + } + /* Add c to output string */ + *output_ptr++ = c; + } + + /* In quote */ + for (;;) + { + end_ptr = cur_ptr; + if (cur_ptr >= line_end_ptr) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("unterminated CSV quoted field"))); + + c = *cur_ptr++; + + /* escape within a quoted field */ + if (c == escapec) + { + /* + * peek at the next char if available, and escape it if it + * is an escape char or a quote char + */ + if (cur_ptr < line_end_ptr) + { + char nextc = *cur_ptr; + + if (nextc == escapec || nextc == quotec) + { + *output_ptr++ = nextc; + cur_ptr++; + continue; + } + } + } + + /* + * end of quoted field. Must do this test after testing for + * escape in case quote char and escape char are the same + * (which is the common case). + */ + if (c == quotec) + break; + + /* Add c to output string */ + *output_ptr++ = c; + } + } +endfield: + + /* Terminate attribute value in output area */ + *output_ptr++ = '\0'; + + /* Check whether raw input matched null marker */ + input_len = end_ptr - start_ptr; + if (!saw_quote && input_len == cstate->opts.null_print_len && + strncmp(start_ptr, cstate->opts.null_print, input_len) == 0) + cstate->raw_fields[fieldno] = NULL; + + fieldno++; + /* Done if we hit EOL instead of a delim */ + if (!found_delim) + break; + } + + /* Clean up state of attribute_buf */ + output_ptr--; + Assert(*output_ptr == '\0'); + cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data); + + return fieldno; +} + + +/* + * Read a binary attribute + */ +static Datum +CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, + Oid typioparam, int32 typmod, + bool *isnull) +{ + int32 fld_size; + Datum result; + + if (!CopyGetInt32(cstate, &fld_size)) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("unexpected EOF in COPY data"))); + if (fld_size == -1) + { + *isnull = true; + return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod); + } + if (fld_size < 0) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("invalid field size"))); + + /* reset attribute_buf to empty, and load raw data in it */ + resetStringInfo(&cstate->attribute_buf); + + enlargeStringInfo(&cstate->attribute_buf, fld_size); + if (CopyReadBinaryData(cstate, cstate->attribute_buf.data, + fld_size) != fld_size) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("unexpected EOF in COPY data"))); + + cstate->attribute_buf.len = fld_size; + cstate->attribute_buf.data[fld_size] = '\0'; + + /* Call the column type's binary input converter */ + result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf, + typioparam, typmod); + + /* Trouble if it didn't eat the whole buffer */ + if (cstate->attribute_buf.cursor != cstate->attribute_buf.len) + ereport(ERROR, + (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), + errmsg("incorrect binary data format"))); + + *isnull = false; + return result; +} diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c new file mode 100644 index 0000000..73e286f --- /dev/null +++ b/src/backend/commands/copyto.c @@ -0,0 +1,1310 @@ +/*------------------------------------------------------------------------- + * + * copyto.c + * COPY
TO file/program/client + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/copyto.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "commands/copy.h" +#include "commands/progress.h" +#include "executor/execdesc.h" +#include "executor/executor.h" +#include "executor/tuptable.h" +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "optimizer/optimizer.h" +#include "pgstat.h" +#include "rewrite/rewriteHandler.h" +#include "storage/fd.h" +#include "tcop/tcopprot.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/partcache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" + +/* + * Represents the different dest cases we need to worry about at + * the bottom level + */ +typedef enum CopyDest +{ + COPY_FILE, /* to file (or a piped program) */ + COPY_FRONTEND, /* to frontend */ +} CopyDest; + +/* + * This struct contains all the state variables used throughout a COPY TO + * operation. + * + * Multi-byte encodings: all supported client-side encodings encode multi-byte + * characters by having the first byte's high bit set. Subsequent bytes of the + * character can have the high bit not set. When scanning data in such an + * encoding to look for a match to a single-byte (ie ASCII) character, we must + * use the full pg_encoding_mblen() machinery to skip over multibyte + * characters, else we might find a false match to a trailing byte. In + * supported server encodings, there is no possibility of a false match, and + * it's faster to make useless comparisons to trailing bytes than it is to + * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true + * when we have to do it the hard way. + */ +typedef struct CopyToStateData +{ + /* low-level state data */ + CopyDest copy_dest; /* type of copy source/destination */ + FILE *copy_file; /* used if copy_dest == COPY_FILE */ + StringInfo fe_msgbuf; /* used for all dests during COPY TO */ + + int file_encoding; /* file or remote side's character encoding */ + bool need_transcoding; /* file encoding diff from server? */ + bool encoding_embeds_ascii; /* ASCII can be non-first byte? */ + + /* parameters from the COPY command */ + Relation rel; /* relation to copy to */ + QueryDesc *queryDesc; /* executable query to copy from */ + List *attnumlist; /* integer list of attnums to copy */ + char *filename; /* filename, or NULL for STDOUT */ + bool is_program; /* is 'filename' a program to popen? */ + + CopyFormatOptions opts; + Node *whereClause; /* WHERE condition (or NULL) */ + + /* + * Working state + */ + MemoryContext copycontext; /* per-copy execution context */ + + FmgrInfo *out_functions; /* lookup info for output functions */ + MemoryContext rowcontext; /* per-row evaluation context */ + uint64 bytes_processed; /* number of bytes processed so far */ +} CopyToStateData; + +/* DestReceiver for COPY (query) TO */ +typedef struct +{ + DestReceiver pub; /* publicly-known function pointers */ + CopyToState cstate; /* CopyToStateData for the command */ + uint64 processed; /* # of tuples processed */ +} DR_copy; + +/* NOTE: there's a copy of this in copyfromparse.c */ +static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; + + +/* non-export function prototypes */ +static void EndCopy(CopyToState cstate); +static void ClosePipeToProgram(CopyToState cstate); +static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot); +static void CopyAttributeOutText(CopyToState cstate, const char *string); +static void CopyAttributeOutCSV(CopyToState cstate, const char *string, + bool use_quote, bool single_attr); + +/* Low-level communications functions */ +static void SendCopyBegin(CopyToState cstate); +static void SendCopyEnd(CopyToState cstate); +static void CopySendData(CopyToState cstate, const void *databuf, int datasize); +static void CopySendString(CopyToState cstate, const char *str); +static void CopySendChar(CopyToState cstate, char c); +static void CopySendEndOfRow(CopyToState cstate); +static void CopySendInt32(CopyToState cstate, int32 val); +static void CopySendInt16(CopyToState cstate, int16 val); + + +/* + * Send copy start/stop messages for frontend copies. These have changed + * in past protocol redesigns. + */ +static void +SendCopyBegin(CopyToState cstate) +{ + StringInfoData buf; + int natts = list_length(cstate->attnumlist); + int16 format = (cstate->opts.binary ? 1 : 0); + int i; + + pq_beginmessage(&buf, 'H'); + pq_sendbyte(&buf, format); /* overall format */ + pq_sendint16(&buf, natts); + for (i = 0; i < natts; i++) + pq_sendint16(&buf, format); /* per-column formats */ + pq_endmessage(&buf); + cstate->copy_dest = COPY_FRONTEND; +} + +static void +SendCopyEnd(CopyToState cstate) +{ + /* Shouldn't have any unsent data */ + Assert(cstate->fe_msgbuf->len == 0); + /* Send Copy Done message */ + pq_putemptymessage('c'); +} + +/*---------- + * CopySendData sends output data to the destination (file or frontend) + * CopySendString does the same for null-terminated strings + * CopySendChar does the same for single characters + * CopySendEndOfRow does the appropriate thing at end of each data row + * (data is not actually flushed except by CopySendEndOfRow) + * + * NB: no data conversion is applied by these functions + *---------- + */ +static void +CopySendData(CopyToState cstate, const void *databuf, int datasize) +{ + appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize); +} + +static void +CopySendString(CopyToState cstate, const char *str) +{ + appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str)); +} + +static void +CopySendChar(CopyToState cstate, char c) +{ + appendStringInfoCharMacro(cstate->fe_msgbuf, c); +} + +static void +CopySendEndOfRow(CopyToState cstate) +{ + StringInfo fe_msgbuf = cstate->fe_msgbuf; + + switch (cstate->copy_dest) + { + case COPY_FILE: + if (!cstate->opts.binary) + { + /* Default line termination depends on platform */ +#ifndef WIN32 + CopySendChar(cstate, '\n'); +#else + CopySendString(cstate, "\r\n"); +#endif + } + + if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1, + cstate->copy_file) != 1 || + ferror(cstate->copy_file)) + { + if (cstate->is_program) + { + if (errno == EPIPE) + { + /* + * The pipe will be closed automatically on error at + * the end of transaction, but we might get a better + * error message from the subprocess' exit code than + * just "Broken Pipe" + */ + ClosePipeToProgram(cstate); + + /* + * If ClosePipeToProgram() didn't throw an error, the + * program terminated normally, but closed the pipe + * first. Restore errno, and throw an error. + */ + errno = EPIPE; + } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to COPY program: %m"))); + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to COPY file: %m"))); + } + break; + case COPY_FRONTEND: + /* The FE/BE protocol uses \n as newline for all platforms */ + if (!cstate->opts.binary) + CopySendChar(cstate, '\n'); + + /* Dump the accumulated row as one CopyData message */ + (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len); + break; + } + + /* Update the progress */ + cstate->bytes_processed += fe_msgbuf->len; + pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed); + + resetStringInfo(fe_msgbuf); +} + +/* + * These functions do apply some data conversion + */ + +/* + * CopySendInt32 sends an int32 in network byte order + */ +static inline void +CopySendInt32(CopyToState cstate, int32 val) +{ + uint32 buf; + + buf = pg_hton32((uint32) val); + CopySendData(cstate, &buf, sizeof(buf)); +} + +/* + * CopySendInt16 sends an int16 in network byte order + */ +static inline void +CopySendInt16(CopyToState cstate, int16 val) +{ + uint16 buf; + + buf = pg_hton16((uint16) val); + CopySendData(cstate, &buf, sizeof(buf)); +} + +/* + * Closes the pipe to an external program, checking the pclose() return code. + */ +static void +ClosePipeToProgram(CopyToState cstate) +{ + int pclose_rc; + + Assert(cstate->is_program); + + pclose_rc = ClosePipeStream(cstate->copy_file); + if (pclose_rc == -1) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close pipe to external command: %m"))); + else if (pclose_rc != 0) + { + ereport(ERROR, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg("program \"%s\" failed", + cstate->filename), + errdetail_internal("%s", wait_result_to_str(pclose_rc)))); + } +} + +/* + * Release resources allocated in a cstate for COPY TO/FROM. + */ +static void +EndCopy(CopyToState cstate) +{ + if (cstate->is_program) + { + ClosePipeToProgram(cstate); + } + else + { + if (cstate->filename != NULL && FreeFile(cstate->copy_file)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + cstate->filename))); + } + + pgstat_progress_end_command(); + + MemoryContextDelete(cstate->copycontext); + pfree(cstate); +} + +/* + * Setup CopyToState to read tuples from a table or a query for COPY TO. + */ +CopyToState +BeginCopyTo(ParseState *pstate, + Relation rel, + RawStmt *raw_query, + Oid queryRelId, + const char *filename, + bool is_program, + List *attnamelist, + List *options) +{ + CopyToState cstate; + bool pipe = (filename == NULL); + TupleDesc tupDesc; + int num_phys_attrs; + MemoryContext oldcontext; + const int progress_cols[] = { + PROGRESS_COPY_COMMAND, + PROGRESS_COPY_TYPE + }; + int64 progress_vals[] = { + PROGRESS_COPY_COMMAND_TO, + 0 + }; + + if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION) + { + if (rel->rd_rel->relkind == RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from view \"%s\"", + RelationGetRelationName(rel)), + errhint("Try the COPY (SELECT ...) TO variant."))); + else if (rel->rd_rel->relkind == RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from materialized view \"%s\"", + RelationGetRelationName(rel)), + errhint("Try the COPY (SELECT ...) TO variant."))); + else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from foreign table \"%s\"", + RelationGetRelationName(rel)), + errhint("Try the COPY (SELECT ...) TO variant."))); + else if (rel->rd_rel->relkind == RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from sequence \"%s\"", + RelationGetRelationName(rel)))); + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from partitioned table \"%s\"", + RelationGetRelationName(rel)), + errhint("Try the COPY (SELECT ...) TO variant."))); + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from non-table relation \"%s\"", + RelationGetRelationName(rel)))); + } + + + /* Allocate workspace and zero all fields */ + cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData)); + + /* + * We allocate everything used by a cstate in a new memory context. This + * avoids memory leaks during repeated use of COPY in a query. + */ + cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext, + "COPY", + ALLOCSET_DEFAULT_SIZES); + + oldcontext = MemoryContextSwitchTo(cstate->copycontext); + + /* Extract options from the statement node tree */ + ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options); + + /* Process the source/target relation or query */ + if (rel) + { + Assert(!raw_query); + + cstate->rel = rel; + + tupDesc = RelationGetDescr(cstate->rel); + } + else + { + List *rewritten; + Query *query; + PlannedStmt *plan; + DestReceiver *dest; + + cstate->rel = NULL; + + /* + * Run parse analysis and rewrite. Note this also acquires sufficient + * locks on the source table(s). + */ + rewritten = pg_analyze_and_rewrite_fixedparams(raw_query, + pstate->p_sourcetext, NULL, 0, + NULL); + + /* check that we got back something we can work with */ + if (rewritten == NIL) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DO INSTEAD NOTHING rules are not supported for COPY"))); + } + else if (list_length(rewritten) > 1) + { + ListCell *lc; + + /* examine queries to determine which error message to issue */ + foreach(lc, rewritten) + { + Query *q = lfirst_node(Query, lc); + + if (q->querySource == QSRC_QUAL_INSTEAD_RULE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("conditional DO INSTEAD rules are not supported for COPY"))); + if (q->querySource == QSRC_NON_INSTEAD_RULE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DO ALSO rules are not supported for the COPY"))); + } + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("multi-statement DO INSTEAD rules are not supported for COPY"))); + } + + query = linitial_node(Query, rewritten); + + /* The grammar allows SELECT INTO, but we don't support that */ + if (query->utilityStmt != NULL && + IsA(query->utilityStmt, CreateTableAsStmt)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY (SELECT INTO) is not supported"))); + + Assert(query->utilityStmt == NULL); + + /* + * Similarly the grammar doesn't enforce the presence of a RETURNING + * clause, but this is required here. + */ + if (query->commandType != CMD_SELECT && + query->returningList == NIL) + { + Assert(query->commandType == CMD_INSERT || + query->commandType == CMD_UPDATE || + query->commandType == CMD_DELETE); + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY query must have a RETURNING clause"))); + } + + /* plan the query */ + plan = pg_plan_query(query, pstate->p_sourcetext, + CURSOR_OPT_PARALLEL_OK, NULL); + + /* + * With row-level security and a user using "COPY relation TO", we + * have to convert the "COPY relation TO" to a query-based COPY (eg: + * "COPY (SELECT * FROM ONLY relation) TO"), to allow the rewriter to + * add in any RLS clauses. + * + * When this happens, we are passed in the relid of the originally + * found relation (which we have locked). As the planner will look up + * the relation again, we double-check here to make sure it found the + * same one that we have locked. + */ + if (queryRelId != InvalidOid) + { + /* + * Note that with RLS involved there may be multiple relations, + * and while the one we need is almost certainly first, we don't + * make any guarantees of that in the planner, so check the whole + * list and make sure we find the original relation. + */ + if (!list_member_oid(plan->relationOids, queryRelId)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("relation referenced by COPY statement has changed"))); + } + + /* + * Use a snapshot with an updated command ID to ensure this query sees + * results of any previously executed queries. + */ + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + /* Create dest receiver for COPY OUT */ + dest = CreateDestReceiver(DestCopyOut); + ((DR_copy *) dest)->cstate = cstate; + + /* Create a QueryDesc requesting no output */ + cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + GetActiveSnapshot(), + InvalidSnapshot, + dest, NULL, NULL, 0); + + /* + * Call ExecutorStart to prepare the plan for execution. + * + * ExecutorStart computes a result tupdesc for us + */ + ExecutorStart(cstate->queryDesc, 0); + + tupDesc = cstate->queryDesc->tupDesc; + } + + /* Generate or convert list of attributes to process */ + cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist); + + num_phys_attrs = tupDesc->natts; + + /* Convert FORCE_QUOTE name list to per-column flags, check validity */ + cstate->opts.force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + if (cstate->opts.force_quote_all) + { + int i; + + for (i = 0; i < num_phys_attrs; i++) + cstate->opts.force_quote_flags[i] = true; + } + else if (cstate->opts.force_quote) + { + List *attnums; + ListCell *cur; + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_quote); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE_QUOTE column \"%s\" not referenced by COPY", + NameStr(attr->attname)))); + cstate->opts.force_quote_flags[attnum - 1] = true; + } + } + + /* Convert FORCE_NOT_NULL name list to per-column flags, check validity */ + cstate->opts.force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + if (cstate->opts.force_notnull) + { + List *attnums; + ListCell *cur; + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_notnull); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE_NOT_NULL column \"%s\" not referenced by COPY", + NameStr(attr->attname)))); + cstate->opts.force_notnull_flags[attnum - 1] = true; + } + } + + /* Convert FORCE_NULL name list to per-column flags, check validity */ + cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); + if (cstate->opts.force_null) + { + List *attnums; + ListCell *cur; + + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_null); + + foreach(cur, attnums) + { + int attnum = lfirst_int(cur); + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (!list_member_int(cstate->attnumlist, attnum)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("FORCE_NULL column \"%s\" not referenced by COPY", + NameStr(attr->attname)))); + cstate->opts.force_null_flags[attnum - 1] = true; + } + } + + /* Use client encoding when ENCODING option is not specified. */ + if (cstate->opts.file_encoding < 0) + cstate->file_encoding = pg_get_client_encoding(); + else + cstate->file_encoding = cstate->opts.file_encoding; + + /* + * Set up encoding conversion info. Even if the file and server encodings + * are the same, we must apply pg_any_to_server() to validate data in + * multibyte encodings. + */ + cstate->need_transcoding = + (cstate->file_encoding != GetDatabaseEncoding() || + pg_database_encoding_max_length() > 1); + /* See Multibyte encoding comment above */ + cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding); + + cstate->copy_dest = COPY_FILE; /* default */ + + if (pipe) + { + progress_vals[1] = PROGRESS_COPY_TYPE_PIPE; + + Assert(!is_program); /* the grammar does not allow this */ + if (whereToSendOutput != DestRemote) + cstate->copy_file = stdout; + } + else + { + cstate->filename = pstrdup(filename); + cstate->is_program = is_program; + + if (is_program) + { + progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM; + cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W); + if (cstate->copy_file == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + cstate->filename))); + } + else + { + mode_t oumask; /* Pre-existing umask value */ + struct stat st; + + progress_vals[1] = PROGRESS_COPY_TYPE_FILE; + + /* + * Prevent write to relative path ... too easy to shoot oneself in + * the foot by overwriting a database file ... + */ + if (!is_absolute_path(filename)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("relative path not allowed for COPY to file"))); + + oumask = umask(S_IWGRP | S_IWOTH); + PG_TRY(); + { + cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W); + } + PG_FINALLY(); + { + umask(oumask); + } + PG_END_TRY(); + if (cstate->copy_file == NULL) + { + /* copy errno because ereport subfunctions might change it */ + int save_errno = errno; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + cstate->filename), + (save_errno == ENOENT || save_errno == EACCES) ? + errhint("COPY TO instructs the PostgreSQL server process to write a file. " + "You may want a client-side facility such as psql's \\copy.") : 0)); + } + + if (fstat(fileno(cstate->copy_file), &st)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + cstate->filename))); + + if (S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a directory", cstate->filename))); + } + } + + /* initialize progress */ + pgstat_progress_start_command(PROGRESS_COMMAND_COPY, + cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid); + pgstat_progress_update_multi_param(2, progress_cols, progress_vals); + + cstate->bytes_processed = 0; + + MemoryContextSwitchTo(oldcontext); + + return cstate; +} + +/* + * Clean up storage and release resources for COPY TO. + */ +void +EndCopyTo(CopyToState cstate) +{ + if (cstate->queryDesc != NULL) + { + /* Close down the query and free resources. */ + ExecutorFinish(cstate->queryDesc); + ExecutorEnd(cstate->queryDesc); + FreeQueryDesc(cstate->queryDesc); + PopActiveSnapshot(); + } + + /* Clean up storage */ + EndCopy(cstate); +} + +/* + * Copy from relation or query TO file. + */ +uint64 +DoCopyTo(CopyToState cstate) +{ + bool pipe = (cstate->filename == NULL); + bool fe_copy = (pipe && whereToSendOutput == DestRemote); + TupleDesc tupDesc; + int num_phys_attrs; + ListCell *cur; + uint64 processed; + + if (fe_copy) + SendCopyBegin(cstate); + + if (cstate->rel) + tupDesc = RelationGetDescr(cstate->rel); + else + tupDesc = cstate->queryDesc->tupDesc; + num_phys_attrs = tupDesc->natts; + cstate->opts.null_print_client = cstate->opts.null_print; /* default */ + + /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */ + cstate->fe_msgbuf = makeStringInfo(); + + /* Get info about the columns we need to process. */ + cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo)); + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + Oid out_func_oid; + bool isvarlena; + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + if (cstate->opts.binary) + getTypeBinaryOutputInfo(attr->atttypid, + &out_func_oid, + &isvarlena); + else + getTypeOutputInfo(attr->atttypid, + &out_func_oid, + &isvarlena); + fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]); + } + + /* + * Create a temporary memory context that we can reset once per row to + * recover palloc'd memory. This avoids any problems with leaks inside + * datatype output routines, and should be faster than retail pfree's + * anyway. (We don't need a whole econtext as CopyFrom does.) + */ + cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext, + "COPY TO", + ALLOCSET_DEFAULT_SIZES); + + if (cstate->opts.binary) + { + /* Generate header for a binary copy */ + int32 tmp; + + /* Signature */ + CopySendData(cstate, BinarySignature, 11); + /* Flags field */ + tmp = 0; + CopySendInt32(cstate, tmp); + /* No header extension */ + tmp = 0; + CopySendInt32(cstate, tmp); + } + else + { + /* + * For non-binary copy, we need to convert null_print to file + * encoding, because it will be sent directly with CopySendString. + */ + if (cstate->need_transcoding) + cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print, + cstate->opts.null_print_len, + cstate->file_encoding); + + /* if a header has been requested send the line */ + if (cstate->opts.header_line) + { + bool hdr_delim = false; + + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + char *colname; + + if (hdr_delim) + CopySendChar(cstate, cstate->opts.delim[0]); + hdr_delim = true; + + colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname); + + if (cstate->opts.csv_mode) + CopyAttributeOutCSV(cstate, colname, false, + list_length(cstate->attnumlist) == 1); + else + CopyAttributeOutText(cstate, colname); + } + + CopySendEndOfRow(cstate); + } + } + + if (cstate->rel) + { + TupleTableSlot *slot; + TableScanDesc scandesc; + + scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); + slot = table_slot_create(cstate->rel, NULL); + + processed = 0; + while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot)) + { + CHECK_FOR_INTERRUPTS(); + + /* Deconstruct the tuple ... */ + slot_getallattrs(slot); + + /* Format and send the data */ + CopyOneRowTo(cstate, slot); + + /* + * Increment the number of processed tuples, and report the + * progress. + */ + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, + ++processed); + } + + ExecDropSingleTupleTableSlot(slot); + table_endscan(scandesc); + } + else + { + /* run the plan --- the dest receiver will send tuples */ + ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L, true); + processed = ((DR_copy *) cstate->queryDesc->dest)->processed; + } + + if (cstate->opts.binary) + { + /* Generate trailer for a binary copy */ + CopySendInt16(cstate, -1); + /* Need to flush out the trailer */ + CopySendEndOfRow(cstate); + } + + MemoryContextDelete(cstate->rowcontext); + + if (fe_copy) + SendCopyEnd(cstate); + + return processed; +} + +/* + * Emit one row during DoCopyTo(). + */ +static void +CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) +{ + bool need_delim = false; + FmgrInfo *out_functions = cstate->out_functions; + MemoryContext oldcontext; + ListCell *cur; + char *string; + + MemoryContextReset(cstate->rowcontext); + oldcontext = MemoryContextSwitchTo(cstate->rowcontext); + + if (cstate->opts.binary) + { + /* Binary per-tuple header */ + CopySendInt16(cstate, list_length(cstate->attnumlist)); + } + + /* Make sure the tuple is fully deconstructed */ + slot_getallattrs(slot); + + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + Datum value = slot->tts_values[attnum - 1]; + bool isnull = slot->tts_isnull[attnum - 1]; + + if (!cstate->opts.binary) + { + if (need_delim) + CopySendChar(cstate, cstate->opts.delim[0]); + need_delim = true; + } + + if (isnull) + { + if (!cstate->opts.binary) + CopySendString(cstate, cstate->opts.null_print_client); + else + CopySendInt32(cstate, -1); + } + else + { + if (!cstate->opts.binary) + { + string = OutputFunctionCall(&out_functions[attnum - 1], + value); + if (cstate->opts.csv_mode) + CopyAttributeOutCSV(cstate, string, + cstate->opts.force_quote_flags[attnum - 1], + list_length(cstate->attnumlist) == 1); + else + CopyAttributeOutText(cstate, string); + } + else + { + bytea *outputbytes; + + outputbytes = SendFunctionCall(&out_functions[attnum - 1], + value); + CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ); + CopySendData(cstate, VARDATA(outputbytes), + VARSIZE(outputbytes) - VARHDRSZ); + } + } + } + + CopySendEndOfRow(cstate); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Send text representation of one attribute, with conversion and escaping + */ +#define DUMPSOFAR() \ + do { \ + if (ptr > start) \ + CopySendData(cstate, start, ptr - start); \ + } while (0) + +static void +CopyAttributeOutText(CopyToState cstate, const char *string) +{ + const char *ptr; + const char *start; + char c; + char delimc = cstate->opts.delim[0]; + + if (cstate->need_transcoding) + ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding); + else + ptr = string; + + /* + * We have to grovel through the string searching for control characters + * and instances of the delimiter character. In most cases, though, these + * are infrequent. To avoid overhead from calling CopySendData once per + * character, we dump out all characters between escaped characters in a + * single call. The loop invariant is that the data from "start" to "ptr" + * can be sent literally, but hasn't yet been. + * + * We can skip pg_encoding_mblen() overhead when encoding is safe, because + * in valid backend encodings, extra bytes of a multibyte character never + * look like ASCII. This loop is sufficiently performance-critical that + * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out + * of the normal safe-encoding path. + */ + if (cstate->encoding_embeds_ascii) + { + start = ptr; + while ((c = *ptr) != '\0') + { + if ((unsigned char) c < (unsigned char) 0x20) + { + /* + * \r and \n must be escaped, the others are traditional. We + * prefer to dump these using the C-like notation, rather than + * a backslash and the literal character, because it makes the + * dump file a bit more proof against Microsoftish data + * mangling. + */ + switch (c) + { + case '\b': + c = 'b'; + break; + case '\f': + c = 'f'; + break; + case '\n': + c = 'n'; + break; + case '\r': + c = 'r'; + break; + case '\t': + c = 't'; + break; + case '\v': + c = 'v'; + break; + default: + /* If it's the delimiter, must backslash it */ + if (c == delimc) + break; + /* All ASCII control chars are length 1 */ + ptr++; + continue; /* fall to end of loop */ + } + /* if we get here, we need to convert the control char */ + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + CopySendChar(cstate, c); + start = ++ptr; /* do not include char in next run */ + } + else if (c == '\\' || c == delimc) + { + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + } + else if (IS_HIGHBIT_SET(c)) + ptr += pg_encoding_mblen(cstate->file_encoding, ptr); + else + ptr++; + } + } + else + { + start = ptr; + while ((c = *ptr) != '\0') + { + if ((unsigned char) c < (unsigned char) 0x20) + { + /* + * \r and \n must be escaped, the others are traditional. We + * prefer to dump these using the C-like notation, rather than + * a backslash and the literal character, because it makes the + * dump file a bit more proof against Microsoftish data + * mangling. + */ + switch (c) + { + case '\b': + c = 'b'; + break; + case '\f': + c = 'f'; + break; + case '\n': + c = 'n'; + break; + case '\r': + c = 'r'; + break; + case '\t': + c = 't'; + break; + case '\v': + c = 'v'; + break; + default: + /* If it's the delimiter, must backslash it */ + if (c == delimc) + break; + /* All ASCII control chars are length 1 */ + ptr++; + continue; /* fall to end of loop */ + } + /* if we get here, we need to convert the control char */ + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + CopySendChar(cstate, c); + start = ++ptr; /* do not include char in next run */ + } + else if (c == '\\' || c == delimc) + { + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + } + else + ptr++; + } + } + + DUMPSOFAR(); +} + +/* + * Send text representation of one attribute, with conversion and + * CSV-style escaping + */ +static void +CopyAttributeOutCSV(CopyToState cstate, const char *string, + bool use_quote, bool single_attr) +{ + const char *ptr; + const char *start; + char c; + char delimc = cstate->opts.delim[0]; + char quotec = cstate->opts.quote[0]; + char escapec = cstate->opts.escape[0]; + + /* force quoting if it matches null_print (before conversion!) */ + if (!use_quote && strcmp(string, cstate->opts.null_print) == 0) + use_quote = true; + + if (cstate->need_transcoding) + ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding); + else + ptr = string; + + /* + * Make a preliminary pass to discover if it needs quoting + */ + if (!use_quote) + { + /* + * Because '\.' can be a data value, quote it if it appears alone on a + * line so it is not interpreted as the end-of-data marker. + */ + if (single_attr && strcmp(ptr, "\\.") == 0) + use_quote = true; + else + { + const char *tptr = ptr; + + while ((c = *tptr) != '\0') + { + if (c == delimc || c == quotec || c == '\n' || c == '\r') + { + use_quote = true; + break; + } + if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii) + tptr += pg_encoding_mblen(cstate->file_encoding, tptr); + else + tptr++; + } + } + } + + if (use_quote) + { + CopySendChar(cstate, quotec); + + /* + * We adopt the same optimization strategy as in CopyAttributeOutText + */ + start = ptr; + while ((c = *ptr) != '\0') + { + if (c == quotec || c == escapec) + { + DUMPSOFAR(); + CopySendChar(cstate, escapec); + start = ptr; /* we include char in next run */ + } + if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii) + ptr += pg_encoding_mblen(cstate->file_encoding, ptr); + else + ptr++; + } + DUMPSOFAR(); + + CopySendChar(cstate, quotec); + } + else + { + /* If it doesn't need quoting, we can just dump it as-is */ + CopySendString(cstate, ptr); + } +} + +/* + * copy_dest_startup --- executor startup + */ +static void +copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo) +{ + /* no-op */ +} + +/* + * copy_dest_receive --- receive one tuple + */ +static bool +copy_dest_receive(TupleTableSlot *slot, DestReceiver *self) +{ + DR_copy *myState = (DR_copy *) self; + CopyToState cstate = myState->cstate; + + /* Send the data */ + CopyOneRowTo(cstate, slot); + + /* Increment the number of processed tuples, and report the progress */ + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, + ++myState->processed); + + return true; +} + +/* + * copy_dest_shutdown --- executor end + */ +static void +copy_dest_shutdown(DestReceiver *self) +{ + /* no-op */ +} + +/* + * copy_dest_destroy --- release DestReceiver object + */ +static void +copy_dest_destroy(DestReceiver *self) +{ + pfree(self); +} + +/* + * CreateCopyDestReceiver -- create a suitable DestReceiver object + */ +DestReceiver * +CreateCopyDestReceiver(void) +{ + DR_copy *self = (DR_copy *) palloc(sizeof(DR_copy)); + + self->pub.receiveSlot = copy_dest_receive; + self->pub.rStartup = copy_dest_startup; + self->pub.rShutdown = copy_dest_shutdown; + self->pub.rDestroy = copy_dest_destroy; + self->pub.mydest = DestCopyOut; + + self->cstate = NULL; /* will be set later */ + self->processed = 0; + + return (DestReceiver *) self; +} diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c new file mode 100644 index 0000000..152c29b --- /dev/null +++ b/src/backend/commands/createas.c @@ -0,0 +1,637 @@ +/*------------------------------------------------------------------------- + * + * createas.c + * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO. + * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors, + * we implement that here, too. + * + * We implement this by diverting the query's normal output to a + * specialized DestReceiver type. + * + * Formerly, CTAS was implemented as a variant of SELECT, which led + * to assorted legacy behaviors that we still try to preserve, notably that + * we must return a tuples-processed count in the QueryCompletion. (We no + * longer do that for CTAS ... WITH NO DATA, however.) + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/createas.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/namespace.h" +#include "catalog/toasting.h" +#include "commands/createas.h" +#include "commands/matview.h" +#include "commands/prepare.h" +#include "commands/tablecmds.h" +#include "commands/view.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_clause.h" +#include "rewrite/rewriteHandler.h" +#include "storage/smgr.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/rls.h" +#include "utils/snapmgr.h" + +typedef struct +{ + DestReceiver pub; /* publicly-known function pointers */ + IntoClause *into; /* target relation specification */ + /* These fields are filled by intorel_startup: */ + Relation rel; /* relation to write to */ + ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ + CommandId output_cid; /* cmin to insert in output tuples */ + int ti_options; /* table_tuple_insert performance options */ + BulkInsertState bistate; /* bulk insert state */ +} DR_intorel; + +/* utility functions for CTAS definition creation */ +static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into); +static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into); + +/* DestReceiver routines for collecting data */ +static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo); +static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self); +static void intorel_shutdown(DestReceiver *self); +static void intorel_destroy(DestReceiver *self); + + +/* + * create_ctas_internal + * + * Internal utility used for the creation of the definition of a relation + * created via CREATE TABLE AS or a materialized view. Caller needs to + * provide a list of attributes (ColumnDef nodes). + */ +static ObjectAddress +create_ctas_internal(List *attrList, IntoClause *into) +{ + CreateStmt *create = makeNode(CreateStmt); + bool is_matview; + char relkind; + Datum toast_options; + static char *validnsps[] = HEAP_RELOPT_NAMESPACES; + ObjectAddress intoRelationAddr; + + /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ + is_matview = (into->viewQuery != NULL); + relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; + + /* + * Create the target relation by faking up a CREATE TABLE parsetree and + * passing it to DefineRelation. + */ + create->relation = into->rel; + create->tableElts = attrList; + create->inhRelations = NIL; + create->ofTypename = NULL; + create->constraints = NIL; + create->options = into->options; + create->oncommit = into->onCommit; + create->tablespacename = into->tableSpaceName; + create->if_not_exists = false; + create->accessMethod = into->accessMethod; + + /* + * Create the relation. (This will error out if there's an existing view, + * so we don't need more code to complain if "replace" is false.) + */ + intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL); + + /* + * If necessary, create a TOAST table for the target table. Note that + * NewRelationCreateToastTable ends with CommandCounterIncrement(), so + * that the TOAST table will be visible for insertion. + */ + CommandCounterIncrement(); + + /* parse and validate reloptions for the toast table */ + toast_options = transformRelOptions((Datum) 0, + create->options, + "toast", + validnsps, + true, false); + + (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true); + + NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options); + + /* Create the "view" part of a materialized view. */ + if (is_matview) + { + /* StoreViewQuery scribbles on tree, so make a copy */ + Query *query = (Query *) copyObject(into->viewQuery); + + StoreViewQuery(intoRelationAddr.objectId, query, false); + CommandCounterIncrement(); + } + + return intoRelationAddr; +} + + +/* + * create_ctas_nodata + * + * Create CTAS or materialized view when WITH NO DATA is used, starting from + * the targetlist of the SELECT or view definition. + */ +static ObjectAddress +create_ctas_nodata(List *tlist, IntoClause *into) +{ + List *attrList; + ListCell *t, + *lc; + + /* + * Build list of ColumnDefs from non-junk elements of the tlist. If a + * column name list was specified in CREATE TABLE AS, override the column + * names in the query. (Too few column names are OK, too many are not.) + */ + attrList = NIL; + lc = list_head(into->colNames); + foreach(t, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(t); + + if (!tle->resjunk) + { + ColumnDef *col; + char *colname; + + if (lc) + { + colname = strVal(lfirst(lc)); + lc = lnext(into->colNames, lc); + } + else + colname = tle->resname; + + col = makeColumnDef(colname, + exprType((Node *) tle->expr), + exprTypmod((Node *) tle->expr), + exprCollation((Node *) tle->expr)); + + /* + * It's possible that the column is of a collatable type but the + * collation could not be resolved, so double-check. (We must + * check this here because DefineRelation would adopt the type's + * default collation rather than complaining.) + */ + if (!OidIsValid(col->collOid) && + type_is_collatable(col->typeName->typeOid)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("no collation was derived for column \"%s\" with collatable type %s", + col->colname, + format_type_be(col->typeName->typeOid)), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + attrList = lappend(attrList, col); + } + } + + if (lc != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("too many column names were specified"))); + + /* Create the relation definition using the ColumnDef list */ + return create_ctas_internal(attrList, into); +} + + +/* + * ExecCreateTableAs -- execute a CREATE TABLE AS command + */ +ObjectAddress +ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, + ParamListInfo params, QueryEnvironment *queryEnv, + QueryCompletion *qc) +{ + Query *query = castNode(Query, stmt->query); + IntoClause *into = stmt->into; + bool is_matview = (into->viewQuery != NULL); + DestReceiver *dest; + Oid save_userid = InvalidOid; + int save_sec_context = 0; + int save_nestlevel = 0; + ObjectAddress address; + List *rewritten; + PlannedStmt *plan; + QueryDesc *queryDesc; + + /* Check if the relation exists or not */ + if (CreateTableAsRelExists(stmt)) + return InvalidObjectAddress; + + /* + * Create the tuple receiver object and insert info it will need + */ + dest = CreateIntoRelDestReceiver(into); + + /* + * The contained Query could be a SELECT, or an EXECUTE utility command. + * If the latter, we just pass it off to ExecuteQuery. + */ + if (query->commandType == CMD_UTILITY && + IsA(query->utilityStmt, ExecuteStmt)) + { + ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt); + + Assert(!is_matview); /* excluded by syntax */ + ExecuteQuery(pstate, estmt, into, params, dest, qc); + + /* get object address that intorel_startup saved for us */ + address = ((DR_intorel *) dest)->reladdr; + + return address; + } + Assert(query->commandType == CMD_SELECT); + + /* + * For materialized views, lock down security-restricted operations and + * arrange to make GUC variable changes local to this command. This is + * not necessary for security, but this keeps the behavior similar to + * REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized + * view not possible to refresh. + */ + if (is_matview) + { + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(save_userid, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + } + + if (into->skipData) + { + /* + * If WITH NO DATA was specified, do not go through the rewriter, + * planner and executor. Just define the relation using a code path + * similar to CREATE VIEW. This avoids dump/restore problems stemming + * from running the planner before all dependencies are set up. + */ + address = create_ctas_nodata(query->targetList, into); + } + else + { + /* + * Parse analysis was done already, but we still have to run the rule + * rewriter. We do not do AcquireRewriteLocks: we assume the query + * either came straight from the parser, or suitable locks were + * acquired by plancache.c. + */ + rewritten = QueryRewrite(query); + + /* SELECT should never rewrite to more or less than one SELECT query */ + if (list_length(rewritten) != 1) + elog(ERROR, "unexpected rewrite result for %s", + is_matview ? "CREATE MATERIALIZED VIEW" : + "CREATE TABLE AS SELECT"); + query = linitial_node(Query, rewritten); + Assert(query->commandType == CMD_SELECT); + + /* plan the query */ + plan = pg_plan_query(query, pstate->p_sourcetext, + CURSOR_OPT_PARALLEL_OK, params); + + /* + * Use a snapshot with an updated command ID to ensure this query sees + * results of any previously executed queries. (This could only + * matter if the planner executed an allegedly-stable function that + * changed the database contents, but let's do it anyway to be + * parallel to the EXPLAIN code path.) + */ + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + /* Create a QueryDesc, redirecting output to our tuple receiver */ + queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + GetActiveSnapshot(), InvalidSnapshot, + dest, params, queryEnv, 0); + + /* call ExecutorStart to prepare the plan for execution */ + ExecutorStart(queryDesc, GetIntoRelEFlags(into)); + + /* run the plan to completion */ + ExecutorRun(queryDesc, ForwardScanDirection, 0L, true); + + /* save the rowcount if we're given a qc to fill */ + if (qc) + SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed); + + /* get object address that intorel_startup saved for us */ + address = ((DR_intorel *) dest)->reladdr; + + /* and clean up */ + ExecutorFinish(queryDesc); + ExecutorEnd(queryDesc); + + FreeQueryDesc(queryDesc); + + PopActiveSnapshot(); + } + + if (is_matview) + { + /* Roll back any GUC changes */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + } + + return address; +} + +/* + * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS + * + * This is exported because EXPLAIN and PREPARE need it too. (Note: those + * callers still need to deal explicitly with the skipData flag; since they + * use different methods for suppressing execution, it doesn't seem worth + * trying to encapsulate that part.) + */ +int +GetIntoRelEFlags(IntoClause *intoClause) +{ + int flags = 0; + + if (intoClause->skipData) + flags |= EXEC_FLAG_WITH_NO_DATA; + + return flags; +} + +/* + * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt + * + * Utility wrapper checking if the relation pending for creation in this + * CreateTableAsStmt query already exists or not. Returns true if the + * relation exists, otherwise false. + */ +bool +CreateTableAsRelExists(CreateTableAsStmt *ctas) +{ + Oid nspid; + Oid oldrelid; + ObjectAddress address; + IntoClause *into = ctas->into; + + nspid = RangeVarGetCreationNamespace(into->rel); + + oldrelid = get_relname_relid(into->rel->relname, nspid); + if (OidIsValid(oldrelid)) + { + if (!ctas->if_not_exists) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists", + into->rel->relname))); + + /* + * The relation exists and IF NOT EXISTS has been specified. + * + * If we are in an extension script, insist that the pre-existing + * object be a member of the extension, to avoid security risks. + */ + ObjectAddressSet(address, RelationRelationId, oldrelid); + checkMembershipInCurrentExtension(&address); + + /* OK to skip */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists, skipping", + into->rel->relname))); + return true; + } + + /* Relation does not exist, it can be created */ + return false; +} + +/* + * CreateIntoRelDestReceiver -- create a suitable DestReceiver object + * + * intoClause will be NULL if called from CreateDestReceiver(), in which + * case it has to be provided later. However, it is convenient to allow + * self->into to be filled in immediately for other callers. + */ +DestReceiver * +CreateIntoRelDestReceiver(IntoClause *intoClause) +{ + DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel)); + + self->pub.receiveSlot = intorel_receive; + self->pub.rStartup = intorel_startup; + self->pub.rShutdown = intorel_shutdown; + self->pub.rDestroy = intorel_destroy; + self->pub.mydest = DestIntoRel; + self->into = intoClause; + /* other private fields will be set during intorel_startup */ + + return (DestReceiver *) self; +} + +/* + * intorel_startup --- executor startup + */ +static void +intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) +{ + DR_intorel *myState = (DR_intorel *) self; + IntoClause *into = myState->into; + bool is_matview; + List *attrList; + ObjectAddress intoRelationAddr; + Relation intoRelationDesc; + ListCell *lc; + int attnum; + + Assert(into != NULL); /* else somebody forgot to set it */ + + /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ + is_matview = (into->viewQuery != NULL); + + /* + * Build column definitions using "pre-cooked" type and collation info. If + * a column name list was specified in CREATE TABLE AS, override the + * column names derived from the query. (Too few column names are OK, too + * many are not.) + */ + attrList = NIL; + lc = list_head(into->colNames); + for (attnum = 0; attnum < typeinfo->natts; attnum++) + { + Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum); + ColumnDef *col; + char *colname; + + if (lc) + { + colname = strVal(lfirst(lc)); + lc = lnext(into->colNames, lc); + } + else + colname = NameStr(attribute->attname); + + col = makeColumnDef(colname, + attribute->atttypid, + attribute->atttypmod, + attribute->attcollation); + + /* + * It's possible that the column is of a collatable type but the + * collation could not be resolved, so double-check. (We must check + * this here because DefineRelation would adopt the type's default + * collation rather than complaining.) + */ + if (!OidIsValid(col->collOid) && + type_is_collatable(col->typeName->typeOid)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("no collation was derived for column \"%s\" with collatable type %s", + col->colname, + format_type_be(col->typeName->typeOid)), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + attrList = lappend(attrList, col); + } + + if (lc != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("too many column names were specified"))); + + /* + * Actually create the target table + */ + intoRelationAddr = create_ctas_internal(attrList, into); + + /* + * Finally we can open the target table + */ + intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock); + + /* + * Make sure the constructed table does not have RLS enabled. + * + * check_enable_rls() will ereport(ERROR) itself if the user has requested + * something invalid, and otherwise will return RLS_ENABLED if RLS should + * be enabled here. We don't actually support that currently, so throw + * our own ereport(ERROR) if that happens. + */ + if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("policies not yet implemented for this command"))); + + /* + * Tentatively mark the target as populated, if it's a matview and we're + * going to fill it; otherwise, no change needed. + */ + if (is_matview && !into->skipData) + SetMatViewPopulatedState(intoRelationDesc, true); + + /* + * Fill private fields of myState for use by later routines + */ + myState->rel = intoRelationDesc; + myState->reladdr = intoRelationAddr; + myState->output_cid = GetCurrentCommandId(true); + myState->ti_options = TABLE_INSERT_SKIP_FSM; + + /* + * If WITH NO DATA is specified, there is no need to set up the state for + * bulk inserts as there are no tuples to insert. + */ + if (!into->skipData) + myState->bistate = GetBulkInsertState(); + else + myState->bistate = NULL; + + /* + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. + */ + Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); +} + +/* + * intorel_receive --- receive one tuple + */ +static bool +intorel_receive(TupleTableSlot *slot, DestReceiver *self) +{ + DR_intorel *myState = (DR_intorel *) self; + + /* Nothing to insert if WITH NO DATA is specified. */ + if (!myState->into->skipData) + { + /* + * Note that the input slot might not be of the type of the target + * relation. That's supported by table_tuple_insert(), but slightly + * less efficient than inserting with the right slot - but the + * alternative would be to copy into a slot of the right type, which + * would not be cheap either. This also doesn't allow accessing per-AM + * data (say a tuple's xmin), but since we don't do that here... + */ + table_tuple_insert(myState->rel, + slot, + myState->output_cid, + myState->ti_options, + myState->bistate); + } + + /* We know this is a newly created relation, so there are no indexes */ + + return true; +} + +/* + * intorel_shutdown --- executor end + */ +static void +intorel_shutdown(DestReceiver *self) +{ + DR_intorel *myState = (DR_intorel *) self; + IntoClause *into = myState->into; + + if (!into->skipData) + { + FreeBulkInsertState(myState->bistate); + table_finish_bulk_insert(myState->rel, myState->ti_options); + } + + /* close rel, but keep lock until commit */ + table_close(myState->rel, NoLock); + myState->rel = NULL; +} + +/* + * intorel_destroy --- release DestReceiver object + */ +static void +intorel_destroy(DestReceiver *self) +{ + pfree(self); +} diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c new file mode 100644 index 0000000..93f0c73 --- /dev/null +++ b/src/backend/commands/dbcommands.c @@ -0,0 +1,3285 @@ +/*------------------------------------------------------------------------- + * + * dbcommands.c + * Database management commands (create/drop database). + * + * Note: database creation/destruction commands use exclusive locks on + * the database objects (as expressed by LockSharedObject()) to avoid + * stepping on each others' toes. Formerly we used table-level locks + * on pg_database, but that's too coarse-grained. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/dbcommands.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xloginsert.h" +#include "access/xlogrecovery.h" +#include "access/xlogutils.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_database.h" +#include "catalog/pg_db_role_setting.h" +#include "catalog/pg_subscription.h" +#include "catalog/pg_tablespace.h" +#include "commands/comment.h" +#include "commands/dbcommands.h" +#include "commands/dbcommands_xlog.h" +#include "commands/defrem.h" +#include "commands/seclabel.h" +#include "commands/tablespace.h" +#include "common/file_perm.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/bgwriter.h" +#include "replication/slot.h" +#include "storage/copydir.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/lmgr.h" +#include "storage/md.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/pg_locale.h" +#include "utils/relmapper.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + +/* + * Create database strategy. + * + * CREATEDB_WAL_LOG will copy the database at the block level and WAL log each + * copied block. + * + * CREATEDB_FILE_COPY will simply perform a file system level copy of the + * database and log a single record for each tablespace copied. To make this + * safe, it also triggers checkpoints before and after the operation. + */ +typedef enum CreateDBStrategy +{ + CREATEDB_WAL_LOG, + CREATEDB_FILE_COPY +} CreateDBStrategy; + +typedef struct +{ + Oid src_dboid; /* source (template) DB */ + Oid dest_dboid; /* DB we are trying to create */ + CreateDBStrategy strategy; /* create db strategy */ +} createdb_failure_params; + +typedef struct +{ + Oid dest_dboid; /* DB we are trying to move */ + Oid dest_tsoid; /* tablespace we are trying to move to */ +} movedb_failure_params; + +/* + * Information about a relation to be copied when creating a database. + */ +typedef struct CreateDBRelInfo +{ + RelFileNode rnode; /* physical relation identifier */ + Oid reloid; /* relation oid */ + bool permanent; /* relation is permanent or unlogged */ +} CreateDBRelInfo; + + +/* non-export function prototypes */ +static void createdb_failure_callback(int code, Datum arg); +static void movedb(const char *dbname, const char *tblspcname); +static void movedb_failure_callback(int code, Datum arg); +static bool get_db_info(const char *name, LOCKMODE lockmode, + Oid *dbIdP, Oid *ownerIdP, + int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, + TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, + Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, + char *dbLocProvider, + char **dbCollversion); +static bool have_createdb_privilege(void); +static void remove_dbtablespaces(Oid db_id); +static bool check_db_file_conflict(Oid db_id); +static int errdetail_busy_db(int notherbackends, int npreparedxacts); +static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dboid, Oid src_tsid, + Oid dst_tsid); +static List *ScanSourceDatabasePgClass(Oid srctbid, Oid srcdbid, char *srcpath); +static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, + Oid dbid, char *srcpath, + List *rnodelist, Snapshot snapshot); +static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, + Oid tbid, Oid dbid, + char *srcpath); +static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, + bool isRedo); +static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid, + Oid dst_tsid); +static void recovery_create_dbdir(char *path, bool only_tblspc); + +/* + * Create a new database using the WAL_LOG strategy. + * + * Each copied block is separately written to the write-ahead log. + */ +static void +CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, + Oid src_tsid, Oid dst_tsid) +{ + char *srcpath; + char *dstpath; + List *rnodelist = NULL; + ListCell *cell; + LockRelId srcrelid; + LockRelId dstrelid; + RelFileNode srcrnode; + RelFileNode dstrnode; + CreateDBRelInfo *relinfo; + + /* Get source and destination database paths. */ + srcpath = GetDatabasePath(src_dboid, src_tsid); + dstpath = GetDatabasePath(dst_dboid, dst_tsid); + + /* Create database directory and write PG_VERSION file. */ + CreateDirAndVersionFile(dstpath, dst_dboid, dst_tsid, false); + + /* Copy relmap file from source database to the destination database. */ + RelationMapCopy(dst_dboid, dst_tsid, srcpath, dstpath); + + /* Get list of relfilenodes to copy from the source database. */ + rnodelist = ScanSourceDatabasePgClass(src_tsid, src_dboid, srcpath); + Assert(rnodelist != NIL); + + /* + * Database IDs will be the same for all relations so set them before + * entering the loop. + */ + srcrelid.dbId = src_dboid; + dstrelid.dbId = dst_dboid; + + /* Loop over our list of relfilenodes and copy each one. */ + foreach(cell, rnodelist) + { + relinfo = lfirst(cell); + srcrnode = relinfo->rnode; + + /* + * If the relation is from the source db's default tablespace then we + * need to create it in the destinations db's default tablespace. + * Otherwise, we need to create in the same tablespace as it is in the + * source database. + */ + if (srcrnode.spcNode == src_tsid) + dstrnode.spcNode = dst_tsid; + else + dstrnode.spcNode = srcrnode.spcNode; + + dstrnode.dbNode = dst_dboid; + dstrnode.relNode = srcrnode.relNode; + + /* + * Acquire locks on source and target relations before copying. + * + * We typically do not read relation data into shared_buffers without + * holding a relation lock. It's unclear what could go wrong if we + * skipped it in this case, because nobody can be modifying either the + * source or destination database at this point, and we have locks on + * both databases, too, but let's take the conservative route. + */ + dstrelid.relId = srcrelid.relId = relinfo->reloid; + LockRelationId(&srcrelid, AccessShareLock); + LockRelationId(&dstrelid, AccessShareLock); + + /* Copy relation storage from source to the destination. */ + CreateAndCopyRelationData(srcrnode, dstrnode, relinfo->permanent); + + /* Release the relation locks. */ + UnlockRelationId(&srcrelid, AccessShareLock); + UnlockRelationId(&dstrelid, AccessShareLock); + } + + pfree(srcpath); + pfree(dstpath); + list_free_deep(rnodelist); +} + +/* + * Scan the pg_class table in the source database to identify the relations + * that need to be copied to the destination database. + * + * This is an exception to the usual rule that cross-database access is + * not possible. We can make it work here because we know that there are no + * connections to the source database and (since there can't be prepared + * transactions touching that database) no in-doubt tuples either. This + * means that we don't need to worry about pruning removing anything from + * under us, and we don't need to be too picky about our snapshot either. + * As long as it sees all previously-committed XIDs as committed and all + * aborted XIDs as aborted, we should be fine: nothing else is possible + * here. + * + * We can't rely on the relcache for anything here, because that only knows + * about the database to which we are connected, and can't handle access to + * other databases. That also means we can't rely on the heap scan + * infrastructure, which would be a bad idea anyway since it might try + * to do things like HOT pruning which we definitely can't do safely in + * a database to which we're not even connected. + */ +static List * +ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath) +{ + RelFileNode rnode; + BlockNumber nblocks; + BlockNumber blkno; + Buffer buf; + Oid relfilenode; + Page page; + List *rnodelist = NIL; + LockRelId relid; + Snapshot snapshot; + SMgrRelation smgr; + BufferAccessStrategy bstrategy; + + /* Get pg_class relfilenode. */ + relfilenode = RelationMapOidToFilenodeForDatabase(srcpath, + RelationRelationId); + + /* Don't read data into shared_buffers without holding a relation lock. */ + relid.dbId = dbid; + relid.relId = RelationRelationId; + LockRelationId(&relid, AccessShareLock); + + /* Prepare a RelFileNode for the pg_class relation. */ + rnode.spcNode = tbid; + rnode.dbNode = dbid; + rnode.relNode = relfilenode; + + smgr = smgropen(rnode, InvalidBackendId); + nblocks = smgrnblocks(smgr, MAIN_FORKNUM); + smgrclose(smgr); + + /* Use a buffer access strategy since this is a bulk read operation. */ + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + /* + * As explained in the function header comments, we need a snapshot that + * will see all committed transactions as committed, and our transaction + * snapshot - or the active snapshot - might not be new enough for that, + * but the return value of GetLatestSnapshot() should work fine. + */ + snapshot = GetLatestSnapshot(); + + /* Process the relation block by block. */ + for (blkno = 0; blkno < nblocks; blkno++) + { + CHECK_FOR_INTERRUPTS(); + + buf = ReadBufferWithoutRelcache(rnode, MAIN_FORKNUM, blkno, + RBM_NORMAL, bstrategy, true); + + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + if (PageIsNew(page) || PageIsEmpty(page)) + { + UnlockReleaseBuffer(buf); + continue; + } + + /* Append relevant pg_class tuples for current page to rnodelist. */ + rnodelist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid, + srcpath, rnodelist, + snapshot); + + UnlockReleaseBuffer(buf); + } + + /* Release relation lock. */ + UnlockRelationId(&relid, AccessShareLock); + + return rnodelist; +} + +/* + * Scan one page of the source database's pg_class relation and add relevant + * entries to rnodelist. The return value is the updated list. + */ +static List * +ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, + char *srcpath, List *rnodelist, + Snapshot snapshot) +{ + BlockNumber blkno = BufferGetBlockNumber(buf); + OffsetNumber offnum; + OffsetNumber maxoff; + HeapTupleData tuple; + + maxoff = PageGetMaxOffsetNumber(page); + + /* Loop over offsets. */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + + itemid = PageGetItemId(page, offnum); + + /* Nothing to do if slot is empty or already dead. */ + if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) || + ItemIdIsRedirected(itemid)) + continue; + + Assert(ItemIdIsNormal(itemid)); + ItemPointerSet(&(tuple.t_self), blkno, offnum); + + /* Initialize a HeapTupleData structure. */ + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationRelationId; + + /* Skip tuples that are not visible to this snapshot. */ + if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf)) + { + CreateDBRelInfo *relinfo; + + /* + * ScanSourceDatabasePgClassTuple is in charge of constructing a + * CreateDBRelInfo object for this tuple, but can also decide that + * this tuple isn't something we need to copy. If we do need to + * copy the relation, add it to the list. + */ + relinfo = ScanSourceDatabasePgClassTuple(&tuple, tbid, dbid, + srcpath); + if (relinfo != NULL) + rnodelist = lappend(rnodelist, relinfo); + } + } + + return rnodelist; +} + +/* + * Decide whether a certain pg_class tuple represents something that + * needs to be copied from the source database to the destination database, + * and if so, construct a CreateDBRelInfo for it. + * + * Visibility checks are handled by the caller, so our job here is just + * to assess the data stored in the tuple. + */ +CreateDBRelInfo * +ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid, + char *srcpath) +{ + CreateDBRelInfo *relinfo; + Form_pg_class classForm; + Oid relfilenode = InvalidOid; + + classForm = (Form_pg_class) GETSTRUCT(tuple); + + /* + * Return NULL if this object does not need to be copied. + * + * Shared objects don't need to be copied, because they are shared. + * Objects without storage can't be copied, because there's nothing to + * copy. Temporary relations don't need to be copied either, because they + * are inaccessible outside of the session that created them, which must + * be gone already, and couldn't connect to a different database if it + * still existed. autovacuum will eventually remove the pg_class entries + * as well. + */ + if (classForm->reltablespace == GLOBALTABLESPACE_OID || + !RELKIND_HAS_STORAGE(classForm->relkind) || + classForm->relpersistence == RELPERSISTENCE_TEMP) + return NULL; + + /* + * If relfilenode is valid then directly use it. Otherwise, consult the + * relmap. + */ + if (OidIsValid(classForm->relfilenode)) + relfilenode = classForm->relfilenode; + else + relfilenode = RelationMapOidToFilenodeForDatabase(srcpath, + classForm->oid); + + /* We must have a valid relfilenode oid. */ + if (!OidIsValid(relfilenode)) + elog(ERROR, "relation with OID %u does not have a valid relfilenode", + classForm->oid); + + /* Prepare a rel info element and add it to the list. */ + relinfo = (CreateDBRelInfo *) palloc(sizeof(CreateDBRelInfo)); + if (OidIsValid(classForm->reltablespace)) + relinfo->rnode.spcNode = classForm->reltablespace; + else + relinfo->rnode.spcNode = tbid; + + relinfo->rnode.dbNode = dbid; + relinfo->rnode.relNode = relfilenode; + relinfo->reloid = classForm->oid; + + /* Temporary relations were rejected above. */ + Assert(classForm->relpersistence != RELPERSISTENCE_TEMP); + relinfo->permanent = + (classForm->relpersistence == RELPERSISTENCE_PERMANENT) ? true : false; + + return relinfo; +} + +/* + * Create database directory and write out the PG_VERSION file in the database + * path. If isRedo is true, it's okay for the database directory to exist + * already. + */ +static void +CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) +{ + int fd; + int nbytes; + char versionfile[MAXPGPATH]; + char buf[16]; + + /* + * Prepare version data before starting a critical section. + * + * Note that we don't have to copy this from the source database; there's + * only one legal value. + */ + sprintf(buf, "%s\n", PG_MAJORVERSION); + nbytes = strlen(PG_MAJORVERSION) + 1; + + /* If we are not in WAL replay then write the WAL. */ + if (!isRedo) + { + xl_dbase_create_wal_log_rec xlrec; + XLogRecPtr lsn; + + START_CRIT_SECTION(); + + xlrec.db_id = dbid; + xlrec.tablespace_id = tsid; + + XLogBeginInsert(); + XLogRegisterData((char *) (&xlrec), + sizeof(xl_dbase_create_wal_log_rec)); + + lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG); + + /* As always, WAL must hit the disk before the data update does. */ + XLogFlush(lsn); + } + + /* Create database directory. */ + if (MakePGDirectory(dbpath) < 0) + { + /* Failure other than already exists or not in WAL replay? */ + if (errno != EEXIST || !isRedo) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", dbpath))); + } + + /* + * Create PG_VERSION file in the database path. If the file already + * exists and we are in WAL replay then try again to open it in write + * mode. + */ + snprintf(versionfile, sizeof(versionfile), "%s/%s", dbpath, "PG_VERSION"); + + fd = OpenTransientFile(versionfile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY); + if (fd < 0 && errno == EEXIST && isRedo) + fd = OpenTransientFile(versionfile, O_WRONLY | O_TRUNC | PG_BINARY); + + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", versionfile))); + + /* Write PG_MAJORVERSION in the PG_VERSION file. */ + pgstat_report_wait_start(WAIT_EVENT_VERSION_FILE_WRITE); + errno = 0; + if ((int) write(fd, buf, nbytes) != nbytes) + { + /* If write didn't set errno, assume problem is no disk space. */ + if (errno == 0) + errno = ENOSPC; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", versionfile))); + } + pgstat_report_wait_end(); + + /* Close the version file. */ + CloseTransientFile(fd); + + /* Critical section done. */ + if (!isRedo) + END_CRIT_SECTION(); +} + +/* + * Create a new database using the FILE_COPY strategy. + * + * Copy each tablespace at the filesystem level, and log a single WAL record + * for each tablespace copied. This requires a checkpoint before and after the + * copy, which may be expensive, but it does greatly reduce WAL generation + * if the copied database is large. + */ +static void +CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, + Oid dst_tsid) +{ + TableScanDesc scan; + Relation rel; + HeapTuple tuple; + + /* + * Force a checkpoint before starting the copy. This will force all dirty + * buffers, including those of unlogged tables, out to disk, to ensure + * source database is up-to-date on disk for the copy. + * FlushDatabaseBuffers() would suffice for that, but we also want to + * process any pending unlink requests. Otherwise, if a checkpoint + * happened while we're copying files, a file might be deleted just when + * we're about to copy it, causing the lstat() call in copydir() to fail + * with ENOENT. + */ + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | + CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); + + /* + * Iterate through all tablespaces of the template database, and copy each + * one to the new database. + */ + rel = table_open(TableSpaceRelationId, AccessShareLock); + scan = table_beginscan_catalog(rel, 0, NULL); + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple); + Oid srctablespace = spaceform->oid; + Oid dsttablespace; + char *srcpath; + char *dstpath; + struct stat st; + + /* No need to copy global tablespace */ + if (srctablespace == GLOBALTABLESPACE_OID) + continue; + + srcpath = GetDatabasePath(src_dboid, srctablespace); + + if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) || + directory_is_empty(srcpath)) + { + /* Assume we can ignore it */ + pfree(srcpath); + continue; + } + + if (srctablespace == src_tsid) + dsttablespace = dst_tsid; + else + dsttablespace = srctablespace; + + dstpath = GetDatabasePath(dst_dboid, dsttablespace); + + /* + * Copy this subdirectory to the new location + * + * We don't need to copy subdirectories + */ + copydir(srcpath, dstpath, false); + + /* Record the filesystem change in XLOG */ + { + xl_dbase_create_file_copy_rec xlrec; + + xlrec.db_id = dst_dboid; + xlrec.tablespace_id = dsttablespace; + xlrec.src_db_id = src_dboid; + xlrec.src_tablespace_id = srctablespace; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, + sizeof(xl_dbase_create_file_copy_rec)); + + (void) XLogInsert(RM_DBASE_ID, + XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE); + } + pfree(srcpath); + pfree(dstpath); + } + table_endscan(scan); + table_close(rel, AccessShareLock); + + /* + * We force a checkpoint before committing. This effectively means that + * committed XLOG_DBASE_CREATE_FILE_COPY operations will never need to be + * replayed (at least not in ordinary crash recovery; we still have to + * make the XLOG entry for the benefit of PITR operations). This avoids + * two nasty scenarios: + * + * #1: When PITR is off, we don't XLOG the contents of newly created + * indexes; therefore the drop-and-recreate-whole-directory behavior of + * DBASE_CREATE replay would lose such indexes. + * + * #2: Since we have to recopy the source database during DBASE_CREATE + * replay, we run the risk of copying changes in it that were committed + * after the original CREATE DATABASE command but before the system crash + * that led to the replay. This is at least unexpected and at worst could + * lead to inconsistencies, eg duplicate table names. + * + * (Both of these were real bugs in releases 8.0 through 8.0.3.) + * + * In PITR replay, the first of these isn't an issue, and the second is + * only a risk if the CREATE DATABASE and subsequent template database + * change both occur while a base backup is being taken. There doesn't + * seem to be much we can do about that except document it as a + * limitation. + * + * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE + * strategy that avoids these problems. + */ + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); +} + +/* + * CREATE DATABASE + */ +Oid +createdb(ParseState *pstate, const CreatedbStmt *stmt) +{ + Oid src_dboid; + Oid src_owner; + int src_encoding = -1; + char *src_collate = NULL; + char *src_ctype = NULL; + char *src_iculocale = NULL; + char src_locprovider = '\0'; + char *src_collversion = NULL; + bool src_istemplate; + bool src_allowconn; + TransactionId src_frozenxid = InvalidTransactionId; + MultiXactId src_minmxid = InvalidMultiXactId; + Oid src_deftablespace; + volatile Oid dst_deftablespace; + Relation pg_database_rel; + HeapTuple tuple; + Datum new_record[Natts_pg_database]; + bool new_record_nulls[Natts_pg_database]; + Oid dboid = InvalidOid; + Oid datdba; + ListCell *option; + DefElem *dtablespacename = NULL; + DefElem *downer = NULL; + DefElem *dtemplate = NULL; + DefElem *dencoding = NULL; + DefElem *dlocale = NULL; + DefElem *dcollate = NULL; + DefElem *dctype = NULL; + DefElem *diculocale = NULL; + DefElem *dlocprovider = NULL; + DefElem *distemplate = NULL; + DefElem *dallowconnections = NULL; + DefElem *dconnlimit = NULL; + DefElem *dcollversion = NULL; + DefElem *dstrategy = NULL; + char *dbname = stmt->dbname; + char *dbowner = NULL; + const char *dbtemplate = NULL; + char *dbcollate = NULL; + char *dbctype = NULL; + char *dbiculocale = NULL; + char dblocprovider = '\0'; + char *canonname; + int encoding = -1; + bool dbistemplate = false; + bool dballowconnections = true; + int dbconnlimit = DATCONNLIMIT_UNLIMITED; + char *dbcollversion = NULL; + int notherbackends; + int npreparedxacts; + CreateDBStrategy dbstrategy = CREATEDB_WAL_LOG; + createdb_failure_params fparms; + + /* Extract options from the statement node tree */ + foreach(option, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(option); + + if (strcmp(defel->defname, "tablespace") == 0) + { + if (dtablespacename) + errorConflictingDefElem(defel, pstate); + dtablespacename = defel; + } + else if (strcmp(defel->defname, "owner") == 0) + { + if (downer) + errorConflictingDefElem(defel, pstate); + downer = defel; + } + else if (strcmp(defel->defname, "template") == 0) + { + if (dtemplate) + errorConflictingDefElem(defel, pstate); + dtemplate = defel; + } + else if (strcmp(defel->defname, "encoding") == 0) + { + if (dencoding) + errorConflictingDefElem(defel, pstate); + dencoding = defel; + } + else if (strcmp(defel->defname, "locale") == 0) + { + if (dlocale) + errorConflictingDefElem(defel, pstate); + dlocale = defel; + } + else if (strcmp(defel->defname, "lc_collate") == 0) + { + if (dcollate) + errorConflictingDefElem(defel, pstate); + dcollate = defel; + } + else if (strcmp(defel->defname, "lc_ctype") == 0) + { + if (dctype) + errorConflictingDefElem(defel, pstate); + dctype = defel; + } + else if (strcmp(defel->defname, "icu_locale") == 0) + { + if (diculocale) + errorConflictingDefElem(defel, pstate); + diculocale = defel; + } + else if (strcmp(defel->defname, "locale_provider") == 0) + { + if (dlocprovider) + errorConflictingDefElem(defel, pstate); + dlocprovider = defel; + } + else if (strcmp(defel->defname, "is_template") == 0) + { + if (distemplate) + errorConflictingDefElem(defel, pstate); + distemplate = defel; + } + else if (strcmp(defel->defname, "allow_connections") == 0) + { + if (dallowconnections) + errorConflictingDefElem(defel, pstate); + dallowconnections = defel; + } + else if (strcmp(defel->defname, "connection_limit") == 0) + { + if (dconnlimit) + errorConflictingDefElem(defel, pstate); + dconnlimit = defel; + } + else if (strcmp(defel->defname, "collation_version") == 0) + { + if (dcollversion) + errorConflictingDefElem(defel, pstate); + dcollversion = defel; + } + else if (strcmp(defel->defname, "location") == 0) + { + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("LOCATION is not supported anymore"), + errhint("Consider using tablespaces instead."), + parser_errposition(pstate, defel->location))); + } + else if (strcmp(defel->defname, "oid") == 0) + { + dboid = defGetObjectId(defel); + + /* + * We don't normally permit new databases to be created with + * system-assigned OIDs. pg_upgrade tries to preserve database + * OIDs, so we can't allow any database to be created with an OID + * that might be in use in a freshly-initialized cluster created + * by some future version. We assume all such OIDs will be from + * the system-managed OID range. + * + * As an exception, however, we permit any OID to be assigned when + * allow_system_table_mods=on (so that initdb can assign system + * OIDs to template0 and postgres) or when performing a binary + * upgrade (so that pg_upgrade can preserve whatever OIDs it finds + * in the source cluster). + */ + if (dboid < FirstNormalObjectId && + !allowSystemTableMods && !IsBinaryUpgrade) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE)), + errmsg("OIDs less than %u are reserved for system objects", FirstNormalObjectId)); + } + else if (strcmp(defel->defname, "strategy") == 0) + { + if (dstrategy) + errorConflictingDefElem(defel, pstate); + dstrategy = defel; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("option \"%s\" not recognized", defel->defname), + parser_errposition(pstate, defel->location))); + } + + if (downer && downer->arg) + dbowner = defGetString(downer); + if (dtemplate && dtemplate->arg) + dbtemplate = defGetString(dtemplate); + if (dencoding && dencoding->arg) + { + const char *encoding_name; + + if (IsA(dencoding->arg, Integer)) + { + encoding = defGetInt32(dencoding); + encoding_name = pg_encoding_to_char(encoding); + if (strcmp(encoding_name, "") == 0 || + pg_valid_server_encoding(encoding_name) < 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("%d is not a valid encoding code", + encoding), + parser_errposition(pstate, dencoding->location))); + } + else + { + encoding_name = defGetString(dencoding); + encoding = pg_valid_server_encoding(encoding_name); + if (encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("%s is not a valid encoding name", + encoding_name), + parser_errposition(pstate, dencoding->location))); + } + } + if (dlocale && dlocale->arg) + { + dbcollate = defGetString(dlocale); + dbctype = defGetString(dlocale); + } + if (dcollate && dcollate->arg) + dbcollate = defGetString(dcollate); + if (dctype && dctype->arg) + dbctype = defGetString(dctype); + if (diculocale && diculocale->arg) + dbiculocale = defGetString(diculocale); + if (dlocprovider && dlocprovider->arg) + { + char *locproviderstr = defGetString(dlocprovider); + + if (pg_strcasecmp(locproviderstr, "icu") == 0) + dblocprovider = COLLPROVIDER_ICU; + else if (pg_strcasecmp(locproviderstr, "libc") == 0) + dblocprovider = COLLPROVIDER_LIBC; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("unrecognized locale provider: %s", + locproviderstr))); + } + if (distemplate && distemplate->arg) + dbistemplate = defGetBoolean(distemplate); + if (dallowconnections && dallowconnections->arg) + dballowconnections = defGetBoolean(dallowconnections); + if (dconnlimit && dconnlimit->arg) + { + dbconnlimit = defGetInt32(dconnlimit); + if (dbconnlimit < DATCONNLIMIT_UNLIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid connection limit: %d", dbconnlimit))); + } + if (dcollversion) + dbcollversion = defGetString(dcollversion); + + /* obtain OID of proposed owner */ + if (dbowner) + datdba = get_role_oid(dbowner, false); + else + datdba = GetUserId(); + + /* + * To create a database, must have createdb privilege and must be able to + * become the target role (this does not imply that the target role itself + * must have createdb privilege). The latter provision guards against + * "giveaway" attacks. Note that a superuser will always have both of + * these privileges a fortiori. + */ + if (!have_createdb_privilege()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to create database"))); + + check_is_member_of_role(GetUserId(), datdba); + + /* + * Lookup database (template) to be cloned, and obtain share lock on it. + * ShareLock allows two CREATE DATABASEs to work from the same template + * concurrently, while ensuring no one is busy dropping it in parallel + * (which would be Very Bad since we'd likely get an incomplete copy + * without knowing it). This also prevents any new connections from being + * made to the source until we finish copying it, so we can be sure it + * won't change underneath us. + */ + if (!dbtemplate) + dbtemplate = "template1"; /* Default template database name */ + + if (!get_db_info(dbtemplate, ShareLock, + &src_dboid, &src_owner, &src_encoding, + &src_istemplate, &src_allowconn, + &src_frozenxid, &src_minmxid, &src_deftablespace, + &src_collate, &src_ctype, &src_iculocale, &src_locprovider, + &src_collversion)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("template database \"%s\" does not exist", + dbtemplate))); + + /* + * If the source database was in the process of being dropped, we can't + * use it as a template. + */ + if (database_is_invalid_oid(src_dboid)) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot use invalid database \"%s\" as template", dbtemplate), + errhint("Use DROP DATABASE to drop invalid databases.")); + + /* + * Permission check: to copy a DB that's not marked datistemplate, you + * must be superuser or the owner thereof. + */ + if (!src_istemplate) + { + if (!pg_database_ownercheck(src_dboid, GetUserId())) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to copy database \"%s\"", + dbtemplate))); + } + + /* Validate the database creation strategy. */ + if (dstrategy && dstrategy->arg) + { + char *strategy; + + strategy = defGetString(dstrategy); + if (strcmp(strategy, "wal_log") == 0) + dbstrategy = CREATEDB_WAL_LOG; + else if (strcmp(strategy, "file_copy") == 0) + dbstrategy = CREATEDB_FILE_COPY; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid create database strategy \"%s\"", strategy), + errhint("Valid strategies are \"wal_log\", and \"file_copy\"."))); + } + + /* If encoding or locales are defaulted, use source's setting */ + if (encoding < 0) + encoding = src_encoding; + if (dbcollate == NULL) + dbcollate = src_collate; + if (dbctype == NULL) + dbctype = src_ctype; + if (dblocprovider == '\0') + dblocprovider = src_locprovider; + if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU) + dbiculocale = src_iculocale; + + /* Some encodings are client only */ + if (!PG_VALID_BE_ENCODING(encoding)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid server encoding %d", encoding))); + + /* Check that the chosen locales are valid, and get canonical spellings */ + if (!check_locale(LC_COLLATE, dbcollate, &canonname)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid locale name: \"%s\"", dbcollate))); + dbcollate = canonname; + if (!check_locale(LC_CTYPE, dbctype, &canonname)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid locale name: \"%s\"", dbctype))); + dbctype = canonname; + + check_encoding_locale_matches(encoding, dbcollate, dbctype); + + if (dblocprovider == COLLPROVIDER_ICU) + { + if (!(is_encoding_supported_by_icu(encoding))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("encoding \"%s\" is not supported with ICU provider", + pg_encoding_to_char(encoding)))); + + /* + * This would happen if template0 uses the libc provider but the new + * database uses icu. + */ + if (!dbiculocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ICU locale must be specified"))); + + check_icu_locale(dbiculocale); + } + else + { + if (dbiculocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("ICU locale cannot be specified unless locale provider is ICU"))); + } + + /* + * Check that the new encoding and locale settings match the source + * database. We insist on this because we simply copy the source data --- + * any non-ASCII data would be wrongly encoded, and any indexes sorted + * according to the source locale would be wrong. + * + * However, we assume that template0 doesn't contain any non-ASCII data + * nor any indexes that depend on collation or ctype, so template0 can be + * used as template for creating a database with any encoding or locale. + */ + if (strcmp(dbtemplate, "template0") != 0) + { + if (encoding != src_encoding) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new encoding (%s) is incompatible with the encoding of the template database (%s)", + pg_encoding_to_char(encoding), + pg_encoding_to_char(src_encoding)), + errhint("Use the same encoding as in the template database, or use template0 as template."))); + + if (strcmp(dbcollate, src_collate) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new collation (%s) is incompatible with the collation of the template database (%s)", + dbcollate, src_collate), + errhint("Use the same collation as in the template database, or use template0 as template."))); + + if (strcmp(dbctype, src_ctype) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new LC_CTYPE (%s) is incompatible with the LC_CTYPE of the template database (%s)", + dbctype, src_ctype), + errhint("Use the same LC_CTYPE as in the template database, or use template0 as template."))); + + if (dblocprovider != src_locprovider) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new locale provider (%s) does not match locale provider of the template database (%s)", + collprovider_name(dblocprovider), collprovider_name(src_locprovider)), + errhint("Use the same locale provider as in the template database, or use template0 as template."))); + + if (dblocprovider == COLLPROVIDER_ICU) + { + Assert(dbiculocale); + Assert(src_iculocale); + if (strcmp(dbiculocale, src_iculocale) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)", + dbiculocale, src_iculocale), + errhint("Use the same ICU locale as in the template database, or use template0 as template."))); + } + } + + /* + * If we got a collation version for the template database, check that it + * matches the actual OS collation version. Otherwise error; the user + * needs to fix the template database first. Don't complain if a + * collation version was specified explicitly as a statement option; that + * is used by pg_upgrade to reproduce the old state exactly. + * + * (If the template database has no collation version, then either the + * platform/provider does not support collation versioning, or it's + * template0, for which we stipulate that it does not contain + * collation-using objects.) + */ + if (src_collversion && !dcollversion) + { + char *actual_versionstr; + + actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate); + if (!actual_versionstr) + ereport(ERROR, + (errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined", + dbtemplate))); + + if (strcmp(actual_versionstr, src_collversion) != 0) + ereport(ERROR, + (errmsg("template database \"%s\" has a collation version mismatch", + dbtemplate), + errdetail("The template database was created using collation version %s, " + "but the operating system provides version %s.", + src_collversion, actual_versionstr), + errhint("Rebuild all objects in the template database that use the default collation and run " + "ALTER DATABASE %s REFRESH COLLATION VERSION, " + "or build PostgreSQL with the right library version.", + quote_identifier(dbtemplate)))); + } + + if (dbcollversion == NULL) + dbcollversion = src_collversion; + + /* + * Normally, we copy the collation version from the template database. + * This last resort only applies if the template database does not have a + * collation version, which is normally only the case for template0. + */ + if (dbcollversion == NULL) + dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dbiculocale : dbcollate); + + /* Resolve default tablespace for new database */ + if (dtablespacename && dtablespacename->arg) + { + char *tablespacename; + AclResult aclresult; + + tablespacename = defGetString(dtablespacename); + dst_deftablespace = get_tablespace_oid(tablespacename, false); + /* check permissions */ + aclresult = pg_tablespace_aclcheck(dst_deftablespace, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + tablespacename); + + /* pg_global must never be the default tablespace */ + if (dst_deftablespace == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_global cannot be used as default tablespace"))); + + /* + * If we are trying to change the default tablespace of the template, + * we require that the template not have any files in the new default + * tablespace. This is necessary because otherwise the copied + * database would contain pg_class rows that refer to its default + * tablespace both explicitly (by OID) and implicitly (as zero), which + * would cause problems. For example another CREATE DATABASE using + * the copied database as template, and trying to change its default + * tablespace again, would yield outright incorrect results (it would + * improperly move tables to the new default tablespace that should + * stay in the same tablespace). + */ + if (dst_deftablespace != src_deftablespace) + { + char *srcpath; + struct stat st; + + srcpath = GetDatabasePath(src_dboid, dst_deftablespace); + + if (stat(srcpath, &st) == 0 && + S_ISDIR(st.st_mode) && + !directory_is_empty(srcpath)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot assign new default tablespace \"%s\"", + tablespacename), + errdetail("There is a conflict because database \"%s\" already has some tables in this tablespace.", + dbtemplate))); + pfree(srcpath); + } + } + else + { + /* Use template database's default tablespace */ + dst_deftablespace = src_deftablespace; + /* Note there is no additional permission check in this path */ + } + + /* + * If built with appropriate switch, whine when regression-testing + * conventions for database names are violated. But don't complain during + * initdb. + */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (IsUnderPostmaster && strstr(dbname, "regression") == NULL) + elog(WARNING, "databases created by regression test cases should have names including \"regression\""); +#endif + + /* + * Check for db name conflict. This is just to give a more friendly error + * message than "unique index violation". There's a race condition but + * we're willing to accept the less friendly message in that case. + */ + if (OidIsValid(get_database_oid(dbname, true))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_DATABASE), + errmsg("database \"%s\" already exists", dbname))); + + /* + * The source DB can't have any active backends, except this one + * (exception is to allow CREATE DB while connected to template1). + * Otherwise we might copy inconsistent data. + * + * This should be last among the basic error checks, because it involves + * potential waiting; we may as well throw an error first if we're gonna + * throw one. + */ + if (CountOtherDBBackends(src_dboid, ¬herbackends, &npreparedxacts)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("source database \"%s\" is being accessed by other users", + dbtemplate), + errdetail_busy_db(notherbackends, npreparedxacts))); + + /* + * Select an OID for the new database, checking that it doesn't have a + * filename conflict with anything already existing in the tablespace + * directories. + */ + pg_database_rel = table_open(DatabaseRelationId, RowExclusiveLock); + + /* + * If database OID is configured, check if the OID is already in use or + * data directory already exists. + */ + if (OidIsValid(dboid)) + { + char *existing_dbname = get_database_name(dboid); + + if (existing_dbname != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE)), + errmsg("database OID %u is already in use by database \"%s\"", + dboid, existing_dbname)); + + if (check_db_file_conflict(dboid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE)), + errmsg("data directory with the specified OID %u already exists", dboid)); + } + else + { + /* Select an OID for the new database if is not explicitly configured. */ + do + { + dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId, + Anum_pg_database_oid); + } while (check_db_file_conflict(dboid)); + } + + /* + * Insert a new tuple into pg_database. This establishes our ownership of + * the new database name (anyone else trying to insert the same name will + * block on the unique index, and fail after we commit). + */ + + Assert((dblocprovider == COLLPROVIDER_ICU && dbiculocale) || + (dblocprovider != COLLPROVIDER_ICU && !dbiculocale)); + + /* Form tuple */ + MemSet(new_record, 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + + new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid); + new_record[Anum_pg_database_datname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(dbname)); + new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba); + new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding); + new_record[Anum_pg_database_datlocprovider - 1] = CharGetDatum(dblocprovider); + new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate); + new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections); + new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit); + new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid); + new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid); + new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace); + new_record[Anum_pg_database_datcollate - 1] = CStringGetTextDatum(dbcollate); + new_record[Anum_pg_database_datctype - 1] = CStringGetTextDatum(dbctype); + if (dbiculocale) + new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale); + else + new_record_nulls[Anum_pg_database_daticulocale - 1] = true; + if (dbcollversion) + new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion); + else + new_record_nulls[Anum_pg_database_datcollversion - 1] = true; + + /* + * We deliberately set datacl to default (NULL), rather than copying it + * from the template database. Copying it would be a bad idea when the + * owner is not the same as the template's owner. + */ + new_record_nulls[Anum_pg_database_datacl - 1] = true; + + tuple = heap_form_tuple(RelationGetDescr(pg_database_rel), + new_record, new_record_nulls); + + CatalogTupleInsert(pg_database_rel, tuple); + + /* + * Now generate additional catalog entries associated with the new DB + */ + + /* Register owner dependency */ + recordDependencyOnOwner(DatabaseRelationId, dboid, datdba); + + /* Create pg_shdepend entries for objects within database */ + copyTemplateDependencies(src_dboid, dboid); + + /* Post creation hook for new database */ + InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0); + + /* + * If we're going to be reading data for the to-be-created database into + * shared_buffers, take a lock on it. Nobody should know that this + * database exists yet, but it's good to maintain the invariant that a + * lock an AccessExclusiveLock on the database is sufficient to drop all + * of its buffers without worrying about more being read later. + * + * Note that we need to do this before entering the + * PG_ENSURE_ERROR_CLEANUP block below, because createdb_failure_callback + * expects this lock to be held already. + */ + if (dbstrategy == CREATEDB_WAL_LOG) + LockSharedObject(DatabaseRelationId, dboid, 0, AccessShareLock); + + /* + * Once we start copying subdirectories, we need to be able to clean 'em + * up if we fail. Use an ENSURE block to make sure this happens. (This + * is not a 100% solution, because of the possibility of failure during + * transaction commit after we leave this routine, but it should handle + * most scenarios.) + */ + fparms.src_dboid = src_dboid; + fparms.dest_dboid = dboid; + fparms.strategy = dbstrategy; + + PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback, + PointerGetDatum(&fparms)); + { + /* + * If the user has asked to create a database with WAL_LOG strategy + * then call CreateDatabaseUsingWalLog, which will copy the database + * at the block level and it will WAL log each copied block. + * Otherwise, call CreateDatabaseUsingFileCopy that will copy the + * database file by file. + */ + if (dbstrategy == CREATEDB_WAL_LOG) + CreateDatabaseUsingWalLog(src_dboid, dboid, src_deftablespace, + dst_deftablespace); + else + CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace, + dst_deftablespace); + + /* + * Close pg_database, but keep lock till commit. + */ + table_close(pg_database_rel, NoLock); + + /* + * Force synchronous commit, thus minimizing the window between + * creation of the database files and committal of the transaction. If + * we crash before committing, we'll have a DB that's taking up disk + * space but is not in pg_database, which is not good. + */ + ForceSyncCommit(); + } + PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback, + PointerGetDatum(&fparms)); + + return dboid; +} + +/* + * Check whether chosen encoding matches chosen locale settings. This + * restriction is necessary because libc's locale-specific code usually + * fails when presented with data in an encoding it's not expecting. We + * allow mismatch in four cases: + * + * 1. locale encoding = SQL_ASCII, which means that the locale is C/POSIX + * which works with any encoding. + * + * 2. locale encoding = -1, which means that we couldn't determine the + * locale's encoding and have to trust the user to get it right. + * + * 3. selected encoding is UTF8 and platform is win32. This is because + * UTF8 is a pseudo codepage that is supported in all locales since it's + * converted to UTF16 before being used. + * + * 4. selected encoding is SQL_ASCII, but only if you're a superuser. This + * is risky but we have historically allowed it --- notably, the + * regression tests require it. + * + * Note: if you change this policy, fix initdb to match. + */ +void +check_encoding_locale_matches(int encoding, const char *collate, const char *ctype) +{ + int ctype_encoding = pg_get_encoding_from_locale(ctype, true); + int collate_encoding = pg_get_encoding_from_locale(collate, true); + + if (!(ctype_encoding == encoding || + ctype_encoding == PG_SQL_ASCII || + ctype_encoding == -1 || +#ifdef WIN32 + encoding == PG_UTF8 || +#endif + (encoding == PG_SQL_ASCII && superuser()))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("encoding \"%s\" does not match locale \"%s\"", + pg_encoding_to_char(encoding), + ctype), + errdetail("The chosen LC_CTYPE setting requires encoding \"%s\".", + pg_encoding_to_char(ctype_encoding)))); + + if (!(collate_encoding == encoding || + collate_encoding == PG_SQL_ASCII || + collate_encoding == -1 || +#ifdef WIN32 + encoding == PG_UTF8 || +#endif + (encoding == PG_SQL_ASCII && superuser()))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("encoding \"%s\" does not match locale \"%s\"", + pg_encoding_to_char(encoding), + collate), + errdetail("The chosen LC_COLLATE setting requires encoding \"%s\".", + pg_encoding_to_char(collate_encoding)))); +} + +/* Error cleanup callback for createdb */ +static void +createdb_failure_callback(int code, Datum arg) +{ + createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg); + + /* + * If we were copying database at block levels then drop pages for the + * destination database that are in the shared buffer cache. And tell + * checkpointer to forget any pending fsync and unlink requests for files + * in the database. The reasoning behind doing this is same as explained + * in dropdb function. But unlike dropdb we don't need to call + * pgstat_drop_database because this database is still not created so + * there should not be any stat for this. + */ + if (fparms->strategy == CREATEDB_WAL_LOG) + { + DropDatabaseBuffers(fparms->dest_dboid); + ForgetDatabaseSyncRequests(fparms->dest_dboid); + + /* Release lock on the target database. */ + UnlockSharedObject(DatabaseRelationId, fparms->dest_dboid, 0, + AccessShareLock); + } + + /* + * Release lock on source database before doing recursive remove. This is + * not essential but it seems desirable to release the lock as soon as + * possible. + */ + UnlockSharedObject(DatabaseRelationId, fparms->src_dboid, 0, ShareLock); + + /* Throw away any successfully copied subdirectories */ + remove_dbtablespaces(fparms->dest_dboid); +} + + +/* + * DROP DATABASE + */ +void +dropdb(const char *dbname, bool missing_ok, bool force) +{ + Oid db_id; + bool db_istemplate; + Relation pgdbrel; + HeapTuple tup; + Form_pg_database datform; + int notherbackends; + int npreparedxacts; + int nslots, + nslots_active; + int nsubscriptions; + + /* + * Look up the target database's OID, and get exclusive lock on it. We + * need this to ensure that no new backend starts up in the target + * database while we are deleting it (see postinit.c), and that no one is + * using it as a CREATE DATABASE template or trying to delete it for + * themselves. + */ + pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); + + if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, + &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", dbname))); + } + else + { + /* Close pg_database, release the lock, since we changed nothing */ + table_close(pgdbrel, RowExclusiveLock); + ereport(NOTICE, + (errmsg("database \"%s\" does not exist, skipping", + dbname))); + return; + } + } + + /* + * Permission checks + */ + if (!pg_database_ownercheck(db_id, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + dbname); + + /* DROP hook for the database being removed */ + InvokeObjectDropHook(DatabaseRelationId, db_id, 0); + + /* + * Disallow dropping a DB that is marked istemplate. This is just to + * prevent people from accidentally dropping template0 or template1; they + * can do so if they're really determined ... + */ + if (db_istemplate) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot drop a template database"))); + + /* Obviously can't drop my own database */ + if (db_id == MyDatabaseId) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("cannot drop the currently open database"))); + + /* + * Check whether there are active logical slots that refer to the + * to-be-dropped database. The database lock we are holding prevents the + * creation of new slots using the database or existing slots becoming + * active. + */ + (void) ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active); + if (nslots_active) + { + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("database \"%s\" is used by an active logical replication slot", + dbname), + errdetail_plural("There is %d active slot.", + "There are %d active slots.", + nslots_active, nslots_active))); + } + + /* + * Check if there are subscriptions defined in the target database. + * + * We can't drop them automatically because they might be holding + * resources in other databases/instances. + */ + if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("database \"%s\" is being used by logical replication subscription", + dbname), + errdetail_plural("There is %d subscription.", + "There are %d subscriptions.", + nsubscriptions, nsubscriptions))); + + + /* + * Attempt to terminate all existing connections to the target database if + * the user has requested to do so. + */ + if (force) + TerminateOtherDBBackends(db_id); + + /* + * Check for other backends in the target database. (Because we hold the + * database lock, no new ones can start after this.) + * + * As in CREATE DATABASE, check this after other error conditions. + */ + if (CountOtherDBBackends(db_id, ¬herbackends, &npreparedxacts)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("database \"%s\" is being accessed by other users", + dbname), + errdetail_busy_db(notherbackends, npreparedxacts))); + + /* + * Delete any comments or security labels associated with the database. + */ + DeleteSharedComments(db_id, DatabaseRelationId); + DeleteSharedSecurityLabel(db_id, DatabaseRelationId); + + /* + * Remove settings associated with this database + */ + DropSetting(db_id, InvalidOid); + + /* + * Remove shared dependency references for the database. + */ + dropDatabaseDependencies(db_id); + + /* + * Tell the cumulative stats system to forget it immediately, too. + */ + pgstat_drop_database(db_id); + + tup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for database %u", db_id); + datform = (Form_pg_database) GETSTRUCT(tup); + + /* + * Except for the deletion of the catalog row, subsequent actions are not + * transactional (consider DropDatabaseBuffers() discarding modified + * buffers). But we might crash or get interrupted below. To prevent + * accesses to a database with invalid contents, mark the database as + * invalid using an in-place update. + * + * We need to flush the WAL before continuing, to guarantee the + * modification is durable before performing irreversible filesystem + * operations. + */ + datform->datconnlimit = DATCONNLIMIT_INVALID_DB; + heap_inplace_update(pgdbrel, tup); + XLogFlush(XactLastRecEnd); + + /* + * Also delete the tuple - transactionally. If this transaction commits, + * the row will be gone, but if we fail, dropdb() can be invoked again. + */ + CatalogTupleDelete(pgdbrel, &tup->t_self); + + /* + * Drop db-specific replication slots. + */ + ReplicationSlotsDropDBSlots(db_id); + + /* + * Drop pages for this database that are in the shared buffer cache. This + * is important to ensure that no remaining backend tries to write out a + * dirty buffer to the dead database later... + */ + DropDatabaseBuffers(db_id); + + /* + * Tell checkpointer to forget any pending fsync and unlink requests for + * files in the database; else the fsyncs will fail at next checkpoint, or + * worse, it will delete files that belong to a newly created database + * with the same OID. + */ + ForgetDatabaseSyncRequests(db_id); + + /* + * Force a checkpoint to make sure the checkpointer has received the + * message sent by ForgetDatabaseSyncRequests. + */ + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); + + /* Close all smgr fds in all backends. */ + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + + /* + * Remove all tablespace subdirs belonging to the database. + */ + remove_dbtablespaces(db_id); + + /* + * Close pg_database, but keep lock till commit. + */ + table_close(pgdbrel, NoLock); + + /* + * Force synchronous commit, thus minimizing the window between removal of + * the database files and committal of the transaction. If we crash before + * committing, we'll have a DB that's gone on disk but still there + * according to pg_database, which is not good. + */ + ForceSyncCommit(); +} + + +/* + * Rename database + */ +ObjectAddress +RenameDatabase(const char *oldname, const char *newname) +{ + Oid db_id; + HeapTuple newtup; + Relation rel; + int notherbackends; + int npreparedxacts; + ObjectAddress address; + + /* + * Look up the target database's OID, and get exclusive lock on it. We + * need this for the same reasons as DROP DATABASE. + */ + rel = table_open(DatabaseRelationId, RowExclusiveLock); + + if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", oldname))); + + /* must be owner */ + if (!pg_database_ownercheck(db_id, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + oldname); + + /* must have createdb rights */ + if (!have_createdb_privilege()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to rename database"))); + + /* + * If built with appropriate switch, whine when regression-testing + * conventions for database names are violated. + */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (strstr(newname, "regression") == NULL) + elog(WARNING, "databases created by regression test cases should have names including \"regression\""); +#endif + + /* + * Make sure the new name doesn't exist. See notes for same error in + * CREATE DATABASE. + */ + if (OidIsValid(get_database_oid(newname, true))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_DATABASE), + errmsg("database \"%s\" already exists", newname))); + + /* + * XXX Client applications probably store the current database somewhere, + * so renaming it could cause confusion. On the other hand, there may not + * be an actual problem besides a little confusion, so think about this + * and decide. + */ + if (db_id == MyDatabaseId) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("current database cannot be renamed"))); + + /* + * Make sure the database does not have active sessions. This is the same + * concern as above, but applied to other sessions. + * + * As in CREATE DATABASE, check this after other error conditions. + */ + if (CountOtherDBBackends(db_id, ¬herbackends, &npreparedxacts)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("database \"%s\" is being accessed by other users", + oldname), + errdetail_busy_db(notherbackends, npreparedxacts))); + + /* rename */ + newtup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id)); + if (!HeapTupleIsValid(newtup)) + elog(ERROR, "cache lookup failed for database %u", db_id); + namestrcpy(&(((Form_pg_database) GETSTRUCT(newtup))->datname), newname); + CatalogTupleUpdate(rel, &newtup->t_self, newtup); + + InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); + + ObjectAddressSet(address, DatabaseRelationId, db_id); + + /* + * Close pg_database, but keep lock till commit. + */ + table_close(rel, NoLock); + + return address; +} + + +/* + * ALTER DATABASE SET TABLESPACE + */ +static void +movedb(const char *dbname, const char *tblspcname) +{ + Oid db_id; + Relation pgdbrel; + int notherbackends; + int npreparedxacts; + HeapTuple oldtuple, + newtuple; + Oid src_tblspcoid, + dst_tblspcoid; + Datum new_record[Natts_pg_database]; + bool new_record_nulls[Natts_pg_database]; + bool new_record_repl[Natts_pg_database]; + ScanKeyData scankey; + SysScanDesc sysscan; + AclResult aclresult; + char *src_dbpath; + char *dst_dbpath; + DIR *dstdir; + struct dirent *xlde; + movedb_failure_params fparms; + + /* + * Look up the target database's OID, and get exclusive lock on it. We + * need this to ensure that no new backend starts up in the database while + * we are moving it, and that no one is using it as a CREATE DATABASE + * template or trying to delete it. + */ + pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); + + if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, + NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", dbname))); + + /* + * We actually need a session lock, so that the lock will persist across + * the commit/restart below. (We could almost get away with letting the + * lock be released at commit, except that someone could try to move + * relations of the DB back into the old directory while we rmtree() it.) + */ + LockSharedObjectForSession(DatabaseRelationId, db_id, 0, + AccessExclusiveLock); + + /* + * Permission checks + */ + if (!pg_database_ownercheck(db_id, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + dbname); + + /* + * Obviously can't move the tables of my own database + */ + if (db_id == MyDatabaseId) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("cannot change the tablespace of the currently open database"))); + + /* + * Get tablespace's oid + */ + dst_tblspcoid = get_tablespace_oid(tblspcname, false); + + /* + * Permission checks + */ + aclresult = pg_tablespace_aclcheck(dst_tblspcoid, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + tblspcname); + + /* + * pg_global must never be the default tablespace + */ + if (dst_tblspcoid == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_global cannot be used as default tablespace"))); + + /* + * No-op if same tablespace + */ + if (src_tblspcoid == dst_tblspcoid) + { + table_close(pgdbrel, NoLock); + UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0, + AccessExclusiveLock); + return; + } + + /* + * Check for other backends in the target database. (Because we hold the + * database lock, no new ones can start after this.) + * + * As in CREATE DATABASE, check this after other error conditions. + */ + if (CountOtherDBBackends(db_id, ¬herbackends, &npreparedxacts)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("database \"%s\" is being accessed by other users", + dbname), + errdetail_busy_db(notherbackends, npreparedxacts))); + + /* + * Get old and new database paths + */ + src_dbpath = GetDatabasePath(db_id, src_tblspcoid); + dst_dbpath = GetDatabasePath(db_id, dst_tblspcoid); + + /* + * Force a checkpoint before proceeding. This will force all dirty + * buffers, including those of unlogged tables, out to disk, to ensure + * source database is up-to-date on disk for the copy. + * FlushDatabaseBuffers() would suffice for that, but we also want to + * process any pending unlink requests. Otherwise, the check for existing + * files in the target directory might fail unnecessarily, not to mention + * that the copy might fail due to source files getting deleted under it. + * On Windows, this also ensures that background procs don't hold any open + * files, which would cause rmdir() to fail. + */ + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT + | CHECKPOINT_FLUSH_ALL); + + /* Close all smgr fds in all backends. */ + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + + /* + * Now drop all buffers holding data of the target database; they should + * no longer be dirty so DropDatabaseBuffers is safe. + * + * It might seem that we could just let these buffers age out of shared + * buffers naturally, since they should not get referenced anymore. The + * problem with that is that if the user later moves the database back to + * its original tablespace, any still-surviving buffers would appear to + * contain valid data again --- but they'd be missing any changes made in + * the database while it was in the new tablespace. In any case, freeing + * buffers that should never be used again seems worth the cycles. + * + * Note: it'd be sufficient to get rid of buffers matching db_id and + * src_tblspcoid, but bufmgr.c presently provides no API for that. + */ + DropDatabaseBuffers(db_id); + + /* + * Check for existence of files in the target directory, i.e., objects of + * this database that are already in the target tablespace. We can't + * allow the move in such a case, because we would need to change those + * relations' pg_class.reltablespace entries to zero, and we don't have + * access to the DB's pg_class to do so. + */ + dstdir = AllocateDir(dst_dbpath); + if (dstdir != NULL) + { + while ((xlde = ReadDir(dstdir, dst_dbpath)) != NULL) + { + if (strcmp(xlde->d_name, ".") == 0 || + strcmp(xlde->d_name, "..") == 0) + continue; + + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("some relations of database \"%s\" are already in tablespace \"%s\"", + dbname, tblspcname), + errhint("You must move them back to the database's default tablespace before using this command."))); + } + + FreeDir(dstdir); + + /* + * The directory exists but is empty. We must remove it before using + * the copydir function. + */ + if (rmdir(dst_dbpath) != 0) + elog(ERROR, "could not remove directory \"%s\": %m", + dst_dbpath); + } + + /* + * Use an ENSURE block to make sure we remove the debris if the copy fails + * (eg, due to out-of-disk-space). This is not a 100% solution, because + * of the possibility of failure during transaction commit, but it should + * handle most scenarios. + */ + fparms.dest_dboid = db_id; + fparms.dest_tsoid = dst_tblspcoid; + PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback, + PointerGetDatum(&fparms)); + { + /* + * Copy files from the old tablespace to the new one + */ + copydir(src_dbpath, dst_dbpath, false); + + /* + * Record the filesystem change in XLOG + */ + { + xl_dbase_create_file_copy_rec xlrec; + + xlrec.db_id = db_id; + xlrec.tablespace_id = dst_tblspcoid; + xlrec.src_db_id = db_id; + xlrec.src_tablespace_id = src_tblspcoid; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, + sizeof(xl_dbase_create_file_copy_rec)); + + (void) XLogInsert(RM_DBASE_ID, + XLOG_DBASE_CREATE_FILE_COPY | XLR_SPECIAL_REL_UPDATE); + } + + /* + * Update the database's pg_database tuple + */ + ScanKeyInit(&scankey, + Anum_pg_database_datname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(dbname)); + sysscan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true, + NULL, 1, &scankey); + oldtuple = systable_getnext(sysscan); + if (!HeapTupleIsValid(oldtuple)) /* shouldn't happen... */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", dbname))); + + MemSet(new_record, 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + MemSet(new_record_repl, false, sizeof(new_record_repl)); + + new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_tblspcoid); + new_record_repl[Anum_pg_database_dattablespace - 1] = true; + + newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(pgdbrel), + new_record, + new_record_nulls, new_record_repl); + CatalogTupleUpdate(pgdbrel, &oldtuple->t_self, newtuple); + + InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); + + systable_endscan(sysscan); + + /* + * Force another checkpoint here. As in CREATE DATABASE, this is to + * ensure that we don't have to replay a committed + * XLOG_DBASE_CREATE_FILE_COPY operation, which would cause us to lose + * any unlogged operations done in the new DB tablespace before the + * next checkpoint. + */ + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); + + /* + * Force synchronous commit, thus minimizing the window between + * copying the database files and committal of the transaction. If we + * crash before committing, we'll leave an orphaned set of files on + * disk, which is not fatal but not good either. + */ + ForceSyncCommit(); + + /* + * Close pg_database, but keep lock till commit. + */ + table_close(pgdbrel, NoLock); + } + PG_END_ENSURE_ERROR_CLEANUP(movedb_failure_callback, + PointerGetDatum(&fparms)); + + /* + * Commit the transaction so that the pg_database update is committed. If + * we crash while removing files, the database won't be corrupt, we'll + * just leave some orphaned files in the old directory. + * + * (This is OK because we know we aren't inside a transaction block.) + * + * XXX would it be safe/better to do this inside the ensure block? Not + * convinced it's a good idea; consider elog just after the transaction + * really commits. + */ + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* Start new transaction for the remaining work; don't need a snapshot */ + StartTransactionCommand(); + + /* + * Remove files from the old tablespace + */ + if (!rmtree(src_dbpath, true)) + ereport(WARNING, + (errmsg("some useless files may be left behind in old database directory \"%s\"", + src_dbpath))); + + /* + * Record the filesystem change in XLOG + */ + { + xl_dbase_drop_rec xlrec; + + xlrec.db_id = db_id; + xlrec.ntablespaces = 1; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); + XLogRegisterData((char *) &src_tblspcoid, sizeof(Oid)); + + (void) XLogInsert(RM_DBASE_ID, + XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + } + + /* Now it's safe to release the database lock */ + UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0, + AccessExclusiveLock); + + pfree(src_dbpath); + pfree(dst_dbpath); +} + +/* Error cleanup callback for movedb */ +static void +movedb_failure_callback(int code, Datum arg) +{ + movedb_failure_params *fparms = (movedb_failure_params *) DatumGetPointer(arg); + char *dstpath; + + /* Get rid of anything we managed to copy to the target directory */ + dstpath = GetDatabasePath(fparms->dest_dboid, fparms->dest_tsoid); + + (void) rmtree(dstpath, true); + + pfree(dstpath); +} + +/* + * Process options and call dropdb function. + */ +void +DropDatabase(ParseState *pstate, DropdbStmt *stmt) +{ + bool force = false; + ListCell *lc; + + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "force") == 0) + force = true; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized DROP DATABASE option \"%s\"", opt->defname), + parser_errposition(pstate, opt->location))); + } + + dropdb(stmt->dbname, stmt->missing_ok, force); +} + +/* + * ALTER DATABASE name ... + */ +Oid +AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel) +{ + Relation rel; + Oid dboid; + HeapTuple tuple, + newtuple; + Form_pg_database datform; + ScanKeyData scankey; + SysScanDesc scan; + ListCell *option; + bool dbistemplate = false; + bool dballowconnections = true; + int dbconnlimit = DATCONNLIMIT_UNLIMITED; + DefElem *distemplate = NULL; + DefElem *dallowconnections = NULL; + DefElem *dconnlimit = NULL; + DefElem *dtablespace = NULL; + Datum new_record[Natts_pg_database]; + bool new_record_nulls[Natts_pg_database]; + bool new_record_repl[Natts_pg_database]; + + /* Extract options from the statement node tree */ + foreach(option, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(option); + + if (strcmp(defel->defname, "is_template") == 0) + { + if (distemplate) + errorConflictingDefElem(defel, pstate); + distemplate = defel; + } + else if (strcmp(defel->defname, "allow_connections") == 0) + { + if (dallowconnections) + errorConflictingDefElem(defel, pstate); + dallowconnections = defel; + } + else if (strcmp(defel->defname, "connection_limit") == 0) + { + if (dconnlimit) + errorConflictingDefElem(defel, pstate); + dconnlimit = defel; + } + else if (strcmp(defel->defname, "tablespace") == 0) + { + if (dtablespace) + errorConflictingDefElem(defel, pstate); + dtablespace = defel; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("option \"%s\" not recognized", defel->defname), + parser_errposition(pstate, defel->location))); + } + + if (dtablespace) + { + /* + * While the SET TABLESPACE syntax doesn't allow any other options, + * somebody could write "WITH TABLESPACE ...". Forbid any other + * options from being specified in that case. + */ + if (list_length(stmt->options) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("option \"%s\" cannot be specified with other options", + dtablespace->defname), + parser_errposition(pstate, dtablespace->location))); + /* this case isn't allowed within a transaction block */ + PreventInTransactionBlock(isTopLevel, "ALTER DATABASE SET TABLESPACE"); + movedb(stmt->dbname, defGetString(dtablespace)); + return InvalidOid; + } + + if (distemplate && distemplate->arg) + dbistemplate = defGetBoolean(distemplate); + if (dallowconnections && dallowconnections->arg) + dballowconnections = defGetBoolean(dallowconnections); + if (dconnlimit && dconnlimit->arg) + { + dbconnlimit = defGetInt32(dconnlimit); + if (dbconnlimit < DATCONNLIMIT_UNLIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid connection limit: %d", dbconnlimit))); + } + + /* + * Get the old tuple. We don't need a lock on the database per se, + * because we're not going to do anything that would mess up incoming + * connections. + */ + rel = table_open(DatabaseRelationId, RowExclusiveLock); + ScanKeyInit(&scankey, + Anum_pg_database_datname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->dbname)); + scan = systable_beginscan(rel, DatabaseNameIndexId, true, + NULL, 1, &scankey); + tuple = systable_getnext(scan); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", stmt->dbname))); + + datform = (Form_pg_database) GETSTRUCT(tuple); + dboid = datform->oid; + + if (database_is_invalid_form(datform)) + { + ereport(FATAL, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot alter invalid database \"%s\"", stmt->dbname), + errhint("Use DROP DATABASE to drop invalid databases.")); + } + + if (!pg_database_ownercheck(dboid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + stmt->dbname); + + /* + * In order to avoid getting locked out and having to go through + * standalone mode, we refuse to disallow connections to the database + * we're currently connected to. Lockout can still happen with concurrent + * sessions but the likeliness of that is not high enough to worry about. + */ + if (!dballowconnections && dboid == MyDatabaseId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot disallow connections for current database"))); + + /* + * Build an updated tuple, perusing the information just obtained + */ + MemSet(new_record, 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + MemSet(new_record_repl, false, sizeof(new_record_repl)); + + if (distemplate) + { + new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate); + new_record_repl[Anum_pg_database_datistemplate - 1] = true; + } + if (dallowconnections) + { + new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections); + new_record_repl[Anum_pg_database_datallowconn - 1] = true; + } + if (dconnlimit) + { + new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit); + new_record_repl[Anum_pg_database_datconnlimit - 1] = true; + } + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), new_record, + new_record_nulls, new_record_repl); + CatalogTupleUpdate(rel, &tuple->t_self, newtuple); + + InvokeObjectPostAlterHook(DatabaseRelationId, dboid, 0); + + systable_endscan(scan); + + /* Close pg_database, but keep lock till commit */ + table_close(rel, NoLock); + + return dboid; +} + + +/* + * ALTER DATABASE name REFRESH COLLATION VERSION + */ +ObjectAddress +AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt) +{ + Relation rel; + ScanKeyData scankey; + SysScanDesc scan; + Oid db_id; + HeapTuple tuple; + Form_pg_database datForm; + ObjectAddress address; + Datum datum; + bool isnull; + char *oldversion; + char *newversion; + + rel = table_open(DatabaseRelationId, RowExclusiveLock); + ScanKeyInit(&scankey, + Anum_pg_database_datname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->dbname)); + scan = systable_beginscan(rel, DatabaseNameIndexId, true, + NULL, 1, &scankey); + tuple = systable_getnext(scan); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", stmt->dbname))); + + datForm = (Form_pg_database) GETSTRUCT(tuple); + db_id = datForm->oid; + + if (!pg_database_ownercheck(db_id, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + stmt->dbname); + + datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull); + oldversion = isnull ? NULL : TextDatumGetCString(datum); + + datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull); + if (isnull) + elog(ERROR, "unexpected null in pg_database"); + newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum)); + + /* cannot change from NULL to non-NULL or vice versa */ + if ((!oldversion && newversion) || (oldversion && !newversion)) + elog(ERROR, "invalid collation version change"); + else if (oldversion && newversion && strcmp(newversion, oldversion) != 0) + { + bool nulls[Natts_pg_database] = {0}; + bool replaces[Natts_pg_database] = {0}; + Datum values[Natts_pg_database] = {0}; + + ereport(NOTICE, + (errmsg("changing version from %s to %s", + oldversion, newversion))); + + values[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(newversion); + replaces[Anum_pg_database_datcollversion - 1] = true; + + tuple = heap_modify_tuple(tuple, RelationGetDescr(rel), + values, nulls, replaces); + CatalogTupleUpdate(rel, &tuple->t_self, tuple); + heap_freetuple(tuple); + } + else + ereport(NOTICE, + (errmsg("version has not changed"))); + + InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); + + ObjectAddressSet(address, DatabaseRelationId, db_id); + + systable_endscan(scan); + + table_close(rel, NoLock); + + return address; +} + + +/* + * ALTER DATABASE name SET ... + */ +Oid +AlterDatabaseSet(AlterDatabaseSetStmt *stmt) +{ + Oid datid = get_database_oid(stmt->dbname, false); + + /* + * Obtain a lock on the database and make sure it didn't go away in the + * meantime. + */ + shdepLockAndCheckObject(DatabaseRelationId, datid); + + if (!pg_database_ownercheck(datid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + stmt->dbname); + + AlterSetting(datid, InvalidOid, stmt->setstmt); + + UnlockSharedObject(DatabaseRelationId, datid, 0, AccessShareLock); + + return datid; +} + + +/* + * ALTER DATABASE name OWNER TO newowner + */ +ObjectAddress +AlterDatabaseOwner(const char *dbname, Oid newOwnerId) +{ + Oid db_id; + HeapTuple tuple; + Relation rel; + ScanKeyData scankey; + SysScanDesc scan; + Form_pg_database datForm; + ObjectAddress address; + + /* + * Get the old tuple. We don't need a lock on the database per se, + * because we're not going to do anything that would mess up incoming + * connections. + */ + rel = table_open(DatabaseRelationId, RowExclusiveLock); + ScanKeyInit(&scankey, + Anum_pg_database_datname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(dbname)); + scan = systable_beginscan(rel, DatabaseNameIndexId, true, + NULL, 1, &scankey); + tuple = systable_getnext(scan); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", dbname))); + + datForm = (Form_pg_database) GETSTRUCT(tuple); + db_id = datForm->oid; + + /* + * If the new owner is the same as the existing owner, consider the + * command to have succeeded. This is to be consistent with other + * objects. + */ + if (datForm->datdba != newOwnerId) + { + Datum repl_val[Natts_pg_database]; + bool repl_null[Natts_pg_database]; + bool repl_repl[Natts_pg_database]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + HeapTuple newtuple; + + /* Otherwise, must be owner of the existing object */ + if (!pg_database_ownercheck(db_id, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + dbname); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* + * must have createdb rights + * + * NOTE: This is different from other alter-owner checks in that the + * current user is checked for createdb privileges instead of the + * destination owner. This is consistent with the CREATE case for + * databases. Because superusers will always have this right, we need + * no special case for them. + */ + if (!have_createdb_privilege()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of database"))); + + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_repl[Anum_pg_database_datdba - 1] = true; + repl_val[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(newOwnerId); + + /* + * Determine the modified ACL for the new owner. This is only + * necessary when the ACL is non-null. + */ + aclDatum = heap_getattr(tuple, + Anum_pg_database_datacl, + RelationGetDescr(rel), + &isNull); + if (!isNull) + { + newAcl = aclnewowner(DatumGetAclP(aclDatum), + datForm->datdba, newOwnerId); + repl_repl[Anum_pg_database_datacl - 1] = true; + repl_val[Anum_pg_database_datacl - 1] = PointerGetDatum(newAcl); + } + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), repl_val, repl_null, repl_repl); + CatalogTupleUpdate(rel, &newtuple->t_self, newtuple); + + heap_freetuple(newtuple); + + /* Update owner dependency reference */ + changeDependencyOnOwner(DatabaseRelationId, db_id, newOwnerId); + } + + InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0); + + ObjectAddressSet(address, DatabaseRelationId, db_id); + + systable_endscan(scan); + + /* Close pg_database, but keep lock till commit */ + table_close(rel, NoLock); + + return address; +} + + +Datum +pg_database_collation_actual_version(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + HeapTuple tp; + char datlocprovider; + Datum datum; + bool isnull; + char *version; + + tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid)); + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("database with OID %u does not exist", dbid))); + + datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider; + + datum = SysCacheGetAttr(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_daticulocale : Anum_pg_database_datcollate, &isnull); + if (isnull) + elog(ERROR, "unexpected null in pg_database"); + version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum)); + + ReleaseSysCache(tp); + + if (version) + PG_RETURN_TEXT_P(cstring_to_text(version)); + else + PG_RETURN_NULL(); +} + + +/* + * Helper functions + */ + +/* + * Look up info about the database named "name". If the database exists, + * obtain the specified lock type on it, fill in any of the remaining + * parameters that aren't NULL, and return true. If no such database, + * return false. + */ +static bool +get_db_info(const char *name, LOCKMODE lockmode, + Oid *dbIdP, Oid *ownerIdP, + int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, + TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, + Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, + char *dbLocProvider, + char **dbCollversion) +{ + bool result = false; + Relation relation; + + AssertArg(name); + + /* Caller may wish to grab a better lock on pg_database beforehand... */ + relation = table_open(DatabaseRelationId, AccessShareLock); + + /* + * Loop covers the rare case where the database is renamed before we can + * lock it. We try again just in case we can find a new one of the same + * name. + */ + for (;;) + { + ScanKeyData scanKey; + SysScanDesc scan; + HeapTuple tuple; + Oid dbOid; + + /* + * there's no syscache for database-indexed-by-name, so must do it the + * hard way + */ + ScanKeyInit(&scanKey, + Anum_pg_database_datname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(name)); + + scan = systable_beginscan(relation, DatabaseNameIndexId, true, + NULL, 1, &scanKey); + + tuple = systable_getnext(scan); + + if (!HeapTupleIsValid(tuple)) + { + /* definitely no database of that name */ + systable_endscan(scan); + break; + } + + dbOid = ((Form_pg_database) GETSTRUCT(tuple))->oid; + + systable_endscan(scan); + + /* + * Now that we have a database OID, we can try to lock the DB. + */ + if (lockmode != NoLock) + LockSharedObject(DatabaseRelationId, dbOid, 0, lockmode); + + /* + * And now, re-fetch the tuple by OID. If it's still there and still + * the same name, we win; else, drop the lock and loop back to try + * again. + */ + tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbOid)); + if (HeapTupleIsValid(tuple)) + { + Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple); + + if (strcmp(name, NameStr(dbform->datname)) == 0) + { + Datum datum; + bool isnull; + + /* oid of the database */ + if (dbIdP) + *dbIdP = dbOid; + /* oid of the owner */ + if (ownerIdP) + *ownerIdP = dbform->datdba; + /* character encoding */ + if (encodingP) + *encodingP = dbform->encoding; + /* allowed as template? */ + if (dbIsTemplateP) + *dbIsTemplateP = dbform->datistemplate; + /* allowing connections? */ + if (dbAllowConnP) + *dbAllowConnP = dbform->datallowconn; + /* limit of frozen XIDs */ + if (dbFrozenXidP) + *dbFrozenXidP = dbform->datfrozenxid; + /* minimum MultiXactId */ + if (dbMinMultiP) + *dbMinMultiP = dbform->datminmxid; + /* default tablespace for this database */ + if (dbTablespace) + *dbTablespace = dbform->dattablespace; + /* default locale settings for this database */ + if (dbLocProvider) + *dbLocProvider = dbform->datlocprovider; + if (dbCollate) + { + datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollate, &isnull); + Assert(!isnull); + *dbCollate = TextDatumGetCString(datum); + } + if (dbCtype) + { + datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datctype, &isnull); + Assert(!isnull); + *dbCtype = TextDatumGetCString(datum); + } + if (dbIculocale) + { + datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticulocale, &isnull); + if (isnull) + *dbIculocale = NULL; + else + *dbIculocale = TextDatumGetCString(datum); + } + if (dbCollversion) + { + datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull); + if (isnull) + *dbCollversion = NULL; + else + *dbCollversion = TextDatumGetCString(datum); + } + ReleaseSysCache(tuple); + result = true; + break; + } + /* can only get here if it was just renamed */ + ReleaseSysCache(tuple); + } + + if (lockmode != NoLock) + UnlockSharedObject(DatabaseRelationId, dbOid, 0, lockmode); + } + + table_close(relation, AccessShareLock); + + return result; +} + +/* Check if current user has createdb privileges */ +static bool +have_createdb_privilege(void) +{ + bool result = false; + HeapTuple utup; + + /* Superusers can always do everything */ + if (superuser()) + return true; + + utup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(GetUserId())); + if (HeapTupleIsValid(utup)) + { + result = ((Form_pg_authid) GETSTRUCT(utup))->rolcreatedb; + ReleaseSysCache(utup); + } + return result; +} + +/* + * Remove tablespace directories + * + * We don't know what tablespaces db_id is using, so iterate through all + * tablespaces removing /db_id + */ +static void +remove_dbtablespaces(Oid db_id) +{ + Relation rel; + TableScanDesc scan; + HeapTuple tuple; + List *ltblspc = NIL; + ListCell *cell; + int ntblspc; + int i; + Oid *tablespace_ids; + + rel = table_open(TableSpaceRelationId, AccessShareLock); + scan = table_beginscan_catalog(rel, 0, NULL); + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); + Oid dsttablespace = spcform->oid; + char *dstpath; + struct stat st; + + /* Don't mess with the global tablespace */ + if (dsttablespace == GLOBALTABLESPACE_OID) + continue; + + dstpath = GetDatabasePath(db_id, dsttablespace); + + if (lstat(dstpath, &st) < 0 || !S_ISDIR(st.st_mode)) + { + /* Assume we can ignore it */ + pfree(dstpath); + continue; + } + + if (!rmtree(dstpath, true)) + ereport(WARNING, + (errmsg("some useless files may be left behind in old database directory \"%s\"", + dstpath))); + + ltblspc = lappend_oid(ltblspc, dsttablespace); + pfree(dstpath); + } + + ntblspc = list_length(ltblspc); + if (ntblspc == 0) + { + table_endscan(scan); + table_close(rel, AccessShareLock); + return; + } + + tablespace_ids = (Oid *) palloc(ntblspc * sizeof(Oid)); + i = 0; + foreach(cell, ltblspc) + tablespace_ids[i++] = lfirst_oid(cell); + + /* Record the filesystem change in XLOG */ + { + xl_dbase_drop_rec xlrec; + + xlrec.db_id = db_id; + xlrec.ntablespaces = ntblspc; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, MinSizeOfDbaseDropRec); + XLogRegisterData((char *) tablespace_ids, ntblspc * sizeof(Oid)); + + (void) XLogInsert(RM_DBASE_ID, + XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + } + + list_free(ltblspc); + pfree(tablespace_ids); + + table_endscan(scan); + table_close(rel, AccessShareLock); +} + +/* + * Check for existing files that conflict with a proposed new DB OID; + * return true if there are any + * + * If there were a subdirectory in any tablespace matching the proposed new + * OID, we'd get a create failure due to the duplicate name ... and then we'd + * try to remove that already-existing subdirectory during the cleanup in + * remove_dbtablespaces. Nuking existing files seems like a bad idea, so + * instead we make this extra check before settling on the OID of the new + * database. This exactly parallels what GetNewRelFileNode() does for table + * relfilenode values. + */ +static bool +check_db_file_conflict(Oid db_id) +{ + bool result = false; + Relation rel; + TableScanDesc scan; + HeapTuple tuple; + + rel = table_open(TableSpaceRelationId, AccessShareLock); + scan = table_beginscan_catalog(rel, 0, NULL); + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); + Oid dsttablespace = spcform->oid; + char *dstpath; + struct stat st; + + /* Don't mess with the global tablespace */ + if (dsttablespace == GLOBALTABLESPACE_OID) + continue; + + dstpath = GetDatabasePath(db_id, dsttablespace); + + if (lstat(dstpath, &st) == 0) + { + /* Found a conflicting file (or directory, whatever) */ + pfree(dstpath); + result = true; + break; + } + + pfree(dstpath); + } + + table_endscan(scan); + table_close(rel, AccessShareLock); + + return result; +} + +/* + * Issue a suitable errdetail message for a busy database + */ +static int +errdetail_busy_db(int notherbackends, int npreparedxacts) +{ + if (notherbackends > 0 && npreparedxacts > 0) + + /* + * We don't deal with singular versus plural here, since gettext + * doesn't support multiple plurals in one string. + */ + errdetail("There are %d other session(s) and %d prepared transaction(s) using the database.", + notherbackends, npreparedxacts); + else if (notherbackends > 0) + errdetail_plural("There is %d other session using the database.", + "There are %d other sessions using the database.", + notherbackends, + notherbackends); + else + errdetail_plural("There is %d prepared transaction using the database.", + "There are %d prepared transactions using the database.", + npreparedxacts, + npreparedxacts); + return 0; /* just to keep ereport macro happy */ +} + +/* + * get_database_oid - given a database name, look up the OID + * + * If missing_ok is false, throw an error if database name not found. If + * true, just return InvalidOid. + */ +Oid +get_database_oid(const char *dbname, bool missing_ok) +{ + Relation pg_database; + ScanKeyData entry[1]; + SysScanDesc scan; + HeapTuple dbtuple; + Oid oid; + + /* + * There's no syscache for pg_database indexed by name, so we must look + * the hard way. + */ + pg_database = table_open(DatabaseRelationId, AccessShareLock); + ScanKeyInit(&entry[0], + Anum_pg_database_datname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(dbname)); + scan = systable_beginscan(pg_database, DatabaseNameIndexId, true, + NULL, 1, entry); + + dbtuple = systable_getnext(scan); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(dbtuple)) + oid = ((Form_pg_database) GETSTRUCT(dbtuple))->oid; + else + oid = InvalidOid; + + systable_endscan(scan); + table_close(pg_database, AccessShareLock); + + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", + dbname))); + + return oid; +} + + +/* + * get_database_name - given a database OID, look up the name + * + * Returns a palloc'd string, or NULL if no such database. + */ +char * +get_database_name(Oid dbid) +{ + HeapTuple dbtuple; + char *result; + + dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid)); + if (HeapTupleIsValid(dbtuple)) + { + result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname)); + ReleaseSysCache(dbtuple); + } + else + result = NULL; + + return result; +} + + +/* + * While dropping a database the pg_database row is marked invalid, but the + * catalog contents still exist. Connections to such a database are not + * allowed. + */ +bool +database_is_invalid_form(Form_pg_database datform) +{ + return datform->datconnlimit == DATCONNLIMIT_INVALID_DB; +} + + +/* + * Convenience wrapper around database_is_invalid_form() + */ +bool +database_is_invalid_oid(Oid dboid) +{ + HeapTuple dbtup; + Form_pg_database dbform; + bool invalid; + + dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dboid)); + if (!HeapTupleIsValid(dbtup)) + elog(ERROR, "cache lookup failed for database %u", dboid); + dbform = (Form_pg_database) GETSTRUCT(dbtup); + + invalid = database_is_invalid_form(dbform); + + ReleaseSysCache(dbtup); + + return invalid; +} + + +/* + * recovery_create_dbdir() + * + * During recovery, there's a case where we validly need to recover a missing + * tablespace directory so that recovery can continue. This happens when + * recovery wants to create a database but the holding tablespace has been + * removed before the server stopped. Since we expect that the directory will + * be gone before reaching recovery consistency, and we have no knowledge about + * the tablespace other than its OID here, we create a real directory under + * pg_tblspc here instead of restoring the symlink. + * + * If only_tblspc is true, then the requested directory must be in pg_tblspc/ + */ +static void +recovery_create_dbdir(char *path, bool only_tblspc) +{ + struct stat st; + + Assert(RecoveryInProgress()); + + if (stat(path, &st) == 0) + return; + + if (only_tblspc && strstr(path, "pg_tblspc/") == NULL) + elog(PANIC, "requested to created invalid directory: %s", path); + + if (reachedConsistency && !allow_in_place_tablespaces) + ereport(PANIC, + errmsg("missing directory \"%s\"", path)); + + elog(reachedConsistency ? WARNING : DEBUG1, + "creating missing directory: %s", path); + + if (pg_mkdir_p(path, pg_dir_create_mode) != 0) + ereport(PANIC, + errmsg("could not create missing directory \"%s\": %m", path)); +} + + +/* + * DATABASE resource manager's routines + */ +void +dbase_redo(XLogReaderState *record) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + /* Backup blocks are not used in dbase records */ + Assert(!XLogRecHasAnyBlockRefs(record)); + + if (info == XLOG_DBASE_CREATE_FILE_COPY) + { + xl_dbase_create_file_copy_rec *xlrec = + (xl_dbase_create_file_copy_rec *) XLogRecGetData(record); + char *src_path; + char *dst_path; + char *parent_path; + struct stat st; + + src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id); + dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id); + + /* + * Our theory for replaying a CREATE is to forcibly drop the target + * subdirectory if present, then re-copy the source data. This may be + * more work than needed, but it is simple to implement. + */ + if (stat(dst_path, &st) == 0 && S_ISDIR(st.st_mode)) + { + if (!rmtree(dst_path, true)) + /* If this failed, copydir() below is going to error. */ + ereport(WARNING, + (errmsg("some useless files may be left behind in old database directory \"%s\"", + dst_path))); + } + + /* + * If the parent of the target path doesn't exist, create it now. This + * enables us to create the target underneath later. Note that if + * the database dir is not in a tablespace, the parent will always + * exist, so this never runs in that case. + */ + parent_path = pstrdup(dst_path); + get_parent_directory(parent_path); + if (stat(parent_path, &st) < 0) + { + if (errno != ENOENT) + ereport(FATAL, + errmsg("could not stat directory \"%s\": %m", + dst_path)); + + recovery_create_dbdir(parent_path, true); + } + pfree(parent_path); + + /* + * There's a case where the copy source directory is missing for the + * same reason above. Create the emtpy source directory so that + * copydir below doesn't fail. The directory will be dropped soon by + * recovery. + */ + if (stat(src_path, &st) < 0 && errno == ENOENT) + recovery_create_dbdir(src_path, false); + + /* + * Force dirty buffers out to disk, to ensure source database is + * up-to-date for the copy. + */ + FlushDatabaseBuffers(xlrec->src_db_id); + + /* Close all sgmr fds in all backends. */ + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + + /* + * Copy this subdirectory to the new location + * + * We don't need to copy subdirectories + */ + copydir(src_path, dst_path, false); + + pfree(src_path); + pfree(dst_path); + } + else if (info == XLOG_DBASE_CREATE_WAL_LOG) + { + xl_dbase_create_wal_log_rec *xlrec = + (xl_dbase_create_wal_log_rec *) XLogRecGetData(record); + char *dbpath; + char *parent_path; + + dbpath = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id); + + /* create the parent directory if needed and valid */ + parent_path = pstrdup(dbpath); + get_parent_directory(parent_path); + recovery_create_dbdir(parent_path, true); + + /* Create the database directory with the version file. */ + CreateDirAndVersionFile(dbpath, xlrec->db_id, xlrec->tablespace_id, + true); + pfree(dbpath); + } + else if (info == XLOG_DBASE_DROP) + { + xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record); + char *dst_path; + int i; + + if (InHotStandby) + { + /* + * Lock database while we resolve conflicts to ensure that + * InitPostgres() cannot fully re-execute concurrently. This + * avoids backends re-connecting automatically to same database, + * which can happen in some cases. + * + * This will lock out walsenders trying to connect to db-specific + * slots for logical decoding too, so it's safe for us to drop + * slots. + */ + LockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock); + ResolveRecoveryConflictWithDatabase(xlrec->db_id); + } + + /* Drop any database-specific replication slots */ + ReplicationSlotsDropDBSlots(xlrec->db_id); + + /* Drop pages for this database that are in the shared buffer cache */ + DropDatabaseBuffers(xlrec->db_id); + + /* Also, clean out any fsync requests that might be pending in md.c */ + ForgetDatabaseSyncRequests(xlrec->db_id); + + /* Clean out the xlog relcache too */ + XLogDropDatabase(xlrec->db_id); + + /* Close all sgmr fds in all backends. */ + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + + for (i = 0; i < xlrec->ntablespaces; i++) + { + dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]); + + /* And remove the physical files */ + if (!rmtree(dst_path, true)) + ereport(WARNING, + (errmsg("some useless files may be left behind in old database directory \"%s\"", + dst_path))); + pfree(dst_path); + } + + if (InHotStandby) + { + /* + * Release locks prior to commit. XXX There is a race condition + * here that may allow backends to reconnect, but the window for + * this is small because the gap between here and commit is mostly + * fairly small and it is unlikely that people will be dropping + * databases that we are trying to connect to anyway. + */ + UnlockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock); + } + } + else + elog(PANIC, "dbase_redo: unknown op code %u", info); +} diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c new file mode 100644 index 0000000..1e07fa9 --- /dev/null +++ b/src/backend/commands/define.c @@ -0,0 +1,391 @@ +/*------------------------------------------------------------------------- + * + * define.c + * Support routines for various kinds of object creation. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/define.c + * + * DESCRIPTION + * The "DefineFoo" routines take the parse tree and pick out the + * appropriate arguments/flags, passing the results to the + * corresponding "FooDefine" routines (in src/catalog) that do + * the actual catalog-munging. These routines also verify permission + * of the user to execute the command. + * + * NOTES + * These things must be defined and committed in the following order: + * "create function": + * input/output, recv/send procedures + * "create type": + * type + * "create operator": + * operators + * + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "catalog/namespace.h" +#include "commands/defrem.h" +#include "nodes/makefuncs.h" +#include "parser/parse_type.h" +#include "parser/scansup.h" +#include "utils/builtins.h" + +/* + * Extract a string value (otherwise uninterpreted) from a DefElem. + */ +char * +defGetString(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a parameter", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_Integer: + return psprintf("%ld", (long) intVal(def->arg)); + case T_Float: + return castNode(Float, def->arg)->fval; + case T_Boolean: + return boolVal(def->arg) ? "true" : "false"; + case T_String: + return strVal(def->arg); + case T_TypeName: + return TypeNameToString((TypeName *) def->arg); + case T_List: + return NameListToString((List *) def->arg); + case T_A_Star: + return pstrdup("*"); + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg)); + } + return NULL; /* keep compiler quiet */ +} + +/* + * Extract a numeric value (actually double) from a DefElem. + */ +double +defGetNumeric(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a numeric value", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_Integer: + return (double) intVal(def->arg); + case T_Float: + return floatVal(def->arg); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a numeric value", + def->defname))); + } + return 0; /* keep compiler quiet */ +} + +/* + * Extract a boolean value from a DefElem. + */ +bool +defGetBoolean(DefElem *def) +{ + /* + * If no parameter given, assume "true" is meant. + */ + if (def->arg == NULL) + return true; + + /* + * Allow 0, 1, "true", "false", "on", "off" + */ + switch (nodeTag(def->arg)) + { + case T_Integer: + switch (intVal(def->arg)) + { + case 0: + return false; + case 1: + return true; + default: + /* otherwise, error out below */ + break; + } + break; + default: + { + char *sval = defGetString(def); + + /* + * The set of strings accepted here should match up with the + * grammar's opt_boolean_or_string production. + */ + if (pg_strcasecmp(sval, "true") == 0) + return true; + if (pg_strcasecmp(sval, "false") == 0) + return false; + if (pg_strcasecmp(sval, "on") == 0) + return true; + if (pg_strcasecmp(sval, "off") == 0) + return false; + } + break; + } + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a Boolean value", + def->defname))); + return false; /* keep compiler quiet */ +} + +/* + * Extract an int32 value from a DefElem. + */ +int32 +defGetInt32(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires an integer value", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_Integer: + return (int32) intVal(def->arg); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires an integer value", + def->defname))); + } + return 0; /* keep compiler quiet */ +} + +/* + * Extract an int64 value from a DefElem. + */ +int64 +defGetInt64(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a numeric value", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_Integer: + return (int64) intVal(def->arg); + case T_Float: + + /* + * Values too large for int4 will be represented as Float + * constants by the lexer. Accept these if they are valid int8 + * strings. + */ + return DatumGetInt64(DirectFunctionCall1(int8in, + CStringGetDatum(castNode(Float, def->arg)->fval))); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a numeric value", + def->defname))); + } + return 0; /* keep compiler quiet */ +} + +/* + * Extract an OID value from a DefElem. + */ +Oid +defGetObjectId(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a numeric value", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_Integer: + return (Oid) intVal(def->arg); + case T_Float: + + /* + * Values too large for int4 will be represented as Float + * constants by the lexer. Accept these if they are valid OID + * strings. + */ + return DatumGetObjectId(DirectFunctionCall1(oidin, + CStringGetDatum(castNode(Float, def->arg)->fval))); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a numeric value", + def->defname))); + } + return 0; /* keep compiler quiet */ +} + +/* + * Extract a possibly-qualified name (as a List of Strings) from a DefElem. + */ +List * +defGetQualifiedName(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a parameter", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_TypeName: + return ((TypeName *) def->arg)->names; + case T_List: + return (List *) def->arg; + case T_String: + /* Allow quoted name for backwards compatibility */ + return list_make1(def->arg); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("argument of %s must be a name", + def->defname))); + } + return NIL; /* keep compiler quiet */ +} + +/* + * Extract a TypeName from a DefElem. + * + * Note: we do not accept a List arg here, because the parser will only + * return a bare List when the name looks like an operator name. + */ +TypeName * +defGetTypeName(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a parameter", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_TypeName: + return (TypeName *) def->arg; + case T_String: + /* Allow quoted typename for backwards compatibility */ + return makeTypeNameFromNameList(list_make1(def->arg)); + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("argument of %s must be a type name", + def->defname))); + } + return NULL; /* keep compiler quiet */ +} + +/* + * Extract a type length indicator (either absolute bytes, or + * -1 for "variable") from a DefElem. + */ +int +defGetTypeLength(DefElem *def) +{ + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a parameter", + def->defname))); + switch (nodeTag(def->arg)) + { + case T_Integer: + return intVal(def->arg); + case T_Float: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires an integer value", + def->defname))); + break; + case T_String: + if (pg_strcasecmp(strVal(def->arg), "variable") == 0) + return -1; /* variable length */ + break; + case T_TypeName: + /* cope if grammar chooses to believe "variable" is a typename */ + if (pg_strcasecmp(TypeNameToString((TypeName *) def->arg), + "variable") == 0) + return -1; /* variable length */ + break; + case T_List: + /* must be an operator name */ + break; + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg)); + } + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid argument for %s: \"%s\"", + def->defname, defGetString(def)))); + return 0; /* keep compiler quiet */ +} + +/* + * Extract a list of string values (otherwise uninterpreted) from a DefElem. + */ +List * +defGetStringList(DefElem *def) +{ + ListCell *cell; + + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a parameter", + def->defname))); + if (nodeTag(def->arg) != T_List) + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg)); + + foreach(cell, (List *) def->arg) + { + Node *str = (Node *) lfirst(cell); + + if (!IsA(str, String)) + elog(ERROR, "unexpected node type in name list: %d", + (int) nodeTag(str)); + } + + return (List *) def->arg; +} + +/* + * Raise an error about a conflicting DefElem. + */ +void +errorConflictingDefElem(DefElem *defel, ParseState *pstate) +{ + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + parser_errposition(pstate, defel->location)); +} diff --git a/src/backend/commands/discard.c b/src/backend/commands/discard.c new file mode 100644 index 0000000..c583539 --- /dev/null +++ b/src/backend/commands/discard.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * discard.c + * The implementation of the DISCARD command + * + * Copyright (c) 1996-2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/commands/discard.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xact.h" +#include "catalog/namespace.h" +#include "commands/async.h" +#include "commands/discard.h" +#include "commands/prepare.h" +#include "commands/sequence.h" +#include "utils/guc.h" +#include "utils/portal.h" + +static void DiscardAll(bool isTopLevel); + +/* + * DISCARD { ALL | SEQUENCES | TEMP | PLANS } + */ +void +DiscardCommand(DiscardStmt *stmt, bool isTopLevel) +{ + switch (stmt->target) + { + case DISCARD_ALL: + DiscardAll(isTopLevel); + break; + + case DISCARD_PLANS: + ResetPlanCache(); + break; + + case DISCARD_SEQUENCES: + ResetSequenceCaches(); + break; + + case DISCARD_TEMP: + ResetTempTableNamespace(); + break; + + default: + elog(ERROR, "unrecognized DISCARD target: %d", stmt->target); + } +} + +static void +DiscardAll(bool isTopLevel) +{ + /* + * Disallow DISCARD ALL in a transaction block. This is arguably + * inconsistent (we don't make a similar check in the command sequence + * that DISCARD ALL is equivalent to), but the idea is to catch mistakes: + * DISCARD ALL inside a transaction block would leave the transaction + * still uncommitted. + */ + PreventInTransactionBlock(isTopLevel, "DISCARD ALL"); + + /* Closing portals might run user-defined code, so do that first. */ + PortalHashTableDeleteAll(); + SetPGVariable("session_authorization", NIL, false); + ResetAllOptions(); + DropAllPreparedStatements(); + Async_UnlistenAll(); + LockReleaseAll(USER_LOCKMETHOD, true); + ResetPlanCache(); + ResetTempTableNamespace(); + ResetSequenceCaches(); +} diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c new file mode 100644 index 0000000..c9b5732 --- /dev/null +++ b/src/backend/commands/dropcmds.c @@ -0,0 +1,493 @@ +/*------------------------------------------------------------------------- + * + * dropcmds.c + * handle various "DROP" operations + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/dropcmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/namespace.h" +#include "catalog/objectaddress.h" +#include "catalog/pg_class.h" +#include "catalog/pg_proc.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "parser/parse_type.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + + +static void does_not_exist_skipping(ObjectType objtype, + Node *object); +static bool owningrel_does_not_exist_skipping(List *object, + const char **msg, char **name); +static bool schema_does_not_exist_skipping(List *object, + const char **msg, char **name); +static bool type_in_list_does_not_exist_skipping(List *typenames, + const char **msg, char **name); + + +/* + * Drop one or more objects. + * + * We don't currently handle all object types here. Relations, for example, + * require special handling, because (for example) indexes have additional + * locking requirements. + * + * We look up all the objects first, and then delete them in a single + * performMultipleDeletions() call. This avoids unnecessary DROP RESTRICT + * errors if there are dependencies between them. + */ +void +RemoveObjects(DropStmt *stmt) +{ + ObjectAddresses *objects; + ListCell *cell1; + + objects = new_object_addresses(); + + foreach(cell1, stmt->objects) + { + ObjectAddress address; + Node *object = lfirst(cell1); + Relation relation = NULL; + Oid namespaceId; + + /* Get an ObjectAddress for the object. */ + address = get_object_address(stmt->removeType, + object, + &relation, + AccessExclusiveLock, + stmt->missing_ok); + + /* + * Issue NOTICE if supplied object was not found. Note this is only + * relevant in the missing_ok case, because otherwise + * get_object_address would have thrown an error. + */ + if (!OidIsValid(address.objectId)) + { + Assert(stmt->missing_ok); + does_not_exist_skipping(stmt->removeType, object); + continue; + } + + /* + * Although COMMENT ON FUNCTION, SECURITY LABEL ON FUNCTION, etc. are + * happy to operate on an aggregate as on any other function, we have + * historically not allowed this for DROP FUNCTION. + */ + if (stmt->removeType == OBJECT_FUNCTION) + { + if (get_func_prokind(address.objectId) == PROKIND_AGGREGATE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is an aggregate function", + NameListToString(castNode(ObjectWithArgs, object)->objname)), + errhint("Use DROP AGGREGATE to drop aggregate functions."))); + } + + /* Check permissions. */ + namespaceId = get_object_namespace(&address); + if (!OidIsValid(namespaceId) || + !pg_namespace_ownercheck(namespaceId, GetUserId())) + check_object_ownership(GetUserId(), stmt->removeType, address, + object, relation); + + /* + * Make note if a temporary namespace has been accessed in this + * transaction. + */ + if (OidIsValid(namespaceId) && isTempNamespace(namespaceId)) + MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE; + + /* Release any relcache reference count, but keep lock until commit. */ + if (relation) + table_close(relation, NoLock); + + add_exact_object_address(&address, objects); + } + + /* Here we really delete them. */ + performMultipleDeletions(objects, stmt->behavior, 0); + + free_object_addresses(objects); +} + +/* + * owningrel_does_not_exist_skipping + * Subroutine for RemoveObjects + * + * After determining that a specification for a rule or trigger returns that + * the specified object does not exist, test whether its owning relation, and + * its schema, exist or not; if they do, return false --- the trigger or rule + * itself is missing instead. If the owning relation or its schema do not + * exist, fill the error message format string and name, and return true. + */ +static bool +owningrel_does_not_exist_skipping(List *object, const char **msg, char **name) +{ + List *parent_object; + RangeVar *parent_rel; + + parent_object = list_truncate(list_copy(object), + list_length(object) - 1); + + if (schema_does_not_exist_skipping(parent_object, msg, name)) + return true; + + parent_rel = makeRangeVarFromNameList(parent_object); + + if (!OidIsValid(RangeVarGetRelid(parent_rel, NoLock, true))) + { + *msg = gettext_noop("relation \"%s\" does not exist, skipping"); + *name = NameListToString(parent_object); + + return true; + } + + return false; +} + +/* + * schema_does_not_exist_skipping + * Subroutine for RemoveObjects + * + * After determining that a specification for a schema-qualifiable object + * refers to an object that does not exist, test whether the specified schema + * exists or not. If no schema was specified, or if the schema does exist, + * return false -- the object itself is missing instead. If the specified + * schema does not exist, fill the error message format string and the + * specified schema name, and return true. + */ +static bool +schema_does_not_exist_skipping(List *object, const char **msg, char **name) +{ + RangeVar *rel; + + rel = makeRangeVarFromNameList(object); + + if (rel->schemaname != NULL && + !OidIsValid(LookupNamespaceNoError(rel->schemaname))) + { + *msg = gettext_noop("schema \"%s\" does not exist, skipping"); + *name = rel->schemaname; + + return true; + } + + return false; +} + +/* + * type_in_list_does_not_exist_skipping + * Subroutine for RemoveObjects + * + * After determining that a specification for a function, cast, aggregate or + * operator returns that the specified object does not exist, test whether the + * involved datatypes, and their schemas, exist or not; if they do, return + * false --- the original object itself is missing instead. If the datatypes + * or schemas do not exist, fill the error message format string and the + * missing name, and return true. + * + * First parameter is a list of TypeNames. + */ +static bool +type_in_list_does_not_exist_skipping(List *typenames, const char **msg, + char **name) +{ + ListCell *l; + + foreach(l, typenames) + { + TypeName *typeName = lfirst_node(TypeName, l); + + if (typeName != NULL) + { + if (!OidIsValid(LookupTypeNameOid(NULL, typeName, true))) + { + /* type doesn't exist, try to find why */ + if (schema_does_not_exist_skipping(typeName->names, msg, name)) + return true; + + *msg = gettext_noop("type \"%s\" does not exist, skipping"); + *name = TypeNameToString(typeName); + + return true; + } + } + } + + return false; +} + +/* + * does_not_exist_skipping + * Subroutine for RemoveObjects + * + * Generate a NOTICE stating that the named object was not found, and is + * being skipped. This is only relevant when "IF EXISTS" is used; otherwise, + * get_object_address() in RemoveObjects would have thrown an ERROR. + */ +static void +does_not_exist_skipping(ObjectType objtype, Node *object) +{ + const char *msg = NULL; + char *name = NULL; + char *args = NULL; + + switch (objtype) + { + case OBJECT_ACCESS_METHOD: + msg = gettext_noop("access method \"%s\" does not exist, skipping"); + name = strVal(object); + break; + case OBJECT_TYPE: + case OBJECT_DOMAIN: + { + TypeName *typ = castNode(TypeName, object); + + if (!schema_does_not_exist_skipping(typ->names, &msg, &name)) + { + msg = gettext_noop("type \"%s\" does not exist, skipping"); + name = TypeNameToString(typ); + } + } + break; + case OBJECT_COLLATION: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("collation \"%s\" does not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; + case OBJECT_CONVERSION: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("conversion \"%s\" does not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; + case OBJECT_SCHEMA: + msg = gettext_noop("schema \"%s\" does not exist, skipping"); + name = strVal(object); + break; + case OBJECT_STATISTIC_EXT: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("statistics object \"%s\" does not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; + case OBJECT_TSPARSER: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("text search parser \"%s\" does not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; + case OBJECT_TSDICTIONARY: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("text search dictionary \"%s\" does not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; + case OBJECT_TSTEMPLATE: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("text search template \"%s\" does not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; + case OBJECT_TSCONFIGURATION: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("text search configuration \"%s\" does not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; + case OBJECT_EXTENSION: + msg = gettext_noop("extension \"%s\" does not exist, skipping"); + name = strVal(object); + break; + case OBJECT_FUNCTION: + { + ObjectWithArgs *owa = castNode(ObjectWithArgs, object); + + if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) && + !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name)) + { + msg = gettext_noop("function %s(%s) does not exist, skipping"); + name = NameListToString(owa->objname); + args = TypeNameListToString(owa->objargs); + } + break; + } + case OBJECT_PROCEDURE: + { + ObjectWithArgs *owa = castNode(ObjectWithArgs, object); + + if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) && + !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name)) + { + msg = gettext_noop("procedure %s(%s) does not exist, skipping"); + name = NameListToString(owa->objname); + args = TypeNameListToString(owa->objargs); + } + break; + } + case OBJECT_ROUTINE: + { + ObjectWithArgs *owa = castNode(ObjectWithArgs, object); + + if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) && + !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name)) + { + msg = gettext_noop("routine %s(%s) does not exist, skipping"); + name = NameListToString(owa->objname); + args = TypeNameListToString(owa->objargs); + } + break; + } + case OBJECT_AGGREGATE: + { + ObjectWithArgs *owa = castNode(ObjectWithArgs, object); + + if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) && + !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name)) + { + msg = gettext_noop("aggregate %s(%s) does not exist, skipping"); + name = NameListToString(owa->objname); + args = TypeNameListToString(owa->objargs); + } + break; + } + case OBJECT_OPERATOR: + { + ObjectWithArgs *owa = castNode(ObjectWithArgs, object); + + if (!schema_does_not_exist_skipping(owa->objname, &msg, &name) && + !type_in_list_does_not_exist_skipping(owa->objargs, &msg, &name)) + { + msg = gettext_noop("operator %s does not exist, skipping"); + name = NameListToString(owa->objname); + } + break; + } + case OBJECT_LANGUAGE: + msg = gettext_noop("language \"%s\" does not exist, skipping"); + name = strVal(object); + break; + case OBJECT_CAST: + { + if (!type_in_list_does_not_exist_skipping(list_make1(linitial(castNode(List, object))), &msg, &name) && + !type_in_list_does_not_exist_skipping(list_make1(lsecond(castNode(List, object))), &msg, &name)) + { + /* XXX quote or no quote? */ + msg = gettext_noop("cast from type %s to type %s does not exist, skipping"); + name = TypeNameToString(linitial_node(TypeName, castNode(List, object))); + args = TypeNameToString(lsecond_node(TypeName, castNode(List, object))); + } + } + break; + case OBJECT_TRANSFORM: + if (!type_in_list_does_not_exist_skipping(list_make1(linitial(castNode(List, object))), &msg, &name)) + { + msg = gettext_noop("transform for type %s language \"%s\" does not exist, skipping"); + name = TypeNameToString(linitial_node(TypeName, castNode(List, object))); + args = strVal(lsecond(castNode(List, object))); + } + break; + case OBJECT_TRIGGER: + if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("trigger \"%s\" for relation \"%s\" does not exist, skipping"); + name = strVal(llast(castNode(List, object))); + args = NameListToString(list_truncate(list_copy(castNode(List, object)), + list_length(castNode(List, object)) - 1)); + } + break; + case OBJECT_POLICY: + if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("policy \"%s\" for relation \"%s\" does not exist, skipping"); + name = strVal(llast(castNode(List, object))); + args = NameListToString(list_truncate(list_copy(castNode(List, object)), + list_length(castNode(List, object)) - 1)); + } + break; + case OBJECT_EVENT_TRIGGER: + msg = gettext_noop("event trigger \"%s\" does not exist, skipping"); + name = strVal(object); + break; + case OBJECT_RULE: + if (!owningrel_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("rule \"%s\" for relation \"%s\" does not exist, skipping"); + name = strVal(llast(castNode(List, object))); + args = NameListToString(list_truncate(list_copy(castNode(List, object)), + list_length(castNode(List, object)) - 1)); + } + break; + case OBJECT_FDW: + msg = gettext_noop("foreign-data wrapper \"%s\" does not exist, skipping"); + name = strVal(object); + break; + case OBJECT_FOREIGN_SERVER: + msg = gettext_noop("server \"%s\" does not exist, skipping"); + name = strVal(object); + break; + case OBJECT_OPCLASS: + { + List *opcname = list_copy_tail(castNode(List, object), 1); + + if (!schema_does_not_exist_skipping(opcname, &msg, &name)) + { + msg = gettext_noop("operator class \"%s\" does not exist for access method \"%s\", skipping"); + name = NameListToString(opcname); + args = strVal(linitial(castNode(List, object))); + } + } + break; + case OBJECT_OPFAMILY: + { + List *opfname = list_copy_tail(castNode(List, object), 1); + + if (!schema_does_not_exist_skipping(opfname, &msg, &name)) + { + msg = gettext_noop("operator family \"%s\" does not exist for access method \"%s\", skipping"); + name = NameListToString(opfname); + args = strVal(linitial(castNode(List, object))); + } + } + break; + case OBJECT_PUBLICATION: + msg = gettext_noop("publication \"%s\" does not exist, skipping"); + name = strVal(object); + break; + default: + elog(ERROR, "unrecognized object type: %d", (int) objtype); + break; + } + + if (!args) + ereport(NOTICE, (errmsg(msg, name))); + else + ereport(NOTICE, (errmsg(msg, name, args))); +} diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c new file mode 100644 index 0000000..356aac4 --- /dev/null +++ b/src/backend/commands/event_trigger.c @@ -0,0 +1,2182 @@ +/*------------------------------------------------------------------------- + * + * event_trigger.c + * PostgreSQL EVENT TRIGGER support code. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/event_trigger.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_event_trigger.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_trigger.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_type.h" +#include "commands/dbcommands.h" +#include "commands/event_trigger.h" +#include "commands/extension.h" +#include "commands/trigger.h" +#include "funcapi.h" +#include "lib/ilist.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "pgstat.h" +#include "tcop/deparse_utility.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/evtcache.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +typedef struct EventTriggerQueryState +{ + /* memory context for this state's objects */ + MemoryContext cxt; + + /* sql_drop */ + slist_head SQLDropList; + bool in_sql_drop; + + /* table_rewrite */ + Oid table_rewrite_oid; /* InvalidOid, or set for table_rewrite + * event */ + int table_rewrite_reason; /* AT_REWRITE reason */ + + /* Support for command collection */ + bool commandCollectionInhibited; + CollectedCommand *currentCommand; + List *commandList; /* list of CollectedCommand; see + * deparse_utility.h */ + struct EventTriggerQueryState *previous; +} EventTriggerQueryState; + +static EventTriggerQueryState *currentEventTriggerState = NULL; + +/* Support for dropped objects */ +typedef struct SQLDropObject +{ + ObjectAddress address; + const char *schemaname; + const char *objname; + const char *objidentity; + const char *objecttype; + List *addrnames; + List *addrargs; + bool original; + bool normal; + bool istemp; + slist_node next; +} SQLDropObject; + +static void AlterEventTriggerOwner_internal(Relation rel, + HeapTuple tup, + Oid newOwnerId); +static void error_duplicate_filter_variable(const char *defname); +static Datum filter_list_to_array(List *filterlist); +static Oid insert_event_trigger_tuple(const char *trigname, const char *eventname, + Oid evtOwner, Oid funcoid, List *tags); +static void validate_ddl_tags(const char *filtervar, List *taglist); +static void validate_table_rewrite_tags(const char *filtervar, List *taglist); +static void EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata); +static const char *stringify_grant_objtype(ObjectType objtype); +static const char *stringify_adefprivs_objtype(ObjectType objtype); + +/* + * Create an event trigger. + */ +Oid +CreateEventTrigger(CreateEventTrigStmt *stmt) +{ + HeapTuple tuple; + Oid funcoid; + Oid funcrettype; + Oid evtowner = GetUserId(); + ListCell *lc; + List *tags = NULL; + + /* + * It would be nice to allow database owners or even regular users to do + * this, but there are obvious privilege escalation risks which would have + * to somehow be plugged first. + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to create event trigger \"%s\"", + stmt->trigname), + errhint("Must be superuser to create an event trigger."))); + + /* Validate event name. */ + if (strcmp(stmt->eventname, "ddl_command_start") != 0 && + strcmp(stmt->eventname, "ddl_command_end") != 0 && + strcmp(stmt->eventname, "sql_drop") != 0 && + strcmp(stmt->eventname, "table_rewrite") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized event name \"%s\"", + stmt->eventname))); + + /* Validate filter conditions. */ + foreach(lc, stmt->whenclause) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "tag") == 0) + { + if (tags != NULL) + error_duplicate_filter_variable(def->defname); + tags = (List *) def->arg; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized filter variable \"%s\"", def->defname))); + } + + /* Validate tag list, if any. */ + if ((strcmp(stmt->eventname, "ddl_command_start") == 0 || + strcmp(stmt->eventname, "ddl_command_end") == 0 || + strcmp(stmt->eventname, "sql_drop") == 0) + && tags != NULL) + validate_ddl_tags("tag", tags); + else if (strcmp(stmt->eventname, "table_rewrite") == 0 + && tags != NULL) + validate_table_rewrite_tags("tag", tags); + + /* + * Give user a nice error message if an event trigger of the same name + * already exists. + */ + tuple = SearchSysCache1(EVENTTRIGGERNAME, CStringGetDatum(stmt->trigname)); + if (HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("event trigger \"%s\" already exists", + stmt->trigname))); + + /* Find and validate the trigger function. */ + funcoid = LookupFuncName(stmt->funcname, 0, NULL, false); + funcrettype = get_func_rettype(funcoid); + if (funcrettype != EVENT_TRIGGEROID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("function %s must return type %s", + NameListToString(stmt->funcname), "event_trigger"))); + + /* Insert catalog entries. */ + return insert_event_trigger_tuple(stmt->trigname, stmt->eventname, + evtowner, funcoid, tags); +} + +/* + * Validate DDL command tags. + */ +static void +validate_ddl_tags(const char *filtervar, List *taglist) +{ + ListCell *lc; + + foreach(lc, taglist) + { + const char *tagstr = strVal(lfirst(lc)); + CommandTag commandTag = GetCommandTagEnum(tagstr); + + if (commandTag == CMDTAG_UNKNOWN) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("filter value \"%s\" not recognized for filter variable \"%s\"", + tagstr, filtervar))); + if (!command_tag_event_trigger_ok(commandTag)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /* translator: %s represents an SQL statement name */ + errmsg("event triggers are not supported for %s", + tagstr))); + } +} + +/* + * Validate DDL command tags for event table_rewrite. + */ +static void +validate_table_rewrite_tags(const char *filtervar, List *taglist) +{ + ListCell *lc; + + foreach(lc, taglist) + { + const char *tagstr = strVal(lfirst(lc)); + CommandTag commandTag = GetCommandTagEnum(tagstr); + + if (!command_tag_table_rewrite_ok(commandTag)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /* translator: %s represents an SQL statement name */ + errmsg("event triggers are not supported for %s", + tagstr))); + } +} + +/* + * Complain about a duplicate filter variable. + */ +static void +error_duplicate_filter_variable(const char *defname) +{ + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("filter variable \"%s\" specified more than once", + defname))); +} + +/* + * Insert the new pg_event_trigger row and record dependencies. + */ +static Oid +insert_event_trigger_tuple(const char *trigname, const char *eventname, Oid evtOwner, + Oid funcoid, List *taglist) +{ + Relation tgrel; + Oid trigoid; + HeapTuple tuple; + Datum values[Natts_pg_trigger]; + bool nulls[Natts_pg_trigger]; + NameData evtnamedata, + evteventdata; + ObjectAddress myself, + referenced; + + /* Open pg_event_trigger. */ + tgrel = table_open(EventTriggerRelationId, RowExclusiveLock); + + /* Build the new pg_trigger tuple. */ + trigoid = GetNewOidWithIndex(tgrel, EventTriggerOidIndexId, + Anum_pg_event_trigger_oid); + values[Anum_pg_event_trigger_oid - 1] = ObjectIdGetDatum(trigoid); + memset(nulls, false, sizeof(nulls)); + namestrcpy(&evtnamedata, trigname); + values[Anum_pg_event_trigger_evtname - 1] = NameGetDatum(&evtnamedata); + namestrcpy(&evteventdata, eventname); + values[Anum_pg_event_trigger_evtevent - 1] = NameGetDatum(&evteventdata); + values[Anum_pg_event_trigger_evtowner - 1] = ObjectIdGetDatum(evtOwner); + values[Anum_pg_event_trigger_evtfoid - 1] = ObjectIdGetDatum(funcoid); + values[Anum_pg_event_trigger_evtenabled - 1] = + CharGetDatum(TRIGGER_FIRES_ON_ORIGIN); + if (taglist == NIL) + nulls[Anum_pg_event_trigger_evttags - 1] = true; + else + values[Anum_pg_event_trigger_evttags - 1] = + filter_list_to_array(taglist); + + /* Insert heap tuple. */ + tuple = heap_form_tuple(tgrel->rd_att, values, nulls); + CatalogTupleInsert(tgrel, tuple); + heap_freetuple(tuple); + + /* Depend on owner. */ + recordDependencyOnOwner(EventTriggerRelationId, trigoid, evtOwner); + + /* Depend on event trigger function. */ + myself.classId = EventTriggerRelationId; + myself.objectId = trigoid; + myself.objectSubId = 0; + referenced.classId = ProcedureRelationId; + referenced.objectId = funcoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + /* Depend on extension, if any. */ + recordDependencyOnCurrentExtension(&myself, false); + + /* Post creation hook for new event trigger */ + InvokeObjectPostCreateHook(EventTriggerRelationId, trigoid, 0); + + /* Close pg_event_trigger. */ + table_close(tgrel, RowExclusiveLock); + + return trigoid; +} + +/* + * In the parser, a clause like WHEN tag IN ('cmd1', 'cmd2') is represented + * by a DefElem whose value is a List of String nodes; in the catalog, we + * store the list of strings as a text array. This function transforms the + * former representation into the latter one. + * + * For cleanliness, we store command tags in the catalog as text. It's + * possible (although not currently anticipated) that we might have + * a case-sensitive filter variable in the future, in which case this would + * need some further adjustment. + */ +static Datum +filter_list_to_array(List *filterlist) +{ + ListCell *lc; + Datum *data; + int i = 0, + l = list_length(filterlist); + + data = (Datum *) palloc(l * sizeof(Datum)); + + foreach(lc, filterlist) + { + const char *value = strVal(lfirst(lc)); + char *result, + *p; + + result = pstrdup(value); + for (p = result; *p; p++) + *p = pg_ascii_toupper((unsigned char) *p); + data[i++] = PointerGetDatum(cstring_to_text(result)); + pfree(result); + } + + return PointerGetDatum(construct_array(data, l, TEXTOID, + -1, false, TYPALIGN_INT)); +} + +/* + * ALTER EVENT TRIGGER foo ENABLE|DISABLE|ENABLE ALWAYS|REPLICA + */ +Oid +AlterEventTrigger(AlterEventTrigStmt *stmt) +{ + Relation tgrel; + HeapTuple tup; + Oid trigoid; + Form_pg_event_trigger evtForm; + char tgenabled = stmt->tgenabled; + + tgrel = table_open(EventTriggerRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(EVENTTRIGGERNAME, + CStringGetDatum(stmt->trigname)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("event trigger \"%s\" does not exist", + stmt->trigname))); + + evtForm = (Form_pg_event_trigger) GETSTRUCT(tup); + trigoid = evtForm->oid; + + if (!pg_event_trigger_ownercheck(trigoid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EVENT_TRIGGER, + stmt->trigname); + + /* tuple is a copy, so we can modify it below */ + evtForm->evtenabled = tgenabled; + + CatalogTupleUpdate(tgrel, &tup->t_self, tup); + + InvokeObjectPostAlterHook(EventTriggerRelationId, + trigoid, 0); + + /* clean up */ + heap_freetuple(tup); + table_close(tgrel, RowExclusiveLock); + + return trigoid; +} + +/* + * Change event trigger's owner -- by name + */ +ObjectAddress +AlterEventTriggerOwner(const char *name, Oid newOwnerId) +{ + Oid evtOid; + HeapTuple tup; + Form_pg_event_trigger evtForm; + Relation rel; + ObjectAddress address; + + rel = table_open(EventTriggerRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(EVENTTRIGGERNAME, CStringGetDatum(name)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("event trigger \"%s\" does not exist", name))); + + evtForm = (Form_pg_event_trigger) GETSTRUCT(tup); + evtOid = evtForm->oid; + + AlterEventTriggerOwner_internal(rel, tup, newOwnerId); + + ObjectAddressSet(address, EventTriggerRelationId, evtOid); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change event trigger owner, by OID + */ +void +AlterEventTriggerOwner_oid(Oid trigOid, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = table_open(EventTriggerRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(EVENTTRIGGEROID, ObjectIdGetDatum(trigOid)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("event trigger with OID %u does not exist", trigOid))); + + AlterEventTriggerOwner_internal(rel, tup, newOwnerId); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); +} + +/* + * Internal workhorse for changing an event trigger's owner + */ +static void +AlterEventTriggerOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) +{ + Form_pg_event_trigger form; + + form = (Form_pg_event_trigger) GETSTRUCT(tup); + + if (form->evtowner == newOwnerId) + return; + + if (!pg_event_trigger_ownercheck(form->oid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EVENT_TRIGGER, + NameStr(form->evtname)); + + /* New owner must be a superuser */ + if (!superuser_arg(newOwnerId)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of event trigger \"%s\"", + NameStr(form->evtname)), + errhint("The owner of an event trigger must be a superuser."))); + + form->evtowner = newOwnerId; + CatalogTupleUpdate(rel, &tup->t_self, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(EventTriggerRelationId, + form->oid, + newOwnerId); + + InvokeObjectPostAlterHook(EventTriggerRelationId, + form->oid, 0); +} + +/* + * get_event_trigger_oid - Look up an event trigger by name to find its OID. + * + * If missing_ok is false, throw an error if trigger not found. If + * true, just return InvalidOid. + */ +Oid +get_event_trigger_oid(const char *trigname, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid1(EVENTTRIGGERNAME, Anum_pg_event_trigger_oid, + CStringGetDatum(trigname)); + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("event trigger \"%s\" does not exist", trigname))); + return oid; +} + +/* + * Return true when we want to fire given Event Trigger and false otherwise, + * filtering on the session replication role and the event trigger registered + * tags matching. + */ +static bool +filter_event_trigger(CommandTag tag, EventTriggerCacheItem *item) +{ + /* + * Filter by session replication role, knowing that we never see disabled + * items down here. + */ + if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA) + { + if (item->enabled == TRIGGER_FIRES_ON_ORIGIN) + return false; + } + else + { + if (item->enabled == TRIGGER_FIRES_ON_REPLICA) + return false; + } + + /* Filter by tags, if any were specified. */ + if (!bms_is_empty(item->tagset) && !bms_is_member(tag, item->tagset)) + return false; + + /* if we reach that point, we're not filtering out this item */ + return true; +} + +/* + * Setup for running triggers for the given event. Return value is an OID list + * of functions to run; if there are any, trigdata is filled with an + * appropriate EventTriggerData for them to receive. + */ +static List * +EventTriggerCommonSetup(Node *parsetree, + EventTriggerEvent event, const char *eventstr, + EventTriggerData *trigdata) +{ + CommandTag tag; + List *cachelist; + ListCell *lc; + List *runlist = NIL; + + /* + * We want the list of command tags for which this procedure is actually + * invoked to match up exactly with the list that CREATE EVENT TRIGGER + * accepts. This debugging cross-check will throw an error if this + * function is invoked for a command tag that CREATE EVENT TRIGGER won't + * accept. (Unfortunately, there doesn't seem to be any simple, automated + * way to verify that CREATE EVENT TRIGGER doesn't accept extra stuff that + * never reaches this control point.) + * + * If this cross-check fails for you, you probably need to either adjust + * standard_ProcessUtility() not to invoke event triggers for the command + * type in question, or you need to adjust event_trigger_ok to accept the + * relevant command tag. + */ +#ifdef USE_ASSERT_CHECKING + { + CommandTag dbgtag; + + dbgtag = CreateCommandTag(parsetree); + if (event == EVT_DDLCommandStart || + event == EVT_DDLCommandEnd || + event == EVT_SQLDrop) + { + if (!command_tag_event_trigger_ok(dbgtag)) + elog(ERROR, "unexpected command tag \"%s\"", GetCommandTagName(dbgtag)); + } + else if (event == EVT_TableRewrite) + { + if (!command_tag_table_rewrite_ok(dbgtag)) + elog(ERROR, "unexpected command tag \"%s\"", GetCommandTagName(dbgtag)); + } + } +#endif + + /* Use cache to find triggers for this event; fast exit if none. */ + cachelist = EventCacheLookup(event); + if (cachelist == NIL) + return NIL; + + /* Get the command tag. */ + tag = CreateCommandTag(parsetree); + + /* + * Filter list of event triggers by command tag, and copy them into our + * memory context. Once we start running the command triggers, or indeed + * once we do anything at all that touches the catalogs, an invalidation + * might leave cachelist pointing at garbage, so we must do this before we + * can do much else. + */ + foreach(lc, cachelist) + { + EventTriggerCacheItem *item = lfirst(lc); + + if (filter_event_trigger(tag, item)) + { + /* We must plan to fire this trigger. */ + runlist = lappend_oid(runlist, item->fnoid); + } + } + + /* don't spend any more time on this if no functions to run */ + if (runlist == NIL) + return NIL; + + trigdata->type = T_EventTriggerData; + trigdata->event = eventstr; + trigdata->parsetree = parsetree; + trigdata->tag = tag; + + return runlist; +} + +/* + * Fire ddl_command_start triggers. + */ +void +EventTriggerDDLCommandStart(Node *parsetree) +{ + List *runlist; + EventTriggerData trigdata; + + /* + * Event Triggers are completely disabled in standalone mode. There are + * (at least) two reasons for this: + * + * 1. A sufficiently broken event trigger might not only render the + * database unusable, but prevent disabling itself to fix the situation. + * In this scenario, restarting in standalone mode provides an escape + * hatch. + * + * 2. BuildEventTriggerCache relies on systable_beginscan_ordered, and + * therefore will malfunction if pg_event_trigger's indexes are damaged. + * To allow recovery from a damaged index, we need some operating mode + * wherein event triggers are disabled. (Or we could implement + * heapscan-and-sort logic for that case, but having disaster recovery + * scenarios depend on code that's otherwise untested isn't appetizing.) + */ + if (!IsUnderPostmaster) + return; + + runlist = EventTriggerCommonSetup(parsetree, + EVT_DDLCommandStart, + "ddl_command_start", + &trigdata); + if (runlist == NIL) + return; + + /* Run the triggers. */ + EventTriggerInvoke(runlist, &trigdata); + + /* Cleanup. */ + list_free(runlist); + + /* + * Make sure anything the event triggers did will be visible to the main + * command. + */ + CommandCounterIncrement(); +} + +/* + * Fire ddl_command_end triggers. + */ +void +EventTriggerDDLCommandEnd(Node *parsetree) +{ + List *runlist; + EventTriggerData trigdata; + + /* + * See EventTriggerDDLCommandStart for a discussion about why event + * triggers are disabled in single user mode. + */ + if (!IsUnderPostmaster) + return; + + /* + * Also do nothing if our state isn't set up, which it won't be if there + * weren't any relevant event triggers at the start of the current DDL + * command. This test might therefore seem optional, but it's important + * because EventTriggerCommonSetup might find triggers that didn't exist + * at the time the command started. Although this function itself + * wouldn't crash, the event trigger functions would presumably call + * pg_event_trigger_ddl_commands which would fail. Better to do nothing + * until the next command. + */ + if (!currentEventTriggerState) + return; + + runlist = EventTriggerCommonSetup(parsetree, + EVT_DDLCommandEnd, "ddl_command_end", + &trigdata); + if (runlist == NIL) + return; + + /* + * Make sure anything the main command did will be visible to the event + * triggers. + */ + CommandCounterIncrement(); + + /* Run the triggers. */ + EventTriggerInvoke(runlist, &trigdata); + + /* Cleanup. */ + list_free(runlist); +} + +/* + * Fire sql_drop triggers. + */ +void +EventTriggerSQLDrop(Node *parsetree) +{ + List *runlist; + EventTriggerData trigdata; + + /* + * See EventTriggerDDLCommandStart for a discussion about why event + * triggers are disabled in single user mode. + */ + if (!IsUnderPostmaster) + return; + + /* + * Use current state to determine whether this event fires at all. If + * there are no triggers for the sql_drop event, then we don't have + * anything to do here. Note that dropped object collection is disabled + * if this is the case, so even if we were to try to run, the list would + * be empty. + */ + if (!currentEventTriggerState || + slist_is_empty(¤tEventTriggerState->SQLDropList)) + return; + + runlist = EventTriggerCommonSetup(parsetree, + EVT_SQLDrop, "sql_drop", + &trigdata); + + /* + * Nothing to do if run list is empty. Note this typically can't happen, + * because if there are no sql_drop events, then objects-to-drop wouldn't + * have been collected in the first place and we would have quit above. + * But it could occur if event triggers were dropped partway through. + */ + if (runlist == NIL) + return; + + /* + * Make sure anything the main command did will be visible to the event + * triggers. + */ + CommandCounterIncrement(); + + /* + * Make sure pg_event_trigger_dropped_objects only works when running + * these triggers. Use PG_TRY to ensure in_sql_drop is reset even when + * one trigger fails. (This is perhaps not necessary, as the currentState + * variable will be removed shortly by our caller, but it seems better to + * play safe.) + */ + currentEventTriggerState->in_sql_drop = true; + + /* Run the triggers. */ + PG_TRY(); + { + EventTriggerInvoke(runlist, &trigdata); + } + PG_FINALLY(); + { + currentEventTriggerState->in_sql_drop = false; + } + PG_END_TRY(); + + /* Cleanup. */ + list_free(runlist); +} + + +/* + * Fire table_rewrite triggers. + */ +void +EventTriggerTableRewrite(Node *parsetree, Oid tableOid, int reason) +{ + List *runlist; + EventTriggerData trigdata; + + /* + * See EventTriggerDDLCommandStart for a discussion about why event + * triggers are disabled in single user mode. + */ + if (!IsUnderPostmaster) + return; + + /* + * Also do nothing if our state isn't set up, which it won't be if there + * weren't any relevant event triggers at the start of the current DDL + * command. This test might therefore seem optional, but it's + * *necessary*, because EventTriggerCommonSetup might find triggers that + * didn't exist at the time the command started. + */ + if (!currentEventTriggerState) + return; + + runlist = EventTriggerCommonSetup(parsetree, + EVT_TableRewrite, + "table_rewrite", + &trigdata); + if (runlist == NIL) + return; + + /* + * Make sure pg_event_trigger_table_rewrite_oid only works when running + * these triggers. Use PG_TRY to ensure table_rewrite_oid is reset even + * when one trigger fails. (This is perhaps not necessary, as the + * currentState variable will be removed shortly by our caller, but it + * seems better to play safe.) + */ + currentEventTriggerState->table_rewrite_oid = tableOid; + currentEventTriggerState->table_rewrite_reason = reason; + + /* Run the triggers. */ + PG_TRY(); + { + EventTriggerInvoke(runlist, &trigdata); + } + PG_FINALLY(); + { + currentEventTriggerState->table_rewrite_oid = InvalidOid; + currentEventTriggerState->table_rewrite_reason = 0; + } + PG_END_TRY(); + + /* Cleanup. */ + list_free(runlist); + + /* + * Make sure anything the event triggers did will be visible to the main + * command. + */ + CommandCounterIncrement(); +} + +/* + * Invoke each event trigger in a list of event triggers. + */ +static void +EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata) +{ + MemoryContext context; + MemoryContext oldcontext; + ListCell *lc; + bool first = true; + + /* Guard against stack overflow due to recursive event trigger */ + check_stack_depth(); + + /* + * Let's evaluate event triggers in their own memory context, so that any + * leaks get cleaned up promptly. + */ + context = AllocSetContextCreate(CurrentMemoryContext, + "event trigger context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(context); + + /* Call each event trigger. */ + foreach(lc, fn_oid_list) + { + LOCAL_FCINFO(fcinfo, 0); + Oid fnoid = lfirst_oid(lc); + FmgrInfo flinfo; + PgStat_FunctionCallUsage fcusage; + + elog(DEBUG1, "EventTriggerInvoke %u", fnoid); + + /* + * We want each event trigger to be able to see the results of the + * previous event trigger's action. Caller is responsible for any + * command-counter increment that is needed between the event trigger + * and anything else in the transaction. + */ + if (first) + first = false; + else + CommandCounterIncrement(); + + /* Look up the function */ + fmgr_info(fnoid, &flinfo); + + /* Call the function, passing no arguments but setting a context. */ + InitFunctionCallInfoData(*fcinfo, &flinfo, 0, + InvalidOid, (Node *) trigdata, NULL); + pgstat_init_function_usage(fcinfo, &fcusage); + FunctionCallInvoke(fcinfo); + pgstat_end_function_usage(&fcusage, true); + + /* Reclaim memory. */ + MemoryContextReset(context); + } + + /* Restore old memory context and delete the temporary one. */ + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(context); +} + +/* + * Do event triggers support this object type? + */ +bool +EventTriggerSupportsObjectType(ObjectType obtype) +{ + switch (obtype) + { + case OBJECT_DATABASE: + case OBJECT_TABLESPACE: + case OBJECT_ROLE: + case OBJECT_PARAMETER_ACL: + /* no support for global objects */ + return false; + case OBJECT_EVENT_TRIGGER: + /* no support for event triggers on event triggers */ + return false; + case OBJECT_ACCESS_METHOD: + case OBJECT_AGGREGATE: + case OBJECT_AMOP: + case OBJECT_AMPROC: + case OBJECT_ATTRIBUTE: + case OBJECT_CAST: + case OBJECT_COLUMN: + case OBJECT_COLLATION: + case OBJECT_CONVERSION: + case OBJECT_DEFACL: + case OBJECT_DEFAULT: + case OBJECT_DOMAIN: + case OBJECT_DOMCONSTRAINT: + case OBJECT_EXTENSION: + case OBJECT_FDW: + case OBJECT_FOREIGN_SERVER: + case OBJECT_FOREIGN_TABLE: + case OBJECT_FUNCTION: + case OBJECT_INDEX: + case OBJECT_LANGUAGE: + case OBJECT_LARGEOBJECT: + case OBJECT_MATVIEW: + case OBJECT_OPCLASS: + case OBJECT_OPERATOR: + case OBJECT_OPFAMILY: + case OBJECT_POLICY: + case OBJECT_PROCEDURE: + case OBJECT_PUBLICATION: + case OBJECT_PUBLICATION_NAMESPACE: + case OBJECT_PUBLICATION_REL: + case OBJECT_ROUTINE: + case OBJECT_RULE: + case OBJECT_SCHEMA: + case OBJECT_SEQUENCE: + case OBJECT_SUBSCRIPTION: + case OBJECT_STATISTIC_EXT: + case OBJECT_TABCONSTRAINT: + case OBJECT_TABLE: + case OBJECT_TRANSFORM: + case OBJECT_TRIGGER: + case OBJECT_TSCONFIGURATION: + case OBJECT_TSDICTIONARY: + case OBJECT_TSPARSER: + case OBJECT_TSTEMPLATE: + case OBJECT_TYPE: + case OBJECT_USER_MAPPING: + case OBJECT_VIEW: + return true; + + /* + * There's intentionally no default: case here; we want the + * compiler to warn if a new ObjectType hasn't been handled above. + */ + } + + /* Shouldn't get here, but if we do, say "no support" */ + return false; +} + +/* + * Do event triggers support this object class? + */ +bool +EventTriggerSupportsObjectClass(ObjectClass objclass) +{ + switch (objclass) + { + case OCLASS_DATABASE: + case OCLASS_TBLSPACE: + case OCLASS_ROLE: + case OCLASS_PARAMETER_ACL: + /* no support for global objects */ + return false; + case OCLASS_EVENT_TRIGGER: + /* no support for event triggers on event triggers */ + return false; + case OCLASS_CLASS: + case OCLASS_PROC: + case OCLASS_TYPE: + case OCLASS_CAST: + case OCLASS_COLLATION: + case OCLASS_CONSTRAINT: + case OCLASS_CONVERSION: + case OCLASS_DEFAULT: + case OCLASS_LANGUAGE: + case OCLASS_LARGEOBJECT: + case OCLASS_OPERATOR: + case OCLASS_OPCLASS: + case OCLASS_OPFAMILY: + case OCLASS_AM: + case OCLASS_AMOP: + case OCLASS_AMPROC: + case OCLASS_REWRITE: + case OCLASS_TRIGGER: + case OCLASS_SCHEMA: + case OCLASS_STATISTIC_EXT: + case OCLASS_TSPARSER: + case OCLASS_TSDICT: + case OCLASS_TSTEMPLATE: + case OCLASS_TSCONFIG: + case OCLASS_FDW: + case OCLASS_FOREIGN_SERVER: + case OCLASS_USER_MAPPING: + case OCLASS_DEFACL: + case OCLASS_EXTENSION: + case OCLASS_POLICY: + case OCLASS_PUBLICATION: + case OCLASS_PUBLICATION_NAMESPACE: + case OCLASS_PUBLICATION_REL: + case OCLASS_SUBSCRIPTION: + case OCLASS_TRANSFORM: + return true; + + /* + * There's intentionally no default: case here; we want the + * compiler to warn if a new OCLASS hasn't been handled above. + */ + } + + /* Shouldn't get here, but if we do, say "no support" */ + return false; +} + +/* + * Prepare event trigger state for a new complete query to run, if necessary; + * returns whether this was done. If it was, EventTriggerEndCompleteQuery must + * be called when the query is done, regardless of whether it succeeds or fails + * -- so use of a PG_TRY block is mandatory. + */ +bool +EventTriggerBeginCompleteQuery(void) +{ + EventTriggerQueryState *state; + MemoryContext cxt; + + /* + * Currently, sql_drop, table_rewrite, ddl_command_end events are the only + * reason to have event trigger state at all; so if there are none, don't + * install one. + */ + if (!trackDroppedObjectsNeeded()) + return false; + + cxt = AllocSetContextCreate(TopMemoryContext, + "event trigger state", + ALLOCSET_DEFAULT_SIZES); + state = MemoryContextAlloc(cxt, sizeof(EventTriggerQueryState)); + state->cxt = cxt; + slist_init(&(state->SQLDropList)); + state->in_sql_drop = false; + state->table_rewrite_oid = InvalidOid; + + state->commandCollectionInhibited = currentEventTriggerState ? + currentEventTriggerState->commandCollectionInhibited : false; + state->currentCommand = NULL; + state->commandList = NIL; + state->previous = currentEventTriggerState; + currentEventTriggerState = state; + + return true; +} + +/* + * Query completed (or errored out) -- clean up local state, return to previous + * one. + * + * Note: it's an error to call this routine if EventTriggerBeginCompleteQuery + * returned false previously. + * + * Note: this might be called in the PG_CATCH block of a failing transaction, + * so be wary of running anything unnecessary. (In particular, it's probably + * unwise to try to allocate memory.) + */ +void +EventTriggerEndCompleteQuery(void) +{ + EventTriggerQueryState *prevstate; + + prevstate = currentEventTriggerState->previous; + + /* this avoids the need for retail pfree of SQLDropList items: */ + MemoryContextDelete(currentEventTriggerState->cxt); + + currentEventTriggerState = prevstate; +} + +/* + * Do we need to keep close track of objects being dropped? + * + * This is useful because there is a cost to running with them enabled. + */ +bool +trackDroppedObjectsNeeded(void) +{ + /* + * true if any sql_drop, table_rewrite, ddl_command_end event trigger + * exists + */ + return list_length(EventCacheLookup(EVT_SQLDrop)) > 0 || + list_length(EventCacheLookup(EVT_TableRewrite)) > 0 || + list_length(EventCacheLookup(EVT_DDLCommandEnd)) > 0; +} + +/* + * Support for dropped objects information on event trigger functions. + * + * We keep the list of objects dropped by the current command in current + * state's SQLDropList (comprising SQLDropObject items). Each time a new + * command is to start, a clean EventTriggerQueryState is created; commands + * that drop objects do the dependency.c dance to drop objects, which + * populates the current state's SQLDropList; when the event triggers are + * invoked they can consume the list via pg_event_trigger_dropped_objects(). + * When the command finishes, the EventTriggerQueryState is cleared, and + * the one from the previous command is restored (when no command is in + * execution, the current state is NULL). + * + * All this lets us support the case that an event trigger function drops + * objects "reentrantly". + */ + +/* + * Register one object as being dropped by the current command. + */ +void +EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool normal) +{ + SQLDropObject *obj; + MemoryContext oldcxt; + + if (!currentEventTriggerState) + return; + + Assert(EventTriggerSupportsObjectClass(getObjectClass(object))); + + /* don't report temp schemas except my own */ + if (object->classId == NamespaceRelationId && + (isAnyTempNamespace(object->objectId) && + !isTempNamespace(object->objectId))) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + obj = palloc0(sizeof(SQLDropObject)); + obj->address = *object; + obj->original = original; + obj->normal = normal; + + /* + * Obtain schema names from the object's catalog tuple, if one exists; + * this lets us skip objects in temp schemas. We trust that + * ObjectProperty contains all object classes that can be + * schema-qualified. + */ + if (is_objectclass_supported(object->classId)) + { + Relation catalog; + HeapTuple tuple; + + catalog = table_open(obj->address.classId, AccessShareLock); + tuple = get_catalog_object_by_oid(catalog, + get_object_attnum_oid(object->classId), + obj->address.objectId); + + if (tuple) + { + AttrNumber attnum; + Datum datum; + bool isnull; + + attnum = get_object_attnum_namespace(obj->address.classId); + if (attnum != InvalidAttrNumber) + { + datum = heap_getattr(tuple, attnum, + RelationGetDescr(catalog), &isnull); + if (!isnull) + { + Oid namespaceId; + + namespaceId = DatumGetObjectId(datum); + /* temp objects are only reported if they are my own */ + if (isTempNamespace(namespaceId)) + { + obj->schemaname = "pg_temp"; + obj->istemp = true; + } + else if (isAnyTempNamespace(namespaceId)) + { + pfree(obj); + table_close(catalog, AccessShareLock); + MemoryContextSwitchTo(oldcxt); + return; + } + else + { + obj->schemaname = get_namespace_name(namespaceId); + obj->istemp = false; + } + } + } + + if (get_object_namensp_unique(obj->address.classId) && + obj->address.objectSubId == 0) + { + attnum = get_object_attnum_name(obj->address.classId); + if (attnum != InvalidAttrNumber) + { + datum = heap_getattr(tuple, attnum, + RelationGetDescr(catalog), &isnull); + if (!isnull) + obj->objname = pstrdup(NameStr(*DatumGetName(datum))); + } + } + } + + table_close(catalog, AccessShareLock); + } + else + { + if (object->classId == NamespaceRelationId && + isTempNamespace(object->objectId)) + obj->istemp = true; + } + + /* object identity, objname and objargs */ + obj->objidentity = + getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs, + false); + + /* object type */ + obj->objecttype = getObjectTypeDescription(&obj->address, false); + + slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * pg_event_trigger_dropped_objects + * + * Make the list of dropped objects available to the user function run by the + * Event Trigger. + */ +Datum +pg_event_trigger_dropped_objects(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + slist_iter iter; + + /* + * Protect this function from being called out of context + */ + if (!currentEventTriggerState || + !currentEventTriggerState->in_sql_drop) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED), + errmsg("%s can only be called in a sql_drop event trigger function", + "pg_event_trigger_dropped_objects()"))); + + /* Build tuplestore to hold the result rows */ + InitMaterializedSRF(fcinfo, 0); + + slist_foreach(iter, &(currentEventTriggerState->SQLDropList)) + { + SQLDropObject *obj; + int i = 0; + Datum values[12]; + bool nulls[12]; + + obj = slist_container(SQLDropObject, next, iter.cur); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + /* classid */ + values[i++] = ObjectIdGetDatum(obj->address.classId); + + /* objid */ + values[i++] = ObjectIdGetDatum(obj->address.objectId); + + /* objsubid */ + values[i++] = Int32GetDatum(obj->address.objectSubId); + + /* original */ + values[i++] = BoolGetDatum(obj->original); + + /* normal */ + values[i++] = BoolGetDatum(obj->normal); + + /* is_temporary */ + values[i++] = BoolGetDatum(obj->istemp); + + /* object_type */ + values[i++] = CStringGetTextDatum(obj->objecttype); + + /* schema_name */ + if (obj->schemaname) + values[i++] = CStringGetTextDatum(obj->schemaname); + else + nulls[i++] = true; + + /* object_name */ + if (obj->objname) + values[i++] = CStringGetTextDatum(obj->objname); + else + nulls[i++] = true; + + /* object_identity */ + if (obj->objidentity) + values[i++] = CStringGetTextDatum(obj->objidentity); + else + nulls[i++] = true; + + /* address_names and address_args */ + if (obj->addrnames) + { + values[i++] = PointerGetDatum(strlist_to_textarray(obj->addrnames)); + + if (obj->addrargs) + values[i++] = PointerGetDatum(strlist_to_textarray(obj->addrargs)); + else + values[i++] = PointerGetDatum(construct_empty_array(TEXTOID)); + } + else + { + nulls[i++] = true; + nulls[i++] = true; + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + return (Datum) 0; +} + +/* + * pg_event_trigger_table_rewrite_oid + * + * Make the Oid of the table going to be rewritten available to the user + * function run by the Event Trigger. + */ +Datum +pg_event_trigger_table_rewrite_oid(PG_FUNCTION_ARGS) +{ + /* + * Protect this function from being called out of context + */ + if (!currentEventTriggerState || + currentEventTriggerState->table_rewrite_oid == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED), + errmsg("%s can only be called in a table_rewrite event trigger function", + "pg_event_trigger_table_rewrite_oid()"))); + + PG_RETURN_OID(currentEventTriggerState->table_rewrite_oid); +} + +/* + * pg_event_trigger_table_rewrite_reason + * + * Make the rewrite reason available to the user. + */ +Datum +pg_event_trigger_table_rewrite_reason(PG_FUNCTION_ARGS) +{ + /* + * Protect this function from being called out of context + */ + if (!currentEventTriggerState || + currentEventTriggerState->table_rewrite_reason == 0) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED), + errmsg("%s can only be called in a table_rewrite event trigger function", + "pg_event_trigger_table_rewrite_reason()"))); + + PG_RETURN_INT32(currentEventTriggerState->table_rewrite_reason); +} + +/*------------------------------------------------------------------------- + * Support for DDL command deparsing + * + * The routines below enable an event trigger function to obtain a list of + * DDL commands as they are executed. There are three main pieces to this + * feature: + * + * 1) Within ProcessUtilitySlow, or some sub-routine thereof, each DDL command + * adds a struct CollectedCommand representation of itself to the command list, + * using the routines below. + * + * 2) Some time after that, ddl_command_end fires and the command list is made + * available to the event trigger function via pg_event_trigger_ddl_commands(); + * the complete command details are exposed as a column of type pg_ddl_command. + * + * 3) An extension can install a function capable of taking a value of type + * pg_ddl_command and transform it into some external, user-visible and/or + * -modifiable representation. + *------------------------------------------------------------------------- + */ + +/* + * Inhibit DDL command collection. + */ +void +EventTriggerInhibitCommandCollection(void) +{ + if (!currentEventTriggerState) + return; + + currentEventTriggerState->commandCollectionInhibited = true; +} + +/* + * Re-establish DDL command collection. + */ +void +EventTriggerUndoInhibitCommandCollection(void) +{ + if (!currentEventTriggerState) + return; + + currentEventTriggerState->commandCollectionInhibited = false; +} + +/* + * EventTriggerCollectSimpleCommand + * Save data about a simple DDL command that was just executed + * + * address identifies the object being operated on. secondaryObject is an + * object address that was related in some way to the executed command; its + * meaning is command-specific. + * + * For instance, for an ALTER obj SET SCHEMA command, objtype is the type of + * object being moved, objectId is its OID, and secondaryOid is the OID of the + * old schema. (The destination schema OID can be obtained by catalog lookup + * of the object.) + */ +void +EventTriggerCollectSimpleCommand(ObjectAddress address, + ObjectAddress secondaryObject, + Node *parsetree) +{ + MemoryContext oldcxt; + CollectedCommand *command; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + command = palloc(sizeof(CollectedCommand)); + + command->type = SCT_Simple; + command->in_extension = creating_extension; + + command->d.simple.address = address; + command->d.simple.secondaryObject = secondaryObject; + command->parsetree = copyObject(parsetree); + + currentEventTriggerState->commandList = lappend(currentEventTriggerState->commandList, + command); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * EventTriggerAlterTableStart + * Prepare to receive data on an ALTER TABLE command about to be executed + * + * Note we don't collect the command immediately; instead we keep it in + * currentCommand, and only when we're done processing the subcommands we will + * add it to the command list. + */ +void +EventTriggerAlterTableStart(Node *parsetree) +{ + MemoryContext oldcxt; + CollectedCommand *command; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + command = palloc(sizeof(CollectedCommand)); + + command->type = SCT_AlterTable; + command->in_extension = creating_extension; + + command->d.alterTable.classId = RelationRelationId; + command->d.alterTable.objectId = InvalidOid; + command->d.alterTable.subcmds = NIL; + command->parsetree = copyObject(parsetree); + + command->parent = currentEventTriggerState->currentCommand; + currentEventTriggerState->currentCommand = command; + + MemoryContextSwitchTo(oldcxt); +} + +/* + * Remember the OID of the object being affected by an ALTER TABLE. + * + * This is needed because in some cases we don't know the OID until later. + */ +void +EventTriggerAlterTableRelid(Oid objectId) +{ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + currentEventTriggerState->currentCommand->d.alterTable.objectId = objectId; +} + +/* + * EventTriggerCollectAlterTableSubcmd + * Save data about a single part of an ALTER TABLE. + * + * Several different commands go through this path, but apart from ALTER TABLE + * itself, they are all concerned with AlterTableCmd nodes that are generated + * internally, so that's all that this code needs to handle at the moment. + */ +void +EventTriggerCollectAlterTableSubcmd(Node *subcmd, ObjectAddress address) +{ + MemoryContext oldcxt; + CollectedATSubcmd *newsub; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + Assert(IsA(subcmd, AlterTableCmd)); + Assert(currentEventTriggerState->currentCommand != NULL); + Assert(OidIsValid(currentEventTriggerState->currentCommand->d.alterTable.objectId)); + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + newsub = palloc(sizeof(CollectedATSubcmd)); + newsub->address = address; + newsub->parsetree = copyObject(subcmd); + + currentEventTriggerState->currentCommand->d.alterTable.subcmds = + lappend(currentEventTriggerState->currentCommand->d.alterTable.subcmds, newsub); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * EventTriggerAlterTableEnd + * Finish up saving an ALTER TABLE command, and add it to command list. + * + * FIXME this API isn't considering the possibility that an xact/subxact is + * aborted partway through. Probably it's best to add an + * AtEOSubXact_EventTriggers() to fix this. + */ +void +EventTriggerAlterTableEnd(void) +{ + CollectedCommand *parent; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + parent = currentEventTriggerState->currentCommand->parent; + + /* If no subcommands, don't collect */ + if (list_length(currentEventTriggerState->currentCommand->d.alterTable.subcmds) != 0) + { + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + currentEventTriggerState->commandList = + lappend(currentEventTriggerState->commandList, + currentEventTriggerState->currentCommand); + + MemoryContextSwitchTo(oldcxt); + } + else + pfree(currentEventTriggerState->currentCommand); + + currentEventTriggerState->currentCommand = parent; +} + +/* + * EventTriggerCollectGrant + * Save data about a GRANT/REVOKE command being executed + * + * This function creates a copy of the InternalGrant, as the original might + * not have the right lifetime. + */ +void +EventTriggerCollectGrant(InternalGrant *istmt) +{ + MemoryContext oldcxt; + CollectedCommand *command; + InternalGrant *icopy; + ListCell *cell; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + /* + * This is tedious, but necessary. + */ + icopy = palloc(sizeof(InternalGrant)); + memcpy(icopy, istmt, sizeof(InternalGrant)); + icopy->objects = list_copy(istmt->objects); + icopy->grantees = list_copy(istmt->grantees); + icopy->col_privs = NIL; + foreach(cell, istmt->col_privs) + icopy->col_privs = lappend(icopy->col_privs, copyObject(lfirst(cell))); + + /* Now collect it, using the copied InternalGrant */ + command = palloc(sizeof(CollectedCommand)); + command->type = SCT_Grant; + command->in_extension = creating_extension; + command->d.grant.istmt = icopy; + command->parsetree = NULL; + + currentEventTriggerState->commandList = + lappend(currentEventTriggerState->commandList, command); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * EventTriggerCollectAlterOpFam + * Save data about an ALTER OPERATOR FAMILY ADD/DROP command being + * executed + */ +void +EventTriggerCollectAlterOpFam(AlterOpFamilyStmt *stmt, Oid opfamoid, + List *operators, List *procedures) +{ + MemoryContext oldcxt; + CollectedCommand *command; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + command = palloc(sizeof(CollectedCommand)); + command->type = SCT_AlterOpFamily; + command->in_extension = creating_extension; + ObjectAddressSet(command->d.opfam.address, + OperatorFamilyRelationId, opfamoid); + command->d.opfam.operators = operators; + command->d.opfam.procedures = procedures; + command->parsetree = (Node *) copyObject(stmt); + + currentEventTriggerState->commandList = + lappend(currentEventTriggerState->commandList, command); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * EventTriggerCollectCreateOpClass + * Save data about a CREATE OPERATOR CLASS command being executed + */ +void +EventTriggerCollectCreateOpClass(CreateOpClassStmt *stmt, Oid opcoid, + List *operators, List *procedures) +{ + MemoryContext oldcxt; + CollectedCommand *command; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + command = palloc0(sizeof(CollectedCommand)); + command->type = SCT_CreateOpClass; + command->in_extension = creating_extension; + ObjectAddressSet(command->d.createopc.address, + OperatorClassRelationId, opcoid); + command->d.createopc.operators = operators; + command->d.createopc.procedures = procedures; + command->parsetree = (Node *) copyObject(stmt); + + currentEventTriggerState->commandList = + lappend(currentEventTriggerState->commandList, command); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * EventTriggerCollectAlterTSConfig + * Save data about an ALTER TEXT SEARCH CONFIGURATION command being + * executed + */ +void +EventTriggerCollectAlterTSConfig(AlterTSConfigurationStmt *stmt, Oid cfgId, + Oid *dictIds, int ndicts) +{ + MemoryContext oldcxt; + CollectedCommand *command; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + command = palloc0(sizeof(CollectedCommand)); + command->type = SCT_AlterTSConfig; + command->in_extension = creating_extension; + ObjectAddressSet(command->d.atscfg.address, + TSConfigRelationId, cfgId); + command->d.atscfg.dictIds = palloc(sizeof(Oid) * ndicts); + memcpy(command->d.atscfg.dictIds, dictIds, sizeof(Oid) * ndicts); + command->d.atscfg.ndicts = ndicts; + command->parsetree = (Node *) copyObject(stmt); + + currentEventTriggerState->commandList = + lappend(currentEventTriggerState->commandList, command); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * EventTriggerCollectAlterDefPrivs + * Save data about an ALTER DEFAULT PRIVILEGES command being + * executed + */ +void +EventTriggerCollectAlterDefPrivs(AlterDefaultPrivilegesStmt *stmt) +{ + MemoryContext oldcxt; + CollectedCommand *command; + + /* ignore if event trigger context not set, or collection disabled */ + if (!currentEventTriggerState || + currentEventTriggerState->commandCollectionInhibited) + return; + + oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); + + command = palloc0(sizeof(CollectedCommand)); + command->type = SCT_AlterDefaultPrivileges; + command->d.defprivs.objtype = stmt->action->objtype; + command->in_extension = creating_extension; + command->parsetree = (Node *) copyObject(stmt); + + currentEventTriggerState->commandList = + lappend(currentEventTriggerState->commandList, command); + MemoryContextSwitchTo(oldcxt); +} + +/* + * In a ddl_command_end event trigger, this function reports the DDL commands + * being run. + */ +Datum +pg_event_trigger_ddl_commands(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + ListCell *lc; + + /* + * Protect this function from being called out of context + */ + if (!currentEventTriggerState) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_EVENT_TRIGGER_PROTOCOL_VIOLATED), + errmsg("%s can only be called in an event trigger function", + "pg_event_trigger_ddl_commands()"))); + + /* Build tuplestore to hold the result rows */ + InitMaterializedSRF(fcinfo, 0); + + foreach(lc, currentEventTriggerState->commandList) + { + CollectedCommand *cmd = lfirst(lc); + Datum values[9]; + bool nulls[9]; + ObjectAddress addr; + int i = 0; + + /* + * For IF NOT EXISTS commands that attempt to create an existing + * object, the returned OID is Invalid. Don't return anything. + * + * One might think that a viable alternative would be to look up the + * Oid of the existing object and run the deparse with that. But + * since the parse tree might be different from the one that created + * the object in the first place, we might not end up in a consistent + * state anyway. + */ + if (cmd->type == SCT_Simple && + !OidIsValid(cmd->d.simple.address.objectId)) + continue; + + MemSet(nulls, 0, sizeof(nulls)); + + switch (cmd->type) + { + case SCT_Simple: + case SCT_AlterTable: + case SCT_AlterOpFamily: + case SCT_CreateOpClass: + case SCT_AlterTSConfig: + { + char *identity; + char *type; + char *schema = NULL; + + if (cmd->type == SCT_Simple) + addr = cmd->d.simple.address; + else if (cmd->type == SCT_AlterTable) + ObjectAddressSet(addr, + cmd->d.alterTable.classId, + cmd->d.alterTable.objectId); + else if (cmd->type == SCT_AlterOpFamily) + addr = cmd->d.opfam.address; + else if (cmd->type == SCT_CreateOpClass) + addr = cmd->d.createopc.address; + else if (cmd->type == SCT_AlterTSConfig) + addr = cmd->d.atscfg.address; + + /* + * If an object was dropped in the same command we may end + * up in a situation where we generated a message but can + * no longer look for the object information, so skip it + * rather than failing. This can happen for example with + * some subcommand combinations of ALTER TABLE. + */ + identity = getObjectIdentity(&addr, true); + if (identity == NULL) + continue; + + /* The type can never be NULL. */ + type = getObjectTypeDescription(&addr, true); + + /* + * Obtain schema name, if any ("pg_temp" if a temp + * object). If the object class is not in the supported + * list here, we assume it's a schema-less object type, + * and thus "schema" remains set to NULL. + */ + if (is_objectclass_supported(addr.classId)) + { + AttrNumber nspAttnum; + + nspAttnum = get_object_attnum_namespace(addr.classId); + if (nspAttnum != InvalidAttrNumber) + { + Relation catalog; + HeapTuple objtup; + Oid schema_oid; + bool isnull; + + catalog = table_open(addr.classId, AccessShareLock); + objtup = get_catalog_object_by_oid(catalog, + get_object_attnum_oid(addr.classId), + addr.objectId); + if (!HeapTupleIsValid(objtup)) + elog(ERROR, "cache lookup failed for object %u/%u", + addr.classId, addr.objectId); + schema_oid = + heap_getattr(objtup, nspAttnum, + RelationGetDescr(catalog), &isnull); + if (isnull) + elog(ERROR, + "invalid null namespace in object %u/%u/%d", + addr.classId, addr.objectId, addr.objectSubId); + schema = get_namespace_name_or_temp(schema_oid); + + table_close(catalog, AccessShareLock); + } + } + + /* classid */ + values[i++] = ObjectIdGetDatum(addr.classId); + /* objid */ + values[i++] = ObjectIdGetDatum(addr.objectId); + /* objsubid */ + values[i++] = Int32GetDatum(addr.objectSubId); + /* command tag */ + values[i++] = CStringGetTextDatum(CreateCommandName(cmd->parsetree)); + /* object_type */ + values[i++] = CStringGetTextDatum(type); + /* schema */ + if (schema == NULL) + nulls[i++] = true; + else + values[i++] = CStringGetTextDatum(schema); + /* identity */ + values[i++] = CStringGetTextDatum(identity); + /* in_extension */ + values[i++] = BoolGetDatum(cmd->in_extension); + /* command */ + values[i++] = PointerGetDatum(cmd); + } + break; + + case SCT_AlterDefaultPrivileges: + /* classid */ + nulls[i++] = true; + /* objid */ + nulls[i++] = true; + /* objsubid */ + nulls[i++] = true; + /* command tag */ + values[i++] = CStringGetTextDatum(CreateCommandName(cmd->parsetree)); + /* object_type */ + values[i++] = CStringGetTextDatum(stringify_adefprivs_objtype(cmd->d.defprivs.objtype)); + /* schema */ + nulls[i++] = true; + /* identity */ + nulls[i++] = true; + /* in_extension */ + values[i++] = BoolGetDatum(cmd->in_extension); + /* command */ + values[i++] = PointerGetDatum(cmd); + break; + + case SCT_Grant: + /* classid */ + nulls[i++] = true; + /* objid */ + nulls[i++] = true; + /* objsubid */ + nulls[i++] = true; + /* command tag */ + values[i++] = CStringGetTextDatum(cmd->d.grant.istmt->is_grant ? + "GRANT" : "REVOKE"); + /* object_type */ + values[i++] = CStringGetTextDatum(stringify_grant_objtype(cmd->d.grant.istmt->objtype)); + /* schema */ + nulls[i++] = true; + /* identity */ + nulls[i++] = true; + /* in_extension */ + values[i++] = BoolGetDatum(cmd->in_extension); + /* command */ + values[i++] = PointerGetDatum(cmd); + break; + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + PG_RETURN_VOID(); +} + +/* + * Return the ObjectType as a string, as it would appear in GRANT and + * REVOKE commands. + */ +static const char * +stringify_grant_objtype(ObjectType objtype) +{ + switch (objtype) + { + case OBJECT_COLUMN: + return "COLUMN"; + case OBJECT_TABLE: + return "TABLE"; + case OBJECT_SEQUENCE: + return "SEQUENCE"; + case OBJECT_DATABASE: + return "DATABASE"; + case OBJECT_DOMAIN: + return "DOMAIN"; + case OBJECT_FDW: + return "FOREIGN DATA WRAPPER"; + case OBJECT_FOREIGN_SERVER: + return "FOREIGN SERVER"; + case OBJECT_FUNCTION: + return "FUNCTION"; + case OBJECT_LANGUAGE: + return "LANGUAGE"; + case OBJECT_LARGEOBJECT: + return "LARGE OBJECT"; + case OBJECT_SCHEMA: + return "SCHEMA"; + case OBJECT_PARAMETER_ACL: + return "PARAMETER"; + case OBJECT_PROCEDURE: + return "PROCEDURE"; + case OBJECT_ROUTINE: + return "ROUTINE"; + case OBJECT_TABLESPACE: + return "TABLESPACE"; + case OBJECT_TYPE: + return "TYPE"; + /* these currently aren't used */ + case OBJECT_ACCESS_METHOD: + case OBJECT_AGGREGATE: + case OBJECT_AMOP: + case OBJECT_AMPROC: + case OBJECT_ATTRIBUTE: + case OBJECT_CAST: + case OBJECT_COLLATION: + case OBJECT_CONVERSION: + case OBJECT_DEFAULT: + case OBJECT_DEFACL: + case OBJECT_DOMCONSTRAINT: + case OBJECT_EVENT_TRIGGER: + case OBJECT_EXTENSION: + case OBJECT_FOREIGN_TABLE: + case OBJECT_INDEX: + case OBJECT_MATVIEW: + case OBJECT_OPCLASS: + case OBJECT_OPERATOR: + case OBJECT_OPFAMILY: + case OBJECT_POLICY: + case OBJECT_PUBLICATION: + case OBJECT_PUBLICATION_NAMESPACE: + case OBJECT_PUBLICATION_REL: + case OBJECT_ROLE: + case OBJECT_RULE: + case OBJECT_STATISTIC_EXT: + case OBJECT_SUBSCRIPTION: + case OBJECT_TABCONSTRAINT: + case OBJECT_TRANSFORM: + case OBJECT_TRIGGER: + case OBJECT_TSCONFIGURATION: + case OBJECT_TSDICTIONARY: + case OBJECT_TSPARSER: + case OBJECT_TSTEMPLATE: + case OBJECT_USER_MAPPING: + case OBJECT_VIEW: + elog(ERROR, "unsupported object type: %d", (int) objtype); + } + + return "???"; /* keep compiler quiet */ +} + +/* + * Return the ObjectType as a string; as above, but use the spelling + * in ALTER DEFAULT PRIVILEGES commands instead. Generally this is just + * the plural. + */ +static const char * +stringify_adefprivs_objtype(ObjectType objtype) +{ + switch (objtype) + { + case OBJECT_COLUMN: + return "COLUMNS"; + case OBJECT_TABLE: + return "TABLES"; + case OBJECT_SEQUENCE: + return "SEQUENCES"; + case OBJECT_DATABASE: + return "DATABASES"; + case OBJECT_DOMAIN: + return "DOMAINS"; + case OBJECT_FDW: + return "FOREIGN DATA WRAPPERS"; + case OBJECT_FOREIGN_SERVER: + return "FOREIGN SERVERS"; + case OBJECT_FUNCTION: + return "FUNCTIONS"; + case OBJECT_LANGUAGE: + return "LANGUAGES"; + case OBJECT_LARGEOBJECT: + return "LARGE OBJECTS"; + case OBJECT_SCHEMA: + return "SCHEMAS"; + case OBJECT_PROCEDURE: + return "PROCEDURES"; + case OBJECT_ROUTINE: + return "ROUTINES"; + case OBJECT_TABLESPACE: + return "TABLESPACES"; + case OBJECT_TYPE: + return "TYPES"; + /* these currently aren't used */ + case OBJECT_ACCESS_METHOD: + case OBJECT_AGGREGATE: + case OBJECT_AMOP: + case OBJECT_AMPROC: + case OBJECT_ATTRIBUTE: + case OBJECT_CAST: + case OBJECT_COLLATION: + case OBJECT_CONVERSION: + case OBJECT_DEFAULT: + case OBJECT_DEFACL: + case OBJECT_DOMCONSTRAINT: + case OBJECT_EVENT_TRIGGER: + case OBJECT_EXTENSION: + case OBJECT_FOREIGN_TABLE: + case OBJECT_INDEX: + case OBJECT_MATVIEW: + case OBJECT_OPCLASS: + case OBJECT_OPERATOR: + case OBJECT_OPFAMILY: + case OBJECT_PARAMETER_ACL: + case OBJECT_POLICY: + case OBJECT_PUBLICATION: + case OBJECT_PUBLICATION_NAMESPACE: + case OBJECT_PUBLICATION_REL: + case OBJECT_ROLE: + case OBJECT_RULE: + case OBJECT_STATISTIC_EXT: + case OBJECT_SUBSCRIPTION: + case OBJECT_TABCONSTRAINT: + case OBJECT_TRANSFORM: + case OBJECT_TRIGGER: + case OBJECT_TSCONFIGURATION: + case OBJECT_TSDICTIONARY: + case OBJECT_TSPARSER: + case OBJECT_TSTEMPLATE: + case OBJECT_USER_MAPPING: + case OBJECT_VIEW: + elog(ERROR, "unsupported object type: %d", (int) objtype); + } + + return "???"; /* keep compiler quiet */ +} diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c new file mode 100644 index 0000000..060c618 --- /dev/null +++ b/src/backend/commands/explain.c @@ -0,0 +1,5022 @@ +/*------------------------------------------------------------------------- + * + * explain.c + * Explain query execution plans + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994-5, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/explain.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "commands/createas.h" +#include "commands/defrem.h" +#include "commands/prepare.h" +#include "executor/nodeHash.h" +#include "foreign/fdwapi.h" +#include "jit/jit.h" +#include "nodes/extensible.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/analyze.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteHandler.h" +#include "storage/bufmgr.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/guc_tables.h" +#include "utils/json.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/ruleutils.h" +#include "utils/snapmgr.h" +#include "utils/tuplesort.h" +#include "utils/typcache.h" +#include "utils/xml.h" + + +/* Hook for plugins to get control in ExplainOneQuery() */ +ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; + +/* Hook for plugins to get control in explain_get_index_name() */ +explain_get_index_name_hook_type explain_get_index_name_hook = NULL; + + +/* OR-able flags for ExplainXMLTag() */ +#define X_OPENING 0 +#define X_CLOSING 1 +#define X_CLOSE_IMMEDIATE 2 +#define X_NOWHITESPACE 4 + +static void ExplainOneQuery(Query *query, int cursorOptions, + IntoClause *into, ExplainState *es, + const char *queryString, ParamListInfo params, + QueryEnvironment *queryEnv); +static void ExplainPrintJIT(ExplainState *es, int jit_flags, + JitInstrumentation *ji); +static void report_triggers(ResultRelInfo *rInfo, bool show_relname, + ExplainState *es); +static double elapsed_time(instr_time *starttime); +static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used); +static void ExplainNode(PlanState *planstate, List *ancestors, + const char *relationship, const char *plan_name, + ExplainState *es); +static void show_plan_tlist(PlanState *planstate, List *ancestors, + ExplainState *es); +static void show_expression(Node *node, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es); +static void show_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es); +static void show_scan_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + ExplainState *es); +static void show_upper_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + ExplainState *es); +static void show_sort_keys(SortState *sortstate, List *ancestors, + ExplainState *es); +static void show_incremental_sort_keys(IncrementalSortState *incrsortstate, + List *ancestors, ExplainState *es); +static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, + ExplainState *es); +static void show_agg_keys(AggState *astate, List *ancestors, + ExplainState *es); +static void show_grouping_sets(PlanState *planstate, Agg *agg, + List *ancestors, ExplainState *es); +static void show_grouping_set_keys(PlanState *planstate, + Agg *aggnode, Sort *sortnode, + List *context, bool useprefix, + List *ancestors, ExplainState *es); +static void show_group_keys(GroupState *gstate, List *ancestors, + ExplainState *es); +static void show_sort_group_keys(PlanState *planstate, const char *qlabel, + int nkeys, int nPresortedKeys, AttrNumber *keycols, + Oid *sortOperators, Oid *collations, bool *nullsFirst, + List *ancestors, ExplainState *es); +static void show_sortorder_options(StringInfo buf, Node *sortexpr, + Oid sortOperator, Oid collation, bool nullsFirst); +static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, + List *ancestors, ExplainState *es); +static void show_sort_info(SortState *sortstate, ExplainState *es); +static void show_incremental_sort_info(IncrementalSortState *incrsortstate, + ExplainState *es); +static void show_hash_info(HashState *hashstate, ExplainState *es); +static void show_memoize_info(MemoizeState *mstate, List *ancestors, + ExplainState *es); +static void show_hashagg_info(AggState *hashstate, ExplainState *es); +static void show_tidbitmap_info(BitmapHeapScanState *planstate, + ExplainState *es); +static void show_instrumentation_count(const char *qlabel, int which, + PlanState *planstate, ExplainState *es); +static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); +static void show_eval_params(Bitmapset *bms_params, ExplainState *es); +static const char *explain_get_index_name(Oid indexId); +static void show_buffer_usage(ExplainState *es, const BufferUsage *usage, + bool planning); +static void show_wal_usage(ExplainState *es, const WalUsage *usage); +static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, + ExplainState *es); +static void ExplainScanTarget(Scan *plan, ExplainState *es); +static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es); +static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es); +static void show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + ExplainState *es); +static void ExplainMemberNodes(PlanState **planstates, int nplans, + List *ancestors, ExplainState *es); +static void ExplainMissingMembers(int nplans, int nchildren, ExplainState *es); +static void ExplainSubPlans(List *plans, List *ancestors, + const char *relationship, ExplainState *es); +static void ExplainCustomChildren(CustomScanState *css, + List *ancestors, ExplainState *es); +static ExplainWorkersState *ExplainCreateWorkersState(int num_workers); +static void ExplainOpenWorker(int n, ExplainState *es); +static void ExplainCloseWorker(int n, ExplainState *es); +static void ExplainFlushWorkersState(ExplainState *es); +static void ExplainProperty(const char *qlabel, const char *unit, + const char *value, bool numeric, ExplainState *es); +static void ExplainOpenSetAsideGroup(const char *objtype, const char *labelname, + bool labeled, int depth, ExplainState *es); +static void ExplainSaveGroup(ExplainState *es, int depth, int *state_save); +static void ExplainRestoreGroup(ExplainState *es, int depth, int *state_save); +static void ExplainDummyGroup(const char *objtype, const char *labelname, + ExplainState *es); +static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es); +static void ExplainIndentText(ExplainState *es); +static void ExplainJSONLineEnding(ExplainState *es); +static void ExplainYAMLLineStarting(ExplainState *es); +static void escape_yaml(StringInfo buf, const char *str); + + + +/* + * ExplainQuery - + * execute an EXPLAIN command + */ +void +ExplainQuery(ParseState *pstate, ExplainStmt *stmt, + ParamListInfo params, DestReceiver *dest) +{ + ExplainState *es = NewExplainState(); + TupOutputState *tstate; + JumbleState *jstate = NULL; + Query *query; + List *rewritten; + ListCell *lc; + bool timing_set = false; + bool summary_set = false; + + /* Parse options list. */ + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "analyze") == 0) + es->analyze = defGetBoolean(opt); + else if (strcmp(opt->defname, "verbose") == 0) + es->verbose = defGetBoolean(opt); + else if (strcmp(opt->defname, "costs") == 0) + es->costs = defGetBoolean(opt); + else if (strcmp(opt->defname, "buffers") == 0) + es->buffers = defGetBoolean(opt); + else if (strcmp(opt->defname, "wal") == 0) + es->wal = defGetBoolean(opt); + else if (strcmp(opt->defname, "settings") == 0) + es->settings = defGetBoolean(opt); + else if (strcmp(opt->defname, "timing") == 0) + { + timing_set = true; + es->timing = defGetBoolean(opt); + } + else if (strcmp(opt->defname, "summary") == 0) + { + summary_set = true; + es->summary = defGetBoolean(opt); + } + else if (strcmp(opt->defname, "format") == 0) + { + char *p = defGetString(opt); + + if (strcmp(p, "text") == 0) + es->format = EXPLAIN_FORMAT_TEXT; + else if (strcmp(p, "xml") == 0) + es->format = EXPLAIN_FORMAT_XML; + else if (strcmp(p, "json") == 0) + es->format = EXPLAIN_FORMAT_JSON; + else if (strcmp(p, "yaml") == 0) + es->format = EXPLAIN_FORMAT_YAML; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"", + opt->defname, p), + parser_errposition(pstate, opt->location))); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized EXPLAIN option \"%s\"", + opt->defname), + parser_errposition(pstate, opt->location))); + } + + if (es->wal && !es->analyze) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("EXPLAIN option WAL requires ANALYZE"))); + + /* if the timing was not set explicitly, set default value */ + es->timing = (timing_set) ? es->timing : es->analyze; + + /* check that timing is used with EXPLAIN ANALYZE */ + if (es->timing && !es->analyze) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("EXPLAIN option TIMING requires ANALYZE"))); + + /* if the summary was not set explicitly, set default value */ + es->summary = (summary_set) ? es->summary : es->analyze; + + query = castNode(Query, stmt->query); + if (IsQueryIdEnabled()) + jstate = JumbleQuery(query, pstate->p_sourcetext); + + if (post_parse_analyze_hook) + (*post_parse_analyze_hook) (pstate, query, jstate); + + /* + * Parse analysis was done already, but we still have to run the rule + * rewriter. We do not do AcquireRewriteLocks: we assume the query either + * came straight from the parser, or suitable locks were acquired by + * plancache.c. + */ + rewritten = QueryRewrite(castNode(Query, stmt->query)); + + /* emit opening boilerplate */ + ExplainBeginOutput(es); + + if (rewritten == NIL) + { + /* + * In the case of an INSTEAD NOTHING, tell at least that. But in + * non-text format, the output is delimited, so this isn't necessary. + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, "Query rewrites to nothing\n"); + } + else + { + ListCell *l; + + /* Explain every plan */ + foreach(l, rewritten) + { + ExplainOneQuery(lfirst_node(Query, l), + CURSOR_OPT_PARALLEL_OK, NULL, es, + pstate->p_sourcetext, params, pstate->p_queryEnv); + + /* Separate plans with an appropriate separator */ + if (lnext(rewritten, l) != NULL) + ExplainSeparatePlans(es); + } + } + + /* emit closing boilerplate */ + ExplainEndOutput(es); + Assert(es->indent == 0); + + /* output tuples */ + tstate = begin_tup_output_tupdesc(dest, ExplainResultDesc(stmt), + &TTSOpsVirtual); + if (es->format == EXPLAIN_FORMAT_TEXT) + do_text_output_multiline(tstate, es->str->data); + else + do_text_output_oneline(tstate, es->str->data); + end_tup_output(tstate); + + pfree(es->str->data); +} + +/* + * Create a new ExplainState struct initialized with default options. + */ +ExplainState * +NewExplainState(void) +{ + ExplainState *es = (ExplainState *) palloc0(sizeof(ExplainState)); + + /* Set default options (most fields can be left as zeroes). */ + es->costs = true; + /* Prepare output buffer. */ + es->str = makeStringInfo(); + + return es; +} + +/* + * ExplainResultDesc - + * construct the result tupledesc for an EXPLAIN + */ +TupleDesc +ExplainResultDesc(ExplainStmt *stmt) +{ + TupleDesc tupdesc; + ListCell *lc; + Oid result_type = TEXTOID; + + /* Check for XML format option */ + foreach(lc, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "format") == 0) + { + char *p = defGetString(opt); + + if (strcmp(p, "xml") == 0) + result_type = XMLOID; + else if (strcmp(p, "json") == 0) + result_type = JSONOID; + else + result_type = TEXTOID; + /* don't "break", as ExplainQuery will use the last value */ + } + } + + /* Need a tuple descriptor representing a single TEXT or XML column */ + tupdesc = CreateTemplateTupleDesc(1); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN", + result_type, -1, 0); + return tupdesc; +} + +/* + * ExplainOneQuery - + * print out the execution plan for one Query + * + * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt. + */ +static void +ExplainOneQuery(Query *query, int cursorOptions, + IntoClause *into, ExplainState *es, + const char *queryString, ParamListInfo params, + QueryEnvironment *queryEnv) +{ + /* planner will not cope with utility statements */ + if (query->commandType == CMD_UTILITY) + { + ExplainOneUtility(query->utilityStmt, into, es, queryString, params, + queryEnv); + return; + } + + /* if an advisor plugin is present, let it manage things */ + if (ExplainOneQuery_hook) + (*ExplainOneQuery_hook) (query, cursorOptions, into, es, + queryString, params, queryEnv); + else + { + PlannedStmt *plan; + instr_time planstart, + planduration; + BufferUsage bufusage_start, + bufusage; + + if (es->buffers) + bufusage_start = pgBufferUsage; + INSTR_TIME_SET_CURRENT(planstart); + + /* plan the query */ + plan = pg_plan_query(query, queryString, cursorOptions, params); + + INSTR_TIME_SET_CURRENT(planduration); + INSTR_TIME_SUBTRACT(planduration, planstart); + + /* calc differences of buffer counters. */ + if (es->buffers) + { + memset(&bufusage, 0, sizeof(BufferUsage)); + BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start); + } + + /* run it (if needed) and produce output */ + ExplainOnePlan(plan, into, es, queryString, params, queryEnv, + &planduration, (es->buffers ? &bufusage : NULL)); + } +} + +/* + * ExplainOneUtility - + * print out the execution plan for one utility statement + * (In general, utility statements don't have plans, but there are some + * we treat as special cases) + * + * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt. + * + * This is exported because it's called back from prepare.c in the + * EXPLAIN EXECUTE case. In that case, we'll be dealing with a statement + * that's in the plan cache, so we have to ensure we don't modify it. + */ +void +ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, + const char *queryString, ParamListInfo params, + QueryEnvironment *queryEnv) +{ + if (utilityStmt == NULL) + return; + + if (IsA(utilityStmt, CreateTableAsStmt)) + { + /* + * We have to rewrite the contained SELECT and then pass it back to + * ExplainOneQuery. Copy to be safe in the EXPLAIN EXECUTE case. + */ + CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; + List *rewritten; + + /* + * Check if the relation exists or not. This is done at this stage to + * avoid query planning or execution. + */ + if (CreateTableAsRelExists(ctas)) + { + if (ctas->objtype == OBJECT_TABLE) + ExplainDummyGroup("CREATE TABLE AS", NULL, es); + else if (ctas->objtype == OBJECT_MATVIEW) + ExplainDummyGroup("CREATE MATERIALIZED VIEW", NULL, es); + else + elog(ERROR, "unexpected object type: %d", + (int) ctas->objtype); + return; + } + + rewritten = QueryRewrite(castNode(Query, copyObject(ctas->query))); + Assert(list_length(rewritten) == 1); + ExplainOneQuery(linitial_node(Query, rewritten), + CURSOR_OPT_PARALLEL_OK, ctas->into, es, + queryString, params, queryEnv); + } + else if (IsA(utilityStmt, DeclareCursorStmt)) + { + /* + * Likewise for DECLARE CURSOR. + * + * Notice that if you say EXPLAIN ANALYZE DECLARE CURSOR then we'll + * actually run the query. This is different from pre-8.3 behavior + * but seems more useful than not running the query. No cursor will + * be created, however. + */ + DeclareCursorStmt *dcs = (DeclareCursorStmt *) utilityStmt; + List *rewritten; + + rewritten = QueryRewrite(castNode(Query, copyObject(dcs->query))); + Assert(list_length(rewritten) == 1); + ExplainOneQuery(linitial_node(Query, rewritten), + dcs->options, NULL, es, + queryString, params, queryEnv); + } + else if (IsA(utilityStmt, ExecuteStmt)) + ExplainExecuteQuery((ExecuteStmt *) utilityStmt, into, es, + queryString, params, queryEnv); + else if (IsA(utilityStmt, NotifyStmt)) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, "NOTIFY\n"); + else + ExplainDummyGroup("Notify", NULL, es); + } + else + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, + "Utility statements have no plan structure\n"); + else + ExplainDummyGroup("Utility Statement", NULL, es); + } +} + +/* + * ExplainOnePlan - + * given a planned query, execute it if needed, and then print + * EXPLAIN output + * + * "into" is NULL unless we are explaining the contents of a CreateTableAsStmt, + * in which case executing the query should result in creating that table. + * + * This is exported because it's called back from prepare.c in the + * EXPLAIN EXECUTE case, and because an index advisor plugin would need + * to call it. + */ +void +ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, + const char *queryString, ParamListInfo params, + QueryEnvironment *queryEnv, const instr_time *planduration, + const BufferUsage *bufusage) +{ + DestReceiver *dest; + QueryDesc *queryDesc; + instr_time starttime; + double totaltime = 0; + int eflags; + int instrument_option = 0; + + Assert(plannedstmt->commandType != CMD_UTILITY); + + if (es->analyze && es->timing) + instrument_option |= INSTRUMENT_TIMER; + else if (es->analyze) + instrument_option |= INSTRUMENT_ROWS; + + if (es->buffers) + instrument_option |= INSTRUMENT_BUFFERS; + if (es->wal) + instrument_option |= INSTRUMENT_WAL; + + /* + * We always collect timing for the entire statement, even when node-level + * timing is off, so we don't look at es->timing here. (We could skip + * this if !es->summary, but it's hardly worth the complication.) + */ + INSTR_TIME_SET_CURRENT(starttime); + + /* + * Use a snapshot with an updated command ID to ensure this query sees + * results of any previously executed queries. + */ + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + /* + * Normally we discard the query's output, but if explaining CREATE TABLE + * AS, we'd better use the appropriate tuple receiver. + */ + if (into) + dest = CreateIntoRelDestReceiver(into); + else + dest = None_Receiver; + + /* Create a QueryDesc for the query */ + queryDesc = CreateQueryDesc(plannedstmt, queryString, + GetActiveSnapshot(), InvalidSnapshot, + dest, params, queryEnv, instrument_option); + + /* Select execution options */ + if (es->analyze) + eflags = 0; /* default run-to-completion flags */ + else + eflags = EXEC_FLAG_EXPLAIN_ONLY; + if (into) + eflags |= GetIntoRelEFlags(into); + + /* call ExecutorStart to prepare the plan for execution */ + ExecutorStart(queryDesc, eflags); + + /* Execute the plan for statistics if asked for */ + if (es->analyze) + { + ScanDirection dir; + + /* EXPLAIN ANALYZE CREATE TABLE AS WITH NO DATA is weird */ + if (into && into->skipData) + dir = NoMovementScanDirection; + else + dir = ForwardScanDirection; + + /* run the plan */ + ExecutorRun(queryDesc, dir, 0L, true); + + /* run cleanup too */ + ExecutorFinish(queryDesc); + + /* We can't run ExecutorEnd 'till we're done printing the stats... */ + totaltime += elapsed_time(&starttime); + } + + ExplainOpenGroup("Query", NULL, true, es); + + /* Create textual dump of plan tree */ + ExplainPrintPlan(es, queryDesc); + + /* + * COMPUTE_QUERY_ID_REGRESS means COMPUTE_QUERY_ID_AUTO, but we don't show + * the queryid in any of the EXPLAIN plans to keep stable the results + * generated by regression test suites. + */ + if (es->verbose && plannedstmt->queryId != UINT64CONST(0) && + compute_query_id != COMPUTE_QUERY_ID_REGRESS) + { + /* + * Output the queryid as an int64 rather than a uint64 so we match + * what would be seen in the BIGINT pg_stat_statements.queryid column. + */ + ExplainPropertyInteger("Query Identifier", NULL, (int64) + plannedstmt->queryId, es); + } + + /* Show buffer usage in planning */ + if (bufusage) + { + ExplainOpenGroup("Planning", "Planning", true, es); + show_buffer_usage(es, bufusage, true); + ExplainCloseGroup("Planning", "Planning", true, es); + } + + if (es->summary && planduration) + { + double plantime = INSTR_TIME_GET_DOUBLE(*planduration); + + ExplainPropertyFloat("Planning Time", "ms", 1000.0 * plantime, 3, es); + } + + /* Print info about runtime of triggers */ + if (es->analyze) + ExplainPrintTriggers(es, queryDesc); + + /* + * Print info about JITing. Tied to es->costs because we don't want to + * display this in regression tests, as it'd cause output differences + * depending on build options. Might want to separate that out from COSTS + * at a later stage. + */ + if (es->costs) + ExplainPrintJITSummary(es, queryDesc); + + /* + * Close down the query and free resources. Include time for this in the + * total execution time (although it should be pretty minimal). + */ + INSTR_TIME_SET_CURRENT(starttime); + + ExecutorEnd(queryDesc); + + FreeQueryDesc(queryDesc); + + PopActiveSnapshot(); + + /* We need a CCI just in case query expanded to multiple plans */ + if (es->analyze) + CommandCounterIncrement(); + + totaltime += elapsed_time(&starttime); + + /* + * We only report execution time if we actually ran the query (that is, + * the user specified ANALYZE), and if summary reporting is enabled (the + * user can set SUMMARY OFF to not have the timing information included in + * the output). By default, ANALYZE sets SUMMARY to true. + */ + if (es->summary && es->analyze) + ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, + es); + + ExplainCloseGroup("Query", NULL, true, es); +} + +/* + * ExplainPrintSettings - + * Print summary of modified settings affecting query planning. + */ +static void +ExplainPrintSettings(ExplainState *es) +{ + int num; + struct config_generic **gucs; + + /* bail out if information about settings not requested */ + if (!es->settings) + return; + + /* request an array of relevant settings */ + gucs = get_explain_guc_options(&num); + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainOpenGroup("Settings", "Settings", true, es); + + for (int i = 0; i < num; i++) + { + char *setting; + struct config_generic *conf = gucs[i]; + + setting = GetConfigOptionByName(conf->name, NULL, true); + + ExplainPropertyText(conf->name, setting, es); + } + + ExplainCloseGroup("Settings", "Settings", true, es); + } + else + { + StringInfoData str; + + /* In TEXT mode, print nothing if there are no options */ + if (num <= 0) + return; + + initStringInfo(&str); + + for (int i = 0; i < num; i++) + { + char *setting; + struct config_generic *conf = gucs[i]; + + if (i > 0) + appendStringInfoString(&str, ", "); + + setting = GetConfigOptionByName(conf->name, NULL, true); + + if (setting) + appendStringInfo(&str, "%s = '%s'", conf->name, setting); + else + appendStringInfo(&str, "%s = NULL", conf->name); + } + + ExplainPropertyText("Settings", str.data, es); + } +} + +/* + * ExplainPrintPlan - + * convert a QueryDesc's plan tree to text and append it to es->str + * + * The caller should have set up the options fields of *es, as well as + * initializing the output buffer es->str. Also, output formatting state + * such as the indent level is assumed valid. Plan-tree-specific fields + * in *es are initialized here. + * + * NB: will not work on utility statements + */ +void +ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) +{ + Bitmapset *rels_used = NULL; + PlanState *ps; + + /* Set up ExplainState fields associated with this plan tree */ + Assert(queryDesc->plannedstmt != NULL); + es->pstmt = queryDesc->plannedstmt; + es->rtable = queryDesc->plannedstmt->rtable; + ExplainPreScanNode(queryDesc->planstate, &rels_used); + es->rtable_names = select_rtable_names_for_explain(es->rtable, rels_used); + es->deparse_cxt = deparse_context_for_plan_tree(queryDesc->plannedstmt, + es->rtable_names); + es->printed_subplans = NULL; + + /* + * Sometimes we mark a Gather node as "invisible", which means that it's + * not to be displayed in EXPLAIN output. The purpose of this is to allow + * running regression tests with force_parallel_mode=regress to get the + * same results as running the same tests with force_parallel_mode=off. + * Such marking is currently only supported on a Gather at the top of the + * plan. We skip that node, and we must also hide per-worker detail data + * further down in the plan tree. + */ + ps = queryDesc->planstate; + if (IsA(ps, GatherState) && ((Gather *) ps->plan)->invisible) + { + ps = outerPlanState(ps); + es->hide_workers = true; + } + ExplainNode(ps, NIL, NULL, NULL, es); + + /* + * If requested, include information about GUC parameters with values that + * don't match the built-in defaults. + */ + ExplainPrintSettings(es); +} + +/* + * ExplainPrintTriggers - + * convert a QueryDesc's trigger statistics to text and append it to + * es->str + * + * The caller should have set up the options fields of *es, as well as + * initializing the output buffer es->str. Other fields in *es are + * initialized here. + */ +void +ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc) +{ + ResultRelInfo *rInfo; + bool show_relname; + List *resultrels; + List *routerels; + List *targrels; + ListCell *l; + + resultrels = queryDesc->estate->es_opened_result_relations; + routerels = queryDesc->estate->es_tuple_routing_result_relations; + targrels = queryDesc->estate->es_trig_target_relations; + + ExplainOpenGroup("Triggers", "Triggers", false, es); + + show_relname = (list_length(resultrels) > 1 || + routerels != NIL || targrels != NIL); + foreach(l, resultrels) + { + rInfo = (ResultRelInfo *) lfirst(l); + report_triggers(rInfo, show_relname, es); + } + + foreach(l, routerels) + { + rInfo = (ResultRelInfo *) lfirst(l); + report_triggers(rInfo, show_relname, es); + } + + foreach(l, targrels) + { + rInfo = (ResultRelInfo *) lfirst(l); + report_triggers(rInfo, show_relname, es); + } + + ExplainCloseGroup("Triggers", "Triggers", false, es); +} + +/* + * ExplainPrintJITSummary - + * Print summarized JIT instrumentation from leader and workers + */ +void +ExplainPrintJITSummary(ExplainState *es, QueryDesc *queryDesc) +{ + JitInstrumentation ji = {0}; + + if (!(queryDesc->estate->es_jit_flags & PGJIT_PERFORM)) + return; + + /* + * Work with a copy instead of modifying the leader state, since this + * function may be called twice + */ + if (queryDesc->estate->es_jit) + InstrJitAgg(&ji, &queryDesc->estate->es_jit->instr); + + /* If this process has done JIT in parallel workers, merge stats */ + if (queryDesc->estate->es_jit_worker_instr) + InstrJitAgg(&ji, queryDesc->estate->es_jit_worker_instr); + + ExplainPrintJIT(es, queryDesc->estate->es_jit_flags, &ji); +} + +/* + * ExplainPrintJIT - + * Append information about JITing to es->str. + */ +static void +ExplainPrintJIT(ExplainState *es, int jit_flags, JitInstrumentation *ji) +{ + instr_time total_time; + + /* don't print information if no JITing happened */ + if (!ji || ji->created_functions == 0) + return; + + /* calculate total time */ + INSTR_TIME_SET_ZERO(total_time); + INSTR_TIME_ADD(total_time, ji->generation_counter); + INSTR_TIME_ADD(total_time, ji->inlining_counter); + INSTR_TIME_ADD(total_time, ji->optimization_counter); + INSTR_TIME_ADD(total_time, ji->emission_counter); + + ExplainOpenGroup("JIT", "JIT", true, es); + + /* for higher density, open code the text output format */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "JIT:\n"); + es->indent++; + + ExplainPropertyInteger("Functions", NULL, ji->created_functions, es); + + ExplainIndentText(es); + appendStringInfo(es->str, "Options: %s %s, %s %s, %s %s, %s %s\n", + "Inlining", jit_flags & PGJIT_INLINE ? "true" : "false", + "Optimization", jit_flags & PGJIT_OPT3 ? "true" : "false", + "Expressions", jit_flags & PGJIT_EXPR ? "true" : "false", + "Deforming", jit_flags & PGJIT_DEFORM ? "true" : "false"); + + if (es->analyze && es->timing) + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Timing: %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms, %s %.3f ms\n", + "Generation", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter), + "Inlining", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter), + "Optimization", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter), + "Emission", 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter), + "Total", 1000.0 * INSTR_TIME_GET_DOUBLE(total_time)); + } + + es->indent--; + } + else + { + ExplainPropertyInteger("Functions", NULL, ji->created_functions, es); + + ExplainOpenGroup("Options", "Options", true, es); + ExplainPropertyBool("Inlining", jit_flags & PGJIT_INLINE, es); + ExplainPropertyBool("Optimization", jit_flags & PGJIT_OPT3, es); + ExplainPropertyBool("Expressions", jit_flags & PGJIT_EXPR, es); + ExplainPropertyBool("Deforming", jit_flags & PGJIT_DEFORM, es); + ExplainCloseGroup("Options", "Options", true, es); + + if (es->analyze && es->timing) + { + ExplainOpenGroup("Timing", "Timing", true, es); + + ExplainPropertyFloat("Generation", "ms", + 1000.0 * INSTR_TIME_GET_DOUBLE(ji->generation_counter), + 3, es); + ExplainPropertyFloat("Inlining", "ms", + 1000.0 * INSTR_TIME_GET_DOUBLE(ji->inlining_counter), + 3, es); + ExplainPropertyFloat("Optimization", "ms", + 1000.0 * INSTR_TIME_GET_DOUBLE(ji->optimization_counter), + 3, es); + ExplainPropertyFloat("Emission", "ms", + 1000.0 * INSTR_TIME_GET_DOUBLE(ji->emission_counter), + 3, es); + ExplainPropertyFloat("Total", "ms", + 1000.0 * INSTR_TIME_GET_DOUBLE(total_time), + 3, es); + + ExplainCloseGroup("Timing", "Timing", true, es); + } + } + + ExplainCloseGroup("JIT", "JIT", true, es); +} + +/* + * ExplainQueryText - + * add a "Query Text" node that contains the actual text of the query + * + * The caller should have set up the options fields of *es, as well as + * initializing the output buffer es->str. + * + */ +void +ExplainQueryText(ExplainState *es, QueryDesc *queryDesc) +{ + if (queryDesc->sourceText) + ExplainPropertyText("Query Text", queryDesc->sourceText, es); +} + +/* + * report_triggers - + * report execution stats for a single relation's triggers + */ +static void +report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) +{ + int nt; + + if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument) + return; + for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++) + { + Trigger *trig = rInfo->ri_TrigDesc->triggers + nt; + Instrumentation *instr = rInfo->ri_TrigInstrument + nt; + char *relname; + char *conname = NULL; + + /* Must clean up instrumentation state */ + InstrEndLoop(instr); + + /* + * We ignore triggers that were never invoked; they likely aren't + * relevant to the current query type. + */ + if (instr->ntuples == 0) + continue; + + ExplainOpenGroup("Trigger", NULL, true, es); + + relname = RelationGetRelationName(rInfo->ri_RelationDesc); + if (OidIsValid(trig->tgconstraint)) + conname = get_constraint_name(trig->tgconstraint); + + /* + * In text format, we avoid printing both the trigger name and the + * constraint name unless VERBOSE is specified. In non-text formats + * we just print everything. + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (es->verbose || conname == NULL) + appendStringInfo(es->str, "Trigger %s", trig->tgname); + else + appendStringInfoString(es->str, "Trigger"); + if (conname) + appendStringInfo(es->str, " for constraint %s", conname); + if (show_relname) + appendStringInfo(es->str, " on %s", relname); + if (es->timing) + appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", + 1000.0 * instr->total, instr->ntuples); + else + appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); + } + else + { + ExplainPropertyText("Trigger Name", trig->tgname, es); + if (conname) + ExplainPropertyText("Constraint Name", conname, es); + ExplainPropertyText("Relation", relname, es); + if (es->timing) + ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, + es); + ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); + } + + if (conname) + pfree(conname); + + ExplainCloseGroup("Trigger", NULL, true, es); + } +} + +/* Compute elapsed time in seconds since given timestamp */ +static double +elapsed_time(instr_time *starttime) +{ + instr_time endtime; + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, *starttime); + return INSTR_TIME_GET_DOUBLE(endtime); +} + +/* + * ExplainPreScanNode - + * Prescan the planstate tree to identify which RTEs are referenced + * + * Adds the relid of each referenced RTE to *rels_used. The result controls + * which RTEs are assigned aliases by select_rtable_names_for_explain. + * This ensures that we don't confusingly assign un-suffixed aliases to RTEs + * that never appear in the EXPLAIN output (such as inheritance parents). + */ +static bool +ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) +{ + Plan *plan = planstate->plan; + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_SampleScan: + case T_IndexScan: + case T_IndexOnlyScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_TableFuncScan: + case T_ValuesScan: + case T_CteScan: + case T_NamedTuplestoreScan: + case T_WorkTableScan: + *rels_used = bms_add_member(*rels_used, + ((Scan *) plan)->scanrelid); + break; + case T_ForeignScan: + *rels_used = bms_add_members(*rels_used, + ((ForeignScan *) plan)->fs_relids); + break; + case T_CustomScan: + *rels_used = bms_add_members(*rels_used, + ((CustomScan *) plan)->custom_relids); + break; + case T_ModifyTable: + *rels_used = bms_add_member(*rels_used, + ((ModifyTable *) plan)->nominalRelation); + if (((ModifyTable *) plan)->exclRelRTI) + *rels_used = bms_add_member(*rels_used, + ((ModifyTable *) plan)->exclRelRTI); + break; + case T_Append: + *rels_used = bms_add_members(*rels_used, + ((Append *) plan)->apprelids); + break; + case T_MergeAppend: + *rels_used = bms_add_members(*rels_used, + ((MergeAppend *) plan)->apprelids); + break; + default: + break; + } + + return planstate_tree_walker(planstate, ExplainPreScanNode, rels_used); +} + +/* + * ExplainNode - + * Appends a description of a plan tree to es->str + * + * planstate points to the executor state node for the current plan node. + * We need to work from a PlanState node, not just a Plan node, in order to + * get at the instrumentation data (if any) as well as the list of subplans. + * + * ancestors is a list of parent Plan and SubPlan nodes, most-closely-nested + * first. These are needed in order to interpret PARAM_EXEC Params. + * + * relationship describes the relationship of this plan node to its parent + * (eg, "Outer", "Inner"); it can be null at top level. plan_name is an + * optional name to be attached to the node. + * + * In text format, es->indent is controlled in this function since we only + * want it to change at plan-node boundaries (but a few subroutines will + * transiently increment it). In non-text formats, es->indent corresponds + * to the nesting depth of logical output groups, and therefore is controlled + * by ExplainOpenGroup/ExplainCloseGroup. + */ +static void +ExplainNode(PlanState *planstate, List *ancestors, + const char *relationship, const char *plan_name, + ExplainState *es) +{ + Plan *plan = planstate->plan; + const char *pname; /* node type name for text output */ + const char *sname; /* node type name for non-text output */ + const char *strategy = NULL; + const char *partialmode = NULL; + const char *operation = NULL; + const char *custom_name = NULL; + ExplainWorkersState *save_workers_state = es->workers_state; + int save_indent = es->indent; + bool haschildren; + + /* + * Prepare per-worker output buffers, if needed. We'll append the data in + * these to the main output string further down. + */ + if (planstate->worker_instrument && es->analyze && !es->hide_workers) + es->workers_state = ExplainCreateWorkersState(planstate->worker_instrument->num_workers); + else + es->workers_state = NULL; + + /* Identify plan node type, and print generic details */ + switch (nodeTag(plan)) + { + case T_Result: + pname = sname = "Result"; + break; + case T_ProjectSet: + pname = sname = "ProjectSet"; + break; + case T_ModifyTable: + sname = "ModifyTable"; + switch (((ModifyTable *) plan)->operation) + { + case CMD_INSERT: + pname = operation = "Insert"; + break; + case CMD_UPDATE: + pname = operation = "Update"; + break; + case CMD_DELETE: + pname = operation = "Delete"; + break; + case CMD_MERGE: + pname = operation = "Merge"; + break; + default: + pname = "???"; + break; + } + break; + case T_Append: + pname = sname = "Append"; + break; + case T_MergeAppend: + pname = sname = "Merge Append"; + break; + case T_RecursiveUnion: + pname = sname = "Recursive Union"; + break; + case T_BitmapAnd: + pname = sname = "BitmapAnd"; + break; + case T_BitmapOr: + pname = sname = "BitmapOr"; + break; + case T_NestLoop: + pname = sname = "Nested Loop"; + break; + case T_MergeJoin: + pname = "Merge"; /* "Join" gets added by jointype switch */ + sname = "Merge Join"; + break; + case T_HashJoin: + pname = "Hash"; /* "Join" gets added by jointype switch */ + sname = "Hash Join"; + break; + case T_SeqScan: + pname = sname = "Seq Scan"; + break; + case T_SampleScan: + pname = sname = "Sample Scan"; + break; + case T_Gather: + pname = sname = "Gather"; + break; + case T_GatherMerge: + pname = sname = "Gather Merge"; + break; + case T_IndexScan: + pname = sname = "Index Scan"; + break; + case T_IndexOnlyScan: + pname = sname = "Index Only Scan"; + break; + case T_BitmapIndexScan: + pname = sname = "Bitmap Index Scan"; + break; + case T_BitmapHeapScan: + pname = sname = "Bitmap Heap Scan"; + break; + case T_TidScan: + pname = sname = "Tid Scan"; + break; + case T_TidRangeScan: + pname = sname = "Tid Range Scan"; + break; + case T_SubqueryScan: + pname = sname = "Subquery Scan"; + break; + case T_FunctionScan: + pname = sname = "Function Scan"; + break; + case T_TableFuncScan: + pname = sname = "Table Function Scan"; + break; + case T_ValuesScan: + pname = sname = "Values Scan"; + break; + case T_CteScan: + pname = sname = "CTE Scan"; + break; + case T_NamedTuplestoreScan: + pname = sname = "Named Tuplestore Scan"; + break; + case T_WorkTableScan: + pname = sname = "WorkTable Scan"; + break; + case T_ForeignScan: + sname = "Foreign Scan"; + switch (((ForeignScan *) plan)->operation) + { + case CMD_SELECT: + pname = "Foreign Scan"; + operation = "Select"; + break; + case CMD_INSERT: + pname = "Foreign Insert"; + operation = "Insert"; + break; + case CMD_UPDATE: + pname = "Foreign Update"; + operation = "Update"; + break; + case CMD_DELETE: + pname = "Foreign Delete"; + operation = "Delete"; + break; + default: + pname = "???"; + break; + } + break; + case T_CustomScan: + sname = "Custom Scan"; + custom_name = ((CustomScan *) plan)->methods->CustomName; + if (custom_name) + pname = psprintf("Custom Scan (%s)", custom_name); + else + pname = sname; + break; + case T_Material: + pname = sname = "Materialize"; + break; + case T_Memoize: + pname = sname = "Memoize"; + break; + case T_Sort: + pname = sname = "Sort"; + break; + case T_IncrementalSort: + pname = sname = "Incremental Sort"; + break; + case T_Group: + pname = sname = "Group"; + break; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + sname = "Aggregate"; + switch (agg->aggstrategy) + { + case AGG_PLAIN: + pname = "Aggregate"; + strategy = "Plain"; + break; + case AGG_SORTED: + pname = "GroupAggregate"; + strategy = "Sorted"; + break; + case AGG_HASHED: + pname = "HashAggregate"; + strategy = "Hashed"; + break; + case AGG_MIXED: + pname = "MixedAggregate"; + strategy = "Mixed"; + break; + default: + pname = "Aggregate ???"; + strategy = "???"; + break; + } + + if (DO_AGGSPLIT_SKIPFINAL(agg->aggsplit)) + { + partialmode = "Partial"; + pname = psprintf("%s %s", partialmode, pname); + } + else if (DO_AGGSPLIT_COMBINE(agg->aggsplit)) + { + partialmode = "Finalize"; + pname = psprintf("%s %s", partialmode, pname); + } + else + partialmode = "Simple"; + } + break; + case T_WindowAgg: + pname = sname = "WindowAgg"; + break; + case T_Unique: + pname = sname = "Unique"; + break; + case T_SetOp: + sname = "SetOp"; + switch (((SetOp *) plan)->strategy) + { + case SETOP_SORTED: + pname = "SetOp"; + strategy = "Sorted"; + break; + case SETOP_HASHED: + pname = "HashSetOp"; + strategy = "Hashed"; + break; + default: + pname = "SetOp ???"; + strategy = "???"; + break; + } + break; + case T_LockRows: + pname = sname = "LockRows"; + break; + case T_Limit: + pname = sname = "Limit"; + break; + case T_Hash: + pname = sname = "Hash"; + break; + default: + pname = sname = "???"; + break; + } + + ExplainOpenGroup("Plan", + relationship ? NULL : "Plan", + true, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (plan_name) + { + ExplainIndentText(es); + appendStringInfo(es->str, "%s\n", plan_name); + es->indent++; + } + if (es->indent) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "-> "); + es->indent += 2; + } + if (plan->parallel_aware) + appendStringInfoString(es->str, "Parallel "); + if (plan->async_capable) + appendStringInfoString(es->str, "Async "); + appendStringInfoString(es->str, pname); + es->indent++; + } + else + { + ExplainPropertyText("Node Type", sname, es); + if (strategy) + ExplainPropertyText("Strategy", strategy, es); + if (partialmode) + ExplainPropertyText("Partial Mode", partialmode, es); + if (operation) + ExplainPropertyText("Operation", operation, es); + if (relationship) + ExplainPropertyText("Parent Relationship", relationship, es); + if (plan_name) + ExplainPropertyText("Subplan Name", plan_name, es); + if (custom_name) + ExplainPropertyText("Custom Plan Provider", custom_name, es); + ExplainPropertyBool("Parallel Aware", plan->parallel_aware, es); + ExplainPropertyBool("Async Capable", plan->async_capable, es); + } + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_SampleScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_TableFuncScan: + case T_ValuesScan: + case T_CteScan: + case T_WorkTableScan: + ExplainScanTarget((Scan *) plan, es); + break; + case T_ForeignScan: + case T_CustomScan: + if (((Scan *) plan)->scanrelid > 0) + ExplainScanTarget((Scan *) plan, es); + break; + case T_IndexScan: + { + IndexScan *indexscan = (IndexScan *) plan; + + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexscan, es); + } + break; + case T_IndexOnlyScan: + { + IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + + ExplainIndexScanDetails(indexonlyscan->indexid, + indexonlyscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexonlyscan, es); + } + break; + case T_BitmapIndexScan: + { + BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; + const char *indexname = + explain_get_index_name(bitmapindexscan->indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " on %s", + quote_identifier(indexname)); + else + ExplainPropertyText("Index Name", indexname, es); + } + break; + case T_ModifyTable: + ExplainModifyTarget((ModifyTable *) plan, es); + break; + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + { + const char *jointype; + + switch (((Join *) plan)->jointype) + { + case JOIN_INNER: + jointype = "Inner"; + break; + case JOIN_LEFT: + jointype = "Left"; + break; + case JOIN_FULL: + jointype = "Full"; + break; + case JOIN_RIGHT: + jointype = "Right"; + break; + case JOIN_SEMI: + jointype = "Semi"; + break; + case JOIN_ANTI: + jointype = "Anti"; + break; + default: + jointype = "???"; + break; + } + if (es->format == EXPLAIN_FORMAT_TEXT) + { + /* + * For historical reasons, the join type is interpolated + * into the node type name... + */ + if (((Join *) plan)->jointype != JOIN_INNER) + appendStringInfo(es->str, " %s Join", jointype); + else if (!IsA(plan, NestLoop)) + appendStringInfoString(es->str, " Join"); + } + else + ExplainPropertyText("Join Type", jointype, es); + } + break; + case T_SetOp: + { + const char *setopcmd; + + switch (((SetOp *) plan)->cmd) + { + case SETOPCMD_INTERSECT: + setopcmd = "Intersect"; + break; + case SETOPCMD_INTERSECT_ALL: + setopcmd = "Intersect All"; + break; + case SETOPCMD_EXCEPT: + setopcmd = "Except"; + break; + case SETOPCMD_EXCEPT_ALL: + setopcmd = "Except All"; + break; + default: + setopcmd = "???"; + break; + } + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, " %s", setopcmd); + else + ExplainPropertyText("Command", setopcmd, es); + } + break; + default: + break; + } + + if (es->costs) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfo(es->str, " (cost=%.2f..%.2f rows=%.0f width=%d)", + plan->startup_cost, plan->total_cost, + plan->plan_rows, plan->plan_width); + } + else + { + ExplainPropertyFloat("Startup Cost", NULL, plan->startup_cost, + 2, es); + ExplainPropertyFloat("Total Cost", NULL, plan->total_cost, + 2, es); + ExplainPropertyFloat("Plan Rows", NULL, plan->plan_rows, + 0, es); + ExplainPropertyInteger("Plan Width", NULL, plan->plan_width, + es); + } + } + + /* + * We have to forcibly clean up the instrumentation state because we + * haven't done ExecutorEnd yet. This is pretty grotty ... + * + * Note: contrib/auto_explain could cause instrumentation to be set up + * even though we didn't ask for it here. Be careful not to print any + * instrumentation results the user didn't ask for. But we do the + * InstrEndLoop call anyway, if possible, to reduce the number of cases + * auto_explain has to contend with. + */ + if (planstate->instrument) + InstrEndLoop(planstate->instrument); + + if (es->analyze && + planstate->instrument && planstate->instrument->nloops > 0) + { + double nloops = planstate->instrument->nloops; + double startup_ms = 1000.0 * planstate->instrument->startup / nloops; + double total_ms = 1000.0 * planstate->instrument->total / nloops; + double rows = planstate->instrument->ntuples / nloops; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (es->timing) + appendStringInfo(es->str, + " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)", + startup_ms, total_ms, rows, nloops); + else + appendStringInfo(es->str, + " (actual rows=%.0f loops=%.0f)", + rows, nloops); + } + else + { + if (es->timing) + { + ExplainPropertyFloat("Actual Startup Time", "ms", startup_ms, + 3, es); + ExplainPropertyFloat("Actual Total Time", "ms", total_ms, + 3, es); + } + ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + } + else if (es->analyze) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoString(es->str, " (never executed)"); + else + { + if (es->timing) + { + ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es); + ExplainPropertyFloat("Actual Total Time", "ms", 0.0, 3, es); + } + ExplainPropertyFloat("Actual Rows", NULL, 0.0, 0, es); + ExplainPropertyFloat("Actual Loops", NULL, 0.0, 0, es); + } + } + + /* in text format, first line ends here */ + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); + + /* prepare per-worker general execution details */ + if (es->workers_state && es->verbose) + { + WorkerInstrumentation *w = planstate->worker_instrument; + + for (int n = 0; n < w->num_workers; n++) + { + Instrumentation *instrument = &w->instrument[n]; + double nloops = instrument->nloops; + double startup_ms; + double total_ms; + double rows; + + if (nloops <= 0) + continue; + startup_ms = 1000.0 * instrument->startup / nloops; + total_ms = 1000.0 * instrument->total / nloops; + rows = instrument->ntuples / nloops; + + ExplainOpenWorker(n, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + if (es->timing) + appendStringInfo(es->str, + "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n", + startup_ms, total_ms, rows, nloops); + else + appendStringInfo(es->str, + "actual rows=%.0f loops=%.0f\n", + rows, nloops); + } + else + { + if (es->timing) + { + ExplainPropertyFloat("Actual Startup Time", "ms", + startup_ms, 3, es); + ExplainPropertyFloat("Actual Total Time", "ms", + total_ms, 3, es); + } + ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es); + ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + } + + ExplainCloseWorker(n, es); + } + } + + /* target list */ + if (es->verbose) + show_plan_tlist(planstate, ancestors, es); + + /* unique join */ + switch (nodeTag(plan)) + { + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + /* try not to be too chatty about this in text mode */ + if (es->format != EXPLAIN_FORMAT_TEXT || + (es->verbose && ((Join *) plan)->inner_unique)) + ExplainPropertyBool("Inner Unique", + ((Join *) plan)->inner_unique, + es); + break; + default: + break; + } + + /* quals, sort keys, etc */ + switch (nodeTag(plan)) + { + case T_IndexScan: + show_scan_qual(((IndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + if (((IndexScan *) plan)->indexqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexScan *) plan)->indexorderbyorig, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_IndexOnlyScan: + show_scan_qual(((IndexOnlyScan *) plan)->indexqual, + "Index Cond", planstate, ancestors, es); + if (((IndexOnlyScan *) plan)->recheckqual) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + if (es->analyze) + ExplainPropertyFloat("Heap Fetches", NULL, + planstate->instrument->ntuples2, 0, es); + break; + case T_BitmapIndexScan: + show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + break; + case T_BitmapHeapScan: + show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig, + "Recheck Cond", planstate, ancestors, es); + if (((BitmapHeapScan *) plan)->bitmapqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + if (es->analyze) + show_tidbitmap_info((BitmapHeapScanState *) planstate, es); + break; + case T_SampleScan: + show_tablesample(((SampleScan *) plan)->tablesample, + planstate, ancestors, es); + /* fall through to print additional fields the same as SeqScan */ + /* FALLTHROUGH */ + case T_SeqScan: + case T_ValuesScan: + case T_CteScan: + case T_NamedTuplestoreScan: + case T_WorkTableScan: + case T_SubqueryScan: + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_Gather: + { + Gather *gather = (Gather *) plan; + + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + ExplainPropertyInteger("Workers Planned", NULL, + gather->num_workers, es); + + /* Show params evaluated at gather node */ + if (gather->initParam) + show_eval_params(gather->initParam, es); + + if (es->analyze) + { + int nworkers; + + nworkers = ((GatherState *) planstate)->nworkers_launched; + ExplainPropertyInteger("Workers Launched", NULL, + nworkers, es); + } + + if (gather->single_copy || es->format != EXPLAIN_FORMAT_TEXT) + ExplainPropertyBool("Single Copy", gather->single_copy, es); + } + break; + case T_GatherMerge: + { + GatherMerge *gm = (GatherMerge *) plan; + + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + ExplainPropertyInteger("Workers Planned", NULL, + gm->num_workers, es); + + /* Show params evaluated at gather-merge node */ + if (gm->initParam) + show_eval_params(gm->initParam, es); + + if (es->analyze) + { + int nworkers; + + nworkers = ((GatherMergeState *) planstate)->nworkers_launched; + ExplainPropertyInteger("Workers Launched", NULL, + nworkers, es); + } + } + break; + case T_FunctionScan: + if (es->verbose) + { + List *fexprs = NIL; + ListCell *lc; + + foreach(lc, ((FunctionScan *) plan)->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + + fexprs = lappend(fexprs, rtfunc->funcexpr); + } + /* We rely on show_expression to insert commas as needed */ + show_expression((Node *) fexprs, + "Function Call", planstate, ancestors, + es->verbose, es); + } + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_TableFuncScan: + if (es->verbose) + { + TableFunc *tablefunc = ((TableFuncScan *) plan)->tablefunc; + + show_expression((Node *) tablefunc, + "Table Function Call", planstate, ancestors, + es->verbose, es); + } + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_TidScan: + { + /* + * The tidquals list has OR semantics, so be sure to show it + * as an OR condition. + */ + List *tidquals = ((TidScan *) plan)->tidquals; + + if (list_length(tidquals) > 1) + tidquals = list_make1(make_orclause(tidquals)); + show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + } + break; + case T_TidRangeScan: + { + /* + * The tidrangequals list has AND semantics, so be sure to + * show it as an AND condition. + */ + List *tidquals = ((TidRangeScan *) plan)->tidrangequals; + + if (list_length(tidquals) > 1) + tidquals = list_make1(make_andclause(tidquals)); + show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + } + break; + case T_ForeignScan: + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + show_foreignscan_info((ForeignScanState *) planstate, es); + break; + case T_CustomScan: + { + CustomScanState *css = (CustomScanState *) planstate; + + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + if (css->methods->ExplainCustomScan) + css->methods->ExplainCustomScan(css, ancestors, es); + } + break; + case T_NestLoop: + show_upper_qual(((NestLoop *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((NestLoop *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_MergeJoin: + show_upper_qual(((MergeJoin *) plan)->mergeclauses, + "Merge Cond", planstate, ancestors, es); + show_upper_qual(((MergeJoin *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((MergeJoin *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_HashJoin: + show_upper_qual(((HashJoin *) plan)->hashclauses, + "Hash Cond", planstate, ancestors, es); + show_upper_qual(((HashJoin *) plan)->join.joinqual, + "Join Filter", planstate, ancestors, es); + if (((HashJoin *) plan)->join.joinqual) + show_instrumentation_count("Rows Removed by Join Filter", 1, + planstate, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 2, + planstate, es); + break; + case T_Agg: + show_agg_keys(castNode(AggState, planstate), ancestors, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + show_hashagg_info((AggState *) planstate, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_WindowAgg: + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + show_upper_qual(((WindowAgg *) plan)->runConditionOrig, + "Run Condition", planstate, ancestors, es); + break; + case T_Group: + show_group_keys(castNode(GroupState, planstate), ancestors, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_Sort: + show_sort_keys(castNode(SortState, planstate), ancestors, es); + show_sort_info(castNode(SortState, planstate), es); + break; + case T_IncrementalSort: + show_incremental_sort_keys(castNode(IncrementalSortState, planstate), + ancestors, es); + show_incremental_sort_info(castNode(IncrementalSortState, planstate), + es); + break; + case T_MergeAppend: + show_merge_append_keys(castNode(MergeAppendState, planstate), + ancestors, es); + break; + case T_Result: + show_upper_qual((List *) ((Result *) plan)->resconstantqual, + "One-Time Filter", planstate, ancestors, es); + show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; + case T_ModifyTable: + show_modifytable_info(castNode(ModifyTableState, planstate), ancestors, + es); + break; + case T_Hash: + show_hash_info(castNode(HashState, planstate), es); + break; + case T_Memoize: + show_memoize_info(castNode(MemoizeState, planstate), ancestors, + es); + break; + default: + break; + } + + /* + * Prepare per-worker JIT instrumentation. As with the overall JIT + * summary, this is printed only if printing costs is enabled. + */ + if (es->workers_state && es->costs && es->verbose) + { + SharedJitInstrumentation *w = planstate->worker_jit_instrument; + + if (w) + { + for (int n = 0; n < w->num_workers; n++) + { + ExplainOpenWorker(n, es); + ExplainPrintJIT(es, planstate->state->es_jit_flags, + &w->jit_instr[n]); + ExplainCloseWorker(n, es); + } + } + } + + /* Show buffer/WAL usage */ + if (es->buffers && planstate->instrument) + show_buffer_usage(es, &planstate->instrument->bufusage, false); + if (es->wal && planstate->instrument) + show_wal_usage(es, &planstate->instrument->walusage); + + /* Prepare per-worker buffer/WAL usage */ + if (es->workers_state && (es->buffers || es->wal) && es->verbose) + { + WorkerInstrumentation *w = planstate->worker_instrument; + + for (int n = 0; n < w->num_workers; n++) + { + Instrumentation *instrument = &w->instrument[n]; + double nloops = instrument->nloops; + + if (nloops <= 0) + continue; + + ExplainOpenWorker(n, es); + if (es->buffers) + show_buffer_usage(es, &instrument->bufusage, false); + if (es->wal) + show_wal_usage(es, &instrument->walusage); + ExplainCloseWorker(n, es); + } + } + + /* Show per-worker details for this plan node, then pop that stack */ + if (es->workers_state) + ExplainFlushWorkersState(es); + es->workers_state = save_workers_state; + + /* + * If partition pruning was done during executor initialization, the + * number of child plans we'll display below will be less than the number + * of subplans that was specified in the plan. To make this a bit less + * mysterious, emit an indication that this happened. Note that this + * field is emitted now because we want it to be a property of the parent + * node; it *cannot* be emitted within the Plans sub-node we'll open next. + */ + switch (nodeTag(plan)) + { + case T_Append: + ExplainMissingMembers(((AppendState *) planstate)->as_nplans, + list_length(((Append *) plan)->appendplans), + es); + break; + case T_MergeAppend: + ExplainMissingMembers(((MergeAppendState *) planstate)->ms_nplans, + list_length(((MergeAppend *) plan)->mergeplans), + es); + break; + default: + break; + } + + /* Get ready to display the child plans */ + haschildren = planstate->initPlan || + outerPlanState(planstate) || + innerPlanState(planstate) || + IsA(plan, Append) || + IsA(plan, MergeAppend) || + IsA(plan, BitmapAnd) || + IsA(plan, BitmapOr) || + IsA(plan, SubqueryScan) || + (IsA(planstate, CustomScanState) && + ((CustomScanState *) planstate)->custom_ps != NIL) || + planstate->subPlan; + if (haschildren) + { + ExplainOpenGroup("Plans", "Plans", false, es); + /* Pass current Plan as head of ancestors list for children */ + ancestors = lcons(plan, ancestors); + } + + /* initPlan-s */ + if (planstate->initPlan) + ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es); + + /* lefttree */ + if (outerPlanState(planstate)) + ExplainNode(outerPlanState(planstate), ancestors, + "Outer", NULL, es); + + /* righttree */ + if (innerPlanState(planstate)) + ExplainNode(innerPlanState(planstate), ancestors, + "Inner", NULL, es); + + /* special child plans */ + switch (nodeTag(plan)) + { + case T_Append: + ExplainMemberNodes(((AppendState *) planstate)->appendplans, + ((AppendState *) planstate)->as_nplans, + ancestors, es); + break; + case T_MergeAppend: + ExplainMemberNodes(((MergeAppendState *) planstate)->mergeplans, + ((MergeAppendState *) planstate)->ms_nplans, + ancestors, es); + break; + case T_BitmapAnd: + ExplainMemberNodes(((BitmapAndState *) planstate)->bitmapplans, + ((BitmapAndState *) planstate)->nplans, + ancestors, es); + break; + case T_BitmapOr: + ExplainMemberNodes(((BitmapOrState *) planstate)->bitmapplans, + ((BitmapOrState *) planstate)->nplans, + ancestors, es); + break; + case T_SubqueryScan: + ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors, + "Subquery", NULL, es); + break; + case T_CustomScan: + ExplainCustomChildren((CustomScanState *) planstate, + ancestors, es); + break; + default: + break; + } + + /* subPlan-s */ + if (planstate->subPlan) + ExplainSubPlans(planstate->subPlan, ancestors, "SubPlan", es); + + /* end of child plans */ + if (haschildren) + { + ancestors = list_delete_first(ancestors); + ExplainCloseGroup("Plans", "Plans", false, es); + } + + /* in text format, undo whatever indentation we added */ + if (es->format == EXPLAIN_FORMAT_TEXT) + es->indent = save_indent; + + ExplainCloseGroup("Plan", + relationship ? NULL : "Plan", + true, es); +} + +/* + * Show the targetlist of a plan node + */ +static void +show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es) +{ + Plan *plan = planstate->plan; + List *context; + List *result = NIL; + bool useprefix; + ListCell *lc; + + /* No work if empty tlist (this occurs eg in bitmap indexscans) */ + if (plan->targetlist == NIL) + return; + /* The tlist of an Append isn't real helpful, so suppress it */ + if (IsA(plan, Append)) + return; + /* Likewise for MergeAppend and RecursiveUnion */ + if (IsA(plan, MergeAppend)) + return; + if (IsA(plan, RecursiveUnion)) + return; + + /* + * Likewise for ForeignScan that executes a direct INSERT/UPDATE/DELETE + * + * Note: the tlist for a ForeignScan that executes a direct INSERT/UPDATE + * might contain subplan output expressions that are confusing in this + * context. The tlist for a ForeignScan that executes a direct UPDATE/ + * DELETE always contains "junk" target columns to identify the exact row + * to update or delete, which would be confusing in this context. So, we + * suppress it in all the cases. + */ + if (IsA(plan, ForeignScan) && + ((ForeignScan *) plan)->operation != CMD_SELECT) + return; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + plan, + ancestors); + useprefix = list_length(es->rtable) > 1; + + /* Deparse each result column (we now include resjunk ones) */ + foreach(lc, plan->targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + result = lappend(result, + deparse_expression((Node *) tle->expr, context, + useprefix, false)); + } + + /* Print results */ + ExplainPropertyList("Output", result, es); +} + +/* + * Show a generic expression + */ +static void +show_expression(Node *node, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) +{ + List *context; + char *exprstr; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + planstate->plan, + ancestors); + + /* Deparse the expression */ + exprstr = deparse_expression(node, context, useprefix, false); + + /* And add to es->str */ + ExplainPropertyText(qlabel, exprstr, es); +} + +/* + * Show a qualifier expression (which is a List with implicit AND semantics) + */ +static void +show_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + bool useprefix, ExplainState *es) +{ + Node *node; + + /* No work if empty qual */ + if (qual == NIL) + return; + + /* Convert AND list to explicit AND */ + node = (Node *) make_ands_explicit(qual); + + /* And show it */ + show_expression(node, qlabel, planstate, ancestors, useprefix, es); +} + +/* + * Show a qualifier expression for a scan plan node + */ +static void +show_scan_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + ExplainState *es) +{ + bool useprefix; + + useprefix = (IsA(planstate->plan, SubqueryScan) || es->verbose); + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); +} + +/* + * Show a qualifier expression for an upper-level plan node + */ +static void +show_upper_qual(List *qual, const char *qlabel, + PlanState *planstate, List *ancestors, + ExplainState *es) +{ + bool useprefix; + + useprefix = (list_length(es->rtable) > 1 || es->verbose); + show_qual(qual, qlabel, planstate, ancestors, useprefix, es); +} + +/* + * Show the sort keys for a Sort node. + */ +static void +show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) +{ + Sort *plan = (Sort *) sortstate->ss.ps.plan; + + show_sort_group_keys((PlanState *) sortstate, "Sort Key", + plan->numCols, 0, plan->sortColIdx, + plan->sortOperators, plan->collations, + plan->nullsFirst, + ancestors, es); +} + +/* + * Show the sort keys for a IncrementalSort node. + */ +static void +show_incremental_sort_keys(IncrementalSortState *incrsortstate, + List *ancestors, ExplainState *es) +{ + IncrementalSort *plan = (IncrementalSort *) incrsortstate->ss.ps.plan; + + show_sort_group_keys((PlanState *) incrsortstate, "Sort Key", + plan->sort.numCols, plan->nPresortedCols, + plan->sort.sortColIdx, + plan->sort.sortOperators, plan->sort.collations, + plan->sort.nullsFirst, + ancestors, es); +} + +/* + * Likewise, for a MergeAppend node. + */ +static void +show_merge_append_keys(MergeAppendState *mstate, List *ancestors, + ExplainState *es) +{ + MergeAppend *plan = (MergeAppend *) mstate->ps.plan; + + show_sort_group_keys((PlanState *) mstate, "Sort Key", + plan->numCols, 0, plan->sortColIdx, + plan->sortOperators, plan->collations, + plan->nullsFirst, + ancestors, es); +} + +/* + * Show the grouping keys for an Agg node. + */ +static void +show_agg_keys(AggState *astate, List *ancestors, + ExplainState *es) +{ + Agg *plan = (Agg *) astate->ss.ps.plan; + + if (plan->numCols > 0 || plan->groupingSets) + { + /* The key columns refer to the tlist of the child plan */ + ancestors = lcons(plan, ancestors); + + if (plan->groupingSets) + show_grouping_sets(outerPlanState(astate), plan, ancestors, es); + else + show_sort_group_keys(outerPlanState(astate), "Group Key", + plan->numCols, 0, plan->grpColIdx, + NULL, NULL, NULL, + ancestors, es); + + ancestors = list_delete_first(ancestors); + } +} + +static void +show_grouping_sets(PlanState *planstate, Agg *agg, + List *ancestors, ExplainState *es) +{ + List *context; + bool useprefix; + ListCell *lc; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + planstate->plan, + ancestors); + useprefix = (list_length(es->rtable) > 1 || es->verbose); + + ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es); + + show_grouping_set_keys(planstate, agg, NULL, + context, useprefix, ancestors, es); + + foreach(lc, agg->chain) + { + Agg *aggnode = lfirst(lc); + Sort *sortnode = (Sort *) aggnode->plan.lefttree; + + show_grouping_set_keys(planstate, aggnode, sortnode, + context, useprefix, ancestors, es); + } + + ExplainCloseGroup("Grouping Sets", "Grouping Sets", false, es); +} + +static void +show_grouping_set_keys(PlanState *planstate, + Agg *aggnode, Sort *sortnode, + List *context, bool useprefix, + List *ancestors, ExplainState *es) +{ + Plan *plan = planstate->plan; + char *exprstr; + ListCell *lc; + List *gsets = aggnode->groupingSets; + AttrNumber *keycols = aggnode->grpColIdx; + const char *keyname; + const char *keysetname; + + if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED) + { + keyname = "Hash Key"; + keysetname = "Hash Keys"; + } + else + { + keyname = "Group Key"; + keysetname = "Group Keys"; + } + + ExplainOpenGroup("Grouping Set", NULL, true, es); + + if (sortnode) + { + show_sort_group_keys(planstate, "Sort Key", + sortnode->numCols, 0, sortnode->sortColIdx, + sortnode->sortOperators, sortnode->collations, + sortnode->nullsFirst, + ancestors, es); + if (es->format == EXPLAIN_FORMAT_TEXT) + es->indent++; + } + + ExplainOpenGroup(keysetname, keysetname, false, es); + + foreach(lc, gsets) + { + List *result = NIL; + ListCell *lc2; + + foreach(lc2, (List *) lfirst(lc)) + { + Index i = lfirst_int(lc2); + AttrNumber keyresno = keycols[i]; + TargetEntry *target = get_tle_by_resno(plan->targetlist, + keyresno); + + if (!target) + elog(ERROR, "no tlist entry for key %d", keyresno); + /* Deparse the expression, showing any top-level cast */ + exprstr = deparse_expression((Node *) target->expr, context, + useprefix, true); + + result = lappend(result, exprstr); + } + + if (!result && es->format == EXPLAIN_FORMAT_TEXT) + ExplainPropertyText(keyname, "()", es); + else + ExplainPropertyListNested(keyname, result, es); + } + + ExplainCloseGroup(keysetname, keysetname, false, es); + + if (sortnode && es->format == EXPLAIN_FORMAT_TEXT) + es->indent--; + + ExplainCloseGroup("Grouping Set", NULL, true, es); +} + +/* + * Show the grouping keys for a Group node. + */ +static void +show_group_keys(GroupState *gstate, List *ancestors, + ExplainState *es) +{ + Group *plan = (Group *) gstate->ss.ps.plan; + + /* The key columns refer to the tlist of the child plan */ + ancestors = lcons(plan, ancestors); + show_sort_group_keys(outerPlanState(gstate), "Group Key", + plan->numCols, 0, plan->grpColIdx, + NULL, NULL, NULL, + ancestors, es); + ancestors = list_delete_first(ancestors); +} + +/* + * Common code to show sort/group keys, which are represented in plan nodes + * as arrays of targetlist indexes. If it's a sort key rather than a group + * key, also pass sort operators/collations/nullsFirst arrays. + */ +static void +show_sort_group_keys(PlanState *planstate, const char *qlabel, + int nkeys, int nPresortedKeys, AttrNumber *keycols, + Oid *sortOperators, Oid *collations, bool *nullsFirst, + List *ancestors, ExplainState *es) +{ + Plan *plan = planstate->plan; + List *context; + List *result = NIL; + List *resultPresorted = NIL; + StringInfoData sortkeybuf; + bool useprefix; + int keyno; + + if (nkeys <= 0) + return; + + initStringInfo(&sortkeybuf); + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + plan, + ancestors); + useprefix = (list_length(es->rtable) > 1 || es->verbose); + + for (keyno = 0; keyno < nkeys; keyno++) + { + /* find key expression in tlist */ + AttrNumber keyresno = keycols[keyno]; + TargetEntry *target = get_tle_by_resno(plan->targetlist, + keyresno); + char *exprstr; + + if (!target) + elog(ERROR, "no tlist entry for key %d", keyresno); + /* Deparse the expression, showing any top-level cast */ + exprstr = deparse_expression((Node *) target->expr, context, + useprefix, true); + resetStringInfo(&sortkeybuf); + appendStringInfoString(&sortkeybuf, exprstr); + /* Append sort order information, if relevant */ + if (sortOperators != NULL) + show_sortorder_options(&sortkeybuf, + (Node *) target->expr, + sortOperators[keyno], + collations[keyno], + nullsFirst[keyno]); + /* Emit one property-list item per sort key */ + result = lappend(result, pstrdup(sortkeybuf.data)); + if (keyno < nPresortedKeys) + resultPresorted = lappend(resultPresorted, exprstr); + } + + ExplainPropertyList(qlabel, result, es); + if (nPresortedKeys > 0) + ExplainPropertyList("Presorted Key", resultPresorted, es); +} + +/* + * Append nondefault characteristics of the sort ordering of a column to buf + * (collation, direction, NULLS FIRST/LAST) + */ +static void +show_sortorder_options(StringInfo buf, Node *sortexpr, + Oid sortOperator, Oid collation, bool nullsFirst) +{ + Oid sortcoltype = exprType(sortexpr); + bool reverse = false; + TypeCacheEntry *typentry; + + typentry = lookup_type_cache(sortcoltype, + TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); + + /* + * Print COLLATE if it's not default for the column's type. There are + * some cases where this is redundant, eg if expression is a column whose + * declared collation is that collation, but it's hard to distinguish that + * here (and arguably, printing COLLATE explicitly is a good idea anyway + * in such cases). + */ + if (OidIsValid(collation) && collation != get_typcollation(sortcoltype)) + { + char *collname = get_collation_name(collation); + + if (collname == NULL) + elog(ERROR, "cache lookup failed for collation %u", collation); + appendStringInfo(buf, " COLLATE %s", quote_identifier(collname)); + } + + /* Print direction if not ASC, or USING if non-default sort operator */ + if (sortOperator == typentry->gt_opr) + { + appendStringInfoString(buf, " DESC"); + reverse = true; + } + else if (sortOperator != typentry->lt_opr) + { + char *opname = get_opname(sortOperator); + + if (opname == NULL) + elog(ERROR, "cache lookup failed for operator %u", sortOperator); + appendStringInfo(buf, " USING %s", opname); + /* Determine whether operator would be considered ASC or DESC */ + (void) get_equality_op_for_ordering_op(sortOperator, &reverse); + } + + /* Add NULLS FIRST/LAST only if it wouldn't be default */ + if (nullsFirst && !reverse) + { + appendStringInfoString(buf, " NULLS FIRST"); + } + else if (!nullsFirst && reverse) + { + appendStringInfoString(buf, " NULLS LAST"); + } +} + +/* + * Show TABLESAMPLE properties + */ +static void +show_tablesample(TableSampleClause *tsc, PlanState *planstate, + List *ancestors, ExplainState *es) +{ + List *context; + bool useprefix; + char *method_name; + List *params = NIL; + char *repeatable; + ListCell *lc; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + planstate->plan, + ancestors); + useprefix = list_length(es->rtable) > 1; + + /* Get the tablesample method name */ + method_name = get_func_name(tsc->tsmhandler); + + /* Deparse parameter expressions */ + foreach(lc, tsc->args) + { + Node *arg = (Node *) lfirst(lc); + + params = lappend(params, + deparse_expression(arg, context, + useprefix, false)); + } + if (tsc->repeatable) + repeatable = deparse_expression((Node *) tsc->repeatable, context, + useprefix, false); + else + repeatable = NULL; + + /* Print results */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + bool first = true; + + ExplainIndentText(es); + appendStringInfo(es->str, "Sampling: %s (", method_name); + foreach(lc, params) + { + if (!first) + appendStringInfoString(es->str, ", "); + appendStringInfoString(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ')'); + if (repeatable) + appendStringInfo(es->str, " REPEATABLE (%s)", repeatable); + appendStringInfoChar(es->str, '\n'); + } + else + { + ExplainPropertyText("Sampling Method", method_name, es); + ExplainPropertyList("Sampling Parameters", params, es); + if (repeatable) + ExplainPropertyText("Repeatable Seed", repeatable, es); + } +} + +/* + * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node + */ +static void +show_sort_info(SortState *sortstate, ExplainState *es) +{ + if (!es->analyze) + return; + + if (sortstate->sort_Done && sortstate->tuplesortstate != NULL) + { + Tuplesortstate *state = (Tuplesortstate *) sortstate->tuplesortstate; + TuplesortInstrumentation stats; + const char *sortMethod; + const char *spaceType; + int64 spaceUsed; + + tuplesort_get_stats(state, &stats); + sortMethod = tuplesort_method_name(stats.sortMethod); + spaceType = tuplesort_space_type_name(stats.spaceType); + spaceUsed = stats.spaceUsed; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Sort Method: %s %s: " INT64_FORMAT "kB\n", + sortMethod, spaceType, spaceUsed); + } + else + { + ExplainPropertyText("Sort Method", sortMethod, es); + ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es); + ExplainPropertyText("Sort Space Type", spaceType, es); + } + } + + /* + * You might think we should just skip this stanza entirely when + * es->hide_workers is true, but then we'd get no sort-method output at + * all. We have to make it look like worker 0's data is top-level data. + * This is easily done by just skipping the OpenWorker/CloseWorker calls. + * Currently, we don't worry about the possibility that there are multiple + * workers in such a case; if there are, duplicate output fields will be + * emitted. + */ + if (sortstate->shared_info != NULL) + { + int n; + + for (n = 0; n < sortstate->shared_info->num_workers; n++) + { + TuplesortInstrumentation *sinstrument; + const char *sortMethod; + const char *spaceType; + int64 spaceUsed; + + sinstrument = &sortstate->shared_info->sinstrument[n]; + if (sinstrument->sortMethod == SORT_TYPE_STILL_IN_PROGRESS) + continue; /* ignore any unfilled slots */ + sortMethod = tuplesort_method_name(sinstrument->sortMethod); + spaceType = tuplesort_space_type_name(sinstrument->spaceType); + spaceUsed = sinstrument->spaceUsed; + + if (es->workers_state) + ExplainOpenWorker(n, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Sort Method: %s %s: " INT64_FORMAT "kB\n", + sortMethod, spaceType, spaceUsed); + } + else + { + ExplainPropertyText("Sort Method", sortMethod, es); + ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es); + ExplainPropertyText("Sort Space Type", spaceType, es); + } + + if (es->workers_state) + ExplainCloseWorker(n, es); + } + } +} + +/* + * Incremental sort nodes sort in (a potentially very large number of) batches, + * so EXPLAIN ANALYZE needs to roll up the tuplesort stats from each batch into + * an intelligible summary. + * + * This function is used for both a non-parallel node and each worker in a + * parallel incremental sort node. + */ +static void +show_incremental_sort_group_info(IncrementalSortGroupInfo *groupInfo, + const char *groupLabel, bool indent, ExplainState *es) +{ + ListCell *methodCell; + List *methodNames = NIL; + + /* Generate a list of sort methods used across all groups. */ + for (int bit = 0; bit < NUM_TUPLESORTMETHODS; bit++) + { + TuplesortMethod sortMethod = (1 << bit); + + if (groupInfo->sortMethods & sortMethod) + { + const char *methodName = tuplesort_method_name(sortMethod); + + methodNames = lappend(methodNames, unconstify(char *, methodName)); + } + } + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (indent) + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfo(es->str, "%s Groups: " INT64_FORMAT " Sort Method", groupLabel, + groupInfo->groupCount); + /* plural/singular based on methodNames size */ + if (list_length(methodNames) > 1) + appendStringInfoString(es->str, "s: "); + else + appendStringInfoString(es->str, ": "); + foreach(methodCell, methodNames) + { + appendStringInfoString(es->str, (char *) methodCell->ptr_value); + if (foreach_current_index(methodCell) < list_length(methodNames) - 1) + appendStringInfoString(es->str, ", "); + } + + if (groupInfo->maxMemorySpaceUsed > 0) + { + int64 avgSpace = groupInfo->totalMemorySpaceUsed / groupInfo->groupCount; + const char *spaceTypeName; + + spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_MEMORY); + appendStringInfo(es->str, " Average %s: " INT64_FORMAT "kB Peak %s: " INT64_FORMAT "kB", + spaceTypeName, avgSpace, + spaceTypeName, groupInfo->maxMemorySpaceUsed); + } + + if (groupInfo->maxDiskSpaceUsed > 0) + { + int64 avgSpace = groupInfo->totalDiskSpaceUsed / groupInfo->groupCount; + + const char *spaceTypeName; + + spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_DISK); + appendStringInfo(es->str, " Average %s: " INT64_FORMAT "kB Peak %s: " INT64_FORMAT "kB", + spaceTypeName, avgSpace, + spaceTypeName, groupInfo->maxDiskSpaceUsed); + } + } + else + { + StringInfoData groupName; + + initStringInfo(&groupName); + appendStringInfo(&groupName, "%s Groups", groupLabel); + ExplainOpenGroup("Incremental Sort Groups", groupName.data, true, es); + ExplainPropertyInteger("Group Count", NULL, groupInfo->groupCount, es); + + ExplainPropertyList("Sort Methods Used", methodNames, es); + + if (groupInfo->maxMemorySpaceUsed > 0) + { + int64 avgSpace = groupInfo->totalMemorySpaceUsed / groupInfo->groupCount; + const char *spaceTypeName; + StringInfoData memoryName; + + spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_MEMORY); + initStringInfo(&memoryName); + appendStringInfo(&memoryName, "Sort Space %s", spaceTypeName); + ExplainOpenGroup("Sort Space", memoryName.data, true, es); + + ExplainPropertyInteger("Average Sort Space Used", "kB", avgSpace, es); + ExplainPropertyInteger("Peak Sort Space Used", "kB", + groupInfo->maxMemorySpaceUsed, es); + + ExplainCloseGroup("Sort Space", memoryName.data, true, es); + } + if (groupInfo->maxDiskSpaceUsed > 0) + { + int64 avgSpace = groupInfo->totalDiskSpaceUsed / groupInfo->groupCount; + const char *spaceTypeName; + StringInfoData diskName; + + spaceTypeName = tuplesort_space_type_name(SORT_SPACE_TYPE_DISK); + initStringInfo(&diskName); + appendStringInfo(&diskName, "Sort Space %s", spaceTypeName); + ExplainOpenGroup("Sort Space", diskName.data, true, es); + + ExplainPropertyInteger("Average Sort Space Used", "kB", avgSpace, es); + ExplainPropertyInteger("Peak Sort Space Used", "kB", + groupInfo->maxDiskSpaceUsed, es); + + ExplainCloseGroup("Sort Space", diskName.data, true, es); + } + + ExplainCloseGroup("Incremental Sort Groups", groupName.data, true, es); + } +} + +/* + * If it's EXPLAIN ANALYZE, show tuplesort stats for an incremental sort node + */ +static void +show_incremental_sort_info(IncrementalSortState *incrsortstate, + ExplainState *es) +{ + IncrementalSortGroupInfo *fullsortGroupInfo; + IncrementalSortGroupInfo *prefixsortGroupInfo; + + fullsortGroupInfo = &incrsortstate->incsort_info.fullsortGroupInfo; + + if (!es->analyze) + return; + + /* + * Since we never have any prefix groups unless we've first sorted a full + * groups and transitioned modes (copying the tuples into a prefix group), + * we don't need to do anything if there were 0 full groups. + * + * We still have to continue after this block if there are no full groups, + * though, since it's possible that we have workers that did real work + * even if the leader didn't participate. + */ + if (fullsortGroupInfo->groupCount > 0) + { + show_incremental_sort_group_info(fullsortGroupInfo, "Full-sort", true, es); + prefixsortGroupInfo = &incrsortstate->incsort_info.prefixsortGroupInfo; + if (prefixsortGroupInfo->groupCount > 0) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); + show_incremental_sort_group_info(prefixsortGroupInfo, "Pre-sorted", true, es); + } + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); + } + + if (incrsortstate->shared_info != NULL) + { + int n; + bool indent_first_line; + + for (n = 0; n < incrsortstate->shared_info->num_workers; n++) + { + IncrementalSortInfo *incsort_info = + &incrsortstate->shared_info->sinfo[n]; + + /* + * If a worker hasn't processed any sort groups at all, then + * exclude it from output since it either didn't launch or didn't + * contribute anything meaningful. + */ + fullsortGroupInfo = &incsort_info->fullsortGroupInfo; + + /* + * Since we never have any prefix groups unless we've first sorted + * a full groups and transitioned modes (copying the tuples into a + * prefix group), we don't need to do anything if there were 0 + * full groups. + */ + if (fullsortGroupInfo->groupCount == 0) + continue; + + if (es->workers_state) + ExplainOpenWorker(n, es); + + indent_first_line = es->workers_state == NULL || es->verbose; + show_incremental_sort_group_info(fullsortGroupInfo, "Full-sort", + indent_first_line, es); + prefixsortGroupInfo = &incsort_info->prefixsortGroupInfo; + if (prefixsortGroupInfo->groupCount > 0) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); + show_incremental_sort_group_info(prefixsortGroupInfo, "Pre-sorted", true, es); + } + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfoChar(es->str, '\n'); + + if (es->workers_state) + ExplainCloseWorker(n, es); + } + } +} + +/* + * Show information on hash buckets/batches. + */ +static void +show_hash_info(HashState *hashstate, ExplainState *es) +{ + HashInstrumentation hinstrument = {0}; + + /* + * Collect stats from the local process, even when it's a parallel query. + * In a parallel query, the leader process may or may not have run the + * hash join, and even if it did it may not have built a hash table due to + * timing (if it started late it might have seen no tuples in the outer + * relation and skipped building the hash table). Therefore we have to be + * prepared to get instrumentation data from all participants. + */ + if (hashstate->hinstrument) + memcpy(&hinstrument, hashstate->hinstrument, + sizeof(HashInstrumentation)); + + /* + * Merge results from workers. In the parallel-oblivious case, the + * results from all participants should be identical, except where + * participants didn't run the join at all so have no data. In the + * parallel-aware case, we need to consider all the results. Each worker + * may have seen a different subset of batches and we want to report the + * highest memory usage across all batches. We take the maxima of other + * values too, for the same reasons as in ExecHashAccumInstrumentation. + */ + if (hashstate->shared_info) + { + SharedHashInfo *shared_info = hashstate->shared_info; + int i; + + for (i = 0; i < shared_info->num_workers; ++i) + { + HashInstrumentation *worker_hi = &shared_info->hinstrument[i]; + + hinstrument.nbuckets = Max(hinstrument.nbuckets, + worker_hi->nbuckets); + hinstrument.nbuckets_original = Max(hinstrument.nbuckets_original, + worker_hi->nbuckets_original); + hinstrument.nbatch = Max(hinstrument.nbatch, + worker_hi->nbatch); + hinstrument.nbatch_original = Max(hinstrument.nbatch_original, + worker_hi->nbatch_original); + hinstrument.space_peak = Max(hinstrument.space_peak, + worker_hi->space_peak); + } + } + + if (hinstrument.nbatch > 0) + { + long spacePeakKb = (hinstrument.space_peak + 1023) / 1024; + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyInteger("Hash Buckets", NULL, + hinstrument.nbuckets, es); + ExplainPropertyInteger("Original Hash Buckets", NULL, + hinstrument.nbuckets_original, es); + ExplainPropertyInteger("Hash Batches", NULL, + hinstrument.nbatch, es); + ExplainPropertyInteger("Original Hash Batches", NULL, + hinstrument.nbatch_original, es); + ExplainPropertyInteger("Peak Memory Usage", "kB", + spacePeakKb, es); + } + else if (hinstrument.nbatch_original != hinstrument.nbatch || + hinstrument.nbuckets_original != hinstrument.nbuckets) + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Buckets: %d (originally %d) Batches: %d (originally %d) Memory Usage: %ldkB\n", + hinstrument.nbuckets, + hinstrument.nbuckets_original, + hinstrument.nbatch, + hinstrument.nbatch_original, + spacePeakKb); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Buckets: %d Batches: %d Memory Usage: %ldkB\n", + hinstrument.nbuckets, hinstrument.nbatch, + spacePeakKb); + } + } +} + +/* + * Show information on memoize hits/misses/evictions and memory usage. + */ +static void +show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) +{ + Plan *plan = ((PlanState *) mstate)->plan; + ListCell *lc; + List *context; + StringInfoData keystr; + char *separator = ""; + bool useprefix; + int64 memPeakKb; + + initStringInfo(&keystr); + + /* + * It's hard to imagine having a memoize node with fewer than 2 RTEs, but + * let's just keep the same useprefix logic as elsewhere in this file. + */ + useprefix = list_length(es->rtable) > 1 || es->verbose; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + plan, + ancestors); + + foreach(lc, ((Memoize *) plan)->param_exprs) + { + Node *expr = (Node *) lfirst(lc); + + appendStringInfoString(&keystr, separator); + + appendStringInfoString(&keystr, deparse_expression(expr, context, + useprefix, false)); + separator = ", "; + } + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Cache Key", keystr.data, es); + ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, "Cache Key: %s\n", keystr.data); + ExplainIndentText(es); + appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical"); + } + + pfree(keystr.data); + + if (!es->analyze) + return; + + if (mstate->stats.cache_misses > 0) + { + /* + * mem_peak is only set when we freed memory, so we must use mem_used + * when mem_peak is 0. + */ + if (mstate->stats.mem_peak > 0) + memPeakKb = (mstate->stats.mem_peak + 1023) / 1024; + else + memPeakKb = (mstate->mem_used + 1023) / 1024; + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyInteger("Cache Hits", NULL, mstate->stats.cache_hits, es); + ExplainPropertyInteger("Cache Misses", NULL, mstate->stats.cache_misses, es); + ExplainPropertyInteger("Cache Evictions", NULL, mstate->stats.cache_evictions, es); + ExplainPropertyInteger("Cache Overflows", NULL, mstate->stats.cache_overflows, es); + ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Hits: " UINT64_FORMAT " Misses: " UINT64_FORMAT " Evictions: " UINT64_FORMAT " Overflows: " UINT64_FORMAT " Memory Usage: " INT64_FORMAT "kB\n", + mstate->stats.cache_hits, + mstate->stats.cache_misses, + mstate->stats.cache_evictions, + mstate->stats.cache_overflows, + memPeakKb); + } + } + + if (mstate->shared_info == NULL) + return; + + /* Show details from parallel workers */ + for (int n = 0; n < mstate->shared_info->num_workers; n++) + { + MemoizeInstrumentation *si; + + si = &mstate->shared_info->sinstrument[n]; + + /* + * Skip workers that didn't do any work. We needn't bother checking + * for cache hits as a miss will always occur before a cache hit. + */ + if (si->cache_misses == 0) + continue; + + if (es->workers_state) + ExplainOpenWorker(n, es); + + /* + * Since the worker's MemoizeState.mem_used field is unavailable to + * us, ExecEndMemoize will have set the + * MemoizeInstrumentation.mem_peak field for us. No need to do the + * zero checks like we did for the serial case above. + */ + memPeakKb = (si->mem_peak + 1023) / 1024; + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Hits: " UINT64_FORMAT " Misses: " UINT64_FORMAT " Evictions: " UINT64_FORMAT " Overflows: " UINT64_FORMAT " Memory Usage: " INT64_FORMAT "kB\n", + si->cache_hits, si->cache_misses, + si->cache_evictions, si->cache_overflows, + memPeakKb); + } + else + { + ExplainPropertyInteger("Cache Hits", NULL, + si->cache_hits, es); + ExplainPropertyInteger("Cache Misses", NULL, + si->cache_misses, es); + ExplainPropertyInteger("Cache Evictions", NULL, + si->cache_evictions, es); + ExplainPropertyInteger("Cache Overflows", NULL, + si->cache_overflows, es); + ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, + es); + } + + if (es->workers_state) + ExplainCloseWorker(n, es); + } +} + +/* + * Show information on hash aggregate memory usage and batches. + */ +static void +show_hashagg_info(AggState *aggstate, ExplainState *es) +{ + Agg *agg = (Agg *) aggstate->ss.ps.plan; + int64 memPeakKb = (aggstate->hash_mem_peak + 1023) / 1024; + + if (agg->aggstrategy != AGG_HASHED && + agg->aggstrategy != AGG_MIXED) + return; + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + + if (es->costs) + ExplainPropertyInteger("Planned Partitions", NULL, + aggstate->hash_planned_partitions, es); + + /* + * During parallel query the leader may have not helped out. We + * detect this by checking how much memory it used. If we find it + * didn't do any work then we don't show its properties. + */ + if (es->analyze && aggstate->hash_mem_peak > 0) + { + ExplainPropertyInteger("HashAgg Batches", NULL, + aggstate->hash_batches_used, es); + ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es); + ExplainPropertyInteger("Disk Usage", "kB", + aggstate->hash_disk_used, es); + } + } + else + { + bool gotone = false; + + if (es->costs && aggstate->hash_planned_partitions > 0) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Planned Partitions: %d", + aggstate->hash_planned_partitions); + gotone = true; + } + + /* + * During parallel query the leader may have not helped out. We + * detect this by checking how much memory it used. If we find it + * didn't do any work then we don't show its properties. + */ + if (es->analyze && aggstate->hash_mem_peak > 0) + { + if (!gotone) + ExplainIndentText(es); + else + appendStringInfoString(es->str, " "); + + appendStringInfo(es->str, "Batches: %d Memory Usage: " INT64_FORMAT "kB", + aggstate->hash_batches_used, memPeakKb); + gotone = true; + + /* Only display disk usage if we spilled to disk */ + if (aggstate->hash_batches_used > 1) + { + appendStringInfo(es->str, " Disk Usage: " UINT64_FORMAT "kB", + aggstate->hash_disk_used); + } + } + + if (gotone) + appendStringInfoChar(es->str, '\n'); + } + + /* Display stats for each parallel worker */ + if (es->analyze && aggstate->shared_info != NULL) + { + for (int n = 0; n < aggstate->shared_info->num_workers; n++) + { + AggregateInstrumentation *sinstrument; + uint64 hash_disk_used; + int hash_batches_used; + + sinstrument = &aggstate->shared_info->sinstrument[n]; + /* Skip workers that didn't do anything */ + if (sinstrument->hash_mem_peak == 0) + continue; + hash_disk_used = sinstrument->hash_disk_used; + hash_batches_used = sinstrument->hash_batches_used; + memPeakKb = (sinstrument->hash_mem_peak + 1023) / 1024; + + if (es->workers_state) + ExplainOpenWorker(n, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + + appendStringInfo(es->str, "Batches: %d Memory Usage: " INT64_FORMAT "kB", + hash_batches_used, memPeakKb); + + /* Only display disk usage if we spilled to disk */ + if (hash_batches_used > 1) + appendStringInfo(es->str, " Disk Usage: " UINT64_FORMAT "kB", + hash_disk_used); + appendStringInfoChar(es->str, '\n'); + } + else + { + ExplainPropertyInteger("HashAgg Batches", NULL, + hash_batches_used, es); + ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, + es); + ExplainPropertyInteger("Disk Usage", "kB", hash_disk_used, es); + } + + if (es->workers_state) + ExplainCloseWorker(n, es); + } + } +} + +/* + * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node + */ +static void +show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es) +{ + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyInteger("Exact Heap Blocks", NULL, + planstate->exact_pages, es); + ExplainPropertyInteger("Lossy Heap Blocks", NULL, + planstate->lossy_pages, es); + } + else + { + if (planstate->exact_pages > 0 || planstate->lossy_pages > 0) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "Heap Blocks:"); + if (planstate->exact_pages > 0) + appendStringInfo(es->str, " exact=%ld", planstate->exact_pages); + if (planstate->lossy_pages > 0) + appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages); + appendStringInfoChar(es->str, '\n'); + } + } +} + +/* + * If it's EXPLAIN ANALYZE, show instrumentation information for a plan node + * + * "which" identifies which instrumentation counter to print + */ +static void +show_instrumentation_count(const char *qlabel, int which, + PlanState *planstate, ExplainState *es) +{ + double nfiltered; + double nloops; + + if (!es->analyze || !planstate->instrument) + return; + + if (which == 2) + nfiltered = planstate->instrument->nfiltered2; + else + nfiltered = planstate->instrument->nfiltered1; + nloops = planstate->instrument->nloops; + + /* In text mode, suppress zero counts; they're not interesting enough */ + if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) + { + if (nloops > 0) + ExplainPropertyFloat(qlabel, NULL, nfiltered / nloops, 0, es); + else + ExplainPropertyFloat(qlabel, NULL, 0.0, 0, es); + } +} + +/* + * Show extra information for a ForeignScan node. + */ +static void +show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es) +{ + FdwRoutine *fdwroutine = fsstate->fdwroutine; + + /* Let the FDW emit whatever fields it wants */ + if (((ForeignScan *) fsstate->ss.ps.plan)->operation != CMD_SELECT) + { + if (fdwroutine->ExplainDirectModify != NULL) + fdwroutine->ExplainDirectModify(fsstate, es); + } + else + { + if (fdwroutine->ExplainForeignScan != NULL) + fdwroutine->ExplainForeignScan(fsstate, es); + } +} + +/* + * Show initplan params evaluated at Gather or Gather Merge node. + */ +static void +show_eval_params(Bitmapset *bms_params, ExplainState *es) +{ + int paramid = -1; + List *params = NIL; + + Assert(bms_params); + + while ((paramid = bms_next_member(bms_params, paramid)) >= 0) + { + char param[32]; + + snprintf(param, sizeof(param), "$%d", paramid); + params = lappend(params, pstrdup(param)); + } + + if (params) + ExplainPropertyList("Params Evaluated", params, es); +} + +/* + * Fetch the name of an index in an EXPLAIN + * + * We allow plugins to get control here so that plans involving hypothetical + * indexes can be explained. + * + * Note: names returned by this function should be "raw"; the caller will + * apply quoting if needed. Formerly the convention was to do quoting here, + * but we don't want that in non-text output formats. + */ +static const char * +explain_get_index_name(Oid indexId) +{ + const char *result; + + if (explain_get_index_name_hook) + result = (*explain_get_index_name_hook) (indexId); + else + result = NULL; + if (result == NULL) + { + /* default behavior: look it up in the catalogs */ + result = get_rel_name(indexId); + if (result == NULL) + elog(ERROR, "cache lookup failed for index %u", indexId); + } + return result; +} + +/* + * Show buffer usage details. + */ +static void +show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning) +{ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + bool has_shared = (usage->shared_blks_hit > 0 || + usage->shared_blks_read > 0 || + usage->shared_blks_dirtied > 0 || + usage->shared_blks_written > 0); + bool has_local = (usage->local_blks_hit > 0 || + usage->local_blks_read > 0 || + usage->local_blks_dirtied > 0 || + usage->local_blks_written > 0); + bool has_temp = (usage->temp_blks_read > 0 || + usage->temp_blks_written > 0); + bool has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) || + !INSTR_TIME_IS_ZERO(usage->blk_write_time)); + bool has_temp_timing = (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time) || + !INSTR_TIME_IS_ZERO(usage->temp_blk_write_time)); + bool show_planning = (planning && (has_shared || + has_local || has_temp || has_timing || + has_temp_timing)); + + if (show_planning) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "Planning:\n"); + es->indent++; + } + + /* Show only positive counter values. */ + if (has_shared || has_local || has_temp) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "Buffers:"); + + if (has_shared) + { + appendStringInfoString(es->str, " shared"); + if (usage->shared_blks_hit > 0) + appendStringInfo(es->str, " hit=%lld", + (long long) usage->shared_blks_hit); + if (usage->shared_blks_read > 0) + appendStringInfo(es->str, " read=%lld", + (long long) usage->shared_blks_read); + if (usage->shared_blks_dirtied > 0) + appendStringInfo(es->str, " dirtied=%lld", + (long long) usage->shared_blks_dirtied); + if (usage->shared_blks_written > 0) + appendStringInfo(es->str, " written=%lld", + (long long) usage->shared_blks_written); + if (has_local || has_temp) + appendStringInfoChar(es->str, ','); + } + if (has_local) + { + appendStringInfoString(es->str, " local"); + if (usage->local_blks_hit > 0) + appendStringInfo(es->str, " hit=%lld", + (long long) usage->local_blks_hit); + if (usage->local_blks_read > 0) + appendStringInfo(es->str, " read=%lld", + (long long) usage->local_blks_read); + if (usage->local_blks_dirtied > 0) + appendStringInfo(es->str, " dirtied=%lld", + (long long) usage->local_blks_dirtied); + if (usage->local_blks_written > 0) + appendStringInfo(es->str, " written=%lld", + (long long) usage->local_blks_written); + if (has_temp) + appendStringInfoChar(es->str, ','); + } + if (has_temp) + { + appendStringInfoString(es->str, " temp"); + if (usage->temp_blks_read > 0) + appendStringInfo(es->str, " read=%lld", + (long long) usage->temp_blks_read); + if (usage->temp_blks_written > 0) + appendStringInfo(es->str, " written=%lld", + (long long) usage->temp_blks_written); + } + appendStringInfoChar(es->str, '\n'); + } + + /* As above, show only positive counter values. */ + if (has_timing || has_temp_timing) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "I/O Timings:"); + + if (has_timing) + { + appendStringInfoString(es->str, " shared/local"); + if (!INSTR_TIME_IS_ZERO(usage->blk_read_time)) + appendStringInfo(es->str, " read=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_read_time)); + if (!INSTR_TIME_IS_ZERO(usage->blk_write_time)) + appendStringInfo(es->str, " write=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->blk_write_time)); + if (has_temp_timing) + appendStringInfoChar(es->str, ','); + } + if (has_temp_timing) + { + appendStringInfoString(es->str, " temp"); + if (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time)) + appendStringInfo(es->str, " read=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time)); + if (!INSTR_TIME_IS_ZERO(usage->temp_blk_write_time)) + appendStringInfo(es->str, " write=%0.3f", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time)); + } + appendStringInfoChar(es->str, '\n'); + } + + if (show_planning) + es->indent--; + } + else + { + ExplainPropertyInteger("Shared Hit Blocks", NULL, + usage->shared_blks_hit, es); + ExplainPropertyInteger("Shared Read Blocks", NULL, + usage->shared_blks_read, es); + ExplainPropertyInteger("Shared Dirtied Blocks", NULL, + usage->shared_blks_dirtied, es); + ExplainPropertyInteger("Shared Written Blocks", NULL, + usage->shared_blks_written, es); + ExplainPropertyInteger("Local Hit Blocks", NULL, + usage->local_blks_hit, es); + ExplainPropertyInteger("Local Read Blocks", NULL, + usage->local_blks_read, es); + ExplainPropertyInteger("Local Dirtied Blocks", NULL, + usage->local_blks_dirtied, es); + ExplainPropertyInteger("Local Written Blocks", NULL, + usage->local_blks_written, es); + ExplainPropertyInteger("Temp Read Blocks", NULL, + usage->temp_blks_read, es); + ExplainPropertyInteger("Temp Written Blocks", NULL, + usage->temp_blks_written, es); + if (track_io_timing) + { + ExplainPropertyFloat("I/O Read Time", "ms", + INSTR_TIME_GET_MILLISEC(usage->blk_read_time), + 3, es); + ExplainPropertyFloat("I/O Write Time", "ms", + INSTR_TIME_GET_MILLISEC(usage->blk_write_time), + 3, es); + ExplainPropertyFloat("Temp I/O Read Time", "ms", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time), + 3, es); + ExplainPropertyFloat("Temp I/O Write Time", "ms", + INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time), + 3, es); + } + } +} + +/* + * Show WAL usage details. + */ +static void +show_wal_usage(ExplainState *es, const WalUsage *usage) +{ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + /* Show only positive counter values. */ + if ((usage->wal_records > 0) || (usage->wal_fpi > 0) || + (usage->wal_bytes > 0)) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "WAL:"); + + if (usage->wal_records > 0) + appendStringInfo(es->str, " records=%lld", + (long long) usage->wal_records); + if (usage->wal_fpi > 0) + appendStringInfo(es->str, " fpi=%lld", + (long long) usage->wal_fpi); + if (usage->wal_bytes > 0) + appendStringInfo(es->str, " bytes=" UINT64_FORMAT, + usage->wal_bytes); + appendStringInfoChar(es->str, '\n'); + } + } + else + { + ExplainPropertyInteger("WAL Records", NULL, + usage->wal_records, es); + ExplainPropertyInteger("WAL FPI", NULL, + usage->wal_fpi, es); + ExplainPropertyUInteger("WAL Bytes", NULL, + usage->wal_bytes, es); + } +} + +/* + * Add some additional details about an IndexScan or IndexOnlyScan + */ +static void +ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, + ExplainState *es) +{ + const char *indexname = explain_get_index_name(indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (ScanDirectionIsBackward(indexorderdir)) + appendStringInfoString(es->str, " Backward"); + appendStringInfo(es->str, " using %s", quote_identifier(indexname)); + } + else + { + const char *scandir; + + switch (indexorderdir) + { + case BackwardScanDirection: + scandir = "Backward"; + break; + case NoMovementScanDirection: + scandir = "NoMovement"; + break; + case ForwardScanDirection: + scandir = "Forward"; + break; + default: + scandir = "???"; + break; + } + ExplainPropertyText("Scan Direction", scandir, es); + ExplainPropertyText("Index Name", indexname, es); + } +} + +/* + * Show the target of a Scan node + */ +static void +ExplainScanTarget(Scan *plan, ExplainState *es) +{ + ExplainTargetRel((Plan *) plan, plan->scanrelid, es); +} + +/* + * Show the target of a ModifyTable node + * + * Here we show the nominal target (ie, the relation that was named in the + * original query). If the actual target(s) is/are different, we'll show them + * in show_modifytable_info(). + */ +static void +ExplainModifyTarget(ModifyTable *plan, ExplainState *es) +{ + ExplainTargetRel((Plan *) plan, plan->nominalRelation, es); +} + +/* + * Show the target relation of a scan or modify node + */ +static void +ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) +{ + char *objectname = NULL; + char *namespace = NULL; + const char *objecttag = NULL; + RangeTblEntry *rte; + char *refname; + + rte = rt_fetch(rti, es->rtable); + refname = (char *) list_nth(es->rtable_names, rti - 1); + if (refname == NULL) + refname = rte->eref->aliasname; + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_SampleScan: + case T_IndexScan: + case T_IndexOnlyScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_ForeignScan: + case T_CustomScan: + case T_ModifyTable: + /* Assert it's on a real relation */ + Assert(rte->rtekind == RTE_RELATION); + objectname = get_rel_name(rte->relid); + if (es->verbose) + namespace = get_namespace_name_or_temp(get_rel_namespace(rte->relid)); + objecttag = "Relation Name"; + break; + case T_FunctionScan: + { + FunctionScan *fscan = (FunctionScan *) plan; + + /* Assert it's on a RangeFunction */ + Assert(rte->rtekind == RTE_FUNCTION); + + /* + * If the expression is still a function call of a single + * function, we can get the real name of the function. + * Otherwise, punt. (Even if it was a single function call + * originally, the optimizer could have simplified it away.) + */ + if (list_length(fscan->functions) == 1) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) linitial(fscan->functions); + + if (IsA(rtfunc->funcexpr, FuncExpr)) + { + FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr; + Oid funcid = funcexpr->funcid; + + objectname = get_func_name(funcid); + if (es->verbose) + namespace = get_namespace_name_or_temp(get_func_namespace(funcid)); + } + } + objecttag = "Function Name"; + } + break; + case T_TableFuncScan: + Assert(rte->rtekind == RTE_TABLEFUNC); + objectname = "xmltable"; + objecttag = "Table Function Name"; + break; + case T_ValuesScan: + Assert(rte->rtekind == RTE_VALUES); + break; + case T_CteScan: + /* Assert it's on a non-self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(!rte->self_reference); + objectname = rte->ctename; + objecttag = "CTE Name"; + break; + case T_NamedTuplestoreScan: + Assert(rte->rtekind == RTE_NAMEDTUPLESTORE); + objectname = rte->enrname; + objecttag = "Tuplestore Name"; + break; + case T_WorkTableScan: + /* Assert it's on a self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(rte->self_reference); + objectname = rte->ctename; + objecttag = "CTE Name"; + break; + default: + break; + } + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfoString(es->str, " on"); + if (namespace != NULL) + appendStringInfo(es->str, " %s.%s", quote_identifier(namespace), + quote_identifier(objectname)); + else if (objectname != NULL) + appendStringInfo(es->str, " %s", quote_identifier(objectname)); + if (objectname == NULL || strcmp(refname, objectname) != 0) + appendStringInfo(es->str, " %s", quote_identifier(refname)); + } + else + { + if (objecttag != NULL && objectname != NULL) + ExplainPropertyText(objecttag, objectname, es); + if (namespace != NULL) + ExplainPropertyText("Schema", namespace, es); + ExplainPropertyText("Alias", refname, es); + } +} + +/* + * Show extra information for a ModifyTable node + * + * We have three objectives here. First, if there's more than one target + * table or it's different from the nominal target, identify the actual + * target(s). Second, give FDWs a chance to display extra info about foreign + * targets. Third, show information about ON CONFLICT. + */ +static void +show_modifytable_info(ModifyTableState *mtstate, List *ancestors, + ExplainState *es) +{ + ModifyTable *node = (ModifyTable *) mtstate->ps.plan; + const char *operation; + const char *foperation; + bool labeltargets; + int j; + List *idxNames = NIL; + ListCell *lst; + + switch (node->operation) + { + case CMD_INSERT: + operation = "Insert"; + foperation = "Foreign Insert"; + break; + case CMD_UPDATE: + operation = "Update"; + foperation = "Foreign Update"; + break; + case CMD_DELETE: + operation = "Delete"; + foperation = "Foreign Delete"; + break; + case CMD_MERGE: + operation = "Merge"; + /* XXX unsupported for now, but avoid compiler noise */ + foperation = "Foreign Merge"; + break; + default: + operation = "???"; + foperation = "Foreign ???"; + break; + } + + /* Should we explicitly label target relations? */ + labeltargets = (mtstate->mt_nrels > 1 || + (mtstate->mt_nrels == 1 && + mtstate->resultRelInfo[0].ri_RangeTableIndex != node->nominalRelation)); + + if (labeltargets) + ExplainOpenGroup("Target Tables", "Target Tables", false, es); + + for (j = 0; j < mtstate->mt_nrels; j++) + { + ResultRelInfo *resultRelInfo = mtstate->resultRelInfo + j; + FdwRoutine *fdwroutine = resultRelInfo->ri_FdwRoutine; + + if (labeltargets) + { + /* Open a group for this target */ + ExplainOpenGroup("Target Table", NULL, true, es); + + /* + * In text mode, decorate each target with operation type, so that + * ExplainTargetRel's output of " on foo" will read nicely. + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfoString(es->str, + fdwroutine ? foperation : operation); + } + + /* Identify target */ + ExplainTargetRel((Plan *) node, + resultRelInfo->ri_RangeTableIndex, + es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + appendStringInfoChar(es->str, '\n'); + es->indent++; + } + } + + /* Give FDW a chance if needed */ + if (!resultRelInfo->ri_usesFdwDirectModify && + fdwroutine != NULL && + fdwroutine->ExplainForeignModify != NULL) + { + List *fdw_private = (List *) list_nth(node->fdwPrivLists, j); + + fdwroutine->ExplainForeignModify(mtstate, + resultRelInfo, + fdw_private, + j, + es); + } + + if (labeltargets) + { + /* Undo the indentation we added in text format */ + if (es->format == EXPLAIN_FORMAT_TEXT) + es->indent--; + + /* Close the group */ + ExplainCloseGroup("Target Table", NULL, true, es); + } + } + + /* Gather names of ON CONFLICT arbiter indexes */ + foreach(lst, node->arbiterIndexes) + { + char *indexname = get_rel_name(lfirst_oid(lst)); + + idxNames = lappend(idxNames, indexname); + } + + if (node->onConflictAction != ONCONFLICT_NONE) + { + ExplainPropertyText("Conflict Resolution", + node->onConflictAction == ONCONFLICT_NOTHING ? + "NOTHING" : "UPDATE", + es); + + /* + * Don't display arbiter indexes at all when DO NOTHING variant + * implicitly ignores all conflicts + */ + if (idxNames) + ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es); + + /* ON CONFLICT DO UPDATE WHERE qual is specially displayed */ + if (node->onConflictWhere) + { + show_upper_qual((List *) node->onConflictWhere, "Conflict Filter", + &mtstate->ps, ancestors, es); + show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es); + } + + /* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */ + if (es->analyze && mtstate->ps.instrument) + { + double total; + double insert_path; + double other_path; + + InstrEndLoop(outerPlanState(mtstate)->instrument); + + /* count the number of source rows */ + total = outerPlanState(mtstate)->instrument->ntuples; + other_path = mtstate->ps.instrument->ntuples2; + insert_path = total - other_path; + + ExplainPropertyFloat("Tuples Inserted", NULL, + insert_path, 0, es); + ExplainPropertyFloat("Conflicting Tuples", NULL, + other_path, 0, es); + } + } + else if (node->operation == CMD_MERGE) + { + /* EXPLAIN ANALYZE display of tuples processed */ + if (es->analyze && mtstate->ps.instrument) + { + double total; + double insert_path; + double update_path; + double delete_path; + double skipped_path; + + InstrEndLoop(outerPlanState(mtstate)->instrument); + + /* count the number of source rows */ + total = outerPlanState(mtstate)->instrument->ntuples; + insert_path = mtstate->mt_merge_inserted; + update_path = mtstate->mt_merge_updated; + delete_path = mtstate->mt_merge_deleted; + skipped_path = total - insert_path - update_path - delete_path; + Assert(skipped_path >= 0); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (total > 0) + { + ExplainIndentText(es); + appendStringInfoString(es->str, "Tuples:"); + if (insert_path > 0) + appendStringInfo(es->str, " inserted=%.0f", insert_path); + if (update_path > 0) + appendStringInfo(es->str, " updated=%.0f", update_path); + if (delete_path > 0) + appendStringInfo(es->str, " deleted=%.0f", delete_path); + if (skipped_path > 0) + appendStringInfo(es->str, " skipped=%.0f", skipped_path); + appendStringInfoChar(es->str, '\n'); + } + } + else + { + ExplainPropertyFloat("Tuples Inserted", NULL, insert_path, 0, es); + ExplainPropertyFloat("Tuples Updated", NULL, update_path, 0, es); + ExplainPropertyFloat("Tuples Deleted", NULL, delete_path, 0, es); + ExplainPropertyFloat("Tuples Skipped", NULL, skipped_path, 0, es); + } + } + } + + if (labeltargets) + ExplainCloseGroup("Target Tables", "Target Tables", false, es); +} + +/* + * Explain the constituent plans of an Append, MergeAppend, + * BitmapAnd, or BitmapOr node. + * + * The ancestors list should already contain the immediate parent of these + * plans. + */ +static void +ExplainMemberNodes(PlanState **planstates, int nplans, + List *ancestors, ExplainState *es) +{ + int j; + + for (j = 0; j < nplans; j++) + ExplainNode(planstates[j], ancestors, + "Member", NULL, es); +} + +/* + * Report about any pruned subnodes of an Append or MergeAppend node. + * + * nplans indicates the number of live subplans. + * nchildren indicates the original number of subnodes in the Plan; + * some of these may have been pruned by the run-time pruning code. + */ +static void +ExplainMissingMembers(int nplans, int nchildren, ExplainState *es) +{ + if (nplans < nchildren || es->format != EXPLAIN_FORMAT_TEXT) + ExplainPropertyInteger("Subplans Removed", NULL, + nchildren - nplans, es); +} + +/* + * Explain a list of SubPlans (or initPlans, which also use SubPlan nodes). + * + * The ancestors list should already contain the immediate parent of these + * SubPlans. + */ +static void +ExplainSubPlans(List *plans, List *ancestors, + const char *relationship, ExplainState *es) +{ + ListCell *lst; + + foreach(lst, plans) + { + SubPlanState *sps = (SubPlanState *) lfirst(lst); + SubPlan *sp = sps->subplan; + + /* + * There can be multiple SubPlan nodes referencing the same physical + * subplan (same plan_id, which is its index in PlannedStmt.subplans). + * We should print a subplan only once, so track which ones we already + * printed. This state must be global across the plan tree, since the + * duplicate nodes could be in different plan nodes, eg both a bitmap + * indexscan's indexqual and its parent heapscan's recheck qual. (We + * do not worry too much about which plan node we show the subplan as + * attached to in such cases.) + */ + if (bms_is_member(sp->plan_id, es->printed_subplans)) + continue; + es->printed_subplans = bms_add_member(es->printed_subplans, + sp->plan_id); + + /* + * Treat the SubPlan node as an ancestor of the plan node(s) within + * it, so that ruleutils.c can find the referents of subplan + * parameters. + */ + ancestors = lcons(sp, ancestors); + + ExplainNode(sps->planstate, ancestors, + relationship, sp->plan_name, es); + + ancestors = list_delete_first(ancestors); + } +} + +/* + * Explain a list of children of a CustomScan. + */ +static void +ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es) +{ + ListCell *cell; + const char *label = + (list_length(css->custom_ps) != 1 ? "children" : "child"); + + foreach(cell, css->custom_ps) + ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es); +} + +/* + * Create a per-plan-node workspace for collecting per-worker data. + * + * Output related to each worker will be temporarily "set aside" into a + * separate buffer, which we'll merge into the main output stream once + * we've processed all data for the plan node. This makes it feasible to + * generate a coherent sub-group of fields for each worker, even though the + * code that produces the fields is in several different places in this file. + * Formatting of such a set-aside field group is managed by + * ExplainOpenSetAsideGroup and ExplainSaveGroup/ExplainRestoreGroup. + */ +static ExplainWorkersState * +ExplainCreateWorkersState(int num_workers) +{ + ExplainWorkersState *wstate; + + wstate = (ExplainWorkersState *) palloc(sizeof(ExplainWorkersState)); + wstate->num_workers = num_workers; + wstate->worker_inited = (bool *) palloc0(num_workers * sizeof(bool)); + wstate->worker_str = (StringInfoData *) + palloc0(num_workers * sizeof(StringInfoData)); + wstate->worker_state_save = (int *) palloc(num_workers * sizeof(int)); + return wstate; +} + +/* + * Begin or resume output into the set-aside group for worker N. + */ +static void +ExplainOpenWorker(int n, ExplainState *es) +{ + ExplainWorkersState *wstate = es->workers_state; + + Assert(wstate); + Assert(n >= 0 && n < wstate->num_workers); + + /* Save prior output buffer pointer */ + wstate->prev_str = es->str; + + if (!wstate->worker_inited[n]) + { + /* First time through, so create the buffer for this worker */ + initStringInfo(&wstate->worker_str[n]); + es->str = &wstate->worker_str[n]; + + /* + * Push suitable initial formatting state for this worker's field + * group. We allow one extra logical nesting level, since this group + * will eventually be wrapped in an outer "Workers" group. + */ + ExplainOpenSetAsideGroup("Worker", NULL, true, 2, es); + + /* + * In non-TEXT formats we always emit a "Worker Number" field, even if + * there's no other data for this worker. + */ + if (es->format != EXPLAIN_FORMAT_TEXT) + ExplainPropertyInteger("Worker Number", NULL, n, es); + + wstate->worker_inited[n] = true; + } + else + { + /* Resuming output for a worker we've already emitted some data for */ + es->str = &wstate->worker_str[n]; + + /* Restore formatting state saved by last ExplainCloseWorker() */ + ExplainRestoreGroup(es, 2, &wstate->worker_state_save[n]); + } + + /* + * In TEXT format, prefix the first output line for this worker with + * "Worker N:". Then, any additional lines should be indented one more + * stop than the "Worker N" line is. + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (es->str->len == 0) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Worker %d: ", n); + } + + es->indent++; + } +} + +/* + * End output for worker N --- must pair with previous ExplainOpenWorker call + */ +static void +ExplainCloseWorker(int n, ExplainState *es) +{ + ExplainWorkersState *wstate = es->workers_state; + + Assert(wstate); + Assert(n >= 0 && n < wstate->num_workers); + Assert(wstate->worker_inited[n]); + + /* + * Save formatting state in case we do another ExplainOpenWorker(), then + * pop the formatting stack. + */ + ExplainSaveGroup(es, 2, &wstate->worker_state_save[n]); + + /* + * In TEXT format, if we didn't actually produce any output line(s) then + * truncate off the partial line emitted by ExplainOpenWorker. (This is + * to avoid bogus output if, say, show_buffer_usage chooses not to print + * anything for the worker.) Also fix up the indent level. + */ + if (es->format == EXPLAIN_FORMAT_TEXT) + { + while (es->str->len > 0 && es->str->data[es->str->len - 1] != '\n') + es->str->data[--(es->str->len)] = '\0'; + + es->indent--; + } + + /* Restore prior output buffer pointer */ + es->str = wstate->prev_str; +} + +/* + * Print per-worker info for current node, then free the ExplainWorkersState. + */ +static void +ExplainFlushWorkersState(ExplainState *es) +{ + ExplainWorkersState *wstate = es->workers_state; + + ExplainOpenGroup("Workers", "Workers", false, es); + for (int i = 0; i < wstate->num_workers; i++) + { + if (wstate->worker_inited[i]) + { + /* This must match previous ExplainOpenSetAsideGroup call */ + ExplainOpenGroup("Worker", NULL, true, es); + appendStringInfoString(es->str, wstate->worker_str[i].data); + ExplainCloseGroup("Worker", NULL, true, es); + + pfree(wstate->worker_str[i].data); + } + } + ExplainCloseGroup("Workers", "Workers", false, es); + + pfree(wstate->worker_inited); + pfree(wstate->worker_str); + pfree(wstate->worker_state_save); + pfree(wstate); +} + +/* + * Explain a property, such as sort keys or targets, that takes the form of + * a list of unlabeled items. "data" is a list of C strings. + */ +void +ExplainPropertyList(const char *qlabel, List *data, ExplainState *es) +{ + ListCell *lc; + bool first = true; + + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + ExplainIndentText(es); + appendStringInfo(es->str, "%s: ", qlabel); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + appendStringInfoString(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, '\n'); + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(qlabel, X_OPENING, es); + foreach(lc, data) + { + char *str; + + appendStringInfoSpaces(es->str, es->indent * 2 + 2); + appendStringInfoString(es->str, ""); + str = escape_xml((const char *) lfirst(lc)); + appendStringInfoString(es->str, str); + pfree(str); + appendStringInfoString(es->str, "\n"); + } + ExplainXMLTag(qlabel, X_CLOSING, es); + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + escape_json(es->str, qlabel); + appendStringInfoString(es->str, ": ["); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + escape_json(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ']'); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfo(es->str, "%s: ", qlabel); + foreach(lc, data) + { + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, es->indent * 2 + 2); + appendStringInfoString(es->str, "- "); + escape_yaml(es->str, (const char *) lfirst(lc)); + } + break; + } +} + +/* + * Explain a property that takes the form of a list of unlabeled items within + * another list. "data" is a list of C strings. + */ +void +ExplainPropertyListNested(const char *qlabel, List *data, ExplainState *es) +{ + ListCell *lc; + bool first = true; + + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + case EXPLAIN_FORMAT_XML: + ExplainPropertyList(qlabel, data, es); + return; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoChar(es->str, '['); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + escape_json(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ']'); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfoString(es->str, "- ["); + foreach(lc, data) + { + if (!first) + appendStringInfoString(es->str, ", "); + escape_yaml(es->str, (const char *) lfirst(lc)); + first = false; + } + appendStringInfoChar(es->str, ']'); + break; + } +} + +/* + * Explain a simple property. + * + * If "numeric" is true, the value is a number (or other value that + * doesn't need quoting in JSON). + * + * If unit is non-NULL the text format will display it after the value. + * + * This usually should not be invoked directly, but via one of the datatype + * specific routines ExplainPropertyText, ExplainPropertyInteger, etc. + */ +static void +ExplainProperty(const char *qlabel, const char *unit, const char *value, + bool numeric, ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + ExplainIndentText(es); + if (unit) + appendStringInfo(es->str, "%s: %s %s\n", qlabel, value, unit); + else + appendStringInfo(es->str, "%s: %s\n", qlabel, value); + break; + + case EXPLAIN_FORMAT_XML: + { + char *str; + + appendStringInfoSpaces(es->str, es->indent * 2); + ExplainXMLTag(qlabel, X_OPENING | X_NOWHITESPACE, es); + str = escape_xml(value); + appendStringInfoString(es->str, str); + pfree(str); + ExplainXMLTag(qlabel, X_CLOSING | X_NOWHITESPACE, es); + appendStringInfoChar(es->str, '\n'); + } + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, es->indent * 2); + escape_json(es->str, qlabel); + appendStringInfoString(es->str, ": "); + if (numeric) + appendStringInfoString(es->str, value); + else + escape_json(es->str, value); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + appendStringInfo(es->str, "%s: ", qlabel); + if (numeric) + appendStringInfoString(es->str, value); + else + escape_yaml(es->str, value); + break; + } +} + +/* + * Explain a string-valued property. + */ +void +ExplainPropertyText(const char *qlabel, const char *value, ExplainState *es) +{ + ExplainProperty(qlabel, NULL, value, false, es); +} + +/* + * Explain an integer-valued property. + */ +void +ExplainPropertyInteger(const char *qlabel, const char *unit, int64 value, + ExplainState *es) +{ + char buf[32]; + + snprintf(buf, sizeof(buf), INT64_FORMAT, value); + ExplainProperty(qlabel, unit, buf, true, es); +} + +/* + * Explain an unsigned integer-valued property. + */ +void +ExplainPropertyUInteger(const char *qlabel, const char *unit, uint64 value, + ExplainState *es) +{ + char buf[32]; + + snprintf(buf, sizeof(buf), UINT64_FORMAT, value); + ExplainProperty(qlabel, unit, buf, true, es); +} + +/* + * Explain a float-valued property, using the specified number of + * fractional digits. + */ +void +ExplainPropertyFloat(const char *qlabel, const char *unit, double value, + int ndigits, ExplainState *es) +{ + char *buf; + + buf = psprintf("%.*f", ndigits, value); + ExplainProperty(qlabel, unit, buf, true, es); + pfree(buf); +} + +/* + * Explain a bool-valued property. + */ +void +ExplainPropertyBool(const char *qlabel, bool value, ExplainState *es) +{ + ExplainProperty(qlabel, NULL, value ? "true" : "false", true, es); +} + +/* + * Open a group of related objects. + * + * objtype is the type of the group object, labelname is its label within + * a containing object (if any). + * + * If labeled is true, the group members will be labeled properties, + * while if it's false, they'll be unlabeled objects. + */ +void +ExplainOpenGroup(const char *objtype, const char *labelname, + bool labeled, ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(objtype, X_OPENING, es); + es->indent++; + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, 2 * es->indent); + if (labelname) + { + escape_json(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + appendStringInfoChar(es->str, labeled ? '{' : '['); + + /* + * In JSON format, the grouping_stack is an integer list. 0 means + * we've emitted nothing at this grouping level, 1 means we've + * emitted something (and so the next item needs a comma). See + * ExplainJSONLineEnding(). + */ + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent++; + break; + + case EXPLAIN_FORMAT_YAML: + + /* + * In YAML format, the grouping stack is an integer list. 0 means + * we've emitted nothing at this grouping level AND this grouping + * level is unlabeled and must be marked with "- ". See + * ExplainYAMLLineStarting(). + */ + ExplainYAMLLineStarting(es); + if (labelname) + { + appendStringInfo(es->str, "%s: ", labelname); + es->grouping_stack = lcons_int(1, es->grouping_stack); + } + else + { + appendStringInfoString(es->str, "- "); + es->grouping_stack = lcons_int(0, es->grouping_stack); + } + es->indent++; + break; + } +} + +/* + * Close a group of related objects. + * Parameters must match the corresponding ExplainOpenGroup call. + */ +void +ExplainCloseGroup(const char *objtype, const char *labelname, + bool labeled, ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent--; + ExplainXMLTag(objtype, X_CLOSING, es); + break; + + case EXPLAIN_FORMAT_JSON: + es->indent--; + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, 2 * es->indent); + appendStringInfoChar(es->str, labeled ? '}' : ']'); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + + case EXPLAIN_FORMAT_YAML: + es->indent--; + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + } +} + +/* + * Open a group of related objects, without emitting actual data. + * + * Prepare the formatting state as though we were beginning a group with + * the identified properties, but don't actually emit anything. Output + * subsequent to this call can be redirected into a separate output buffer, + * and then eventually appended to the main output buffer after doing a + * regular ExplainOpenGroup call (with the same parameters). + * + * The extra "depth" parameter is the new group's depth compared to current. + * It could be more than one, in case the eventual output will be enclosed + * in additional nesting group levels. We assume we don't need to track + * formatting state for those levels while preparing this group's output. + * + * There is no ExplainCloseSetAsideGroup --- in current usage, we always + * pop this state with ExplainSaveGroup. + */ +static void +ExplainOpenSetAsideGroup(const char *objtype, const char *labelname, + bool labeled, int depth, ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent += depth; + break; + + case EXPLAIN_FORMAT_JSON: + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent += depth; + break; + + case EXPLAIN_FORMAT_YAML: + if (labelname) + es->grouping_stack = lcons_int(1, es->grouping_stack); + else + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent += depth; + break; + } +} + +/* + * Pop one level of grouping state, allowing for a re-push later. + * + * This is typically used after ExplainOpenSetAsideGroup; pass the + * same "depth" used for that. + * + * This should not emit any output. If state needs to be saved, + * save it at *state_save. Currently, an integer save area is sufficient + * for all formats, but we might need to revisit that someday. + */ +static void +ExplainSaveGroup(ExplainState *es, int depth, int *state_save) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent -= depth; + break; + + case EXPLAIN_FORMAT_JSON: + es->indent -= depth; + *state_save = linitial_int(es->grouping_stack); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + + case EXPLAIN_FORMAT_YAML: + es->indent -= depth; + *state_save = linitial_int(es->grouping_stack); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + } +} + +/* + * Re-push one level of grouping state, undoing the effects of ExplainSaveGroup. + */ +static void +ExplainRestoreGroup(ExplainState *es, int depth, int *state_save) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent += depth; + break; + + case EXPLAIN_FORMAT_JSON: + es->grouping_stack = lcons_int(*state_save, es->grouping_stack); + es->indent += depth; + break; + + case EXPLAIN_FORMAT_YAML: + es->grouping_stack = lcons_int(*state_save, es->grouping_stack); + es->indent += depth; + break; + } +} + +/* + * Emit a "dummy" group that never has any members. + * + * objtype is the type of the group object, labelname is its label within + * a containing object (if any). + */ +static void +ExplainDummyGroup(const char *objtype, const char *labelname, ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + ExplainXMLTag(objtype, X_CLOSE_IMMEDIATE, es); + break; + + case EXPLAIN_FORMAT_JSON: + ExplainJSONLineEnding(es); + appendStringInfoSpaces(es->str, 2 * es->indent); + if (labelname) + { + escape_json(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + escape_json(es->str, objtype); + break; + + case EXPLAIN_FORMAT_YAML: + ExplainYAMLLineStarting(es); + if (labelname) + { + escape_yaml(es->str, labelname); + appendStringInfoString(es->str, ": "); + } + else + { + appendStringInfoString(es->str, "- "); + } + escape_yaml(es->str, objtype); + break; + } +} + +/* + * Emit the start-of-output boilerplate. + * + * This is just enough different from processing a subgroup that we need + * a separate pair of subroutines. + */ +void +ExplainBeginOutput(ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + appendStringInfoString(es->str, + "\n"); + es->indent++; + break; + + case EXPLAIN_FORMAT_JSON: + /* top-level structure is an array of plans */ + appendStringInfoChar(es->str, '['); + es->grouping_stack = lcons_int(0, es->grouping_stack); + es->indent++; + break; + + case EXPLAIN_FORMAT_YAML: + es->grouping_stack = lcons_int(0, es->grouping_stack); + break; + } +} + +/* + * Emit the end-of-output boilerplate. + */ +void +ExplainEndOutput(ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* nothing to do */ + break; + + case EXPLAIN_FORMAT_XML: + es->indent--; + appendStringInfoString(es->str, ""); + break; + + case EXPLAIN_FORMAT_JSON: + es->indent--; + appendStringInfoString(es->str, "\n]"); + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + + case EXPLAIN_FORMAT_YAML: + es->grouping_stack = list_delete_first(es->grouping_stack); + break; + } +} + +/* + * Put an appropriate separator between multiple plans + */ +void +ExplainSeparatePlans(ExplainState *es) +{ + switch (es->format) + { + case EXPLAIN_FORMAT_TEXT: + /* add a blank line */ + appendStringInfoChar(es->str, '\n'); + break; + + case EXPLAIN_FORMAT_XML: + case EXPLAIN_FORMAT_JSON: + case EXPLAIN_FORMAT_YAML: + /* nothing to do */ + break; + } +} + +/* + * Emit opening or closing XML tag. + * + * "flags" must contain X_OPENING, X_CLOSING, or X_CLOSE_IMMEDIATE. + * Optionally, OR in X_NOWHITESPACE to suppress the whitespace we'd normally + * add. + * + * XML restricts tag names more than our other output formats, eg they can't + * contain white space or slashes. Replace invalid characters with dashes, + * so that for example "I/O Read Time" becomes "I-O-Read-Time". + */ +static void +ExplainXMLTag(const char *tagname, int flags, ExplainState *es) +{ + const char *s; + const char *valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_."; + + if ((flags & X_NOWHITESPACE) == 0) + appendStringInfoSpaces(es->str, 2 * es->indent); + appendStringInfoCharMacro(es->str, '<'); + if ((flags & X_CLOSING) != 0) + appendStringInfoCharMacro(es->str, '/'); + for (s = tagname; *s; s++) + appendStringInfoChar(es->str, strchr(valid, *s) ? *s : '-'); + if ((flags & X_CLOSE_IMMEDIATE) != 0) + appendStringInfoString(es->str, " /"); + appendStringInfoCharMacro(es->str, '>'); + if ((flags & X_NOWHITESPACE) == 0) + appendStringInfoCharMacro(es->str, '\n'); +} + +/* + * Indent a text-format line. + * + * We indent by two spaces per indentation level. However, when emitting + * data for a parallel worker there might already be data on the current line + * (cf. ExplainOpenWorker); in that case, don't indent any more. + */ +static void +ExplainIndentText(ExplainState *es) +{ + Assert(es->format == EXPLAIN_FORMAT_TEXT); + if (es->str->len == 0 || es->str->data[es->str->len - 1] == '\n') + appendStringInfoSpaces(es->str, es->indent * 2); +} + +/* + * Emit a JSON line ending. + * + * JSON requires a comma after each property but the last. To facilitate this, + * in JSON format, the text emitted for each property begins just prior to the + * preceding line-break (and comma, if applicable). + */ +static void +ExplainJSONLineEnding(ExplainState *es) +{ + Assert(es->format == EXPLAIN_FORMAT_JSON); + if (linitial_int(es->grouping_stack) != 0) + appendStringInfoChar(es->str, ','); + else + linitial_int(es->grouping_stack) = 1; + appendStringInfoChar(es->str, '\n'); +} + +/* + * Indent a YAML line. + * + * YAML lines are ordinarily indented by two spaces per indentation level. + * The text emitted for each property begins just prior to the preceding + * line-break, except for the first property in an unlabeled group, for which + * it begins immediately after the "- " that introduces the group. The first + * property of the group appears on the same line as the opening "- ". + */ +static void +ExplainYAMLLineStarting(ExplainState *es) +{ + Assert(es->format == EXPLAIN_FORMAT_YAML); + if (linitial_int(es->grouping_stack) == 0) + { + linitial_int(es->grouping_stack) = 1; + } + else + { + appendStringInfoChar(es->str, '\n'); + appendStringInfoSpaces(es->str, es->indent * 2); + } +} + +/* + * YAML is a superset of JSON; unfortunately, the YAML quoting rules are + * ridiculously complicated -- as documented in sections 5.3 and 7.3.3 of + * http://yaml.org/spec/1.2/spec.html -- so we chose to just quote everything. + * Empty strings, strings with leading or trailing whitespace, and strings + * containing a variety of special characters must certainly be quoted or the + * output is invalid; and other seemingly harmless strings like "0xa" or + * "true" must be quoted, lest they be interpreted as a hexadecimal or Boolean + * constant rather than a string. + */ +static void +escape_yaml(StringInfo buf, const char *str) +{ + escape_json(buf, str); +} diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c new file mode 100644 index 0000000..df6f021 --- /dev/null +++ b/src/backend/commands/extension.c @@ -0,0 +1,3417 @@ +/*------------------------------------------------------------------------- + * + * extension.c + * Commands to manipulate extensions + * + * Extensions in PostgreSQL allow management of collections of SQL objects. + * + * All we need internally to manage an extension is an OID so that the + * dependent objects can be associated with it. An extension is created by + * populating the pg_extension catalog from a "control" file. + * The extension control file is parsed with the same parser we use for + * postgresql.conf. An extension also has an installation script file, + * containing SQL commands to create the extension's objects. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/extension.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include +#include +#include + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/relation.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_extension.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_type.h" +#include "commands/alter.h" +#include "commands/comment.h" +#include "commands/defrem.h" +#include "commands/extension.h" +#include "commands/schemacmds.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "storage/fd.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/varlena.h" + + +/* Globally visible state variables */ +bool creating_extension = false; +Oid CurrentExtensionObject = InvalidOid; + +/* + * Internal data structure to hold the results of parsing a control file + */ +typedef struct ExtensionControlFile +{ + char *name; /* name of the extension */ + char *directory; /* directory for script files */ + char *default_version; /* default install target version, if any */ + char *module_pathname; /* string to substitute for + * MODULE_PATHNAME */ + char *comment; /* comment, if any */ + char *schema; /* target schema (allowed if !relocatable) */ + bool relocatable; /* is ALTER EXTENSION SET SCHEMA supported? */ + bool superuser; /* must be superuser to install? */ + bool trusted; /* allow becoming superuser on the fly? */ + int encoding; /* encoding of the script file, or -1 */ + List *requires; /* names of prerequisite extensions */ +} ExtensionControlFile; + +/* + * Internal data structure for update path information + */ +typedef struct ExtensionVersionInfo +{ + char *name; /* name of the starting version */ + List *reachable; /* List of ExtensionVersionInfo's */ + bool installable; /* does this version have an install script? */ + /* working state for Dijkstra's algorithm: */ + bool distance_known; /* is distance from start known yet? */ + int distance; /* current worst-case distance estimate */ + struct ExtensionVersionInfo *previous; /* current best predecessor */ +} ExtensionVersionInfo; + +/* Local functions */ +static List *find_update_path(List *evi_list, + ExtensionVersionInfo *evi_start, + ExtensionVersionInfo *evi_target, + bool reject_indirect, + bool reinitialize); +static Oid get_required_extension(char *reqExtensionName, + char *extensionName, + char *origSchemaName, + bool cascade, + List *parents, + bool is_create); +static void get_available_versions_for_extension(ExtensionControlFile *pcontrol, + Tuplestorestate *tupstore, + TupleDesc tupdesc); +static Datum convert_requires_to_datum(List *requires); +static void ApplyExtensionUpdates(Oid extensionOid, + ExtensionControlFile *pcontrol, + const char *initialVersion, + List *updateVersions, + char *origSchemaName, + bool cascade, + bool is_create); +static char *read_whole_file(const char *filename, int *length); + + +/* + * get_extension_oid - given an extension name, look up the OID + * + * If missing_ok is false, throw an error if extension name not found. If + * true, just return InvalidOid. + */ +Oid +get_extension_oid(const char *extname, bool missing_ok) +{ + Oid result; + Relation rel; + SysScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + rel = table_open(ExtensionRelationId, AccessShareLock); + + ScanKeyInit(&entry[0], + Anum_pg_extension_extname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(extname)); + + scandesc = systable_beginscan(rel, ExtensionNameIndexId, true, + NULL, 1, entry); + + tuple = systable_getnext(scandesc); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + result = ((Form_pg_extension) GETSTRUCT(tuple))->oid; + else + result = InvalidOid; + + systable_endscan(scandesc); + + table_close(rel, AccessShareLock); + + if (!OidIsValid(result) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("extension \"%s\" does not exist", + extname))); + + return result; +} + +/* + * get_extension_name - given an extension OID, look up the name + * + * Returns a palloc'd string, or NULL if no such extension. + */ +char * +get_extension_name(Oid ext_oid) +{ + char *result; + Relation rel; + SysScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + rel = table_open(ExtensionRelationId, AccessShareLock); + + ScanKeyInit(&entry[0], + Anum_pg_extension_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ext_oid)); + + scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, + NULL, 1, entry); + + tuple = systable_getnext(scandesc); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + result = pstrdup(NameStr(((Form_pg_extension) GETSTRUCT(tuple))->extname)); + else + result = NULL; + + systable_endscan(scandesc); + + table_close(rel, AccessShareLock); + + return result; +} + +/* + * get_extension_schema - given an extension OID, fetch its extnamespace + * + * Returns InvalidOid if no such extension. + */ +static Oid +get_extension_schema(Oid ext_oid) +{ + Oid result; + Relation rel; + SysScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + rel = table_open(ExtensionRelationId, AccessShareLock); + + ScanKeyInit(&entry[0], + Anum_pg_extension_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ext_oid)); + + scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, + NULL, 1, entry); + + tuple = systable_getnext(scandesc); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace; + else + result = InvalidOid; + + systable_endscan(scandesc); + + table_close(rel, AccessShareLock); + + return result; +} + +/* + * Utility functions to check validity of extension and version names + */ +static void +check_valid_extension_name(const char *extensionname) +{ + int namelen = strlen(extensionname); + + /* + * Disallow empty names (the parser rejects empty identifiers anyway, but + * let's check). + */ + if (namelen == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension name: \"%s\"", extensionname), + errdetail("Extension names must not be empty."))); + + /* + * No double dashes, since that would make script filenames ambiguous. + */ + if (strstr(extensionname, "--")) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension name: \"%s\"", extensionname), + errdetail("Extension names must not contain \"--\"."))); + + /* + * No leading or trailing dash either. (We could probably allow this, but + * it would require much care in filename parsing and would make filenames + * visually if not formally ambiguous. Since there's no real-world use + * case, let's just forbid it.) + */ + if (extensionname[0] == '-' || extensionname[namelen - 1] == '-') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension name: \"%s\"", extensionname), + errdetail("Extension names must not begin or end with \"-\"."))); + + /* + * No directory separators either (this is sufficient to prevent ".." + * style attacks). + */ + if (first_dir_separator(extensionname) != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension name: \"%s\"", extensionname), + errdetail("Extension names must not contain directory separator characters."))); +} + +static void +check_valid_version_name(const char *versionname) +{ + int namelen = strlen(versionname); + + /* + * Disallow empty names (we could possibly allow this, but there seems + * little point). + */ + if (namelen == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension version name: \"%s\"", versionname), + errdetail("Version names must not be empty."))); + + /* + * No double dashes, since that would make script filenames ambiguous. + */ + if (strstr(versionname, "--")) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension version name: \"%s\"", versionname), + errdetail("Version names must not contain \"--\"."))); + + /* + * No leading or trailing dash either. + */ + if (versionname[0] == '-' || versionname[namelen - 1] == '-') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension version name: \"%s\"", versionname), + errdetail("Version names must not begin or end with \"-\"."))); + + /* + * No directory separators either (this is sufficient to prevent ".." + * style attacks). + */ + if (first_dir_separator(versionname) != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid extension version name: \"%s\"", versionname), + errdetail("Version names must not contain directory separator characters."))); +} + +/* + * Utility functions to handle extension-related path names + */ +static bool +is_extension_control_filename(const char *filename) +{ + const char *extension = strrchr(filename, '.'); + + return (extension != NULL) && (strcmp(extension, ".control") == 0); +} + +static bool +is_extension_script_filename(const char *filename) +{ + const char *extension = strrchr(filename, '.'); + + return (extension != NULL) && (strcmp(extension, ".sql") == 0); +} + +static char * +get_extension_control_directory(void) +{ + char sharepath[MAXPGPATH]; + char *result; + + get_share_path(my_exec_path, sharepath); + result = (char *) palloc(MAXPGPATH); + snprintf(result, MAXPGPATH, "%s/extension", sharepath); + + return result; +} + +static char * +get_extension_control_filename(const char *extname) +{ + char sharepath[MAXPGPATH]; + char *result; + + get_share_path(my_exec_path, sharepath); + result = (char *) palloc(MAXPGPATH); + snprintf(result, MAXPGPATH, "%s/extension/%s.control", + sharepath, extname); + + return result; +} + +static char * +get_extension_script_directory(ExtensionControlFile *control) +{ + char sharepath[MAXPGPATH]; + char *result; + + /* + * The directory parameter can be omitted, absolute, or relative to the + * installation's share directory. + */ + if (!control->directory) + return get_extension_control_directory(); + + if (is_absolute_path(control->directory)) + return pstrdup(control->directory); + + get_share_path(my_exec_path, sharepath); + result = (char *) palloc(MAXPGPATH); + snprintf(result, MAXPGPATH, "%s/%s", sharepath, control->directory); + + return result; +} + +static char * +get_extension_aux_control_filename(ExtensionControlFile *control, + const char *version) +{ + char *result; + char *scriptdir; + + scriptdir = get_extension_script_directory(control); + + result = (char *) palloc(MAXPGPATH); + snprintf(result, MAXPGPATH, "%s/%s--%s.control", + scriptdir, control->name, version); + + pfree(scriptdir); + + return result; +} + +static char * +get_extension_script_filename(ExtensionControlFile *control, + const char *from_version, const char *version) +{ + char *result; + char *scriptdir; + + scriptdir = get_extension_script_directory(control); + + result = (char *) palloc(MAXPGPATH); + if (from_version) + snprintf(result, MAXPGPATH, "%s/%s--%s--%s.sql", + scriptdir, control->name, from_version, version); + else + snprintf(result, MAXPGPATH, "%s/%s--%s.sql", + scriptdir, control->name, version); + + pfree(scriptdir); + + return result; +} + + +/* + * Parse contents of primary or auxiliary control file, and fill in + * fields of *control. We parse primary file if version == NULL, + * else the optional auxiliary file for that version. + * + * Control files are supposed to be very short, half a dozen lines, + * so we don't worry about memory allocation risks here. Also we don't + * worry about what encoding it's in; all values are expected to be ASCII. + */ +static void +parse_extension_control_file(ExtensionControlFile *control, + const char *version) +{ + char *filename; + FILE *file; + ConfigVariable *item, + *head = NULL, + *tail = NULL; + + /* + * Locate the file to read. Auxiliary files are optional. + */ + if (version) + filename = get_extension_aux_control_filename(control, version); + else + filename = get_extension_control_filename(control->name); + + if ((file = AllocateFile(filename, "r")) == NULL) + { + if (errno == ENOENT) + { + /* no complaint for missing auxiliary file */ + if (version) + { + pfree(filename); + return; + } + + /* missing control file indicates extension is not installed */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" is not available", control->name), + errdetail("Could not open extension control file \"%s\": %m.", + filename), + errhint("The extension must first be installed on the system where PostgreSQL is running."))); + } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open extension control file \"%s\": %m", + filename))); + } + + /* + * Parse the file content, using GUC's file parsing code. We need not + * check the return value since any errors will be thrown at ERROR level. + */ + (void) ParseConfigFp(file, filename, 0, ERROR, &head, &tail); + + FreeFile(file); + + /* + * Convert the ConfigVariable list into ExtensionControlFile entries. + */ + for (item = head; item != NULL; item = item->next) + { + if (strcmp(item->name, "directory") == 0) + { + if (version) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parameter \"%s\" cannot be set in a secondary extension control file", + item->name))); + + control->directory = pstrdup(item->value); + } + else if (strcmp(item->name, "default_version") == 0) + { + if (version) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parameter \"%s\" cannot be set in a secondary extension control file", + item->name))); + + control->default_version = pstrdup(item->value); + } + else if (strcmp(item->name, "module_pathname") == 0) + { + control->module_pathname = pstrdup(item->value); + } + else if (strcmp(item->name, "comment") == 0) + { + control->comment = pstrdup(item->value); + } + else if (strcmp(item->name, "schema") == 0) + { + control->schema = pstrdup(item->value); + } + else if (strcmp(item->name, "relocatable") == 0) + { + if (!parse_bool(item->value, &control->relocatable)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("parameter \"%s\" requires a Boolean value", + item->name))); + } + else if (strcmp(item->name, "superuser") == 0) + { + if (!parse_bool(item->value, &control->superuser)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("parameter \"%s\" requires a Boolean value", + item->name))); + } + else if (strcmp(item->name, "trusted") == 0) + { + if (!parse_bool(item->value, &control->trusted)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("parameter \"%s\" requires a Boolean value", + item->name))); + } + else if (strcmp(item->name, "encoding") == 0) + { + control->encoding = pg_valid_server_encoding(item->value); + if (control->encoding < 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("\"%s\" is not a valid encoding name", + item->value))); + } + else if (strcmp(item->name, "requires") == 0) + { + /* Need a modifiable copy of string */ + char *rawnames = pstrdup(item->value); + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawnames, ',', &control->requires)) + { + /* syntax error in name list */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("parameter \"%s\" must be a list of extension names", + item->name))); + } + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized parameter \"%s\" in file \"%s\"", + item->name, filename))); + } + + FreeConfigVariables(head); + + if (control->relocatable && control->schema != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parameter \"schema\" cannot be specified when \"relocatable\" is true"))); + + pfree(filename); +} + +/* + * Read the primary control file for the specified extension. + */ +static ExtensionControlFile * +read_extension_control_file(const char *extname) +{ + ExtensionControlFile *control; + + /* + * Set up default values. Pointer fields are initially null. + */ + control = (ExtensionControlFile *) palloc0(sizeof(ExtensionControlFile)); + control->name = pstrdup(extname); + control->relocatable = false; + control->superuser = true; + control->trusted = false; + control->encoding = -1; + + /* + * Parse the primary control file. + */ + parse_extension_control_file(control, NULL); + + return control; +} + +/* + * Read the auxiliary control file for the specified extension and version. + * + * Returns a new modified ExtensionControlFile struct; the original struct + * (reflecting just the primary control file) is not modified. + */ +static ExtensionControlFile * +read_extension_aux_control_file(const ExtensionControlFile *pcontrol, + const char *version) +{ + ExtensionControlFile *acontrol; + + /* + * Flat-copy the struct. Pointer fields share values with original. + */ + acontrol = (ExtensionControlFile *) palloc(sizeof(ExtensionControlFile)); + memcpy(acontrol, pcontrol, sizeof(ExtensionControlFile)); + + /* + * Parse the auxiliary control file, overwriting struct fields + */ + parse_extension_control_file(acontrol, version); + + return acontrol; +} + +/* + * Read an SQL script file into a string, and convert to database encoding + */ +static char * +read_extension_script_file(const ExtensionControlFile *control, + const char *filename) +{ + int src_encoding; + char *src_str; + char *dest_str; + int len; + + src_str = read_whole_file(filename, &len); + + /* use database encoding if not given */ + if (control->encoding < 0) + src_encoding = GetDatabaseEncoding(); + else + src_encoding = control->encoding; + + /* make sure that source string is valid in the expected encoding */ + (void) pg_verify_mbstr(src_encoding, src_str, len, false); + + /* + * Convert the encoding to the database encoding. read_whole_file + * null-terminated the string, so if no conversion happens the string is + * valid as is. + */ + dest_str = pg_any_to_server(src_str, len, src_encoding); + + return dest_str; +} + +/* + * Execute given SQL string. + * + * Note: it's tempting to just use SPI to execute the string, but that does + * not work very well. The really serious problem is that SPI will parse, + * analyze, and plan the whole string before executing any of it; of course + * this fails if there are any plannable statements referring to objects + * created earlier in the script. A lesser annoyance is that SPI insists + * on printing the whole string as errcontext in case of any error, and that + * could be very long. + */ +static void +execute_sql_string(const char *sql) +{ + List *raw_parsetree_list; + DestReceiver *dest; + ListCell *lc1; + + /* + * Parse the SQL string into a list of raw parse trees. + */ + raw_parsetree_list = pg_parse_query(sql); + + /* All output from SELECTs goes to the bit bucket */ + dest = CreateDestReceiver(DestNone); + + /* + * Do parse analysis, rule rewrite, planning, and execution for each raw + * parsetree. We must fully execute each query before beginning parse + * analysis on the next one, since there may be interdependencies. + */ + foreach(lc1, raw_parsetree_list) + { + RawStmt *parsetree = lfirst_node(RawStmt, lc1); + MemoryContext per_parsetree_context, + oldcontext; + List *stmt_list; + ListCell *lc2; + + /* + * We do the work for each parsetree in a short-lived context, to + * limit the memory used when there are many commands in the string. + */ + per_parsetree_context = + AllocSetContextCreate(CurrentMemoryContext, + "execute_sql_string per-statement context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(per_parsetree_context); + + /* Be sure parser can see any DDL done so far */ + CommandCounterIncrement(); + + stmt_list = pg_analyze_and_rewrite_fixedparams(parsetree, + sql, + NULL, + 0, + NULL); + stmt_list = pg_plan_queries(stmt_list, sql, CURSOR_OPT_PARALLEL_OK, NULL); + + foreach(lc2, stmt_list) + { + PlannedStmt *stmt = lfirst_node(PlannedStmt, lc2); + + CommandCounterIncrement(); + + PushActiveSnapshot(GetTransactionSnapshot()); + + if (stmt->utilityStmt == NULL) + { + QueryDesc *qdesc; + + qdesc = CreateQueryDesc(stmt, + sql, + GetActiveSnapshot(), NULL, + dest, NULL, NULL, 0); + + ExecutorStart(qdesc, 0); + ExecutorRun(qdesc, ForwardScanDirection, 0, true); + ExecutorFinish(qdesc); + ExecutorEnd(qdesc); + + FreeQueryDesc(qdesc); + } + else + { + if (IsA(stmt->utilityStmt, TransactionStmt)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("transaction control statements are not allowed within an extension script"))); + + ProcessUtility(stmt, + sql, + false, + PROCESS_UTILITY_QUERY, + NULL, + NULL, + dest, + NULL); + } + + PopActiveSnapshot(); + } + + /* Clean up per-parsetree context. */ + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(per_parsetree_context); + } + + /* Be sure to advance the command counter after the last script command */ + CommandCounterIncrement(); +} + +/* + * Policy function: is the given extension trusted for installation by a + * non-superuser? + * + * (Update the errhint logic below if you change this.) + */ +static bool +extension_is_trusted(ExtensionControlFile *control) +{ + AclResult aclresult; + + /* Never trust unless extension's control file says it's okay */ + if (!control->trusted) + return false; + /* Allow if user has CREATE privilege on current database */ + aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE); + if (aclresult == ACLCHECK_OK) + return true; + return false; +} + +/* + * Execute the appropriate script file for installing or updating the extension + * + * If from_version isn't NULL, it's an update + */ +static void +execute_extension_script(Oid extensionOid, ExtensionControlFile *control, + const char *from_version, + const char *version, + List *requiredSchemas, + const char *schemaName, Oid schemaOid) +{ + bool switch_to_superuser = false; + char *filename; + Oid save_userid = 0; + int save_sec_context = 0; + int save_nestlevel; + StringInfoData pathbuf; + ListCell *lc; + + /* + * Enforce superuser-ness if appropriate. We postpone these checks until + * here so that the control flags are correctly associated with the right + * script(s) if they happen to be set in secondary control files. + */ + if (control->superuser && !superuser()) + { + if (extension_is_trusted(control)) + switch_to_superuser = true; + else if (from_version == NULL) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to create extension \"%s\"", + control->name), + control->trusted + ? errhint("Must have CREATE privilege on current database to create this extension.") + : errhint("Must be superuser to create this extension."))); + else + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to update extension \"%s\"", + control->name), + control->trusted + ? errhint("Must have CREATE privilege on current database to update this extension.") + : errhint("Must be superuser to update this extension."))); + } + + filename = get_extension_script_filename(control, from_version, version); + + /* + * If installing a trusted extension on behalf of a non-superuser, become + * the bootstrap superuser. (This switch will be cleaned up automatically + * if the transaction aborts, as will the GUC changes below.) + */ + if (switch_to_superuser) + { + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(BOOTSTRAP_SUPERUSERID, + save_sec_context | SECURITY_LOCAL_USERID_CHANGE); + } + + /* + * Force client_min_messages and log_min_messages to be at least WARNING, + * so that we won't spam the user with useless NOTICE messages from common + * script actions like creating shell types. + * + * We use the equivalent of a function SET option to allow the setting to + * persist for exactly the duration of the script execution. guc.c also + * takes care of undoing the setting on error. + * + * log_min_messages can't be set by ordinary users, so for that one we + * pretend to be superuser. + */ + save_nestlevel = NewGUCNestLevel(); + + if (client_min_messages < WARNING) + (void) set_config_option("client_min_messages", "warning", + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + if (log_min_messages < WARNING) + (void) set_config_option_ext("log_min_messages", "warning", + PGC_SUSET, PGC_S_SESSION, + BOOTSTRAP_SUPERUSERID, + GUC_ACTION_SAVE, true, 0, false); + + /* + * Similarly disable check_function_bodies, to ensure that SQL functions + * won't be parsed during creation. + */ + if (check_function_bodies) + (void) set_config_option("check_function_bodies", "off", + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + + /* + * Set up the search path to have the target schema first, making it be + * the default creation target namespace. Then add the schemas of any + * prerequisite extensions, unless they are in pg_catalog which would be + * searched anyway. (Listing pg_catalog explicitly in a non-first + * position would be bad for security.) Finally add pg_temp to ensure + * that temp objects can't take precedence over others. + * + * Note: it might look tempting to use PushOverrideSearchPath for this, + * but we cannot do that. We have to actually set the search_path GUC in + * case the extension script examines or changes it. In any case, the + * GUC_ACTION_SAVE method is just as convenient. + */ + initStringInfo(&pathbuf); + appendStringInfoString(&pathbuf, quote_identifier(schemaName)); + foreach(lc, requiredSchemas) + { + Oid reqschema = lfirst_oid(lc); + char *reqname = get_namespace_name(reqschema); + + if (reqname && strcmp(reqname, "pg_catalog") != 0) + appendStringInfo(&pathbuf, ", %s", quote_identifier(reqname)); + } + appendStringInfoString(&pathbuf, ", pg_temp"); + + (void) set_config_option("search_path", pathbuf.data, + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + + /* + * Set creating_extension and related variables so that + * recordDependencyOnCurrentExtension and other functions do the right + * things. On failure, ensure we reset these variables. + */ + creating_extension = true; + CurrentExtensionObject = extensionOid; + PG_TRY(); + { + char *c_sql = read_extension_script_file(control, filename); + Datum t_sql; + + /* + * We filter each substitution through quote_identifier(). When the + * arg contains one of the following characters, no one collection of + * quoting can work inside $$dollar-quoted string literals$$, + * 'single-quoted string literals', and outside of any literal. To + * avoid a security snare for extension authors, error on substitution + * for arguments containing these. + */ + const char *quoting_relevant_chars = "\"$'\\"; + + /* We use various functions that want to operate on text datums */ + t_sql = CStringGetTextDatum(c_sql); + + /* + * Reduce any lines beginning with "\echo" to empty. This allows + * scripts to contain messages telling people not to run them via + * psql, which has been found to be necessary due to old habits. + */ + t_sql = DirectFunctionCall4Coll(textregexreplace, + C_COLLATION_OID, + t_sql, + CStringGetTextDatum("^\\\\echo.*$"), + CStringGetTextDatum(""), + CStringGetTextDatum("ng")); + + /* + * If the script uses @extowner@, substitute the calling username. + */ + if (strstr(c_sql, "@extowner@")) + { + Oid uid = switch_to_superuser ? save_userid : GetUserId(); + const char *userName = GetUserNameFromId(uid, false); + const char *qUserName = quote_identifier(userName); + + t_sql = DirectFunctionCall3Coll(replace_text, + C_COLLATION_OID, + t_sql, + CStringGetTextDatum("@extowner@"), + CStringGetTextDatum(qUserName)); + if (strpbrk(userName, quoting_relevant_chars)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid character in extension owner: must not contain any of \"%s\"", + quoting_relevant_chars))); + } + + /* + * If it's not relocatable, substitute the target schema name for + * occurrences of @extschema@. + * + * For a relocatable extension, we needn't do this. There cannot be + * any need for @extschema@, else it wouldn't be relocatable. + */ + if (!control->relocatable) + { + Datum old = t_sql; + const char *qSchemaName = quote_identifier(schemaName); + + t_sql = DirectFunctionCall3Coll(replace_text, + C_COLLATION_OID, + t_sql, + CStringGetTextDatum("@extschema@"), + CStringGetTextDatum(qSchemaName)); + if (t_sql != old && strpbrk(schemaName, quoting_relevant_chars)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid character in extension \"%s\" schema: must not contain any of \"%s\"", + control->name, quoting_relevant_chars))); + } + + /* + * If module_pathname was set in the control file, substitute its + * value for occurrences of MODULE_PATHNAME. + */ + if (control->module_pathname) + { + t_sql = DirectFunctionCall3Coll(replace_text, + C_COLLATION_OID, + t_sql, + CStringGetTextDatum("MODULE_PATHNAME"), + CStringGetTextDatum(control->module_pathname)); + } + + /* And now back to C string */ + c_sql = text_to_cstring(DatumGetTextPP(t_sql)); + + execute_sql_string(c_sql); + } + PG_FINALLY(); + { + creating_extension = false; + CurrentExtensionObject = InvalidOid; + } + PG_END_TRY(); + + /* + * Restore the GUC variables we set above. + */ + AtEOXact_GUC(true, save_nestlevel); + + /* + * Restore authentication state if needed. + */ + if (switch_to_superuser) + SetUserIdAndSecContext(save_userid, save_sec_context); +} + +/* + * Find or create an ExtensionVersionInfo for the specified version name + * + * Currently, we just use a List of the ExtensionVersionInfo's. Searching + * for them therefore uses about O(N^2) time when there are N versions of + * the extension. We could change the data structure to a hash table if + * this ever becomes a bottleneck. + */ +static ExtensionVersionInfo * +get_ext_ver_info(const char *versionname, List **evi_list) +{ + ExtensionVersionInfo *evi; + ListCell *lc; + + foreach(lc, *evi_list) + { + evi = (ExtensionVersionInfo *) lfirst(lc); + if (strcmp(evi->name, versionname) == 0) + return evi; + } + + evi = (ExtensionVersionInfo *) palloc(sizeof(ExtensionVersionInfo)); + evi->name = pstrdup(versionname); + evi->reachable = NIL; + evi->installable = false; + /* initialize for later application of Dijkstra's algorithm */ + evi->distance_known = false; + evi->distance = INT_MAX; + evi->previous = NULL; + + *evi_list = lappend(*evi_list, evi); + + return evi; +} + +/* + * Locate the nearest unprocessed ExtensionVersionInfo + * + * This part of the algorithm is also about O(N^2). A priority queue would + * make it much faster, but for now there's no need. + */ +static ExtensionVersionInfo * +get_nearest_unprocessed_vertex(List *evi_list) +{ + ExtensionVersionInfo *evi = NULL; + ListCell *lc; + + foreach(lc, evi_list) + { + ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc); + + /* only vertices whose distance is still uncertain are candidates */ + if (evi2->distance_known) + continue; + /* remember the closest such vertex */ + if (evi == NULL || + evi->distance > evi2->distance) + evi = evi2; + } + + return evi; +} + +/* + * Obtain information about the set of update scripts available for the + * specified extension. The result is a List of ExtensionVersionInfo + * structs, each with a subsidiary list of the ExtensionVersionInfos for + * the versions that can be reached in one step from that version. + */ +static List * +get_ext_ver_list(ExtensionControlFile *control) +{ + List *evi_list = NIL; + int extnamelen = strlen(control->name); + char *location; + DIR *dir; + struct dirent *de; + + location = get_extension_script_directory(control); + dir = AllocateDir(location); + while ((de = ReadDir(dir, location)) != NULL) + { + char *vername; + char *vername2; + ExtensionVersionInfo *evi; + ExtensionVersionInfo *evi2; + + /* must be a .sql file ... */ + if (!is_extension_script_filename(de->d_name)) + continue; + + /* ... matching extension name followed by separator */ + if (strncmp(de->d_name, control->name, extnamelen) != 0 || + de->d_name[extnamelen] != '-' || + de->d_name[extnamelen + 1] != '-') + continue; + + /* extract version name(s) from 'extname--something.sql' filename */ + vername = pstrdup(de->d_name + extnamelen + 2); + *strrchr(vername, '.') = '\0'; + vername2 = strstr(vername, "--"); + if (!vername2) + { + /* It's an install, not update, script; record its version name */ + evi = get_ext_ver_info(vername, &evi_list); + evi->installable = true; + continue; + } + *vername2 = '\0'; /* terminate first version */ + vername2 += 2; /* and point to second */ + + /* if there's a third --, it's bogus, ignore it */ + if (strstr(vername2, "--")) + continue; + + /* Create ExtensionVersionInfos and link them together */ + evi = get_ext_ver_info(vername, &evi_list); + evi2 = get_ext_ver_info(vername2, &evi_list); + evi->reachable = lappend(evi->reachable, evi2); + } + FreeDir(dir); + + return evi_list; +} + +/* + * Given an initial and final version name, identify the sequence of update + * scripts that have to be applied to perform that update. + * + * Result is a List of names of versions to transition through (the initial + * version is *not* included). + */ +static List * +identify_update_path(ExtensionControlFile *control, + const char *oldVersion, const char *newVersion) +{ + List *result; + List *evi_list; + ExtensionVersionInfo *evi_start; + ExtensionVersionInfo *evi_target; + + /* Extract the version update graph from the script directory */ + evi_list = get_ext_ver_list(control); + + /* Initialize start and end vertices */ + evi_start = get_ext_ver_info(oldVersion, &evi_list); + evi_target = get_ext_ver_info(newVersion, &evi_list); + + /* Find shortest path */ + result = find_update_path(evi_list, evi_start, evi_target, false, false); + + if (result == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extension \"%s\" has no update path from version \"%s\" to version \"%s\"", + control->name, oldVersion, newVersion))); + + return result; +} + +/* + * Apply Dijkstra's algorithm to find the shortest path from evi_start to + * evi_target. + * + * If reject_indirect is true, ignore paths that go through installable + * versions. This saves work when the caller will consider starting from + * all installable versions anyway. + * + * If reinitialize is false, assume the ExtensionVersionInfo list has not + * been used for this before, and the initialization done by get_ext_ver_info + * is still good. Otherwise, reinitialize all transient fields used here. + * + * Result is a List of names of versions to transition through (the initial + * version is *not* included). Returns NIL if no such path. + */ +static List * +find_update_path(List *evi_list, + ExtensionVersionInfo *evi_start, + ExtensionVersionInfo *evi_target, + bool reject_indirect, + bool reinitialize) +{ + List *result; + ExtensionVersionInfo *evi; + ListCell *lc; + + /* Caller error if start == target */ + Assert(evi_start != evi_target); + /* Caller error if reject_indirect and target is installable */ + Assert(!(reject_indirect && evi_target->installable)); + + if (reinitialize) + { + foreach(lc, evi_list) + { + evi = (ExtensionVersionInfo *) lfirst(lc); + evi->distance_known = false; + evi->distance = INT_MAX; + evi->previous = NULL; + } + } + + evi_start->distance = 0; + + while ((evi = get_nearest_unprocessed_vertex(evi_list)) != NULL) + { + if (evi->distance == INT_MAX) + break; /* all remaining vertices are unreachable */ + evi->distance_known = true; + if (evi == evi_target) + break; /* found shortest path to target */ + foreach(lc, evi->reachable) + { + ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc); + int newdist; + + /* if reject_indirect, treat installable versions as unreachable */ + if (reject_indirect && evi2->installable) + continue; + newdist = evi->distance + 1; + if (newdist < evi2->distance) + { + evi2->distance = newdist; + evi2->previous = evi; + } + else if (newdist == evi2->distance && + evi2->previous != NULL && + strcmp(evi->name, evi2->previous->name) < 0) + { + /* + * Break ties in favor of the version name that comes first + * according to strcmp(). This behavior is undocumented and + * users shouldn't rely on it. We do it just to ensure that + * if there is a tie, the update path that is chosen does not + * depend on random factors like the order in which directory + * entries get visited. + */ + evi2->previous = evi; + } + } + } + + /* Return NIL if target is not reachable from start */ + if (!evi_target->distance_known) + return NIL; + + /* Build and return list of version names representing the update path */ + result = NIL; + for (evi = evi_target; evi != evi_start; evi = evi->previous) + result = lcons(evi->name, result); + + return result; +} + +/* + * Given a target version that is not directly installable, find the + * best installation sequence starting from a directly-installable version. + * + * evi_list: previously-collected version update graph + * evi_target: member of that list that we want to reach + * + * Returns the best starting-point version, or NULL if there is none. + * On success, *best_path is set to the path from the start point. + * + * If there's more than one possible start point, prefer shorter update paths, + * and break any ties arbitrarily on the basis of strcmp'ing the starting + * versions' names. + */ +static ExtensionVersionInfo * +find_install_path(List *evi_list, ExtensionVersionInfo *evi_target, + List **best_path) +{ + ExtensionVersionInfo *evi_start = NULL; + ListCell *lc; + + *best_path = NIL; + + /* + * We don't expect to be called for an installable target, but if we are, + * the answer is easy: just start from there, with an empty update path. + */ + if (evi_target->installable) + return evi_target; + + /* Consider all installable versions as start points */ + foreach(lc, evi_list) + { + ExtensionVersionInfo *evi1 = (ExtensionVersionInfo *) lfirst(lc); + List *path; + + if (!evi1->installable) + continue; + + /* + * Find shortest path from evi1 to evi_target; but no need to consider + * paths going through other installable versions. + */ + path = find_update_path(evi_list, evi1, evi_target, true, true); + if (path == NIL) + continue; + + /* Remember best path */ + if (evi_start == NULL || + list_length(path) < list_length(*best_path) || + (list_length(path) == list_length(*best_path) && + strcmp(evi_start->name, evi1->name) < 0)) + { + evi_start = evi1; + *best_path = path; + } + } + + return evi_start; +} + +/* + * CREATE EXTENSION worker + * + * When CASCADE is specified, CreateExtensionInternal() recurses if required + * extensions need to be installed. To sanely handle cyclic dependencies, + * the "parents" list contains a list of names of extensions already being + * installed, allowing us to error out if we recurse to one of those. + */ +static ObjectAddress +CreateExtensionInternal(char *extensionName, + char *schemaName, + const char *versionName, + bool cascade, + List *parents, + bool is_create) +{ + char *origSchemaName = schemaName; + Oid schemaOid = InvalidOid; + Oid extowner = GetUserId(); + ExtensionControlFile *pcontrol; + ExtensionControlFile *control; + char *filename; + struct stat fst; + List *updateVersions; + List *requiredExtensions; + List *requiredSchemas; + Oid extensionOid; + ObjectAddress address; + ListCell *lc; + + /* + * Read the primary control file. Note we assume that it does not contain + * any non-ASCII data, so there is no need to worry about encoding at this + * point. + */ + pcontrol = read_extension_control_file(extensionName); + + /* + * Determine the version to install + */ + if (versionName == NULL) + { + if (pcontrol->default_version) + versionName = pcontrol->default_version; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("version to install must be specified"))); + } + check_valid_version_name(versionName); + + /* + * Figure out which script(s) we need to run to install the desired + * version of the extension. If we do not have a script that directly + * does what is needed, we try to find a sequence of update scripts that + * will get us there. + */ + filename = get_extension_script_filename(pcontrol, NULL, versionName); + if (stat(filename, &fst) == 0) + { + /* Easy, no extra scripts */ + updateVersions = NIL; + } + else + { + /* Look for best way to install this version */ + List *evi_list; + ExtensionVersionInfo *evi_start; + ExtensionVersionInfo *evi_target; + + /* Extract the version update graph from the script directory */ + evi_list = get_ext_ver_list(pcontrol); + + /* Identify the target version */ + evi_target = get_ext_ver_info(versionName, &evi_list); + + /* Identify best path to reach target */ + evi_start = find_install_path(evi_list, evi_target, + &updateVersions); + + /* Fail if no path ... */ + if (evi_start == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extension \"%s\" has no installation script nor update path for version \"%s\"", + pcontrol->name, versionName))); + + /* Otherwise, install best starting point and then upgrade */ + versionName = evi_start->name; + } + + /* + * Fetch control parameters for installation target version + */ + control = read_extension_aux_control_file(pcontrol, versionName); + + /* + * Determine the target schema to install the extension into + */ + if (schemaName) + { + /* If the user is giving us the schema name, it must exist already. */ + schemaOid = get_namespace_oid(schemaName, false); + } + + if (control->schema != NULL) + { + /* + * The extension is not relocatable and the author gave us a schema + * for it. + * + * Unless CASCADE parameter was given, it's an error to give a schema + * different from control->schema if control->schema is specified. + */ + if (schemaName && strcmp(control->schema, schemaName) != 0 && + !cascade) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" must be installed in schema \"%s\"", + control->name, + control->schema))); + + /* Always use the schema from control file for current extension. */ + schemaName = control->schema; + + /* Find or create the schema in case it does not exist. */ + schemaOid = get_namespace_oid(schemaName, true); + + if (!OidIsValid(schemaOid)) + { + CreateSchemaStmt *csstmt = makeNode(CreateSchemaStmt); + + csstmt->schemaname = schemaName; + csstmt->authrole = NULL; /* will be created by current user */ + csstmt->schemaElts = NIL; + csstmt->if_not_exists = false; + CreateSchemaCommand(csstmt, "(generated CREATE SCHEMA command)", + -1, -1); + + /* + * CreateSchemaCommand includes CommandCounterIncrement, so new + * schema is now visible. + */ + schemaOid = get_namespace_oid(schemaName, false); + } + } + else if (!OidIsValid(schemaOid)) + { + /* + * Neither user nor author of the extension specified schema; use the + * current default creation namespace, which is the first explicit + * entry in the search_path. + */ + List *search_path = fetch_search_path(false); + + if (search_path == NIL) /* nothing valid in search_path? */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("no schema has been selected to create in"))); + schemaOid = linitial_oid(search_path); + schemaName = get_namespace_name(schemaOid); + if (schemaName == NULL) /* recently-deleted namespace? */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("no schema has been selected to create in"))); + + list_free(search_path); + } + + /* + * Make note if a temporary namespace has been accessed in this + * transaction. + */ + if (isTempNamespace(schemaOid)) + MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE; + + /* + * We don't check creation rights on the target namespace here. If the + * extension script actually creates any objects there, it will fail if + * the user doesn't have such permissions. But there are cases such as + * procedural languages where it's convenient to set schema = pg_catalog + * yet we don't want to restrict the command to users with ACL_CREATE for + * pg_catalog. + */ + + /* + * Look up the prerequisite extensions, install them if necessary, and + * build lists of their OIDs and the OIDs of their target schemas. + */ + requiredExtensions = NIL; + requiredSchemas = NIL; + foreach(lc, control->requires) + { + char *curreq = (char *) lfirst(lc); + Oid reqext; + Oid reqschema; + + reqext = get_required_extension(curreq, + extensionName, + origSchemaName, + cascade, + parents, + is_create); + reqschema = get_extension_schema(reqext); + requiredExtensions = lappend_oid(requiredExtensions, reqext); + requiredSchemas = lappend_oid(requiredSchemas, reqschema); + } + + /* + * Insert new tuple into pg_extension, and create dependency entries. + */ + address = InsertExtensionTuple(control->name, extowner, + schemaOid, control->relocatable, + versionName, + PointerGetDatum(NULL), + PointerGetDatum(NULL), + requiredExtensions); + extensionOid = address.objectId; + + /* + * Apply any control-file comment on extension + */ + if (control->comment != NULL) + CreateComments(extensionOid, ExtensionRelationId, 0, control->comment); + + /* + * Execute the installation script file + */ + execute_extension_script(extensionOid, control, + NULL, versionName, + requiredSchemas, + schemaName, schemaOid); + + /* + * If additional update scripts have to be executed, apply the updates as + * though a series of ALTER EXTENSION UPDATE commands were given + */ + ApplyExtensionUpdates(extensionOid, pcontrol, + versionName, updateVersions, + origSchemaName, cascade, is_create); + + return address; +} + +/* + * Get the OID of an extension listed in "requires", possibly creating it. + */ +static Oid +get_required_extension(char *reqExtensionName, + char *extensionName, + char *origSchemaName, + bool cascade, + List *parents, + bool is_create) +{ + Oid reqExtensionOid; + + reqExtensionOid = get_extension_oid(reqExtensionName, true); + if (!OidIsValid(reqExtensionOid)) + { + if (cascade) + { + /* Must install it. */ + ObjectAddress addr; + List *cascade_parents; + ListCell *lc; + + /* Check extension name validity before trying to cascade. */ + check_valid_extension_name(reqExtensionName); + + /* Check for cyclic dependency between extensions. */ + foreach(lc, parents) + { + char *pname = (char *) lfirst(lc); + + if (strcmp(pname, reqExtensionName) == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_RECURSION), + errmsg("cyclic dependency detected between extensions \"%s\" and \"%s\"", + reqExtensionName, extensionName))); + } + + ereport(NOTICE, + (errmsg("installing required extension \"%s\"", + reqExtensionName))); + + /* Add current extension to list of parents to pass down. */ + cascade_parents = lappend(list_copy(parents), extensionName); + + /* + * Create the required extension. We propagate the SCHEMA option + * if any, and CASCADE, but no other options. + */ + addr = CreateExtensionInternal(reqExtensionName, + origSchemaName, + NULL, + cascade, + cascade_parents, + is_create); + + /* Get its newly-assigned OID. */ + reqExtensionOid = addr.objectId; + } + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("required extension \"%s\" is not installed", + reqExtensionName), + is_create ? + errhint("Use CREATE EXTENSION ... CASCADE to install required extensions too.") : 0)); + } + + return reqExtensionOid; +} + +/* + * CREATE EXTENSION + */ +ObjectAddress +CreateExtension(ParseState *pstate, CreateExtensionStmt *stmt) +{ + DefElem *d_schema = NULL; + DefElem *d_new_version = NULL; + DefElem *d_cascade = NULL; + char *schemaName = NULL; + char *versionName = NULL; + bool cascade = false; + ListCell *lc; + + /* Check extension name validity before any filesystem access */ + check_valid_extension_name(stmt->extname); + + /* + * Check for duplicate extension name. The unique index on + * pg_extension.extname would catch this anyway, and serves as a backstop + * in case of race conditions; but this is a friendlier error message, and + * besides we need a check to support IF NOT EXISTS. + */ + if (get_extension_oid(stmt->extname, true) != InvalidOid) + { + if (stmt->if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("extension \"%s\" already exists, skipping", + stmt->extname))); + return InvalidObjectAddress; + } + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("extension \"%s\" already exists", + stmt->extname))); + } + + /* + * We use global variables to track the extension being created, so we can + * create only one extension at the same time. + */ + if (creating_extension) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nested CREATE EXTENSION is not supported"))); + + /* Deconstruct the statement option list */ + foreach(lc, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "schema") == 0) + { + if (d_schema) + errorConflictingDefElem(defel, pstate); + d_schema = defel; + schemaName = defGetString(d_schema); + } + else if (strcmp(defel->defname, "new_version") == 0) + { + if (d_new_version) + errorConflictingDefElem(defel, pstate); + d_new_version = defel; + versionName = defGetString(d_new_version); + } + else if (strcmp(defel->defname, "cascade") == 0) + { + if (d_cascade) + errorConflictingDefElem(defel, pstate); + d_cascade = defel; + cascade = defGetBoolean(d_cascade); + } + else + elog(ERROR, "unrecognized option: %s", defel->defname); + } + + /* Call CreateExtensionInternal to do the real work. */ + return CreateExtensionInternal(stmt->extname, + schemaName, + versionName, + cascade, + NIL, + true); +} + +/* + * InsertExtensionTuple + * + * Insert the new pg_extension row, and create extension's dependency entries. + * Return the OID assigned to the new row. + * + * This is exported for the benefit of pg_upgrade, which has to create a + * pg_extension entry (and the extension-level dependencies) without + * actually running the extension's script. + * + * extConfig and extCondition should be arrays or PointerGetDatum(NULL). + * We declare them as plain Datum to avoid needing array.h in extension.h. + */ +ObjectAddress +InsertExtensionTuple(const char *extName, Oid extOwner, + Oid schemaOid, bool relocatable, const char *extVersion, + Datum extConfig, Datum extCondition, + List *requiredExtensions) +{ + Oid extensionOid; + Relation rel; + Datum values[Natts_pg_extension]; + bool nulls[Natts_pg_extension]; + HeapTuple tuple; + ObjectAddress myself; + ObjectAddress nsp; + ObjectAddresses *refobjs; + ListCell *lc; + + /* + * Build and insert the pg_extension tuple + */ + rel = table_open(ExtensionRelationId, RowExclusiveLock); + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + extensionOid = GetNewOidWithIndex(rel, ExtensionOidIndexId, + Anum_pg_extension_oid); + values[Anum_pg_extension_oid - 1] = ObjectIdGetDatum(extensionOid); + values[Anum_pg_extension_extname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(extName)); + values[Anum_pg_extension_extowner - 1] = ObjectIdGetDatum(extOwner); + values[Anum_pg_extension_extnamespace - 1] = ObjectIdGetDatum(schemaOid); + values[Anum_pg_extension_extrelocatable - 1] = BoolGetDatum(relocatable); + values[Anum_pg_extension_extversion - 1] = CStringGetTextDatum(extVersion); + + if (extConfig == PointerGetDatum(NULL)) + nulls[Anum_pg_extension_extconfig - 1] = true; + else + values[Anum_pg_extension_extconfig - 1] = extConfig; + + if (extCondition == PointerGetDatum(NULL)) + nulls[Anum_pg_extension_extcondition - 1] = true; + else + values[Anum_pg_extension_extcondition - 1] = extCondition; + + tuple = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tuple); + + heap_freetuple(tuple); + table_close(rel, RowExclusiveLock); + + /* + * Record dependencies on owner, schema, and prerequisite extensions + */ + recordDependencyOnOwner(ExtensionRelationId, extensionOid, extOwner); + + refobjs = new_object_addresses(); + + ObjectAddressSet(myself, ExtensionRelationId, extensionOid); + + ObjectAddressSet(nsp, NamespaceRelationId, schemaOid); + add_exact_object_address(&nsp, refobjs); + + foreach(lc, requiredExtensions) + { + Oid reqext = lfirst_oid(lc); + ObjectAddress otherext; + + ObjectAddressSet(otherext, ExtensionRelationId, reqext); + add_exact_object_address(&otherext, refobjs); + } + + /* Record all of them (this includes duplicate elimination) */ + record_object_address_dependencies(&myself, refobjs, DEPENDENCY_NORMAL); + free_object_addresses(refobjs); + + /* Post creation hook for new extension */ + InvokeObjectPostCreateHook(ExtensionRelationId, extensionOid, 0); + + return myself; +} + +/* + * Guts of extension deletion. + * + * All we need do here is remove the pg_extension tuple itself. Everything + * else is taken care of by the dependency infrastructure. + */ +void +RemoveExtensionById(Oid extId) +{ + Relation rel; + SysScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + /* + * Disallow deletion of any extension that's currently open for insertion; + * else subsequent executions of recordDependencyOnCurrentExtension() + * could create dangling pg_depend records that refer to a no-longer-valid + * pg_extension OID. This is needed not so much because we think people + * might write "DROP EXTENSION foo" in foo's own script files, as because + * errors in dependency management in extension script files could give + * rise to cases where an extension is dropped as a result of recursing + * from some contained object. Because of that, we must test for the case + * here, not at some higher level of the DROP EXTENSION command. + */ + if (extId == CurrentExtensionObject) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot drop extension \"%s\" because it is being modified", + get_extension_name(extId)))); + + rel = table_open(ExtensionRelationId, RowExclusiveLock); + + ScanKeyInit(&entry[0], + Anum_pg_extension_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(extId)); + scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, + NULL, 1, entry); + + tuple = systable_getnext(scandesc); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + CatalogTupleDelete(rel, &tuple->t_self); + + systable_endscan(scandesc); + + table_close(rel, RowExclusiveLock); +} + +/* + * This function lists the available extensions (one row per primary control + * file in the control directory). We parse each control file and report the + * interesting fields. + * + * The system view pg_available_extensions provides a user interface to this + * SRF, adding information about whether the extensions are installed in the + * current DB. + */ +Datum +pg_available_extensions(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + char *location; + DIR *dir; + struct dirent *de; + + /* Build tuplestore to hold the result rows */ + InitMaterializedSRF(fcinfo, 0); + + location = get_extension_control_directory(); + dir = AllocateDir(location); + + /* + * If the control directory doesn't exist, we want to silently return an + * empty set. Any other error will be reported by ReadDir. + */ + if (dir == NULL && errno == ENOENT) + { + /* do nothing */ + } + else + { + while ((de = ReadDir(dir, location)) != NULL) + { + ExtensionControlFile *control; + char *extname; + Datum values[3]; + bool nulls[3]; + + if (!is_extension_control_filename(de->d_name)) + continue; + + /* extract extension name from 'name.control' filename */ + extname = pstrdup(de->d_name); + *strrchr(extname, '.') = '\0'; + + /* ignore it if it's an auxiliary control file */ + if (strstr(extname, "--")) + continue; + + control = read_extension_control_file(extname); + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + /* name */ + values[0] = DirectFunctionCall1(namein, + CStringGetDatum(control->name)); + /* default_version */ + if (control->default_version == NULL) + nulls[1] = true; + else + values[1] = CStringGetTextDatum(control->default_version); + /* comment */ + if (control->comment == NULL) + nulls[2] = true; + else + values[2] = CStringGetTextDatum(control->comment); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + FreeDir(dir); + } + + return (Datum) 0; +} + +/* + * This function lists the available extension versions (one row per + * extension installation script). For each version, we parse the related + * control file(s) and report the interesting fields. + * + * The system view pg_available_extension_versions provides a user interface + * to this SRF, adding information about which versions are installed in the + * current DB. + */ +Datum +pg_available_extension_versions(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + char *location; + DIR *dir; + struct dirent *de; + + /* Build tuplestore to hold the result rows */ + InitMaterializedSRF(fcinfo, 0); + + location = get_extension_control_directory(); + dir = AllocateDir(location); + + /* + * If the control directory doesn't exist, we want to silently return an + * empty set. Any other error will be reported by ReadDir. + */ + if (dir == NULL && errno == ENOENT) + { + /* do nothing */ + } + else + { + while ((de = ReadDir(dir, location)) != NULL) + { + ExtensionControlFile *control; + char *extname; + + if (!is_extension_control_filename(de->d_name)) + continue; + + /* extract extension name from 'name.control' filename */ + extname = pstrdup(de->d_name); + *strrchr(extname, '.') = '\0'; + + /* ignore it if it's an auxiliary control file */ + if (strstr(extname, "--")) + continue; + + /* read the control file */ + control = read_extension_control_file(extname); + + /* scan extension's script directory for install scripts */ + get_available_versions_for_extension(control, rsinfo->setResult, + rsinfo->setDesc); + } + + FreeDir(dir); + } + + return (Datum) 0; +} + +/* + * Inner loop for pg_available_extension_versions: + * read versions of one extension, add rows to tupstore + */ +static void +get_available_versions_for_extension(ExtensionControlFile *pcontrol, + Tuplestorestate *tupstore, + TupleDesc tupdesc) +{ + List *evi_list; + ListCell *lc; + + /* Extract the version update graph from the script directory */ + evi_list = get_ext_ver_list(pcontrol); + + /* For each installable version ... */ + foreach(lc, evi_list) + { + ExtensionVersionInfo *evi = (ExtensionVersionInfo *) lfirst(lc); + ExtensionControlFile *control; + Datum values[8]; + bool nulls[8]; + ListCell *lc2; + + if (!evi->installable) + continue; + + /* + * Fetch parameters for specific version (pcontrol is not changed) + */ + control = read_extension_aux_control_file(pcontrol, evi->name); + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + /* name */ + values[0] = DirectFunctionCall1(namein, + CStringGetDatum(control->name)); + /* version */ + values[1] = CStringGetTextDatum(evi->name); + /* superuser */ + values[2] = BoolGetDatum(control->superuser); + /* trusted */ + values[3] = BoolGetDatum(control->trusted); + /* relocatable */ + values[4] = BoolGetDatum(control->relocatable); + /* schema */ + if (control->schema == NULL) + nulls[5] = true; + else + values[5] = DirectFunctionCall1(namein, + CStringGetDatum(control->schema)); + /* requires */ + if (control->requires == NIL) + nulls[6] = true; + else + values[6] = convert_requires_to_datum(control->requires); + /* comment */ + if (control->comment == NULL) + nulls[7] = true; + else + values[7] = CStringGetTextDatum(control->comment); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + + /* + * Find all non-directly-installable versions that would be installed + * starting from this version, and report them, inheriting the + * parameters that aren't changed in updates from this version. + */ + foreach(lc2, evi_list) + { + ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc2); + List *best_path; + + if (evi2->installable) + continue; + if (find_install_path(evi_list, evi2, &best_path) == evi) + { + /* + * Fetch parameters for this version (pcontrol is not changed) + */ + control = read_extension_aux_control_file(pcontrol, evi2->name); + + /* name stays the same */ + /* version */ + values[1] = CStringGetTextDatum(evi2->name); + /* superuser */ + values[2] = BoolGetDatum(control->superuser); + /* trusted */ + values[3] = BoolGetDatum(control->trusted); + /* relocatable */ + values[4] = BoolGetDatum(control->relocatable); + /* schema stays the same */ + /* requires */ + if (control->requires == NIL) + nulls[6] = true; + else + { + values[6] = convert_requires_to_datum(control->requires); + nulls[6] = false; + } + /* comment stays the same */ + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + } + } +} + +/* + * Test whether the given extension exists (not whether it's installed) + * + * This checks for the existence of a matching control file in the extension + * directory. That's not a bulletproof check, since the file might be + * invalid, but this is only used for hints so it doesn't have to be 100% + * right. + */ +bool +extension_file_exists(const char *extensionName) +{ + bool result = false; + char *location; + DIR *dir; + struct dirent *de; + + location = get_extension_control_directory(); + dir = AllocateDir(location); + + /* + * If the control directory doesn't exist, we want to silently return + * false. Any other error will be reported by ReadDir. + */ + if (dir == NULL && errno == ENOENT) + { + /* do nothing */ + } + else + { + while ((de = ReadDir(dir, location)) != NULL) + { + char *extname; + + if (!is_extension_control_filename(de->d_name)) + continue; + + /* extract extension name from 'name.control' filename */ + extname = pstrdup(de->d_name); + *strrchr(extname, '.') = '\0'; + + /* ignore it if it's an auxiliary control file */ + if (strstr(extname, "--")) + continue; + + /* done if it matches request */ + if (strcmp(extname, extensionName) == 0) + { + result = true; + break; + } + } + + FreeDir(dir); + } + + return result; +} + +/* + * Convert a list of extension names to a name[] Datum + */ +static Datum +convert_requires_to_datum(List *requires) +{ + Datum *datums; + int ndatums; + ArrayType *a; + ListCell *lc; + + ndatums = list_length(requires); + datums = (Datum *) palloc(ndatums * sizeof(Datum)); + ndatums = 0; + foreach(lc, requires) + { + char *curreq = (char *) lfirst(lc); + + datums[ndatums++] = + DirectFunctionCall1(namein, CStringGetDatum(curreq)); + } + a = construct_array(datums, ndatums, + NAMEOID, + NAMEDATALEN, false, TYPALIGN_CHAR); + return PointerGetDatum(a); +} + +/* + * This function reports the version update paths that exist for the + * specified extension. + */ +Datum +pg_extension_update_paths(PG_FUNCTION_ARGS) +{ + Name extname = PG_GETARG_NAME(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + List *evi_list; + ExtensionControlFile *control; + ListCell *lc1; + + /* Check extension name validity before any filesystem access */ + check_valid_extension_name(NameStr(*extname)); + + /* Build tuplestore to hold the result rows */ + InitMaterializedSRF(fcinfo, 0); + + /* Read the extension's control file */ + control = read_extension_control_file(NameStr(*extname)); + + /* Extract the version update graph from the script directory */ + evi_list = get_ext_ver_list(control); + + /* Iterate over all pairs of versions */ + foreach(lc1, evi_list) + { + ExtensionVersionInfo *evi1 = (ExtensionVersionInfo *) lfirst(lc1); + ListCell *lc2; + + foreach(lc2, evi_list) + { + ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc2); + List *path; + Datum values[3]; + bool nulls[3]; + + if (evi1 == evi2) + continue; + + /* Find shortest path from evi1 to evi2 */ + path = find_update_path(evi_list, evi1, evi2, false, true); + + /* Emit result row */ + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + /* source */ + values[0] = CStringGetTextDatum(evi1->name); + /* target */ + values[1] = CStringGetTextDatum(evi2->name); + /* path */ + if (path == NIL) + nulls[2] = true; + else + { + StringInfoData pathbuf; + ListCell *lcv; + + initStringInfo(&pathbuf); + /* The path doesn't include start vertex, but show it */ + appendStringInfoString(&pathbuf, evi1->name); + foreach(lcv, path) + { + char *versionName = (char *) lfirst(lcv); + + appendStringInfoString(&pathbuf, "--"); + appendStringInfoString(&pathbuf, versionName); + } + values[2] = CStringGetTextDatum(pathbuf.data); + pfree(pathbuf.data); + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + } + + return (Datum) 0; +} + +/* + * pg_extension_config_dump + * + * Record information about a configuration table that belongs to an + * extension being created, but whose contents should be dumped in whole + * or in part during pg_dump. + */ +Datum +pg_extension_config_dump(PG_FUNCTION_ARGS) +{ + Oid tableoid = PG_GETARG_OID(0); + text *wherecond = PG_GETARG_TEXT_PP(1); + char *tablename; + Relation extRel; + ScanKeyData key[1]; + SysScanDesc extScan; + HeapTuple extTup; + Datum arrayDatum; + Datum elementDatum; + int arrayLength; + int arrayIndex; + bool isnull; + Datum repl_val[Natts_pg_extension]; + bool repl_null[Natts_pg_extension]; + bool repl_repl[Natts_pg_extension]; + ArrayType *a; + + /* + * We only allow this to be called from an extension's SQL script. We + * shouldn't need any permissions check beyond that. + */ + if (!creating_extension) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("%s can only be called from an SQL script executed by CREATE EXTENSION", + "pg_extension_config_dump()"))); + + /* + * Check that the table exists and is a member of the extension being + * created. This ensures that we don't need to register an additional + * dependency to protect the extconfig entry. + */ + tablename = get_rel_name(tableoid); + if (tablename == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("OID %u does not refer to a table", tableoid))); + if (getExtensionOfObject(RelationRelationId, tableoid) != + CurrentExtensionObject) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("table \"%s\" is not a member of the extension being created", + tablename))); + + /* + * Add the table OID and WHERE condition to the extension's extconfig and + * extcondition arrays. + * + * If the table is already in extconfig, treat this as an update of the + * WHERE condition. + */ + + /* Find the pg_extension tuple */ + extRel = table_open(ExtensionRelationId, RowExclusiveLock); + + ScanKeyInit(&key[0], + Anum_pg_extension_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(CurrentExtensionObject)); + + extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, + NULL, 1, key); + + extTup = systable_getnext(extScan); + + if (!HeapTupleIsValid(extTup)) /* should not happen */ + elog(ERROR, "could not find tuple for extension %u", + CurrentExtensionObject); + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + /* Build or modify the extconfig value */ + elementDatum = ObjectIdGetDatum(tableoid); + + arrayDatum = heap_getattr(extTup, Anum_pg_extension_extconfig, + RelationGetDescr(extRel), &isnull); + if (isnull) + { + /* Previously empty extconfig, so build 1-element array */ + arrayLength = 0; + arrayIndex = 1; + + a = construct_array(&elementDatum, 1, + OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + } + else + { + /* Modify or extend existing extconfig array */ + Oid *arrayData; + int i; + + a = DatumGetArrayTypeP(arrayDatum); + + arrayLength = ARR_DIMS(a)[0]; + if (ARR_NDIM(a) != 1 || + ARR_LBOUND(a)[0] != 1 || + arrayLength < 0 || + ARR_HASNULL(a) || + ARR_ELEMTYPE(a) != OIDOID) + elog(ERROR, "extconfig is not a 1-D Oid array"); + arrayData = (Oid *) ARR_DATA_PTR(a); + + arrayIndex = arrayLength + 1; /* set up to add after end */ + + for (i = 0; i < arrayLength; i++) + { + if (arrayData[i] == tableoid) + { + arrayIndex = i + 1; /* replace this element instead */ + break; + } + } + + a = array_set(a, 1, &arrayIndex, + elementDatum, + false, + -1 /* varlena array */ , + sizeof(Oid) /* OID's typlen */ , + true /* OID's typbyval */ , + TYPALIGN_INT /* OID's typalign */ ); + } + repl_val[Anum_pg_extension_extconfig - 1] = PointerGetDatum(a); + repl_repl[Anum_pg_extension_extconfig - 1] = true; + + /* Build or modify the extcondition value */ + elementDatum = PointerGetDatum(wherecond); + + arrayDatum = heap_getattr(extTup, Anum_pg_extension_extcondition, + RelationGetDescr(extRel), &isnull); + if (isnull) + { + if (arrayLength != 0) + elog(ERROR, "extconfig and extcondition arrays do not match"); + + a = construct_array(&elementDatum, 1, + TEXTOID, + -1, false, TYPALIGN_INT); + } + else + { + a = DatumGetArrayTypeP(arrayDatum); + + if (ARR_NDIM(a) != 1 || + ARR_LBOUND(a)[0] != 1 || + ARR_HASNULL(a) || + ARR_ELEMTYPE(a) != TEXTOID) + elog(ERROR, "extcondition is not a 1-D text array"); + if (ARR_DIMS(a)[0] != arrayLength) + elog(ERROR, "extconfig and extcondition arrays do not match"); + + /* Add or replace at same index as in extconfig */ + a = array_set(a, 1, &arrayIndex, + elementDatum, + false, + -1 /* varlena array */ , + -1 /* TEXT's typlen */ , + false /* TEXT's typbyval */ , + TYPALIGN_INT /* TEXT's typalign */ ); + } + repl_val[Anum_pg_extension_extcondition - 1] = PointerGetDatum(a); + repl_repl[Anum_pg_extension_extcondition - 1] = true; + + extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(extRel, &extTup->t_self, extTup); + + systable_endscan(extScan); + + table_close(extRel, RowExclusiveLock); + + PG_RETURN_VOID(); +} + +/* + * extension_config_remove + * + * Remove the specified table OID from extension's extconfig, if present. + * This is not currently exposed as a function, but it could be; + * for now, we just invoke it from ALTER EXTENSION DROP. + */ +static void +extension_config_remove(Oid extensionoid, Oid tableoid) +{ + Relation extRel; + ScanKeyData key[1]; + SysScanDesc extScan; + HeapTuple extTup; + Datum arrayDatum; + int arrayLength; + int arrayIndex; + bool isnull; + Datum repl_val[Natts_pg_extension]; + bool repl_null[Natts_pg_extension]; + bool repl_repl[Natts_pg_extension]; + ArrayType *a; + + /* Find the pg_extension tuple */ + extRel = table_open(ExtensionRelationId, RowExclusiveLock); + + ScanKeyInit(&key[0], + Anum_pg_extension_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(extensionoid)); + + extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, + NULL, 1, key); + + extTup = systable_getnext(extScan); + + if (!HeapTupleIsValid(extTup)) /* should not happen */ + elog(ERROR, "could not find tuple for extension %u", + extensionoid); + + /* Search extconfig for the tableoid */ + arrayDatum = heap_getattr(extTup, Anum_pg_extension_extconfig, + RelationGetDescr(extRel), &isnull); + if (isnull) + { + /* nothing to do */ + a = NULL; + arrayLength = 0; + arrayIndex = -1; + } + else + { + Oid *arrayData; + int i; + + a = DatumGetArrayTypeP(arrayDatum); + + arrayLength = ARR_DIMS(a)[0]; + if (ARR_NDIM(a) != 1 || + ARR_LBOUND(a)[0] != 1 || + arrayLength < 0 || + ARR_HASNULL(a) || + ARR_ELEMTYPE(a) != OIDOID) + elog(ERROR, "extconfig is not a 1-D Oid array"); + arrayData = (Oid *) ARR_DATA_PTR(a); + + arrayIndex = -1; /* flag for no deletion needed */ + + for (i = 0; i < arrayLength; i++) + { + if (arrayData[i] == tableoid) + { + arrayIndex = i; /* index to remove */ + break; + } + } + } + + /* If tableoid is not in extconfig, nothing to do */ + if (arrayIndex < 0) + { + systable_endscan(extScan); + table_close(extRel, RowExclusiveLock); + return; + } + + /* Modify or delete the extconfig value */ + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (arrayLength <= 1) + { + /* removing only element, just set array to null */ + repl_null[Anum_pg_extension_extconfig - 1] = true; + } + else + { + /* squeeze out the target element */ + Datum *dvalues; + int nelems; + int i; + + /* We already checked there are no nulls */ + deconstruct_array(a, OIDOID, sizeof(Oid), true, TYPALIGN_INT, + &dvalues, NULL, &nelems); + + for (i = arrayIndex; i < arrayLength - 1; i++) + dvalues[i] = dvalues[i + 1]; + + a = construct_array(dvalues, arrayLength - 1, + OIDOID, sizeof(Oid), true, TYPALIGN_INT); + + repl_val[Anum_pg_extension_extconfig - 1] = PointerGetDatum(a); + } + repl_repl[Anum_pg_extension_extconfig - 1] = true; + + /* Modify or delete the extcondition value */ + arrayDatum = heap_getattr(extTup, Anum_pg_extension_extcondition, + RelationGetDescr(extRel), &isnull); + if (isnull) + { + elog(ERROR, "extconfig and extcondition arrays do not match"); + } + else + { + a = DatumGetArrayTypeP(arrayDatum); + + if (ARR_NDIM(a) != 1 || + ARR_LBOUND(a)[0] != 1 || + ARR_HASNULL(a) || + ARR_ELEMTYPE(a) != TEXTOID) + elog(ERROR, "extcondition is not a 1-D text array"); + if (ARR_DIMS(a)[0] != arrayLength) + elog(ERROR, "extconfig and extcondition arrays do not match"); + } + + if (arrayLength <= 1) + { + /* removing only element, just set array to null */ + repl_null[Anum_pg_extension_extcondition - 1] = true; + } + else + { + /* squeeze out the target element */ + Datum *dvalues; + int nelems; + int i; + + /* We already checked there are no nulls */ + deconstruct_array(a, TEXTOID, -1, false, TYPALIGN_INT, + &dvalues, NULL, &nelems); + + for (i = arrayIndex; i < arrayLength - 1; i++) + dvalues[i] = dvalues[i + 1]; + + a = construct_array(dvalues, arrayLength - 1, + TEXTOID, -1, false, TYPALIGN_INT); + + repl_val[Anum_pg_extension_extcondition - 1] = PointerGetDatum(a); + } + repl_repl[Anum_pg_extension_extcondition - 1] = true; + + extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(extRel, &extTup->t_self, extTup); + + systable_endscan(extScan); + + table_close(extRel, RowExclusiveLock); +} + +/* + * Execute ALTER EXTENSION SET SCHEMA + */ +ObjectAddress +AlterExtensionNamespace(const char *extensionName, const char *newschema, Oid *oldschema) +{ + Oid extensionOid; + Oid nspOid; + Oid oldNspOid; + AclResult aclresult; + Relation extRel; + ScanKeyData key[2]; + SysScanDesc extScan; + HeapTuple extTup; + Form_pg_extension extForm; + Relation depRel; + SysScanDesc depScan; + HeapTuple depTup; + ObjectAddresses *objsMoved; + ObjectAddress extAddr; + + extensionOid = get_extension_oid(extensionName, false); + + nspOid = LookupCreationNamespace(newschema); + + /* + * Permission check: must own extension. Note that we don't bother to + * check ownership of the individual member objects ... + */ + if (!pg_extension_ownercheck(extensionOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION, + extensionName); + + /* Permission check: must have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(nspOid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, newschema); + + /* + * If the schema is currently a member of the extension, disallow moving + * the extension into the schema. That would create a dependency loop. + */ + if (getExtensionOfObject(NamespaceRelationId, nspOid) == extensionOid) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot move extension \"%s\" into schema \"%s\" " + "because the extension contains the schema", + extensionName, newschema))); + + /* Locate the pg_extension tuple */ + extRel = table_open(ExtensionRelationId, RowExclusiveLock); + + ScanKeyInit(&key[0], + Anum_pg_extension_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(extensionOid)); + + extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, + NULL, 1, key); + + extTup = systable_getnext(extScan); + + if (!HeapTupleIsValid(extTup)) /* should not happen */ + elog(ERROR, "could not find tuple for extension %u", + extensionOid); + + /* Copy tuple so we can modify it below */ + extTup = heap_copytuple(extTup); + extForm = (Form_pg_extension) GETSTRUCT(extTup); + + systable_endscan(extScan); + + /* + * If the extension is already in the target schema, just silently do + * nothing. + */ + if (extForm->extnamespace == nspOid) + { + table_close(extRel, RowExclusiveLock); + return InvalidObjectAddress; + } + + /* Check extension is supposed to be relocatable */ + if (!extForm->extrelocatable) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" does not support SET SCHEMA", + NameStr(extForm->extname)))); + + objsMoved = new_object_addresses(); + + /* store the OID of the namespace to-be-changed */ + oldNspOid = extForm->extnamespace; + + /* + * Scan pg_depend to find objects that depend directly on the extension, + * and alter each one's schema. + */ + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ExtensionRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(extensionOid)); + + depScan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(depTup = systable_getnext(depScan))) + { + Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup); + ObjectAddress dep; + Oid dep_oldNspOid; + + /* + * Ignore non-membership dependencies. (Currently, the only other + * case we could see here is a normal dependency from another + * extension.) + */ + if (pg_depend->deptype != DEPENDENCY_EXTENSION) + continue; + + dep.classId = pg_depend->classid; + dep.objectId = pg_depend->objid; + dep.objectSubId = pg_depend->objsubid; + + if (dep.objectSubId != 0) /* should not happen */ + elog(ERROR, "extension should not have a sub-object dependency"); + + /* Relocate the object */ + dep_oldNspOid = AlterObjectNamespace_oid(dep.classId, + dep.objectId, + nspOid, + objsMoved); + + /* + * If not all the objects had the same old namespace (ignoring any + * that are not in namespaces), complain. + */ + if (dep_oldNspOid != InvalidOid && dep_oldNspOid != oldNspOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("extension \"%s\" does not support SET SCHEMA", + NameStr(extForm->extname)), + errdetail("%s is not in the extension's schema \"%s\"", + getObjectDescription(&dep, false), + get_namespace_name(oldNspOid)))); + } + + /* report old schema, if caller wants it */ + if (oldschema) + *oldschema = oldNspOid; + + systable_endscan(depScan); + + relation_close(depRel, AccessShareLock); + + /* Now adjust pg_extension.extnamespace */ + extForm->extnamespace = nspOid; + + CatalogTupleUpdate(extRel, &extTup->t_self, extTup); + + table_close(extRel, RowExclusiveLock); + + /* update dependencies to point to the new schema */ + changeDependencyFor(ExtensionRelationId, extensionOid, + NamespaceRelationId, oldNspOid, nspOid); + + InvokeObjectPostAlterHook(ExtensionRelationId, extensionOid, 0); + + ObjectAddressSet(extAddr, ExtensionRelationId, extensionOid); + + return extAddr; +} + +/* + * Execute ALTER EXTENSION UPDATE + */ +ObjectAddress +ExecAlterExtensionStmt(ParseState *pstate, AlterExtensionStmt *stmt) +{ + DefElem *d_new_version = NULL; + char *versionName; + char *oldVersionName; + ExtensionControlFile *control; + Oid extensionOid; + Relation extRel; + ScanKeyData key[1]; + SysScanDesc extScan; + HeapTuple extTup; + List *updateVersions; + Datum datum; + bool isnull; + ListCell *lc; + ObjectAddress address; + + /* + * We use global variables to track the extension being created, so we can + * create/update only one extension at the same time. + */ + if (creating_extension) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nested ALTER EXTENSION is not supported"))); + + /* + * Look up the extension --- it must already exist in pg_extension + */ + extRel = table_open(ExtensionRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_extension_extname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->extname)); + + extScan = systable_beginscan(extRel, ExtensionNameIndexId, true, + NULL, 1, key); + + extTup = systable_getnext(extScan); + + if (!HeapTupleIsValid(extTup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("extension \"%s\" does not exist", + stmt->extname))); + + extensionOid = ((Form_pg_extension) GETSTRUCT(extTup))->oid; + + /* + * Determine the existing version we are updating from + */ + datum = heap_getattr(extTup, Anum_pg_extension_extversion, + RelationGetDescr(extRel), &isnull); + if (isnull) + elog(ERROR, "extversion is null"); + oldVersionName = text_to_cstring(DatumGetTextPP(datum)); + + systable_endscan(extScan); + + table_close(extRel, AccessShareLock); + + /* Permission check: must own extension */ + if (!pg_extension_ownercheck(extensionOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION, + stmt->extname); + + /* + * Read the primary control file. Note we assume that it does not contain + * any non-ASCII data, so there is no need to worry about encoding at this + * point. + */ + control = read_extension_control_file(stmt->extname); + + /* + * Read the statement option list + */ + foreach(lc, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "new_version") == 0) + { + if (d_new_version) + errorConflictingDefElem(defel, pstate); + d_new_version = defel; + } + else + elog(ERROR, "unrecognized option: %s", defel->defname); + } + + /* + * Determine the version to update to + */ + if (d_new_version && d_new_version->arg) + versionName = strVal(d_new_version->arg); + else if (control->default_version) + versionName = control->default_version; + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("version to install must be specified"))); + versionName = NULL; /* keep compiler quiet */ + } + check_valid_version_name(versionName); + + /* + * If we're already at that version, just say so + */ + if (strcmp(oldVersionName, versionName) == 0) + { + ereport(NOTICE, + (errmsg("version \"%s\" of extension \"%s\" is already installed", + versionName, stmt->extname))); + return InvalidObjectAddress; + } + + /* + * Identify the series of update script files we need to execute + */ + updateVersions = identify_update_path(control, + oldVersionName, + versionName); + + /* + * Update the pg_extension row and execute the update scripts, one at a + * time + */ + ApplyExtensionUpdates(extensionOid, control, + oldVersionName, updateVersions, + NULL, false, false); + + ObjectAddressSet(address, ExtensionRelationId, extensionOid); + + return address; +} + +/* + * Apply a series of update scripts as though individual ALTER EXTENSION + * UPDATE commands had been given, including altering the pg_extension row + * and dependencies each time. + * + * This might be more work than necessary, but it ensures that old update + * scripts don't break if newer versions have different control parameters. + */ +static void +ApplyExtensionUpdates(Oid extensionOid, + ExtensionControlFile *pcontrol, + const char *initialVersion, + List *updateVersions, + char *origSchemaName, + bool cascade, + bool is_create) +{ + const char *oldVersionName = initialVersion; + ListCell *lcv; + + foreach(lcv, updateVersions) + { + char *versionName = (char *) lfirst(lcv); + ExtensionControlFile *control; + char *schemaName; + Oid schemaOid; + List *requiredExtensions; + List *requiredSchemas; + Relation extRel; + ScanKeyData key[1]; + SysScanDesc extScan; + HeapTuple extTup; + Form_pg_extension extForm; + Datum values[Natts_pg_extension]; + bool nulls[Natts_pg_extension]; + bool repl[Natts_pg_extension]; + ObjectAddress myself; + ListCell *lc; + + /* + * Fetch parameters for specific version (pcontrol is not changed) + */ + control = read_extension_aux_control_file(pcontrol, versionName); + + /* Find the pg_extension tuple */ + extRel = table_open(ExtensionRelationId, RowExclusiveLock); + + ScanKeyInit(&key[0], + Anum_pg_extension_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(extensionOid)); + + extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, + NULL, 1, key); + + extTup = systable_getnext(extScan); + + if (!HeapTupleIsValid(extTup)) /* should not happen */ + elog(ERROR, "could not find tuple for extension %u", + extensionOid); + + extForm = (Form_pg_extension) GETSTRUCT(extTup); + + /* + * Determine the target schema (set by original install) + */ + schemaOid = extForm->extnamespace; + schemaName = get_namespace_name(schemaOid); + + /* + * Modify extrelocatable and extversion in the pg_extension tuple + */ + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + memset(repl, 0, sizeof(repl)); + + values[Anum_pg_extension_extrelocatable - 1] = + BoolGetDatum(control->relocatable); + repl[Anum_pg_extension_extrelocatable - 1] = true; + values[Anum_pg_extension_extversion - 1] = + CStringGetTextDatum(versionName); + repl[Anum_pg_extension_extversion - 1] = true; + + extTup = heap_modify_tuple(extTup, RelationGetDescr(extRel), + values, nulls, repl); + + CatalogTupleUpdate(extRel, &extTup->t_self, extTup); + + systable_endscan(extScan); + + table_close(extRel, RowExclusiveLock); + + /* + * Look up the prerequisite extensions for this version, install them + * if necessary, and build lists of their OIDs and the OIDs of their + * target schemas. + */ + requiredExtensions = NIL; + requiredSchemas = NIL; + foreach(lc, control->requires) + { + char *curreq = (char *) lfirst(lc); + Oid reqext; + Oid reqschema; + + reqext = get_required_extension(curreq, + control->name, + origSchemaName, + cascade, + NIL, + is_create); + reqschema = get_extension_schema(reqext); + requiredExtensions = lappend_oid(requiredExtensions, reqext); + requiredSchemas = lappend_oid(requiredSchemas, reqschema); + } + + /* + * Remove and recreate dependencies on prerequisite extensions + */ + deleteDependencyRecordsForClass(ExtensionRelationId, extensionOid, + ExtensionRelationId, + DEPENDENCY_NORMAL); + + myself.classId = ExtensionRelationId; + myself.objectId = extensionOid; + myself.objectSubId = 0; + + foreach(lc, requiredExtensions) + { + Oid reqext = lfirst_oid(lc); + ObjectAddress otherext; + + otherext.classId = ExtensionRelationId; + otherext.objectId = reqext; + otherext.objectSubId = 0; + + recordDependencyOn(&myself, &otherext, DEPENDENCY_NORMAL); + } + + InvokeObjectPostAlterHook(ExtensionRelationId, extensionOid, 0); + + /* + * Finally, execute the update script file + */ + execute_extension_script(extensionOid, control, + oldVersionName, versionName, + requiredSchemas, + schemaName, schemaOid); + + /* + * Update prior-version name and loop around. Since + * execute_sql_string did a final CommandCounterIncrement, we can + * update the pg_extension row again. + */ + oldVersionName = versionName; + } +} + +/* + * Execute ALTER EXTENSION ADD/DROP + * + * Return value is the address of the altered extension. + * + * objAddr is an output argument which, if not NULL, is set to the address of + * the added/dropped object. + */ +ObjectAddress +ExecAlterExtensionContentsStmt(AlterExtensionContentsStmt *stmt, + ObjectAddress *objAddr) +{ + ObjectAddress extension; + ObjectAddress object; + Relation relation; + Oid oldExtension; + + switch (stmt->objtype) + { + case OBJECT_DATABASE: + case OBJECT_EXTENSION: + case OBJECT_INDEX: + case OBJECT_PUBLICATION: + case OBJECT_ROLE: + case OBJECT_STATISTIC_EXT: + case OBJECT_SUBSCRIPTION: + case OBJECT_TABLESPACE: + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot add an object of this type to an extension"))); + break; + default: + /* OK */ + break; + } + + /* + * Find the extension and acquire a lock on it, to ensure it doesn't get + * dropped concurrently. A sharable lock seems sufficient: there's no + * reason not to allow other sorts of manipulations, such as add/drop of + * other objects, to occur concurrently. Concurrently adding/dropping the + * *same* object would be bad, but we prevent that by using a non-sharable + * lock on the individual object, below. + */ + extension = get_object_address(OBJECT_EXTENSION, + (Node *) makeString(stmt->extname), + &relation, AccessShareLock, false); + + /* Permission check: must own extension */ + if (!pg_extension_ownercheck(extension.objectId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_EXTENSION, + stmt->extname); + + /* + * Translate the parser representation that identifies the object into an + * ObjectAddress. get_object_address() will throw an error if the object + * does not exist, and will also acquire a lock on the object to guard + * against concurrent DROP and ALTER EXTENSION ADD/DROP operations. + */ + object = get_object_address(stmt->objtype, stmt->object, + &relation, ShareUpdateExclusiveLock, false); + + Assert(object.objectSubId == 0); + if (objAddr) + *objAddr = object; + + /* Permission check: must own target object, too */ + check_object_ownership(GetUserId(), stmt->objtype, object, + stmt->object, relation); + + /* + * Check existing extension membership. + */ + oldExtension = getExtensionOfObject(object.classId, object.objectId); + + if (stmt->action > 0) + { + /* + * ADD, so complain if object is already attached to some extension. + */ + if (OidIsValid(oldExtension)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("%s is already a member of extension \"%s\"", + getObjectDescription(&object, false), + get_extension_name(oldExtension)))); + + /* + * Prevent a schema from being added to an extension if the schema + * contains the extension. That would create a dependency loop. + */ + if (object.classId == NamespaceRelationId && + object.objectId == get_extension_schema(extension.objectId)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot add schema \"%s\" to extension \"%s\" " + "because the schema contains the extension", + get_namespace_name(object.objectId), + stmt->extname))); + + /* + * OK, add the dependency. + */ + recordDependencyOn(&object, &extension, DEPENDENCY_EXTENSION); + + /* + * Also record the initial ACL on the object, if any. + * + * Note that this will handle the object's ACLs, as well as any ACLs + * on object subIds. (In other words, when the object is a table, + * this will record the table's ACL and the ACLs for the columns on + * the table, if any). + */ + recordExtObjInitPriv(object.objectId, object.classId); + } + else + { + /* + * DROP, so complain if it's not a member. + */ + if (oldExtension != extension.objectId) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("%s is not a member of extension \"%s\"", + getObjectDescription(&object, false), + stmt->extname))); + + /* + * OK, drop the dependency. + */ + if (deleteDependencyRecordsForClass(object.classId, object.objectId, + ExtensionRelationId, + DEPENDENCY_EXTENSION) != 1) + elog(ERROR, "unexpected number of extension dependency records"); + + /* + * If it's a relation, it might have an entry in the extension's + * extconfig array, which we must remove. + */ + if (object.classId == RelationRelationId) + extension_config_remove(extension.objectId, object.objectId); + + /* + * Remove all the initial ACLs, if any. + * + * Note that this will remove the object's ACLs, as well as any ACLs + * on object subIds. (In other words, when the object is a table, + * this will remove the table's ACL and the ACLs for the columns on + * the table, if any). + */ + removeExtObjInitPriv(object.objectId, object.classId); + } + + InvokeObjectPostAlterHook(ExtensionRelationId, extension.objectId, 0); + + /* + * If get_object_address() opened the relation for us, we close it to keep + * the reference count correct - but we retain any locks acquired by + * get_object_address() until commit time, to guard against concurrent + * activity. + */ + if (relation != NULL) + relation_close(relation, NoLock); + + return extension; +} + +/* + * Read the whole of file into memory. + * + * The file contents are returned as a single palloc'd chunk. For convenience + * of the callers, an extra \0 byte is added to the end. + */ +static char * +read_whole_file(const char *filename, int *length) +{ + char *buf; + FILE *file; + size_t bytes_to_read; + struct stat fst; + + if (stat(filename, &fst) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", filename))); + + if (fst.st_size > (MaxAllocSize - 1)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("file \"%s\" is too large", filename))); + bytes_to_read = (size_t) fst.st_size; + + if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + filename))); + + buf = (char *) palloc(bytes_to_read + 1); + + *length = fread(buf, 1, bytes_to_read, file); + + if (ferror(file)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + + FreeFile(file); + + buf[*length] = '\0'; + return buf; +} diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c new file mode 100644 index 0000000..91f4dd3 --- /dev/null +++ b/src/backend/commands/foreigncmds.c @@ -0,0 +1,1617 @@ +/*------------------------------------------------------------------------- + * + * foreigncmds.c + * foreign-data wrapper/server creation/manipulation commands + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/commands/foreigncmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_foreign_data_wrapper.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "catalog/pg_user_mapping.h" +#include "commands/defrem.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +typedef struct +{ + char *tablename; + char *cmd; +} import_error_callback_arg; + +/* Internal functions */ +static void import_error_callback(void *arg); + + +/* + * Convert a DefElem list to the text array format that is used in + * pg_foreign_data_wrapper, pg_foreign_server, pg_user_mapping, and + * pg_foreign_table. + * + * Returns the array in the form of a Datum, or PointerGetDatum(NULL) + * if the list is empty. + * + * Note: The array is usually stored to database without further + * processing, hence any validation should be done before this + * conversion. + */ +static Datum +optionListToArray(List *options) +{ + ArrayBuildState *astate = NULL; + ListCell *cell; + + foreach(cell, options) + { + DefElem *def = lfirst(cell); + const char *value; + Size len; + text *t; + + value = defGetString(def); + len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value); + t = palloc(len + 1); + SET_VARSIZE(t, len); + sprintf(VARDATA(t), "%s=%s", def->defname, value); + + astate = accumArrayResult(astate, PointerGetDatum(t), + false, TEXTOID, + CurrentMemoryContext); + } + + if (astate) + return makeArrayResult(astate, CurrentMemoryContext); + + return PointerGetDatum(NULL); +} + + +/* + * Transform a list of DefElem into text array format. This is substantially + * the same thing as optionListToArray(), except we recognize SET/ADD/DROP + * actions for modifying an existing list of options, which is passed in + * Datum form as oldOptions. Also, if fdwvalidator isn't InvalidOid + * it specifies a validator function to call on the result. + * + * Returns the array in the form of a Datum, or PointerGetDatum(NULL) + * if the list is empty. + * + * This is used by CREATE/ALTER of FOREIGN DATA WRAPPER/SERVER/USER MAPPING/ + * FOREIGN TABLE. + */ +Datum +transformGenericOptions(Oid catalogId, + Datum oldOptions, + List *options, + Oid fdwvalidator) +{ + List *resultOptions = untransformRelOptions(oldOptions); + ListCell *optcell; + Datum result; + + foreach(optcell, options) + { + DefElem *od = lfirst(optcell); + ListCell *cell; + + /* + * Find the element in resultOptions. We need this for validation in + * all cases. + */ + foreach(cell, resultOptions) + { + DefElem *def = lfirst(cell); + + if (strcmp(def->defname, od->defname) == 0) + break; + } + + /* + * It is possible to perform multiple SET/DROP actions on the same + * option. The standard permits this, as long as the options to be + * added are unique. Note that an unspecified action is taken to be + * ADD. + */ + switch (od->defaction) + { + case DEFELEM_DROP: + if (!cell) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("option \"%s\" not found", + od->defname))); + resultOptions = list_delete_cell(resultOptions, cell); + break; + + case DEFELEM_SET: + if (!cell) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("option \"%s\" not found", + od->defname))); + lfirst(cell) = od; + break; + + case DEFELEM_ADD: + case DEFELEM_UNSPEC: + if (cell) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("option \"%s\" provided more than once", + od->defname))); + resultOptions = lappend(resultOptions, od); + break; + + default: + elog(ERROR, "unrecognized action %d on option \"%s\"", + (int) od->defaction, od->defname); + break; + } + } + + result = optionListToArray(resultOptions); + + if (OidIsValid(fdwvalidator)) + { + Datum valarg = result; + + /* + * Pass a null options list as an empty array, so that validators + * don't have to be declared non-strict to handle the case. + */ + if (DatumGetPointer(valarg) == NULL) + valarg = PointerGetDatum(construct_empty_array(TEXTOID)); + OidFunctionCall2(fdwvalidator, valarg, ObjectIdGetDatum(catalogId)); + } + + return result; +} + + +/* + * Internal workhorse for changing a data wrapper's owner. + * + * Allow this only for superusers; also the new owner must be a + * superuser. + */ +static void +AlterForeignDataWrapperOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) +{ + Form_pg_foreign_data_wrapper form; + Datum repl_val[Natts_pg_foreign_data_wrapper]; + bool repl_null[Natts_pg_foreign_data_wrapper]; + bool repl_repl[Natts_pg_foreign_data_wrapper]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + + form = (Form_pg_foreign_data_wrapper) GETSTRUCT(tup); + + /* Must be a superuser to change a FDW owner */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of foreign-data wrapper \"%s\"", + NameStr(form->fdwname)), + errhint("Must be superuser to change owner of a foreign-data wrapper."))); + + /* New owner must also be a superuser */ + if (!superuser_arg(newOwnerId)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of foreign-data wrapper \"%s\"", + NameStr(form->fdwname)), + errhint("The owner of a foreign-data wrapper must be a superuser."))); + + if (form->fdwowner != newOwnerId) + { + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_repl[Anum_pg_foreign_data_wrapper_fdwowner - 1] = true; + repl_val[Anum_pg_foreign_data_wrapper_fdwowner - 1] = ObjectIdGetDatum(newOwnerId); + + aclDatum = heap_getattr(tup, + Anum_pg_foreign_data_wrapper_fdwacl, + RelationGetDescr(rel), + &isNull); + /* Null ACLs do not require changes */ + if (!isNull) + { + newAcl = aclnewowner(DatumGetAclP(aclDatum), + form->fdwowner, newOwnerId); + repl_repl[Anum_pg_foreign_data_wrapper_fdwacl - 1] = true; + repl_val[Anum_pg_foreign_data_wrapper_fdwacl - 1] = PointerGetDatum(newAcl); + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, + repl_repl); + + CatalogTupleUpdate(rel, &tup->t_self, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(ForeignDataWrapperRelationId, + form->oid, + newOwnerId); + } + + InvokeObjectPostAlterHook(ForeignDataWrapperRelationId, + form->oid, 0); +} + +/* + * Change foreign-data wrapper owner -- by name + * + * Note restrictions in the "_internal" function, above. + */ +ObjectAddress +AlterForeignDataWrapperOwner(const char *name, Oid newOwnerId) +{ + Oid fdwId; + HeapTuple tup; + Relation rel; + ObjectAddress address; + Form_pg_foreign_data_wrapper form; + + + rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(FOREIGNDATAWRAPPERNAME, CStringGetDatum(name)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("foreign-data wrapper \"%s\" does not exist", name))); + + form = (Form_pg_foreign_data_wrapper) GETSTRUCT(tup); + fdwId = form->oid; + + AlterForeignDataWrapperOwner_internal(rel, tup, newOwnerId); + + ObjectAddressSet(address, ForeignDataWrapperRelationId, fdwId); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change foreign-data wrapper owner -- by OID + * + * Note restrictions in the "_internal" function, above. + */ +void +AlterForeignDataWrapperOwner_oid(Oid fwdId, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fwdId)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("foreign-data wrapper with OID %u does not exist", fwdId))); + + AlterForeignDataWrapperOwner_internal(rel, tup, newOwnerId); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); +} + +/* + * Internal workhorse for changing a foreign server's owner + */ +static void +AlterForeignServerOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) +{ + Form_pg_foreign_server form; + Datum repl_val[Natts_pg_foreign_server]; + bool repl_null[Natts_pg_foreign_server]; + bool repl_repl[Natts_pg_foreign_server]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + + form = (Form_pg_foreign_server) GETSTRUCT(tup); + + if (form->srvowner != newOwnerId) + { + /* Superusers can always do it */ + if (!superuser()) + { + Oid srvId; + AclResult aclresult; + + srvId = form->oid; + + /* Must be owner */ + if (!pg_foreign_server_ownercheck(srvId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER, + NameStr(form->srvname)); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* New owner must have USAGE privilege on foreign-data wrapper */ + aclresult = pg_foreign_data_wrapper_aclcheck(form->srvfdw, newOwnerId, ACL_USAGE); + if (aclresult != ACLCHECK_OK) + { + ForeignDataWrapper *fdw = GetForeignDataWrapper(form->srvfdw); + + aclcheck_error(aclresult, OBJECT_FDW, fdw->fdwname); + } + } + + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_repl[Anum_pg_foreign_server_srvowner - 1] = true; + repl_val[Anum_pg_foreign_server_srvowner - 1] = ObjectIdGetDatum(newOwnerId); + + aclDatum = heap_getattr(tup, + Anum_pg_foreign_server_srvacl, + RelationGetDescr(rel), + &isNull); + /* Null ACLs do not require changes */ + if (!isNull) + { + newAcl = aclnewowner(DatumGetAclP(aclDatum), + form->srvowner, newOwnerId); + repl_repl[Anum_pg_foreign_server_srvacl - 1] = true; + repl_val[Anum_pg_foreign_server_srvacl - 1] = PointerGetDatum(newAcl); + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, + repl_repl); + + CatalogTupleUpdate(rel, &tup->t_self, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(ForeignServerRelationId, form->oid, + newOwnerId); + } + + InvokeObjectPostAlterHook(ForeignServerRelationId, + form->oid, 0); +} + +/* + * Change foreign server owner -- by name + */ +ObjectAddress +AlterForeignServerOwner(const char *name, Oid newOwnerId) +{ + Oid servOid; + HeapTuple tup; + Relation rel; + ObjectAddress address; + Form_pg_foreign_server form; + + rel = table_open(ForeignServerRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(FOREIGNSERVERNAME, CStringGetDatum(name)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("server \"%s\" does not exist", name))); + + form = (Form_pg_foreign_server) GETSTRUCT(tup); + servOid = form->oid; + + AlterForeignServerOwner_internal(rel, tup, newOwnerId); + + ObjectAddressSet(address, ForeignServerRelationId, servOid); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change foreign server owner -- by OID + */ +void +AlterForeignServerOwner_oid(Oid srvId, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = table_open(ForeignServerRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(FOREIGNSERVEROID, ObjectIdGetDatum(srvId)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("foreign server with OID %u does not exist", srvId))); + + AlterForeignServerOwner_internal(rel, tup, newOwnerId); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); +} + +/* + * Convert a handler function name passed from the parser to an Oid. + */ +static Oid +lookup_fdw_handler_func(DefElem *handler) +{ + Oid handlerOid; + + if (handler == NULL || handler->arg == NULL) + return InvalidOid; + + /* handlers have no arguments */ + handlerOid = LookupFuncName((List *) handler->arg, 0, NULL, false); + + /* check that handler has correct return type */ + if (get_func_rettype(handlerOid) != FDW_HANDLEROID) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("function %s must return type %s", + NameListToString((List *) handler->arg), "fdw_handler"))); + + return handlerOid; +} + +/* + * Convert a validator function name passed from the parser to an Oid. + */ +static Oid +lookup_fdw_validator_func(DefElem *validator) +{ + Oid funcargtypes[2]; + + if (validator == NULL || validator->arg == NULL) + return InvalidOid; + + /* validators take text[], oid */ + funcargtypes[0] = TEXTARRAYOID; + funcargtypes[1] = OIDOID; + + return LookupFuncName((List *) validator->arg, 2, funcargtypes, false); + /* validator's return value is ignored, so we don't check the type */ +} + +/* + * Process function options of CREATE/ALTER FDW + */ +static void +parse_func_options(ParseState *pstate, List *func_options, + bool *handler_given, Oid *fdwhandler, + bool *validator_given, Oid *fdwvalidator) +{ + ListCell *cell; + + *handler_given = false; + *validator_given = false; + /* return InvalidOid if not given */ + *fdwhandler = InvalidOid; + *fdwvalidator = InvalidOid; + + foreach(cell, func_options) + { + DefElem *def = (DefElem *) lfirst(cell); + + if (strcmp(def->defname, "handler") == 0) + { + if (*handler_given) + errorConflictingDefElem(def, pstate); + *handler_given = true; + *fdwhandler = lookup_fdw_handler_func(def); + } + else if (strcmp(def->defname, "validator") == 0) + { + if (*validator_given) + errorConflictingDefElem(def, pstate); + *validator_given = true; + *fdwvalidator = lookup_fdw_validator_func(def); + } + else + elog(ERROR, "option \"%s\" not recognized", + def->defname); + } +} + +/* + * Create a foreign-data wrapper + */ +ObjectAddress +CreateForeignDataWrapper(ParseState *pstate, CreateFdwStmt *stmt) +{ + Relation rel; + Datum values[Natts_pg_foreign_data_wrapper]; + bool nulls[Natts_pg_foreign_data_wrapper]; + HeapTuple tuple; + Oid fdwId; + bool handler_given; + bool validator_given; + Oid fdwhandler; + Oid fdwvalidator; + Datum fdwoptions; + Oid ownerId; + ObjectAddress myself; + ObjectAddress referenced; + + rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock); + + /* Must be superuser */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to create foreign-data wrapper \"%s\"", + stmt->fdwname), + errhint("Must be superuser to create a foreign-data wrapper."))); + + /* For now the owner cannot be specified on create. Use effective user ID. */ + ownerId = GetUserId(); + + /* + * Check that there is no other foreign-data wrapper by this name. + */ + if (GetForeignDataWrapperByName(stmt->fdwname, true) != NULL) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("foreign-data wrapper \"%s\" already exists", + stmt->fdwname))); + + /* + * Insert tuple into pg_foreign_data_wrapper. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + fdwId = GetNewOidWithIndex(rel, ForeignDataWrapperOidIndexId, + Anum_pg_foreign_data_wrapper_oid); + values[Anum_pg_foreign_data_wrapper_oid - 1] = ObjectIdGetDatum(fdwId); + values[Anum_pg_foreign_data_wrapper_fdwname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->fdwname)); + values[Anum_pg_foreign_data_wrapper_fdwowner - 1] = ObjectIdGetDatum(ownerId); + + /* Lookup handler and validator functions, if given */ + parse_func_options(pstate, stmt->func_options, + &handler_given, &fdwhandler, + &validator_given, &fdwvalidator); + + values[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = ObjectIdGetDatum(fdwhandler); + values[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = ObjectIdGetDatum(fdwvalidator); + + nulls[Anum_pg_foreign_data_wrapper_fdwacl - 1] = true; + + fdwoptions = transformGenericOptions(ForeignDataWrapperRelationId, + PointerGetDatum(NULL), + stmt->options, + fdwvalidator); + + if (PointerIsValid(DatumGetPointer(fdwoptions))) + values[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = fdwoptions; + else + nulls[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true; + + tuple = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tuple); + + heap_freetuple(tuple); + + /* record dependencies */ + myself.classId = ForeignDataWrapperRelationId; + myself.objectId = fdwId; + myself.objectSubId = 0; + + if (OidIsValid(fdwhandler)) + { + referenced.classId = ProcedureRelationId; + referenced.objectId = fdwhandler; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + + if (OidIsValid(fdwvalidator)) + { + referenced.classId = ProcedureRelationId; + referenced.objectId = fdwvalidator; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + + recordDependencyOnOwner(ForeignDataWrapperRelationId, fdwId, ownerId); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, false); + + /* Post creation hook for new foreign data wrapper */ + InvokeObjectPostCreateHook(ForeignDataWrapperRelationId, fdwId, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + + +/* + * Alter foreign-data wrapper + */ +ObjectAddress +AlterForeignDataWrapper(ParseState *pstate, AlterFdwStmt *stmt) +{ + Relation rel; + HeapTuple tp; + Form_pg_foreign_data_wrapper fdwForm; + Datum repl_val[Natts_pg_foreign_data_wrapper]; + bool repl_null[Natts_pg_foreign_data_wrapper]; + bool repl_repl[Natts_pg_foreign_data_wrapper]; + Oid fdwId; + bool isnull; + Datum datum; + bool handler_given; + bool validator_given; + Oid fdwhandler; + Oid fdwvalidator; + ObjectAddress myself; + + rel = table_open(ForeignDataWrapperRelationId, RowExclusiveLock); + + /* Must be superuser */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to alter foreign-data wrapper \"%s\"", + stmt->fdwname), + errhint("Must be superuser to alter a foreign-data wrapper."))); + + tp = SearchSysCacheCopy1(FOREIGNDATAWRAPPERNAME, + CStringGetDatum(stmt->fdwname)); + + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("foreign-data wrapper \"%s\" does not exist", stmt->fdwname))); + + fdwForm = (Form_pg_foreign_data_wrapper) GETSTRUCT(tp); + fdwId = fdwForm->oid; + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + parse_func_options(pstate, stmt->func_options, + &handler_given, &fdwhandler, + &validator_given, &fdwvalidator); + + if (handler_given) + { + repl_val[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = ObjectIdGetDatum(fdwhandler); + repl_repl[Anum_pg_foreign_data_wrapper_fdwhandler - 1] = true; + + /* + * It could be that the behavior of accessing foreign table changes + * with the new handler. Warn about this. + */ + ereport(WARNING, + (errmsg("changing the foreign-data wrapper handler can change behavior of existing foreign tables"))); + } + + if (validator_given) + { + repl_val[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = ObjectIdGetDatum(fdwvalidator); + repl_repl[Anum_pg_foreign_data_wrapper_fdwvalidator - 1] = true; + + /* + * It could be that existing options for the FDW or dependent SERVER, + * USER MAPPING or FOREIGN TABLE objects are no longer valid according + * to the new validator. Warn about this. + */ + if (OidIsValid(fdwvalidator)) + ereport(WARNING, + (errmsg("changing the foreign-data wrapper validator can cause " + "the options for dependent objects to become invalid"))); + } + else + { + /* + * Validator is not changed, but we need it for validating options. + */ + fdwvalidator = fdwForm->fdwvalidator; + } + + /* + * If options specified, validate and update. + */ + if (stmt->options) + { + /* Extract the current options */ + datum = SysCacheGetAttr(FOREIGNDATAWRAPPEROID, + tp, + Anum_pg_foreign_data_wrapper_fdwoptions, + &isnull); + if (isnull) + datum = PointerGetDatum(NULL); + + /* Transform the options */ + datum = transformGenericOptions(ForeignDataWrapperRelationId, + datum, + stmt->options, + fdwvalidator); + + if (PointerIsValid(DatumGetPointer(datum))) + repl_val[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = datum; + else + repl_null[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true; + + repl_repl[Anum_pg_foreign_data_wrapper_fdwoptions - 1] = true; + } + + /* Everything looks good - update the tuple */ + tp = heap_modify_tuple(tp, RelationGetDescr(rel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(rel, &tp->t_self, tp); + + heap_freetuple(tp); + + ObjectAddressSet(myself, ForeignDataWrapperRelationId, fdwId); + + /* Update function dependencies if we changed them */ + if (handler_given || validator_given) + { + ObjectAddress referenced; + + /* + * Flush all existing dependency records of this FDW on functions; we + * assume there can be none other than the ones we are fixing. + */ + deleteDependencyRecordsForClass(ForeignDataWrapperRelationId, + fdwId, + ProcedureRelationId, + DEPENDENCY_NORMAL); + + /* And build new ones. */ + + if (OidIsValid(fdwhandler)) + { + referenced.classId = ProcedureRelationId; + referenced.objectId = fdwhandler; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + + if (OidIsValid(fdwvalidator)) + { + referenced.classId = ProcedureRelationId; + referenced.objectId = fdwvalidator; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + } + + InvokeObjectPostAlterHook(ForeignDataWrapperRelationId, fdwId, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + + +/* + * Create a foreign server + */ +ObjectAddress +CreateForeignServer(CreateForeignServerStmt *stmt) +{ + Relation rel; + Datum srvoptions; + Datum values[Natts_pg_foreign_server]; + bool nulls[Natts_pg_foreign_server]; + HeapTuple tuple; + Oid srvId; + Oid ownerId; + AclResult aclresult; + ObjectAddress myself; + ObjectAddress referenced; + ForeignDataWrapper *fdw; + + rel = table_open(ForeignServerRelationId, RowExclusiveLock); + + /* For now the owner cannot be specified on create. Use effective user ID. */ + ownerId = GetUserId(); + + /* + * Check that there is no other foreign server by this name. If there is + * one, do nothing if IF NOT EXISTS was specified. + */ + srvId = get_foreign_server_oid(stmt->servername, true); + if (OidIsValid(srvId)) + { + if (stmt->if_not_exists) + { + /* + * If we are in an extension script, insist that the pre-existing + * object be a member of the extension, to avoid security risks. + */ + ObjectAddressSet(myself, ForeignServerRelationId, srvId); + checkMembershipInCurrentExtension(&myself); + + /* OK to skip */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("server \"%s\" already exists, skipping", + stmt->servername))); + table_close(rel, RowExclusiveLock); + return InvalidObjectAddress; + } + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("server \"%s\" already exists", + stmt->servername))); + } + + /* + * Check that the FDW exists and that we have USAGE on it. Also get the + * actual FDW for option validation etc. + */ + fdw = GetForeignDataWrapperByName(stmt->fdwname, false); + + aclresult = pg_foreign_data_wrapper_aclcheck(fdw->fdwid, ownerId, ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FDW, fdw->fdwname); + + /* + * Insert tuple into pg_foreign_server. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + srvId = GetNewOidWithIndex(rel, ForeignServerOidIndexId, + Anum_pg_foreign_server_oid); + values[Anum_pg_foreign_server_oid - 1] = ObjectIdGetDatum(srvId); + values[Anum_pg_foreign_server_srvname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->servername)); + values[Anum_pg_foreign_server_srvowner - 1] = ObjectIdGetDatum(ownerId); + values[Anum_pg_foreign_server_srvfdw - 1] = ObjectIdGetDatum(fdw->fdwid); + + /* Add server type if supplied */ + if (stmt->servertype) + values[Anum_pg_foreign_server_srvtype - 1] = + CStringGetTextDatum(stmt->servertype); + else + nulls[Anum_pg_foreign_server_srvtype - 1] = true; + + /* Add server version if supplied */ + if (stmt->version) + values[Anum_pg_foreign_server_srvversion - 1] = + CStringGetTextDatum(stmt->version); + else + nulls[Anum_pg_foreign_server_srvversion - 1] = true; + + /* Start with a blank acl */ + nulls[Anum_pg_foreign_server_srvacl - 1] = true; + + /* Add server options */ + srvoptions = transformGenericOptions(ForeignServerRelationId, + PointerGetDatum(NULL), + stmt->options, + fdw->fdwvalidator); + + if (PointerIsValid(DatumGetPointer(srvoptions))) + values[Anum_pg_foreign_server_srvoptions - 1] = srvoptions; + else + nulls[Anum_pg_foreign_server_srvoptions - 1] = true; + + tuple = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tuple); + + heap_freetuple(tuple); + + /* record dependencies */ + myself.classId = ForeignServerRelationId; + myself.objectId = srvId; + myself.objectSubId = 0; + + referenced.classId = ForeignDataWrapperRelationId; + referenced.objectId = fdw->fdwid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + recordDependencyOnOwner(ForeignServerRelationId, srvId, ownerId); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, false); + + /* Post creation hook for new foreign server */ + InvokeObjectPostCreateHook(ForeignServerRelationId, srvId, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + + +/* + * Alter foreign server + */ +ObjectAddress +AlterForeignServer(AlterForeignServerStmt *stmt) +{ + Relation rel; + HeapTuple tp; + Datum repl_val[Natts_pg_foreign_server]; + bool repl_null[Natts_pg_foreign_server]; + bool repl_repl[Natts_pg_foreign_server]; + Oid srvId; + Form_pg_foreign_server srvForm; + ObjectAddress address; + + rel = table_open(ForeignServerRelationId, RowExclusiveLock); + + tp = SearchSysCacheCopy1(FOREIGNSERVERNAME, + CStringGetDatum(stmt->servername)); + + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("server \"%s\" does not exist", stmt->servername))); + + srvForm = (Form_pg_foreign_server) GETSTRUCT(tp); + srvId = srvForm->oid; + + /* + * Only owner or a superuser can ALTER a SERVER. + */ + if (!pg_foreign_server_ownercheck(srvId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER, + stmt->servername); + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (stmt->has_version) + { + /* + * Change the server VERSION string. + */ + if (stmt->version) + repl_val[Anum_pg_foreign_server_srvversion - 1] = + CStringGetTextDatum(stmt->version); + else + repl_null[Anum_pg_foreign_server_srvversion - 1] = true; + + repl_repl[Anum_pg_foreign_server_srvversion - 1] = true; + } + + if (stmt->options) + { + ForeignDataWrapper *fdw = GetForeignDataWrapper(srvForm->srvfdw); + Datum datum; + bool isnull; + + /* Extract the current srvoptions */ + datum = SysCacheGetAttr(FOREIGNSERVEROID, + tp, + Anum_pg_foreign_server_srvoptions, + &isnull); + if (isnull) + datum = PointerGetDatum(NULL); + + /* Prepare the options array */ + datum = transformGenericOptions(ForeignServerRelationId, + datum, + stmt->options, + fdw->fdwvalidator); + + if (PointerIsValid(DatumGetPointer(datum))) + repl_val[Anum_pg_foreign_server_srvoptions - 1] = datum; + else + repl_null[Anum_pg_foreign_server_srvoptions - 1] = true; + + repl_repl[Anum_pg_foreign_server_srvoptions - 1] = true; + } + + /* Everything looks good - update the tuple */ + tp = heap_modify_tuple(tp, RelationGetDescr(rel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(rel, &tp->t_self, tp); + + InvokeObjectPostAlterHook(ForeignServerRelationId, srvId, 0); + + ObjectAddressSet(address, ForeignServerRelationId, srvId); + + heap_freetuple(tp); + + table_close(rel, RowExclusiveLock); + + return address; +} + + +/* + * Common routine to check permission for user-mapping-related DDL + * commands. We allow server owners to operate on any mapping, and + * users to operate on their own mapping. + */ +static void +user_mapping_ddl_aclcheck(Oid umuserid, Oid serverid, const char *servername) +{ + Oid curuserid = GetUserId(); + + if (!pg_foreign_server_ownercheck(serverid, curuserid)) + { + if (umuserid == curuserid) + { + AclResult aclresult; + + aclresult = pg_foreign_server_aclcheck(serverid, curuserid, ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, servername); + } + else + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FOREIGN_SERVER, + servername); + } +} + + +/* + * Create user mapping + */ +ObjectAddress +CreateUserMapping(CreateUserMappingStmt *stmt) +{ + Relation rel; + Datum useoptions; + Datum values[Natts_pg_user_mapping]; + bool nulls[Natts_pg_user_mapping]; + HeapTuple tuple; + Oid useId; + Oid umId; + ObjectAddress myself; + ObjectAddress referenced; + ForeignServer *srv; + ForeignDataWrapper *fdw; + RoleSpec *role = (RoleSpec *) stmt->user; + + rel = table_open(UserMappingRelationId, RowExclusiveLock); + + if (role->roletype == ROLESPEC_PUBLIC) + useId = ACL_ID_PUBLIC; + else + useId = get_rolespec_oid(stmt->user, false); + + /* Check that the server exists. */ + srv = GetForeignServerByName(stmt->servername, false); + + user_mapping_ddl_aclcheck(useId, srv->serverid, stmt->servername); + + /* + * Check that the user mapping is unique within server. + */ + umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid, + ObjectIdGetDatum(useId), + ObjectIdGetDatum(srv->serverid)); + + if (OidIsValid(umId)) + { + if (stmt->if_not_exists) + { + /* + * Since user mappings aren't members of extensions (see comments + * below), no need for checkMembershipInCurrentExtension here. + */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("user mapping for \"%s\" already exists for server \"%s\", skipping", + MappingUserName(useId), + stmt->servername))); + + table_close(rel, RowExclusiveLock); + return InvalidObjectAddress; + } + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("user mapping for \"%s\" already exists for server \"%s\"", + MappingUserName(useId), + stmt->servername))); + } + + fdw = GetForeignDataWrapper(srv->fdwid); + + /* + * Insert tuple into pg_user_mapping. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + umId = GetNewOidWithIndex(rel, UserMappingOidIndexId, + Anum_pg_user_mapping_oid); + values[Anum_pg_user_mapping_oid - 1] = ObjectIdGetDatum(umId); + values[Anum_pg_user_mapping_umuser - 1] = ObjectIdGetDatum(useId); + values[Anum_pg_user_mapping_umserver - 1] = ObjectIdGetDatum(srv->serverid); + + /* Add user options */ + useoptions = transformGenericOptions(UserMappingRelationId, + PointerGetDatum(NULL), + stmt->options, + fdw->fdwvalidator); + + if (PointerIsValid(DatumGetPointer(useoptions))) + values[Anum_pg_user_mapping_umoptions - 1] = useoptions; + else + nulls[Anum_pg_user_mapping_umoptions - 1] = true; + + tuple = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tuple); + + heap_freetuple(tuple); + + /* Add dependency on the server */ + myself.classId = UserMappingRelationId; + myself.objectId = umId; + myself.objectSubId = 0; + + referenced.classId = ForeignServerRelationId; + referenced.objectId = srv->serverid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + if (OidIsValid(useId)) + { + /* Record the mapped user dependency */ + recordDependencyOnOwner(UserMappingRelationId, umId, useId); + } + + /* + * Perhaps someday there should be a recordDependencyOnCurrentExtension + * call here; but since roles aren't members of extensions, it seems like + * user mappings shouldn't be either. Note that the grammar and pg_dump + * would need to be extended too if we change this. + */ + + /* Post creation hook for new user mapping */ + InvokeObjectPostCreateHook(UserMappingRelationId, umId, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + + +/* + * Alter user mapping + */ +ObjectAddress +AlterUserMapping(AlterUserMappingStmt *stmt) +{ + Relation rel; + HeapTuple tp; + Datum repl_val[Natts_pg_user_mapping]; + bool repl_null[Natts_pg_user_mapping]; + bool repl_repl[Natts_pg_user_mapping]; + Oid useId; + Oid umId; + ForeignServer *srv; + ObjectAddress address; + RoleSpec *role = (RoleSpec *) stmt->user; + + rel = table_open(UserMappingRelationId, RowExclusiveLock); + + if (role->roletype == ROLESPEC_PUBLIC) + useId = ACL_ID_PUBLIC; + else + useId = get_rolespec_oid(stmt->user, false); + + srv = GetForeignServerByName(stmt->servername, false); + + umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid, + ObjectIdGetDatum(useId), + ObjectIdGetDatum(srv->serverid)); + if (!OidIsValid(umId)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("user mapping for \"%s\" does not exist for server \"%s\"", + MappingUserName(useId), stmt->servername))); + + user_mapping_ddl_aclcheck(useId, srv->serverid, stmt->servername); + + tp = SearchSysCacheCopy1(USERMAPPINGOID, ObjectIdGetDatum(umId)); + + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for user mapping %u", umId); + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (stmt->options) + { + ForeignDataWrapper *fdw; + Datum datum; + bool isnull; + + /* + * Process the options. + */ + + fdw = GetForeignDataWrapper(srv->fdwid); + + datum = SysCacheGetAttr(USERMAPPINGUSERSERVER, + tp, + Anum_pg_user_mapping_umoptions, + &isnull); + if (isnull) + datum = PointerGetDatum(NULL); + + /* Prepare the options array */ + datum = transformGenericOptions(UserMappingRelationId, + datum, + stmt->options, + fdw->fdwvalidator); + + if (PointerIsValid(DatumGetPointer(datum))) + repl_val[Anum_pg_user_mapping_umoptions - 1] = datum; + else + repl_null[Anum_pg_user_mapping_umoptions - 1] = true; + + repl_repl[Anum_pg_user_mapping_umoptions - 1] = true; + } + + /* Everything looks good - update the tuple */ + tp = heap_modify_tuple(tp, RelationGetDescr(rel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(rel, &tp->t_self, tp); + + InvokeObjectPostAlterHook(UserMappingRelationId, + umId, 0); + + ObjectAddressSet(address, UserMappingRelationId, umId); + + heap_freetuple(tp); + + table_close(rel, RowExclusiveLock); + + return address; +} + + +/* + * Drop user mapping + */ +Oid +RemoveUserMapping(DropUserMappingStmt *stmt) +{ + ObjectAddress object; + Oid useId; + Oid umId; + ForeignServer *srv; + RoleSpec *role = (RoleSpec *) stmt->user; + + if (role->roletype == ROLESPEC_PUBLIC) + useId = ACL_ID_PUBLIC; + else + { + useId = get_rolespec_oid(stmt->user, stmt->missing_ok); + if (!OidIsValid(useId)) + { + /* + * IF EXISTS specified, role not found and not public. Notice this + * and leave. + */ + elog(NOTICE, "role \"%s\" does not exist, skipping", + role->rolename); + return InvalidOid; + } + } + + srv = GetForeignServerByName(stmt->servername, true); + + if (!srv) + { + if (!stmt->missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("server \"%s\" does not exist", + stmt->servername))); + /* IF EXISTS, just note it */ + ereport(NOTICE, + (errmsg("server \"%s\" does not exist, skipping", + stmt->servername))); + return InvalidOid; + } + + umId = GetSysCacheOid2(USERMAPPINGUSERSERVER, Anum_pg_user_mapping_oid, + ObjectIdGetDatum(useId), + ObjectIdGetDatum(srv->serverid)); + + if (!OidIsValid(umId)) + { + if (!stmt->missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("user mapping for \"%s\" does not exist for server \"%s\"", + MappingUserName(useId), stmt->servername))); + + /* IF EXISTS specified, just note it */ + ereport(NOTICE, + (errmsg("user mapping for \"%s\" does not exist for server \"%s\", skipping", + MappingUserName(useId), stmt->servername))); + return InvalidOid; + } + + user_mapping_ddl_aclcheck(useId, srv->serverid, srv->servername); + + /* + * Do the deletion + */ + object.classId = UserMappingRelationId; + object.objectId = umId; + object.objectSubId = 0; + + performDeletion(&object, DROP_CASCADE, 0); + + return umId; +} + + +/* + * Create a foreign table + * call after DefineRelation(). + */ +void +CreateForeignTable(CreateForeignTableStmt *stmt, Oid relid) +{ + Relation ftrel; + Datum ftoptions; + Datum values[Natts_pg_foreign_table]; + bool nulls[Natts_pg_foreign_table]; + HeapTuple tuple; + AclResult aclresult; + ObjectAddress myself; + ObjectAddress referenced; + Oid ownerId; + ForeignDataWrapper *fdw; + ForeignServer *server; + + /* + * Advance command counter to ensure the pg_attribute tuple is visible; + * the tuple might be updated to add constraints in previous step. + */ + CommandCounterIncrement(); + + ftrel = table_open(ForeignTableRelationId, RowExclusiveLock); + + /* + * For now the owner cannot be specified on create. Use effective user ID. + */ + ownerId = GetUserId(); + + /* + * Check that the foreign server exists and that we have USAGE on it. Also + * get the actual FDW for option validation etc. + */ + server = GetForeignServerByName(stmt->servername, false); + aclresult = pg_foreign_server_aclcheck(server->serverid, ownerId, ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, server->servername); + + fdw = GetForeignDataWrapper(server->fdwid); + + /* + * Insert tuple into pg_foreign_table. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + values[Anum_pg_foreign_table_ftrelid - 1] = ObjectIdGetDatum(relid); + values[Anum_pg_foreign_table_ftserver - 1] = ObjectIdGetDatum(server->serverid); + /* Add table generic options */ + ftoptions = transformGenericOptions(ForeignTableRelationId, + PointerGetDatum(NULL), + stmt->options, + fdw->fdwvalidator); + + if (PointerIsValid(DatumGetPointer(ftoptions))) + values[Anum_pg_foreign_table_ftoptions - 1] = ftoptions; + else + nulls[Anum_pg_foreign_table_ftoptions - 1] = true; + + tuple = heap_form_tuple(ftrel->rd_att, values, nulls); + + CatalogTupleInsert(ftrel, tuple); + + heap_freetuple(tuple); + + /* Add pg_class dependency on the server */ + myself.classId = RelationRelationId; + myself.objectId = relid; + myself.objectSubId = 0; + + referenced.classId = ForeignServerRelationId; + referenced.objectId = server->serverid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + table_close(ftrel, RowExclusiveLock); +} + +/* + * Import a foreign schema + */ +void +ImportForeignSchema(ImportForeignSchemaStmt *stmt) +{ + ForeignServer *server; + ForeignDataWrapper *fdw; + FdwRoutine *fdw_routine; + AclResult aclresult; + List *cmd_list; + ListCell *lc; + + /* Check that the foreign server exists and that we have USAGE on it */ + server = GetForeignServerByName(stmt->server_name, false); + aclresult = pg_foreign_server_aclcheck(server->serverid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, server->servername); + + /* Check that the schema exists and we have CREATE permissions on it */ + (void) LookupCreationNamespace(stmt->local_schema); + + /* Get the FDW and check it supports IMPORT */ + fdw = GetForeignDataWrapper(server->fdwid); + if (!OidIsValid(fdw->fdwhandler)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("foreign-data wrapper \"%s\" has no handler", + fdw->fdwname))); + fdw_routine = GetFdwRoutine(fdw->fdwhandler); + if (fdw_routine->ImportForeignSchema == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_NO_SCHEMAS), + errmsg("foreign-data wrapper \"%s\" does not support IMPORT FOREIGN SCHEMA", + fdw->fdwname))); + + /* Call FDW to get a list of commands */ + cmd_list = fdw_routine->ImportForeignSchema(stmt, server->serverid); + + /* Parse and execute each command */ + foreach(lc, cmd_list) + { + char *cmd = (char *) lfirst(lc); + import_error_callback_arg callback_arg; + ErrorContextCallback sqlerrcontext; + List *raw_parsetree_list; + ListCell *lc2; + + /* + * Setup error traceback support for ereport(). This is so that any + * error in the generated SQL will be displayed nicely. + */ + callback_arg.tablename = NULL; /* not known yet */ + callback_arg.cmd = cmd; + sqlerrcontext.callback = import_error_callback; + sqlerrcontext.arg = (void *) &callback_arg; + sqlerrcontext.previous = error_context_stack; + error_context_stack = &sqlerrcontext; + + /* + * Parse the SQL string into a list of raw parse trees. + */ + raw_parsetree_list = pg_parse_query(cmd); + + /* + * Process each parse tree (we allow the FDW to put more than one + * command per string, though this isn't really advised). + */ + foreach(lc2, raw_parsetree_list) + { + RawStmt *rs = lfirst_node(RawStmt, lc2); + CreateForeignTableStmt *cstmt = (CreateForeignTableStmt *) rs->stmt; + PlannedStmt *pstmt; + + /* + * Because we only allow CreateForeignTableStmt, we can skip parse + * analysis, rewrite, and planning steps here. + */ + if (!IsA(cstmt, CreateForeignTableStmt)) + elog(ERROR, + "foreign-data wrapper \"%s\" returned incorrect statement type %d", + fdw->fdwname, (int) nodeTag(cstmt)); + + /* Ignore commands for tables excluded by filter options */ + if (!IsImportableForeignTable(cstmt->base.relation->relname, stmt)) + continue; + + /* Enable reporting of current table's name on error */ + callback_arg.tablename = cstmt->base.relation->relname; + + /* Ensure creation schema is the one given in IMPORT statement */ + cstmt->base.relation->schemaname = pstrdup(stmt->local_schema); + + /* No planning needed, just make a wrapper PlannedStmt */ + pstmt = makeNode(PlannedStmt); + pstmt->commandType = CMD_UTILITY; + pstmt->canSetTag = false; + pstmt->utilityStmt = (Node *) cstmt; + pstmt->stmt_location = rs->stmt_location; + pstmt->stmt_len = rs->stmt_len; + + /* Execute statement */ + ProcessUtility(pstmt, cmd, false, + PROCESS_UTILITY_SUBCOMMAND, NULL, NULL, + None_Receiver, NULL); + + /* Be sure to advance the command counter between subcommands */ + CommandCounterIncrement(); + + callback_arg.tablename = NULL; + } + + error_context_stack = sqlerrcontext.previous; + } +} + +/* + * error context callback to let us supply the failing SQL statement's text + */ +static void +import_error_callback(void *arg) +{ + import_error_callback_arg *callback_arg = (import_error_callback_arg *) arg; + int syntaxerrposition; + + /* If it's a syntax error, convert to internal syntax error report */ + syntaxerrposition = geterrposition(); + if (syntaxerrposition > 0) + { + errposition(0); + internalerrposition(syntaxerrposition); + internalerrquery(callback_arg->cmd); + } + + if (callback_arg->tablename) + errcontext("importing foreign table \"%s\"", + callback_arg->tablename); +} diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c new file mode 100644 index 0000000..00a6d28 --- /dev/null +++ b/src/backend/commands/functioncmds.c @@ -0,0 +1,2374 @@ +/*------------------------------------------------------------------------- + * + * functioncmds.c + * + * Routines for CREATE and DROP FUNCTION commands and CREATE and DROP + * CAST commands. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/functioncmds.c + * + * DESCRIPTION + * These routines take the parse tree and pick out the + * appropriate arguments/flags, and pass the results to the + * corresponding "FooDefine" routines (in src/catalog) that do + * the actual catalog-munging. These routines also verify permission + * of the user to execute the command. + * + * NOTES + * These things must be defined and committed in the following order: + * "create function": + * input/output, recv/send procedures + * "create type": + * type + * "create operator": + * operators + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_cast.h" +#include "catalog/pg_language.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_transform.h" +#include "catalog/pg_type.h" +#include "commands/alter.h" +#include "commands/defrem.h" +#include "commands/extension.h" +#include "commands/proclang.h" +#include "executor/execdesc.h" +#include "executor/executor.h" +#include "executor/functions.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "optimizer/optimizer.h" +#include "parser/analyze.h" +#include "parser/parse_coerce.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_func.h" +#include "parser/parse_type.h" +#include "pgstat.h" +#include "tcop/pquery.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +/* + * Examine the RETURNS clause of the CREATE FUNCTION statement + * and return information about it as *prorettype_p and *returnsSet. + * + * This is more complex than the average typename lookup because we want to + * allow a shell type to be used, or even created if the specified return type + * doesn't exist yet. (Without this, there's no way to define the I/O procs + * for a new type.) But SQL function creation won't cope, so error out if + * the target language is SQL. (We do this here, not in the SQL-function + * validator, so as not to produce a NOTICE and then an ERROR for the same + * condition.) + */ +static void +compute_return_type(TypeName *returnType, Oid languageOid, + Oid *prorettype_p, bool *returnsSet_p) +{ + Oid rettype; + Type typtup; + AclResult aclresult; + + typtup = LookupTypeName(NULL, returnType, NULL, false); + + if (typtup) + { + if (!((Form_pg_type) GETSTRUCT(typtup))->typisdefined) + { + if (languageOid == SQLlanguageId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("SQL function cannot return shell type %s", + TypeNameToString(returnType)))); + else + ereport(NOTICE, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("return type %s is only a shell", + TypeNameToString(returnType)))); + } + rettype = typeTypeId(typtup); + ReleaseSysCache(typtup); + } + else + { + char *typnam = TypeNameToString(returnType); + Oid namespaceId; + AclResult aclresult; + char *typname; + ObjectAddress address; + + /* + * Only C-coded functions can be I/O functions. We enforce this + * restriction here mainly to prevent littering the catalogs with + * shell types due to simple typos in user-defined function + * definitions. + */ + if (languageOid != INTERNALlanguageId && + languageOid != ClanguageId) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type \"%s\" does not exist", typnam))); + + /* Reject if there's typmod decoration, too */ + if (returnType->typmods != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type modifier cannot be specified for shell type \"%s\"", + typnam))); + + /* Otherwise, go ahead and make a shell type */ + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type \"%s\" is not yet defined", typnam), + errdetail("Creating a shell type definition."))); + namespaceId = QualifiedNameGetCreationNamespace(returnType->names, + &typname); + aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + address = TypeShellMake(typname, namespaceId, GetUserId()); + rettype = address.objectId; + Assert(OidIsValid(rettype)); + } + + aclresult = pg_type_aclcheck(rettype, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, rettype); + + *prorettype_p = rettype; + *returnsSet_p = returnType->setof; +} + +/* + * Interpret the function parameter list of a CREATE FUNCTION, + * CREATE PROCEDURE, or CREATE AGGREGATE statement. + * + * Input parameters: + * parameters: list of FunctionParameter structs + * languageOid: OID of function language (InvalidOid if it's CREATE AGGREGATE) + * objtype: identifies type of object being created + * + * Results are stored into output parameters. parameterTypes must always + * be created, but the other arrays/lists can be NULL pointers if not needed. + * variadicArgType is set to the variadic array type if there's a VARIADIC + * parameter (there can be only one); or to InvalidOid if not. + * requiredResultType is set to InvalidOid if there are no OUT parameters, + * else it is set to the OID of the implied result type. + */ +void +interpret_function_parameter_list(ParseState *pstate, + List *parameters, + Oid languageOid, + ObjectType objtype, + oidvector **parameterTypes, + List **parameterTypes_list, + ArrayType **allParameterTypes, + ArrayType **parameterModes, + ArrayType **parameterNames, + List **inParameterNames_list, + List **parameterDefaults, + Oid *variadicArgType, + Oid *requiredResultType) +{ + int parameterCount = list_length(parameters); + Oid *inTypes; + int inCount = 0; + Datum *allTypes; + Datum *paramModes; + Datum *paramNames; + int outCount = 0; + int varCount = 0; + bool have_names = false; + bool have_defaults = false; + ListCell *x; + int i; + + *variadicArgType = InvalidOid; /* default result */ + *requiredResultType = InvalidOid; /* default result */ + + inTypes = (Oid *) palloc(parameterCount * sizeof(Oid)); + allTypes = (Datum *) palloc(parameterCount * sizeof(Datum)); + paramModes = (Datum *) palloc(parameterCount * sizeof(Datum)); + paramNames = (Datum *) palloc0(parameterCount * sizeof(Datum)); + *parameterDefaults = NIL; + + /* Scan the list and extract data into work arrays */ + i = 0; + foreach(x, parameters) + { + FunctionParameter *fp = (FunctionParameter *) lfirst(x); + TypeName *t = fp->argType; + FunctionParameterMode fpmode = fp->mode; + bool isinput = false; + Oid toid; + Type typtup; + AclResult aclresult; + + /* For our purposes here, a defaulted mode spec is identical to IN */ + if (fpmode == FUNC_PARAM_DEFAULT) + fpmode = FUNC_PARAM_IN; + + typtup = LookupTypeName(NULL, t, NULL, false); + if (typtup) + { + if (!((Form_pg_type) GETSTRUCT(typtup))->typisdefined) + { + /* As above, hard error if language is SQL */ + if (languageOid == SQLlanguageId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("SQL function cannot accept shell type %s", + TypeNameToString(t)))); + /* We don't allow creating aggregates on shell types either */ + else if (objtype == OBJECT_AGGREGATE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate cannot accept shell type %s", + TypeNameToString(t)))); + else + ereport(NOTICE, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("argument type %s is only a shell", + TypeNameToString(t)))); + } + toid = typeTypeId(typtup); + ReleaseSysCache(typtup); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type %s does not exist", + TypeNameToString(t)))); + toid = InvalidOid; /* keep compiler quiet */ + } + + aclresult = pg_type_aclcheck(toid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, toid); + + if (t->setof) + { + if (objtype == OBJECT_AGGREGATE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregates cannot accept set arguments"))); + else if (objtype == OBJECT_PROCEDURE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("procedures cannot accept set arguments"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("functions cannot accept set arguments"))); + } + + /* handle input parameters */ + if (fpmode != FUNC_PARAM_OUT && fpmode != FUNC_PARAM_TABLE) + { + /* other input parameters can't follow a VARIADIC parameter */ + if (varCount > 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("VARIADIC parameter must be the last input parameter"))); + inTypes[inCount++] = toid; + isinput = true; + if (parameterTypes_list) + *parameterTypes_list = lappend_oid(*parameterTypes_list, toid); + } + + /* handle output parameters */ + if (fpmode != FUNC_PARAM_IN && fpmode != FUNC_PARAM_VARIADIC) + { + if (objtype == OBJECT_PROCEDURE) + { + /* + * We disallow OUT-after-VARIADIC only for procedures. While + * such a case causes no confusion in ordinary function calls, + * it would cause confusion in a CALL statement. + */ + if (varCount > 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("VARIADIC parameter must be the last parameter"))); + /* Procedures with output parameters always return RECORD */ + *requiredResultType = RECORDOID; + } + else if (outCount == 0) /* save first output param's type */ + *requiredResultType = toid; + outCount++; + } + + if (fpmode == FUNC_PARAM_VARIADIC) + { + *variadicArgType = toid; + varCount++; + /* validate variadic parameter type */ + switch (toid) + { + case ANYARRAYOID: + case ANYCOMPATIBLEARRAYOID: + case ANYOID: + /* okay */ + break; + default: + if (!OidIsValid(get_element_type(toid))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("VARIADIC parameter must be an array"))); + break; + } + } + + allTypes[i] = ObjectIdGetDatum(toid); + + paramModes[i] = CharGetDatum(fpmode); + + if (fp->name && fp->name[0]) + { + ListCell *px; + + /* + * As of Postgres 9.0 we disallow using the same name for two + * input or two output function parameters. Depending on the + * function's language, conflicting input and output names might + * be bad too, but we leave it to the PL to complain if so. + */ + foreach(px, parameters) + { + FunctionParameter *prevfp = (FunctionParameter *) lfirst(px); + FunctionParameterMode prevfpmode; + + if (prevfp == fp) + break; + /* as above, default mode is IN */ + prevfpmode = prevfp->mode; + if (prevfpmode == FUNC_PARAM_DEFAULT) + prevfpmode = FUNC_PARAM_IN; + /* pure in doesn't conflict with pure out */ + if ((fpmode == FUNC_PARAM_IN || + fpmode == FUNC_PARAM_VARIADIC) && + (prevfpmode == FUNC_PARAM_OUT || + prevfpmode == FUNC_PARAM_TABLE)) + continue; + if ((prevfpmode == FUNC_PARAM_IN || + prevfpmode == FUNC_PARAM_VARIADIC) && + (fpmode == FUNC_PARAM_OUT || + fpmode == FUNC_PARAM_TABLE)) + continue; + if (prevfp->name && prevfp->name[0] && + strcmp(prevfp->name, fp->name) == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("parameter name \"%s\" used more than once", + fp->name))); + } + + paramNames[i] = CStringGetTextDatum(fp->name); + have_names = true; + } + + if (inParameterNames_list) + *inParameterNames_list = lappend(*inParameterNames_list, makeString(fp->name ? fp->name : pstrdup(""))); + + if (fp->defexpr) + { + Node *def; + + if (!isinput) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only input parameters can have default values"))); + + def = transformExpr(pstate, fp->defexpr, + EXPR_KIND_FUNCTION_DEFAULT); + def = coerce_to_specific_type(pstate, def, toid, "DEFAULT"); + assign_expr_collations(pstate, def); + + /* + * Make sure no variables are referred to (this is probably dead + * code now that add_missing_from is history). + */ + if (list_length(pstate->p_rtable) != 0 || + contain_var_clause(def)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("cannot use table references in parameter default value"))); + + /* + * transformExpr() should have already rejected subqueries, + * aggregates, and window functions, based on the EXPR_KIND_ for a + * default expression. + * + * It can't return a set either --- but coerce_to_specific_type + * already checked that for us. + * + * Note: the point of these restrictions is to ensure that an + * expression that, on its face, hasn't got subplans, aggregates, + * etc cannot suddenly have them after function default arguments + * are inserted. + */ + + *parameterDefaults = lappend(*parameterDefaults, def); + have_defaults = true; + } + else + { + if (isinput && have_defaults) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("input parameters after one with a default value must also have defaults"))); + + /* + * For procedures, we also can't allow OUT parameters after one + * with a default, because the same sort of confusion arises in a + * CALL statement. + */ + if (objtype == OBJECT_PROCEDURE && have_defaults) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("procedure OUT parameters cannot appear after one with a default value"))); + } + + i++; + } + + /* Now construct the proper outputs as needed */ + *parameterTypes = buildoidvector(inTypes, inCount); + + if (outCount > 0 || varCount > 0) + { + *allParameterTypes = construct_array(allTypes, parameterCount, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + *parameterModes = construct_array(paramModes, parameterCount, CHAROID, + 1, true, TYPALIGN_CHAR); + if (outCount > 1) + *requiredResultType = RECORDOID; + /* otherwise we set requiredResultType correctly above */ + } + else + { + *allParameterTypes = NULL; + *parameterModes = NULL; + } + + if (have_names) + { + for (i = 0; i < parameterCount; i++) + { + if (paramNames[i] == PointerGetDatum(NULL)) + paramNames[i] = CStringGetTextDatum(""); + } + *parameterNames = construct_array(paramNames, parameterCount, TEXTOID, + -1, false, TYPALIGN_INT); + } + else + *parameterNames = NULL; +} + + +/* + * Recognize one of the options that can be passed to both CREATE + * FUNCTION and ALTER FUNCTION and return it via one of the out + * parameters. Returns true if the passed option was recognized. If + * the out parameter we were going to assign to points to non-NULL, + * raise a duplicate-clause error. (We don't try to detect duplicate + * SET parameters though --- if you're redundant, the last one wins.) + */ +static bool +compute_common_attribute(ParseState *pstate, + bool is_procedure, + DefElem *defel, + DefElem **volatility_item, + DefElem **strict_item, + DefElem **security_item, + DefElem **leakproof_item, + List **set_items, + DefElem **cost_item, + DefElem **rows_item, + DefElem **support_item, + DefElem **parallel_item) +{ + if (strcmp(defel->defname, "volatility") == 0) + { + if (is_procedure) + goto procedure_error; + if (*volatility_item) + errorConflictingDefElem(defel, pstate); + + *volatility_item = defel; + } + else if (strcmp(defel->defname, "strict") == 0) + { + if (is_procedure) + goto procedure_error; + if (*strict_item) + errorConflictingDefElem(defel, pstate); + + *strict_item = defel; + } + else if (strcmp(defel->defname, "security") == 0) + { + if (*security_item) + errorConflictingDefElem(defel, pstate); + + *security_item = defel; + } + else if (strcmp(defel->defname, "leakproof") == 0) + { + if (is_procedure) + goto procedure_error; + if (*leakproof_item) + errorConflictingDefElem(defel, pstate); + + *leakproof_item = defel; + } + else if (strcmp(defel->defname, "set") == 0) + { + *set_items = lappend(*set_items, defel->arg); + } + else if (strcmp(defel->defname, "cost") == 0) + { + if (is_procedure) + goto procedure_error; + if (*cost_item) + errorConflictingDefElem(defel, pstate); + + *cost_item = defel; + } + else if (strcmp(defel->defname, "rows") == 0) + { + if (is_procedure) + goto procedure_error; + if (*rows_item) + errorConflictingDefElem(defel, pstate); + + *rows_item = defel; + } + else if (strcmp(defel->defname, "support") == 0) + { + if (is_procedure) + goto procedure_error; + if (*support_item) + errorConflictingDefElem(defel, pstate); + + *support_item = defel; + } + else if (strcmp(defel->defname, "parallel") == 0) + { + if (is_procedure) + goto procedure_error; + if (*parallel_item) + errorConflictingDefElem(defel, pstate); + + *parallel_item = defel; + } + else + return false; + + /* Recognized an option */ + return true; + +procedure_error: + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("invalid attribute in procedure definition"), + parser_errposition(pstate, defel->location))); + return false; +} + +static char +interpret_func_volatility(DefElem *defel) +{ + char *str = strVal(defel->arg); + + if (strcmp(str, "immutable") == 0) + return PROVOLATILE_IMMUTABLE; + else if (strcmp(str, "stable") == 0) + return PROVOLATILE_STABLE; + else if (strcmp(str, "volatile") == 0) + return PROVOLATILE_VOLATILE; + else + { + elog(ERROR, "invalid volatility \"%s\"", str); + return 0; /* keep compiler quiet */ + } +} + +static char +interpret_func_parallel(DefElem *defel) +{ + char *str = strVal(defel->arg); + + if (strcmp(str, "safe") == 0) + return PROPARALLEL_SAFE; + else if (strcmp(str, "unsafe") == 0) + return PROPARALLEL_UNSAFE; + else if (strcmp(str, "restricted") == 0) + return PROPARALLEL_RESTRICTED; + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parameter \"parallel\" must be SAFE, RESTRICTED, or UNSAFE"))); + return PROPARALLEL_UNSAFE; /* keep compiler quiet */ + } +} + +/* + * Update a proconfig value according to a list of VariableSetStmt items. + * + * The input and result may be NULL to signify a null entry. + */ +static ArrayType * +update_proconfig_value(ArrayType *a, List *set_items) +{ + ListCell *l; + + foreach(l, set_items) + { + VariableSetStmt *sstmt = lfirst_node(VariableSetStmt, l); + + if (sstmt->kind == VAR_RESET_ALL) + a = NULL; + else + { + char *valuestr = ExtractSetVariableArgs(sstmt); + + if (valuestr) + a = GUCArrayAdd(a, sstmt->name, valuestr); + else /* RESET */ + a = GUCArrayDelete(a, sstmt->name); + } + } + + return a; +} + +static Oid +interpret_func_support(DefElem *defel) +{ + List *procName = defGetQualifiedName(defel); + Oid procOid; + Oid argList[1]; + + /* + * Support functions always take one INTERNAL argument and return + * INTERNAL. + */ + argList[0] = INTERNALOID; + + procOid = LookupFuncName(procName, 1, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procName, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("support function %s must return type %s", + NameListToString(procName), "internal"))); + + /* + * Someday we might want an ACL check here; but for now, we insist that + * you be superuser to specify a support function, so privilege on the + * support function is moot. + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to specify a support function"))); + + return procOid; +} + + +/* + * Dissect the list of options assembled in gram.y into function + * attributes. + */ +static void +compute_function_attributes(ParseState *pstate, + bool is_procedure, + List *options, + List **as, + char **language, + Node **transform, + bool *windowfunc_p, + char *volatility_p, + bool *strict_p, + bool *security_definer, + bool *leakproof_p, + ArrayType **proconfig, + float4 *procost, + float4 *prorows, + Oid *prosupport, + char *parallel_p) +{ + ListCell *option; + DefElem *as_item = NULL; + DefElem *language_item = NULL; + DefElem *transform_item = NULL; + DefElem *windowfunc_item = NULL; + DefElem *volatility_item = NULL; + DefElem *strict_item = NULL; + DefElem *security_item = NULL; + DefElem *leakproof_item = NULL; + List *set_items = NIL; + DefElem *cost_item = NULL; + DefElem *rows_item = NULL; + DefElem *support_item = NULL; + DefElem *parallel_item = NULL; + + foreach(option, options) + { + DefElem *defel = (DefElem *) lfirst(option); + + if (strcmp(defel->defname, "as") == 0) + { + if (as_item) + errorConflictingDefElem(defel, pstate); + as_item = defel; + } + else if (strcmp(defel->defname, "language") == 0) + { + if (language_item) + errorConflictingDefElem(defel, pstate); + language_item = defel; + } + else if (strcmp(defel->defname, "transform") == 0) + { + if (transform_item) + errorConflictingDefElem(defel, pstate); + transform_item = defel; + } + else if (strcmp(defel->defname, "window") == 0) + { + if (windowfunc_item) + errorConflictingDefElem(defel, pstate); + if (is_procedure) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("invalid attribute in procedure definition"), + parser_errposition(pstate, defel->location))); + windowfunc_item = defel; + } + else if (compute_common_attribute(pstate, + is_procedure, + defel, + &volatility_item, + &strict_item, + &security_item, + &leakproof_item, + &set_items, + &cost_item, + &rows_item, + &support_item, + ¶llel_item)) + { + /* recognized common option */ + continue; + } + else + elog(ERROR, "option \"%s\" not recognized", + defel->defname); + } + + if (as_item) + *as = (List *) as_item->arg; + if (language_item) + *language = strVal(language_item->arg); + if (transform_item) + *transform = transform_item->arg; + if (windowfunc_item) + *windowfunc_p = boolVal(windowfunc_item->arg); + if (volatility_item) + *volatility_p = interpret_func_volatility(volatility_item); + if (strict_item) + *strict_p = boolVal(strict_item->arg); + if (security_item) + *security_definer = boolVal(security_item->arg); + if (leakproof_item) + *leakproof_p = boolVal(leakproof_item->arg); + if (set_items) + *proconfig = update_proconfig_value(NULL, set_items); + if (cost_item) + { + *procost = defGetNumeric(cost_item); + if (*procost <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COST must be positive"))); + } + if (rows_item) + { + *prorows = defGetNumeric(rows_item); + if (*prorows <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ROWS must be positive"))); + } + if (support_item) + *prosupport = interpret_func_support(support_item); + if (parallel_item) + *parallel_p = interpret_func_parallel(parallel_item); +} + + +/* + * For a dynamically linked C language object, the form of the clause is + * + * AS [, ] + * + * In all other cases + * + * AS + */ +static void +interpret_AS_clause(Oid languageOid, const char *languageName, + char *funcname, List *as, Node *sql_body_in, + List *parameterTypes, List *inParameterNames, + char **prosrc_str_p, char **probin_str_p, + Node **sql_body_out, + const char *queryString) +{ + if (!sql_body_in && !as) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("no function body specified"))); + + if (sql_body_in && as) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("duplicate function body specified"))); + + if (sql_body_in && languageOid != SQLlanguageId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("inline SQL function body only valid for language SQL"))); + + *sql_body_out = NULL; + + if (languageOid == ClanguageId) + { + /* + * For "C" language, store the file name in probin and, when given, + * the link symbol name in prosrc. If link symbol is omitted, + * substitute procedure name. We also allow link symbol to be + * specified as "-", since that was the habit in PG versions before + * 8.4, and there might be dump files out there that don't translate + * that back to "omitted". + */ + *probin_str_p = strVal(linitial(as)); + if (list_length(as) == 1) + *prosrc_str_p = funcname; + else + { + *prosrc_str_p = strVal(lsecond(as)); + if (strcmp(*prosrc_str_p, "-") == 0) + *prosrc_str_p = funcname; + } + } + else if (sql_body_in) + { + SQLFunctionParseInfoPtr pinfo; + + pinfo = (SQLFunctionParseInfoPtr) palloc0(sizeof(SQLFunctionParseInfo)); + + pinfo->fname = funcname; + pinfo->nargs = list_length(parameterTypes); + pinfo->argtypes = (Oid *) palloc(pinfo->nargs * sizeof(Oid)); + pinfo->argnames = (char **) palloc(pinfo->nargs * sizeof(char *)); + for (int i = 0; i < list_length(parameterTypes); i++) + { + char *s = strVal(list_nth(inParameterNames, i)); + + pinfo->argtypes[i] = list_nth_oid(parameterTypes, i); + if (IsPolymorphicType(pinfo->argtypes[i])) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("SQL function with unquoted function body cannot have polymorphic arguments"))); + + if (s[0] != '\0') + pinfo->argnames[i] = s; + else + pinfo->argnames[i] = NULL; + } + + if (IsA(sql_body_in, List)) + { + List *stmts = linitial_node(List, castNode(List, sql_body_in)); + ListCell *lc; + List *transformed_stmts = NIL; + + foreach(lc, stmts) + { + Node *stmt = lfirst(lc); + Query *q; + ParseState *pstate = make_parsestate(NULL); + + pstate->p_sourcetext = queryString; + sql_fn_parser_setup(pstate, pinfo); + q = transformStmt(pstate, stmt); + if (q->commandType == CMD_UTILITY) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("%s is not yet supported in unquoted SQL function body", + GetCommandTagName(CreateCommandTag(q->utilityStmt)))); + transformed_stmts = lappend(transformed_stmts, q); + free_parsestate(pstate); + } + + *sql_body_out = (Node *) list_make1(transformed_stmts); + } + else + { + Query *q; + ParseState *pstate = make_parsestate(NULL); + + pstate->p_sourcetext = queryString; + sql_fn_parser_setup(pstate, pinfo); + q = transformStmt(pstate, sql_body_in); + if (q->commandType == CMD_UTILITY) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("%s is not yet supported in unquoted SQL function body", + GetCommandTagName(CreateCommandTag(q->utilityStmt)))); + free_parsestate(pstate); + + *sql_body_out = (Node *) q; + } + + /* + * We must put something in prosrc. For the moment, just record an + * empty string. It might be useful to store the original text of the + * CREATE FUNCTION statement --- but to make actual use of that in + * error reports, we'd also have to adjust readfuncs.c to not throw + * away node location fields when reading prosqlbody. + */ + *prosrc_str_p = pstrdup(""); + + /* But we definitely don't need probin. */ + *probin_str_p = NULL; + } + else + { + /* Everything else wants the given string in prosrc. */ + *prosrc_str_p = strVal(linitial(as)); + *probin_str_p = NULL; + + if (list_length(as) != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only one AS item needed for language \"%s\"", + languageName))); + + if (languageOid == INTERNALlanguageId) + { + /* + * In PostgreSQL versions before 6.5, the SQL name of the created + * function could not be different from the internal name, and + * "prosrc" wasn't used. So there is code out there that does + * CREATE FUNCTION xyz AS '' LANGUAGE internal. To preserve some + * modicum of backwards compatibility, accept an empty "prosrc" + * value as meaning the supplied SQL function name. + */ + if (strlen(*prosrc_str_p) == 0) + *prosrc_str_p = funcname; + } + } +} + + +/* + * CreateFunction + * Execute a CREATE FUNCTION (or CREATE PROCEDURE) utility statement. + */ +ObjectAddress +CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt) +{ + char *probin_str; + char *prosrc_str; + Node *prosqlbody; + Oid prorettype; + bool returnsSet; + char *language; + Oid languageOid; + Oid languageValidator; + Node *transformDefElem = NULL; + char *funcname; + Oid namespaceId; + AclResult aclresult; + oidvector *parameterTypes; + List *parameterTypes_list = NIL; + ArrayType *allParameterTypes; + ArrayType *parameterModes; + ArrayType *parameterNames; + List *inParameterNames_list = NIL; + List *parameterDefaults; + Oid variadicArgType; + List *trftypes_list = NIL; + ArrayType *trftypes; + Oid requiredResultType; + bool isWindowFunc, + isStrict, + security, + isLeakProof; + char volatility; + ArrayType *proconfig; + float4 procost; + float4 prorows; + Oid prosupport; + HeapTuple languageTuple; + Form_pg_language languageStruct; + List *as_clause; + char parallel; + + /* Convert list of names to a name and namespace */ + namespaceId = QualifiedNameGetCreationNamespace(stmt->funcname, + &funcname); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + + /* Set default attributes */ + as_clause = NIL; + language = NULL; + isWindowFunc = false; + isStrict = false; + security = false; + isLeakProof = false; + volatility = PROVOLATILE_VOLATILE; + proconfig = NULL; + procost = -1; /* indicates not set */ + prorows = -1; /* indicates not set */ + prosupport = InvalidOid; + parallel = PROPARALLEL_UNSAFE; + + /* Extract non-default attributes from stmt->options list */ + compute_function_attributes(pstate, + stmt->is_procedure, + stmt->options, + &as_clause, &language, &transformDefElem, + &isWindowFunc, &volatility, + &isStrict, &security, &isLeakProof, + &proconfig, &procost, &prorows, + &prosupport, ¶llel); + + if (!language) + { + if (stmt->sql_body) + language = "sql"; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("no language specified"))); + } + + /* Look up the language and validate permissions */ + languageTuple = SearchSysCache1(LANGNAME, PointerGetDatum(language)); + if (!HeapTupleIsValid(languageTuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("language \"%s\" does not exist", language), + (extension_file_exists(language) ? + errhint("Use CREATE EXTENSION to load the language into the database.") : 0))); + + languageStruct = (Form_pg_language) GETSTRUCT(languageTuple); + languageOid = languageStruct->oid; + + if (languageStruct->lanpltrusted) + { + /* if trusted language, need USAGE privilege */ + AclResult aclresult; + + aclresult = pg_language_aclcheck(languageOid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_LANGUAGE, + NameStr(languageStruct->lanname)); + } + else + { + /* if untrusted language, must be superuser */ + if (!superuser()) + aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_LANGUAGE, + NameStr(languageStruct->lanname)); + } + + languageValidator = languageStruct->lanvalidator; + + ReleaseSysCache(languageTuple); + + /* + * Only superuser is allowed to create leakproof functions because + * leakproof functions can see tuples which have not yet been filtered out + * by security barrier views or row-level security policies. + */ + if (isLeakProof && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser can define a leakproof function"))); + + if (transformDefElem) + { + ListCell *lc; + + foreach(lc, castNode(List, transformDefElem)) + { + Oid typeid = typenameTypeId(NULL, + lfirst_node(TypeName, lc)); + Oid elt = get_base_element_type(typeid); + + typeid = elt ? elt : typeid; + + get_transform_oid(typeid, languageOid, false); + trftypes_list = lappend_oid(trftypes_list, typeid); + } + } + + /* + * Convert remaining parameters of CREATE to form wanted by + * ProcedureCreate. + */ + interpret_function_parameter_list(pstate, + stmt->parameters, + languageOid, + stmt->is_procedure ? OBJECT_PROCEDURE : OBJECT_FUNCTION, + ¶meterTypes, + ¶meterTypes_list, + &allParameterTypes, + ¶meterModes, + ¶meterNames, + &inParameterNames_list, + ¶meterDefaults, + &variadicArgType, + &requiredResultType); + + if (stmt->is_procedure) + { + Assert(!stmt->returnType); + prorettype = requiredResultType ? requiredResultType : VOIDOID; + returnsSet = false; + } + else if (stmt->returnType) + { + /* explicit RETURNS clause */ + compute_return_type(stmt->returnType, languageOid, + &prorettype, &returnsSet); + if (OidIsValid(requiredResultType) && prorettype != requiredResultType) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("function result type must be %s because of OUT parameters", + format_type_be(requiredResultType)))); + } + else if (OidIsValid(requiredResultType)) + { + /* default RETURNS clause from OUT parameters */ + prorettype = requiredResultType; + returnsSet = false; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("function result type must be specified"))); + /* Alternative possibility: default to RETURNS VOID */ + prorettype = VOIDOID; + returnsSet = false; + } + + if (list_length(trftypes_list) > 0) + { + ListCell *lc; + Datum *arr; + int i; + + arr = palloc(list_length(trftypes_list) * sizeof(Datum)); + i = 0; + foreach(lc, trftypes_list) + arr[i++] = ObjectIdGetDatum(lfirst_oid(lc)); + trftypes = construct_array(arr, list_length(trftypes_list), + OIDOID, sizeof(Oid), true, TYPALIGN_INT); + } + else + { + /* store SQL NULL instead of empty array */ + trftypes = NULL; + } + + interpret_AS_clause(languageOid, language, funcname, as_clause, stmt->sql_body, + parameterTypes_list, inParameterNames_list, + &prosrc_str, &probin_str, &prosqlbody, + pstate->p_sourcetext); + + /* + * Set default values for COST and ROWS depending on other parameters; + * reject ROWS if it's not returnsSet. NB: pg_dump knows these default + * values, keep it in sync if you change them. + */ + if (procost < 0) + { + /* SQL and PL-language functions are assumed more expensive */ + if (languageOid == INTERNALlanguageId || + languageOid == ClanguageId) + procost = 1; + else + procost = 100; + } + if (prorows < 0) + { + if (returnsSet) + prorows = 1000; + else + prorows = 0; /* dummy value if not returnsSet */ + } + else if (!returnsSet) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ROWS is not applicable when function does not return a set"))); + + /* + * And now that we have all the parameters, and know we're permitted to do + * so, go ahead and create the function. + */ + return ProcedureCreate(funcname, + namespaceId, + stmt->replace, + returnsSet, + prorettype, + GetUserId(), + languageOid, + languageValidator, + prosrc_str, /* converted to text later */ + probin_str, /* converted to text later */ + prosqlbody, + stmt->is_procedure ? PROKIND_PROCEDURE : (isWindowFunc ? PROKIND_WINDOW : PROKIND_FUNCTION), + security, + isLeakProof, + isStrict, + volatility, + parallel, + parameterTypes, + PointerGetDatum(allParameterTypes), + PointerGetDatum(parameterModes), + PointerGetDatum(parameterNames), + parameterDefaults, + PointerGetDatum(trftypes), + PointerGetDatum(proconfig), + prosupport, + procost, + prorows); +} + +/* + * Guts of function deletion. + * + * Note: this is also used for aggregate deletion, since the OIDs of + * both functions and aggregates point to pg_proc. + */ +void +RemoveFunctionById(Oid funcOid) +{ + Relation relation; + HeapTuple tup; + char prokind; + + /* + * Delete the pg_proc tuple. + */ + relation = table_open(ProcedureRelationId, RowExclusiveLock); + + tup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcOid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for function %u", funcOid); + + prokind = ((Form_pg_proc) GETSTRUCT(tup))->prokind; + + CatalogTupleDelete(relation, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(relation, RowExclusiveLock); + + pgstat_drop_function(funcOid); + + /* + * If there's a pg_aggregate tuple, delete that too. + */ + if (prokind == PROKIND_AGGREGATE) + { + relation = table_open(AggregateRelationId, RowExclusiveLock); + + tup = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(funcOid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for pg_aggregate tuple for function %u", funcOid); + + CatalogTupleDelete(relation, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(relation, RowExclusiveLock); + } +} + +/* + * Implements the ALTER FUNCTION utility command (except for the + * RENAME and OWNER clauses, which are handled as part of the generic + * ALTER framework). + */ +ObjectAddress +AlterFunction(ParseState *pstate, AlterFunctionStmt *stmt) +{ + HeapTuple tup; + Oid funcOid; + Form_pg_proc procForm; + bool is_procedure; + Relation rel; + ListCell *l; + DefElem *volatility_item = NULL; + DefElem *strict_item = NULL; + DefElem *security_def_item = NULL; + DefElem *leakproof_item = NULL; + List *set_items = NIL; + DefElem *cost_item = NULL; + DefElem *rows_item = NULL; + DefElem *support_item = NULL; + DefElem *parallel_item = NULL; + ObjectAddress address; + + rel = table_open(ProcedureRelationId, RowExclusiveLock); + + funcOid = LookupFuncWithArgs(stmt->objtype, stmt->func, false); + + ObjectAddressSet(address, ProcedureRelationId, funcOid); + + tup = SearchSysCacheCopy1(PROCOID, ObjectIdGetDatum(funcOid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for function %u", funcOid); + + procForm = (Form_pg_proc) GETSTRUCT(tup); + + /* Permission check: must own function */ + if (!pg_proc_ownercheck(funcOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, stmt->objtype, + NameListToString(stmt->func->objname)); + + if (procForm->prokind == PROKIND_AGGREGATE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is an aggregate function", + NameListToString(stmt->func->objname)))); + + is_procedure = (procForm->prokind == PROKIND_PROCEDURE); + + /* Examine requested actions. */ + foreach(l, stmt->actions) + { + DefElem *defel = (DefElem *) lfirst(l); + + if (compute_common_attribute(pstate, + is_procedure, + defel, + &volatility_item, + &strict_item, + &security_def_item, + &leakproof_item, + &set_items, + &cost_item, + &rows_item, + &support_item, + ¶llel_item) == false) + elog(ERROR, "option \"%s\" not recognized", defel->defname); + } + + if (volatility_item) + procForm->provolatile = interpret_func_volatility(volatility_item); + if (strict_item) + procForm->proisstrict = boolVal(strict_item->arg); + if (security_def_item) + procForm->prosecdef = boolVal(security_def_item->arg); + if (leakproof_item) + { + procForm->proleakproof = boolVal(leakproof_item->arg); + if (procForm->proleakproof && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser can define a leakproof function"))); + } + if (cost_item) + { + procForm->procost = defGetNumeric(cost_item); + if (procForm->procost <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COST must be positive"))); + } + if (rows_item) + { + procForm->prorows = defGetNumeric(rows_item); + if (procForm->prorows <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ROWS must be positive"))); + if (!procForm->proretset) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ROWS is not applicable when function does not return a set"))); + } + if (support_item) + { + /* interpret_func_support handles the privilege check */ + Oid newsupport = interpret_func_support(support_item); + + /* Add or replace dependency on support function */ + if (OidIsValid(procForm->prosupport)) + changeDependencyFor(ProcedureRelationId, funcOid, + ProcedureRelationId, procForm->prosupport, + newsupport); + else + { + ObjectAddress referenced; + + referenced.classId = ProcedureRelationId; + referenced.objectId = newsupport; + referenced.objectSubId = 0; + recordDependencyOn(&address, &referenced, DEPENDENCY_NORMAL); + } + + procForm->prosupport = newsupport; + } + if (parallel_item) + procForm->proparallel = interpret_func_parallel(parallel_item); + if (set_items) + { + Datum datum; + bool isnull; + ArrayType *a; + Datum repl_val[Natts_pg_proc]; + bool repl_null[Natts_pg_proc]; + bool repl_repl[Natts_pg_proc]; + + /* extract existing proconfig setting */ + datum = SysCacheGetAttr(PROCOID, tup, Anum_pg_proc_proconfig, &isnull); + a = isnull ? NULL : DatumGetArrayTypeP(datum); + + /* update according to each SET or RESET item, left to right */ + a = update_proconfig_value(a, set_items); + + /* update the tuple */ + memset(repl_repl, false, sizeof(repl_repl)); + repl_repl[Anum_pg_proc_proconfig - 1] = true; + + if (a == NULL) + { + repl_val[Anum_pg_proc_proconfig - 1] = (Datum) 0; + repl_null[Anum_pg_proc_proconfig - 1] = true; + } + else + { + repl_val[Anum_pg_proc_proconfig - 1] = PointerGetDatum(a); + repl_null[Anum_pg_proc_proconfig - 1] = false; + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), + repl_val, repl_null, repl_repl); + } + /* DO NOT put more touches of procForm below here; it's now dangling. */ + + /* Do the update */ + CatalogTupleUpdate(rel, &tup->t_self, tup); + + InvokeObjectPostAlterHook(ProcedureRelationId, funcOid, 0); + + table_close(rel, NoLock); + heap_freetuple(tup); + + return address; +} + + +/* + * CREATE CAST + */ +ObjectAddress +CreateCast(CreateCastStmt *stmt) +{ + Oid sourcetypeid; + Oid targettypeid; + char sourcetyptype; + char targettyptype; + Oid funcid; + int nargs; + char castcontext; + char castmethod; + HeapTuple tuple; + AclResult aclresult; + ObjectAddress myself; + + sourcetypeid = typenameTypeId(NULL, stmt->sourcetype); + targettypeid = typenameTypeId(NULL, stmt->targettype); + sourcetyptype = get_typtype(sourcetypeid); + targettyptype = get_typtype(targettypeid); + + /* No pseudo-types allowed */ + if (sourcetyptype == TYPTYPE_PSEUDO) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("source data type %s is a pseudo-type", + TypeNameToString(stmt->sourcetype)))); + + if (targettyptype == TYPTYPE_PSEUDO) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("target data type %s is a pseudo-type", + TypeNameToString(stmt->targettype)))); + + /* Permission check */ + if (!pg_type_ownercheck(sourcetypeid, GetUserId()) + && !pg_type_ownercheck(targettypeid, GetUserId())) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be owner of type %s or type %s", + format_type_be(sourcetypeid), + format_type_be(targettypeid)))); + + aclresult = pg_type_aclcheck(sourcetypeid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, sourcetypeid); + + aclresult = pg_type_aclcheck(targettypeid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, targettypeid); + + /* Domains are allowed for historical reasons, but we warn */ + if (sourcetyptype == TYPTYPE_DOMAIN) + ereport(WARNING, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cast will be ignored because the source data type is a domain"))); + + else if (targettyptype == TYPTYPE_DOMAIN) + ereport(WARNING, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cast will be ignored because the target data type is a domain"))); + + /* Determine the cast method */ + if (stmt->func != NULL) + castmethod = COERCION_METHOD_FUNCTION; + else if (stmt->inout) + castmethod = COERCION_METHOD_INOUT; + else + castmethod = COERCION_METHOD_BINARY; + + if (castmethod == COERCION_METHOD_FUNCTION) + { + Form_pg_proc procstruct; + + funcid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->func, false); + + tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + procstruct = (Form_pg_proc) GETSTRUCT(tuple); + nargs = procstruct->pronargs; + if (nargs < 1 || nargs > 3) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cast function must take one to three arguments"))); + if (!IsBinaryCoercible(sourcetypeid, procstruct->proargtypes.values[0])) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("argument of cast function must match or be binary-coercible from source data type"))); + if (nargs > 1 && procstruct->proargtypes.values[1] != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("second argument of cast function must be type %s", + "integer"))); + if (nargs > 2 && procstruct->proargtypes.values[2] != BOOLOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("third argument of cast function must be type %s", + "boolean"))); + if (!IsBinaryCoercible(procstruct->prorettype, targettypeid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("return data type of cast function must match or be binary-coercible to target data type"))); + + /* + * Restricting the volatility of a cast function may or may not be a + * good idea in the abstract, but it definitely breaks many old + * user-defined types. Disable this check --- tgl 2/1/03 + */ +#ifdef NOT_USED + if (procstruct->provolatile == PROVOLATILE_VOLATILE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cast function must not be volatile"))); +#endif + if (procstruct->prokind != PROKIND_FUNCTION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cast function must be a normal function"))); + if (procstruct->proretset) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cast function must not return a set"))); + + ReleaseSysCache(tuple); + } + else + { + funcid = InvalidOid; + nargs = 0; + } + + if (castmethod == COERCION_METHOD_BINARY) + { + int16 typ1len; + int16 typ2len; + bool typ1byval; + bool typ2byval; + char typ1align; + char typ2align; + + /* + * Must be superuser to create binary-compatible casts, since + * erroneous casts can easily crash the backend. + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create a cast WITHOUT FUNCTION"))); + + /* + * Also, insist that the types match as to size, alignment, and + * pass-by-value attributes; this provides at least a crude check that + * they have similar representations. A pair of types that fail this + * test should certainly not be equated. + */ + get_typlenbyvalalign(sourcetypeid, &typ1len, &typ1byval, &typ1align); + get_typlenbyvalalign(targettypeid, &typ2len, &typ2byval, &typ2align); + if (typ1len != typ2len || + typ1byval != typ2byval || + typ1align != typ2align) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("source and target data types are not physically compatible"))); + + /* + * We know that composite, enum and array types are never binary- + * compatible with each other. They all have OIDs embedded in them. + * + * Theoretically you could build a user-defined base type that is + * binary-compatible with a composite, enum, or array type. But we + * disallow that too, as in practice such a cast is surely a mistake. + * You can always work around that by writing a cast function. + */ + if (sourcetyptype == TYPTYPE_COMPOSITE || + targettyptype == TYPTYPE_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("composite data types are not binary-compatible"))); + + if (sourcetyptype == TYPTYPE_ENUM || + targettyptype == TYPTYPE_ENUM) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("enum data types are not binary-compatible"))); + + if (OidIsValid(get_element_type(sourcetypeid)) || + OidIsValid(get_element_type(targettypeid))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("array data types are not binary-compatible"))); + + /* + * We also disallow creating binary-compatibility casts involving + * domains. Casting from a domain to its base type is already + * allowed, and casting the other way ought to go through domain + * coercion to permit constraint checking. Again, if you're intent on + * having your own semantics for that, create a no-op cast function. + * + * NOTE: if we were to relax this, the above checks for composites + * etc. would have to be modified to look through domains to their + * base types. + */ + if (sourcetyptype == TYPTYPE_DOMAIN || + targettyptype == TYPTYPE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("domain data types must not be marked binary-compatible"))); + } + + /* + * Allow source and target types to be same only for length coercion + * functions. We assume a multi-arg function does length coercion. + */ + if (sourcetypeid == targettypeid && nargs < 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("source data type and target data type are the same"))); + + /* convert CoercionContext enum to char value for castcontext */ + switch (stmt->context) + { + case COERCION_IMPLICIT: + castcontext = COERCION_CODE_IMPLICIT; + break; + case COERCION_ASSIGNMENT: + castcontext = COERCION_CODE_ASSIGNMENT; + break; + /* COERCION_PLPGSQL is intentionally not covered here */ + case COERCION_EXPLICIT: + castcontext = COERCION_CODE_EXPLICIT; + break; + default: + elog(ERROR, "unrecognized CoercionContext: %d", stmt->context); + castcontext = 0; /* keep compiler quiet */ + break; + } + + myself = CastCreate(sourcetypeid, targettypeid, funcid, castcontext, + castmethod, DEPENDENCY_NORMAL); + return myself; +} + + +static void +check_transform_function(Form_pg_proc procstruct) +{ + if (procstruct->provolatile == PROVOLATILE_VOLATILE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("transform function must not be volatile"))); + if (procstruct->prokind != PROKIND_FUNCTION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("transform function must be a normal function"))); + if (procstruct->proretset) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("transform function must not return a set"))); + if (procstruct->pronargs != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("transform function must take one argument"))); + if (procstruct->proargtypes.values[0] != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("first argument of transform function must be type %s", + "internal"))); +} + + +/* + * CREATE TRANSFORM + */ +ObjectAddress +CreateTransform(CreateTransformStmt *stmt) +{ + Oid typeid; + char typtype; + Oid langid; + Oid fromsqlfuncid; + Oid tosqlfuncid; + AclResult aclresult; + Form_pg_proc procstruct; + Datum values[Natts_pg_transform]; + bool nulls[Natts_pg_transform]; + bool replaces[Natts_pg_transform]; + Oid transformid; + HeapTuple tuple; + HeapTuple newtuple; + Relation relation; + ObjectAddress myself, + referenced; + ObjectAddresses *addrs; + bool is_replace; + + /* + * Get the type + */ + typeid = typenameTypeId(NULL, stmt->type_name); + typtype = get_typtype(typeid); + + if (typtype == TYPTYPE_PSEUDO) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("data type %s is a pseudo-type", + TypeNameToString(stmt->type_name)))); + + if (typtype == TYPTYPE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("data type %s is a domain", + TypeNameToString(stmt->type_name)))); + + if (!pg_type_ownercheck(typeid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typeid); + + aclresult = pg_type_aclcheck(typeid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, typeid); + + /* + * Get the language + */ + langid = get_language_oid(stmt->lang, false); + + aclresult = pg_language_aclcheck(langid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_LANGUAGE, stmt->lang); + + /* + * Get the functions + */ + if (stmt->fromsql) + { + fromsqlfuncid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->fromsql, false); + + if (!pg_proc_ownercheck(fromsqlfuncid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, NameListToString(stmt->fromsql->objname)); + + aclresult = pg_proc_aclcheck(fromsqlfuncid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, NameListToString(stmt->fromsql->objname)); + + tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(fromsqlfuncid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for function %u", fromsqlfuncid); + procstruct = (Form_pg_proc) GETSTRUCT(tuple); + if (procstruct->prorettype != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("return data type of FROM SQL function must be %s", + "internal"))); + check_transform_function(procstruct); + ReleaseSysCache(tuple); + } + else + fromsqlfuncid = InvalidOid; + + if (stmt->tosql) + { + tosqlfuncid = LookupFuncWithArgs(OBJECT_FUNCTION, stmt->tosql, false); + + if (!pg_proc_ownercheck(tosqlfuncid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, NameListToString(stmt->tosql->objname)); + + aclresult = pg_proc_aclcheck(tosqlfuncid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, NameListToString(stmt->tosql->objname)); + + tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(tosqlfuncid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for function %u", tosqlfuncid); + procstruct = (Form_pg_proc) GETSTRUCT(tuple); + if (procstruct->prorettype != typeid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("return data type of TO SQL function must be the transform data type"))); + check_transform_function(procstruct); + ReleaseSysCache(tuple); + } + else + tosqlfuncid = InvalidOid; + + /* + * Ready to go + */ + values[Anum_pg_transform_trftype - 1] = ObjectIdGetDatum(typeid); + values[Anum_pg_transform_trflang - 1] = ObjectIdGetDatum(langid); + values[Anum_pg_transform_trffromsql - 1] = ObjectIdGetDatum(fromsqlfuncid); + values[Anum_pg_transform_trftosql - 1] = ObjectIdGetDatum(tosqlfuncid); + + MemSet(nulls, false, sizeof(nulls)); + + relation = table_open(TransformRelationId, RowExclusiveLock); + + tuple = SearchSysCache2(TRFTYPELANG, + ObjectIdGetDatum(typeid), + ObjectIdGetDatum(langid)); + if (HeapTupleIsValid(tuple)) + { + Form_pg_transform form = (Form_pg_transform) GETSTRUCT(tuple); + + if (!stmt->replace) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("transform for type %s language \"%s\" already exists", + format_type_be(typeid), + stmt->lang))); + + MemSet(replaces, false, sizeof(replaces)); + replaces[Anum_pg_transform_trffromsql - 1] = true; + replaces[Anum_pg_transform_trftosql - 1] = true; + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(relation), values, nulls, replaces); + CatalogTupleUpdate(relation, &newtuple->t_self, newtuple); + + transformid = form->oid; + ReleaseSysCache(tuple); + is_replace = true; + } + else + { + transformid = GetNewOidWithIndex(relation, TransformOidIndexId, + Anum_pg_transform_oid); + values[Anum_pg_transform_oid - 1] = ObjectIdGetDatum(transformid); + newtuple = heap_form_tuple(RelationGetDescr(relation), values, nulls); + CatalogTupleInsert(relation, newtuple); + is_replace = false; + } + + if (is_replace) + deleteDependencyRecordsFor(TransformRelationId, transformid, true); + + addrs = new_object_addresses(); + + /* make dependency entries */ + ObjectAddressSet(myself, TransformRelationId, transformid); + + /* dependency on language */ + ObjectAddressSet(referenced, LanguageRelationId, langid); + add_exact_object_address(&referenced, addrs); + + /* dependency on type */ + ObjectAddressSet(referenced, TypeRelationId, typeid); + add_exact_object_address(&referenced, addrs); + + /* dependencies on functions */ + if (OidIsValid(fromsqlfuncid)) + { + ObjectAddressSet(referenced, ProcedureRelationId, fromsqlfuncid); + add_exact_object_address(&referenced, addrs); + } + if (OidIsValid(tosqlfuncid)) + { + ObjectAddressSet(referenced, ProcedureRelationId, tosqlfuncid); + add_exact_object_address(&referenced, addrs); + } + + record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL); + free_object_addresses(addrs); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, is_replace); + + /* Post creation hook for new transform */ + InvokeObjectPostCreateHook(TransformRelationId, transformid, 0); + + heap_freetuple(newtuple); + + table_close(relation, RowExclusiveLock); + + return myself; +} + + +/* + * get_transform_oid - given type OID and language OID, look up a transform OID + * + * If missing_ok is false, throw an error if the transform is not found. If + * true, just return InvalidOid. + */ +Oid +get_transform_oid(Oid type_id, Oid lang_id, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid2(TRFTYPELANG, Anum_pg_transform_oid, + ObjectIdGetDatum(type_id), + ObjectIdGetDatum(lang_id)); + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("transform for type %s language \"%s\" does not exist", + format_type_be(type_id), + get_language_name(lang_id, false)))); + return oid; +} + + +/* + * Subroutine for ALTER FUNCTION/AGGREGATE SET SCHEMA/RENAME + * + * Is there a function with the given name and signature already in the given + * namespace? If so, raise an appropriate error message. + */ +void +IsThereFunctionInNamespace(const char *proname, int pronargs, + oidvector *proargtypes, Oid nspOid) +{ + /* check for duplicate name (more friendly than unique-index failure) */ + if (SearchSysCacheExists3(PROCNAMEARGSNSP, + CStringGetDatum(proname), + PointerGetDatum(proargtypes), + ObjectIdGetDatum(nspOid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_FUNCTION), + errmsg("function %s already exists in schema \"%s\"", + funcname_signature_string(proname, pronargs, + NIL, proargtypes->values), + get_namespace_name(nspOid)))); +} + +/* + * ExecuteDoStmt + * Execute inline procedural-language code + * + * See at ExecuteCallStmt() about the atomic argument. + */ +void +ExecuteDoStmt(ParseState *pstate, DoStmt *stmt, bool atomic) +{ + InlineCodeBlock *codeblock = makeNode(InlineCodeBlock); + ListCell *arg; + DefElem *as_item = NULL; + DefElem *language_item = NULL; + char *language; + Oid laninline; + HeapTuple languageTuple; + Form_pg_language languageStruct; + + /* Process options we got from gram.y */ + foreach(arg, stmt->args) + { + DefElem *defel = (DefElem *) lfirst(arg); + + if (strcmp(defel->defname, "as") == 0) + { + if (as_item) + errorConflictingDefElem(defel, pstate); + as_item = defel; + } + else if (strcmp(defel->defname, "language") == 0) + { + if (language_item) + errorConflictingDefElem(defel, pstate); + language_item = defel; + } + else + elog(ERROR, "option \"%s\" not recognized", + defel->defname); + } + + if (as_item) + codeblock->source_text = strVal(as_item->arg); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("no inline code specified"))); + + /* if LANGUAGE option wasn't specified, use the default */ + if (language_item) + language = strVal(language_item->arg); + else + language = "plpgsql"; + + /* Look up the language and validate permissions */ + languageTuple = SearchSysCache1(LANGNAME, PointerGetDatum(language)); + if (!HeapTupleIsValid(languageTuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("language \"%s\" does not exist", language), + (extension_file_exists(language) ? + errhint("Use CREATE EXTENSION to load the language into the database.") : 0))); + + languageStruct = (Form_pg_language) GETSTRUCT(languageTuple); + codeblock->langOid = languageStruct->oid; + codeblock->langIsTrusted = languageStruct->lanpltrusted; + codeblock->atomic = atomic; + + if (languageStruct->lanpltrusted) + { + /* if trusted language, need USAGE privilege */ + AclResult aclresult; + + aclresult = pg_language_aclcheck(codeblock->langOid, GetUserId(), + ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_LANGUAGE, + NameStr(languageStruct->lanname)); + } + else + { + /* if untrusted language, must be superuser */ + if (!superuser()) + aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_LANGUAGE, + NameStr(languageStruct->lanname)); + } + + /* get the handler function's OID */ + laninline = languageStruct->laninline; + if (!OidIsValid(laninline)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("language \"%s\" does not support inline code execution", + NameStr(languageStruct->lanname)))); + + ReleaseSysCache(languageTuple); + + /* execute the inline handler */ + OidFunctionCall1(laninline, PointerGetDatum(codeblock)); +} + +/* + * Execute CALL statement + * + * Inside a top-level CALL statement, transaction-terminating commands such as + * COMMIT or a PL-specific equivalent are allowed. The terminology in the SQL + * standard is that CALL establishes a non-atomic execution context. Most + * other commands establish an atomic execution context, in which transaction + * control actions are not allowed. If there are nested executions of CALL, + * we want to track the execution context recursively, so that the nested + * CALLs can also do transaction control. Note, however, that for example in + * CALL -> SELECT -> CALL, the second call cannot do transaction control, + * because the SELECT in between establishes an atomic execution context. + * + * So when ExecuteCallStmt() is called from the top level, we pass in atomic = + * false (recall that that means transactions = yes). We then create a + * CallContext node with content atomic = false, which is passed in the + * fcinfo->context field to the procedure invocation. The language + * implementation should then take appropriate measures to allow or prevent + * transaction commands based on that information, e.g., call + * SPI_connect_ext(SPI_OPT_NONATOMIC). The language should also pass on the + * atomic flag to any nested invocations to CALL. + * + * The expression data structures and execution context that we create + * within this function are children of the portalContext of the Portal + * that the CALL utility statement runs in. Therefore, any pass-by-ref + * values that we're passing to the procedure will survive transaction + * commits that might occur inside the procedure. + */ +void +ExecuteCallStmt(CallStmt *stmt, ParamListInfo params, bool atomic, DestReceiver *dest) +{ + LOCAL_FCINFO(fcinfo, FUNC_MAX_ARGS); + ListCell *lc; + FuncExpr *fexpr; + int nargs; + int i; + AclResult aclresult; + FmgrInfo flinfo; + CallContext *callcontext; + EState *estate; + ExprContext *econtext; + HeapTuple tp; + PgStat_FunctionCallUsage fcusage; + Datum retval; + + fexpr = stmt->funcexpr; + Assert(fexpr); + Assert(IsA(fexpr, FuncExpr)); + + aclresult = pg_proc_aclcheck(fexpr->funcid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_PROCEDURE, get_func_name(fexpr->funcid)); + + /* Prep the context object we'll pass to the procedure */ + callcontext = makeNode(CallContext); + callcontext->atomic = atomic; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(fexpr->funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", fexpr->funcid); + + /* + * If proconfig is set we can't allow transaction commands because of the + * way the GUC stacking works: The transaction boundary would have to pop + * the proconfig setting off the stack. That restriction could be lifted + * by redesigning the GUC nesting mechanism a bit. + */ + if (!heap_attisnull(tp, Anum_pg_proc_proconfig, NULL)) + callcontext->atomic = true; + + /* + * In security definer procedures, we can't allow transaction commands. + * StartTransaction() insists that the security context stack is empty, + * and AbortTransaction() resets the security context. This could be + * reorganized, but right now it doesn't work. + */ + if (((Form_pg_proc) GETSTRUCT(tp))->prosecdef) + callcontext->atomic = true; + + ReleaseSysCache(tp); + + /* safety check; see ExecInitFunc() */ + nargs = list_length(fexpr->args); + if (nargs > FUNC_MAX_ARGS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg_plural("cannot pass more than %d argument to a procedure", + "cannot pass more than %d arguments to a procedure", + FUNC_MAX_ARGS, + FUNC_MAX_ARGS))); + + /* Initialize function call structure */ + InvokeFunctionExecuteHook(fexpr->funcid); + fmgr_info(fexpr->funcid, &flinfo); + fmgr_info_set_expr((Node *) fexpr, &flinfo); + InitFunctionCallInfoData(*fcinfo, &flinfo, nargs, fexpr->inputcollid, + (Node *) callcontext, NULL); + + /* + * Evaluate procedure arguments inside a suitable execution context. Note + * we can't free this context till the procedure returns. + */ + estate = CreateExecutorState(); + estate->es_param_list_info = params; + econtext = CreateExprContext(estate); + + /* + * If we're called in non-atomic context, we also have to ensure that the + * argument expressions run with an up-to-date snapshot. Our caller will + * have provided a current snapshot in atomic contexts, but not in + * non-atomic contexts, because the possibility of a COMMIT/ROLLBACK + * destroying the snapshot makes higher-level management too complicated. + */ + if (!atomic) + PushActiveSnapshot(GetTransactionSnapshot()); + + i = 0; + foreach(lc, fexpr->args) + { + ExprState *exprstate; + Datum val; + bool isnull; + + exprstate = ExecPrepareExpr(lfirst(lc), estate); + + val = ExecEvalExprSwitchContext(exprstate, econtext, &isnull); + + fcinfo->args[i].value = val; + fcinfo->args[i].isnull = isnull; + + i++; + } + + /* Get rid of temporary snapshot for arguments, if we made one */ + if (!atomic) + PopActiveSnapshot(); + + /* Here we actually call the procedure */ + pgstat_init_function_usage(fcinfo, &fcusage); + retval = FunctionCallInvoke(fcinfo); + pgstat_end_function_usage(&fcusage, true); + + /* Handle the procedure's outputs */ + if (fexpr->funcresulttype == VOIDOID) + { + /* do nothing */ + } + else if (fexpr->funcresulttype == RECORDOID) + { + /* send tuple to client */ + HeapTupleHeader td; + Oid tupType; + int32 tupTypmod; + TupleDesc retdesc; + HeapTupleData rettupdata; + TupOutputState *tstate; + TupleTableSlot *slot; + + if (fcinfo->isnull) + elog(ERROR, "procedure returned null record"); + + /* + * Ensure there's an active snapshot whilst we execute whatever's + * involved here. Note that this is *not* sufficient to make the + * world safe for TOAST pointers to be included in the returned data: + * the referenced data could have gone away while we didn't hold a + * snapshot. Hence, it's incumbent on PLs that can do COMMIT/ROLLBACK + * to not return TOAST pointers, unless those pointers were fetched + * after the last COMMIT/ROLLBACK in the procedure. + * + * XXX that is a really nasty, hard-to-test requirement. Is there a + * way to remove it? + */ + EnsurePortalSnapshotExists(); + + td = DatumGetHeapTupleHeader(retval); + tupType = HeapTupleHeaderGetTypeId(td); + tupTypmod = HeapTupleHeaderGetTypMod(td); + retdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + + tstate = begin_tup_output_tupdesc(dest, retdesc, + &TTSOpsHeapTuple); + + rettupdata.t_len = HeapTupleHeaderGetDatumLength(td); + ItemPointerSetInvalid(&(rettupdata.t_self)); + rettupdata.t_tableOid = InvalidOid; + rettupdata.t_data = td; + + slot = ExecStoreHeapTuple(&rettupdata, tstate->slot, false); + tstate->dest->receiveSlot(slot, tstate->dest); + + end_tup_output(tstate); + + ReleaseTupleDesc(retdesc); + } + else + elog(ERROR, "unexpected result type for procedure: %u", + fexpr->funcresulttype); + + FreeExecutorState(estate); +} + +/* + * Construct the tuple descriptor for a CALL statement return + */ +TupleDesc +CallStmtResultDesc(CallStmt *stmt) +{ + FuncExpr *fexpr; + HeapTuple tuple; + TupleDesc tupdesc; + + fexpr = stmt->funcexpr; + + tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(fexpr->funcid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for procedure %u", fexpr->funcid); + + tupdesc = build_function_result_tupdesc_t(tuple); + + ReleaseSysCache(tuple); + + return tupdesc; +} diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c new file mode 100644 index 0000000..d3f7b09 --- /dev/null +++ b/src/backend/commands/indexcmds.c @@ -0,0 +1,4355 @@ +/*------------------------------------------------------------------------- + * + * indexcmds.c + * POSTGRES define and remove index code. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/indexcmds.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/amapi.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/indexing.h" +#include "catalog/pg_am.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_tablespace.h" +#include "catalog/pg_type.h" +#include "commands/comment.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/progress.h" +#include "commands/tablecmds.h" +#include "commands/tablespace.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parse_coerce.h" +#include "parser/parse_func.h" +#include "parser/parse_oper.h" +#include "partitioning/partdesc.h" +#include "pgstat.h" +#include "rewrite/rewriteManip.h" +#include "storage/lmgr.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "storage/sinvaladt.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/partcache.h" +#include "utils/pg_rusage.h" +#include "utils/regproc.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +/* non-export function prototypes */ +static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts); +static void CheckPredicate(Expr *predicate); +static void ComputeIndexAttrs(IndexInfo *indexInfo, + Oid *typeOidP, + Oid *collationOidP, + Oid *classOidP, + int16 *colOptionP, + List *attList, + List *exclusionOpNames, + Oid relId, + const char *accessMethodName, Oid accessMethodId, + bool amcanorder, + bool isconstraint, + Oid ddl_userid, + int ddl_sec_context, + int *ddl_save_nestlevel); +static char *ChooseIndexName(const char *tabname, Oid namespaceId, + List *colnames, List *exclusionOpNames, + bool primary, bool isconstraint); +static char *ChooseIndexNameAddition(List *colnames); +static List *ChooseIndexColumnNames(List *indexElems); +static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params, + bool isTopLevel); +static void RangeVarCallbackForReindexIndex(const RangeVar *relation, + Oid relId, Oid oldRelId, void *arg); +static Oid ReindexTable(RangeVar *relation, ReindexParams *params, + bool isTopLevel); +static void ReindexMultipleTables(const char *objectName, + ReindexObjectType objectKind, ReindexParams *params); +static void reindex_error_callback(void *args); +static void ReindexPartitions(Oid relid, ReindexParams *params, + bool isTopLevel); +static void ReindexMultipleInternal(List *relids, + ReindexParams *params); +static bool ReindexRelationConcurrently(Oid relationOid, + ReindexParams *params); +static void update_relispartition(Oid relationId, bool newval); +static inline void set_indexsafe_procflags(void); + +/* + * callback argument type for RangeVarCallbackForReindexIndex() + */ +struct ReindexIndexCallbackState +{ + ReindexParams params; /* options from statement */ + Oid locked_table_oid; /* tracks previously locked table */ +}; + +/* + * callback arguments for reindex_error_callback() + */ +typedef struct ReindexErrorInfo +{ + char *relname; + char *relnamespace; + char relkind; +} ReindexErrorInfo; + +/* + * CheckIndexCompatible + * Determine whether an existing index definition is compatible with a + * prospective index definition, such that the existing index storage + * could become the storage of the new index, avoiding a rebuild. + * + * 'oldId': the OID of the existing index + * 'accessMethodName': name of the AM to use. + * 'attributeList': a list of IndexElem specifying columns and expressions + * to index on. + * 'exclusionOpNames': list of names of exclusion-constraint operators, + * or NIL if not an exclusion constraint. + * + * This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates + * any indexes that depended on a changing column from their pg_get_indexdef + * or pg_get_constraintdef definitions. We omit some of the sanity checks of + * DefineIndex. We assume that the old and new indexes have the same number + * of columns and that if one has an expression column or predicate, both do. + * Errors arising from the attribute list still apply. + * + * Most column type changes that can skip a table rewrite do not invalidate + * indexes. We acknowledge this when all operator classes, collations and + * exclusion operators match. Though we could further permit intra-opfamily + * changes for btree and hash indexes, that adds subtle complexity with no + * concrete benefit for core types. Note, that INCLUDE columns aren't + * checked by this function, for them it's enough that table rewrite is + * skipped. + * + * When a comparison or exclusion operator has a polymorphic input type, the + * actual input types must also match. This defends against the possibility + * that operators could vary behavior in response to get_fn_expr_argtype(). + * At present, this hazard is theoretical: check_exclusion_constraint() and + * all core index access methods decline to set fn_expr for such calls. + * + * We do not yet implement a test to verify compatibility of expression + * columns or predicates, so assume any such index is incompatible. + */ +bool +CheckIndexCompatible(Oid oldId, + const char *accessMethodName, + List *attributeList, + List *exclusionOpNames) +{ + bool isconstraint; + Oid *typeObjectId; + Oid *collationObjectId; + Oid *classObjectId; + Oid accessMethodId; + Oid relationId; + HeapTuple tuple; + Form_pg_index indexForm; + Form_pg_am accessMethodForm; + IndexAmRoutine *amRoutine; + bool amcanorder; + int16 *coloptions; + IndexInfo *indexInfo; + int numberOfAttributes; + int old_natts; + bool isnull; + bool ret = true; + oidvector *old_indclass; + oidvector *old_indcollation; + Relation irel; + int i; + Datum d; + + /* Caller should already have the relation locked in some way. */ + relationId = IndexGetRelation(oldId, false); + + /* + * We can pretend isconstraint = false unconditionally. It only serves to + * decide the text of an error message that should never happen for us. + */ + isconstraint = false; + + numberOfAttributes = list_length(attributeList); + Assert(numberOfAttributes > 0); + Assert(numberOfAttributes <= INDEX_MAX_KEYS); + + /* look up the access method */ + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("access method \"%s\" does not exist", + accessMethodName))); + accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); + accessMethodId = accessMethodForm->oid; + amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler); + ReleaseSysCache(tuple); + + amcanorder = amRoutine->amcanorder; + + /* + * Compute the operator classes, collations, and exclusion operators for + * the new index, so we can test whether it's compatible with the existing + * one. Note that ComputeIndexAttrs might fail here, but that's OK: + * DefineIndex would have failed later. Our attributeList contains only + * key attributes, thus we're filling ii_NumIndexAttrs and + * ii_NumIndexKeyAttrs with same value. + */ + indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes, + accessMethodId, NIL, NIL, false, false, false, false); + typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16)); + ComputeIndexAttrs(indexInfo, + typeObjectId, collationObjectId, classObjectId, + coloptions, attributeList, + exclusionOpNames, relationId, + accessMethodName, accessMethodId, + amcanorder, isconstraint, InvalidOid, 0, NULL); + + + /* Get the soon-obsolete pg_index tuple. */ + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for index %u", oldId); + indexForm = (Form_pg_index) GETSTRUCT(tuple); + + /* + * We don't assess expressions or predicates; assume incompatibility. + * Also, if the index is invalid for any reason, treat it as incompatible. + */ + if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) && + heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) && + indexForm->indisvalid)) + { + ReleaseSysCache(tuple); + return false; + } + + /* Any change in operator class or collation breaks compatibility. */ + old_natts = indexForm->indnkeyatts; + Assert(old_natts == numberOfAttributes); + + d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull); + Assert(!isnull); + old_indcollation = (oidvector *) DatumGetPointer(d); + + d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull); + Assert(!isnull); + old_indclass = (oidvector *) DatumGetPointer(d); + + ret = (memcmp(old_indclass->values, classObjectId, + old_natts * sizeof(Oid)) == 0 && + memcmp(old_indcollation->values, collationObjectId, + old_natts * sizeof(Oid)) == 0); + + ReleaseSysCache(tuple); + + if (!ret) + return false; + + /* For polymorphic opcintype, column type changes break compatibility. */ + irel = index_open(oldId, AccessShareLock); /* caller probably has a lock */ + for (i = 0; i < old_natts; i++) + { + if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) && + TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i]) + { + ret = false; + break; + } + } + + /* Any change in opclass options break compatibility. */ + if (ret) + { + Datum *opclassOptions = RelationGetIndexRawAttOptions(irel); + + ret = CompareOpclassOptions(opclassOptions, + indexInfo->ii_OpclassOptions, old_natts); + + if (opclassOptions) + pfree(opclassOptions); + } + + /* Any change in exclusion operator selections breaks compatibility. */ + if (ret && indexInfo->ii_ExclusionOps != NULL) + { + Oid *old_operators, + *old_procs; + uint16 *old_strats; + + RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats); + ret = memcmp(old_operators, indexInfo->ii_ExclusionOps, + old_natts * sizeof(Oid)) == 0; + + /* Require an exact input type match for polymorphic operators. */ + if (ret) + { + for (i = 0; i < old_natts && ret; i++) + { + Oid left, + right; + + op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right); + if ((IsPolymorphicType(left) || IsPolymorphicType(right)) && + TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i]) + { + ret = false; + break; + } + } + } + } + + index_close(irel, NoLock); + return ret; +} + +/* + * CompareOpclassOptions + * + * Compare per-column opclass options which are represented by arrays of text[] + * datums. Both elements of arrays and array themselves can be NULL. + */ +static bool +CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts) +{ + int i; + + if (!opts1 && !opts2) + return true; + + for (i = 0; i < natts; i++) + { + Datum opt1 = opts1 ? opts1[i] : (Datum) 0; + Datum opt2 = opts2 ? opts2[i] : (Datum) 0; + + if (opt1 == (Datum) 0) + { + if (opt2 == (Datum) 0) + continue; + else + return false; + } + else if (opt2 == (Datum) 0) + return false; + + /* Compare non-NULL text[] datums. */ + if (!DatumGetBool(DirectFunctionCall2(array_eq, opt1, opt2))) + return false; + } + + return true; +} + +/* + * WaitForOlderSnapshots + * + * Wait for transactions that might have an older snapshot than the given xmin + * limit, because it might not contain tuples deleted just before it has + * been taken. Obtain a list of VXIDs of such transactions, and wait for them + * individually. This is used when building an index concurrently. + * + * We can exclude any running transactions that have xmin > the xmin given; + * their oldest snapshot must be newer than our xmin limit. + * We can also exclude any transactions that have xmin = zero, since they + * evidently have no live snapshot at all (and any one they might be in + * process of taking is certainly newer than ours). Transactions in other + * DBs can be ignored too, since they'll never even be able to see the + * index being worked on. + * + * We can also exclude autovacuum processes and processes running manual + * lazy VACUUMs, because they won't be fazed by missing index entries + * either. (Manual ANALYZEs, however, can't be excluded because they + * might be within transactions that are going to do arbitrary operations + * later.) Processes running CREATE INDEX CONCURRENTLY or REINDEX CONCURRENTLY + * on indexes that are neither expressional nor partial are also safe to + * ignore, since we know that those processes won't examine any data + * outside the table they're indexing. + * + * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not + * check for that. + * + * If a process goes idle-in-transaction with xmin zero, we do not need to + * wait for it anymore, per the above argument. We do not have the + * infrastructure right now to stop waiting if that happens, but we can at + * least avoid the folly of waiting when it is idle at the time we would + * begin to wait. We do this by repeatedly rechecking the output of + * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid + * doesn't show up in the output, we know we can forget about it. + */ +void +WaitForOlderSnapshots(TransactionId limitXmin, bool progress) +{ + int n_old_snapshots; + int i; + VirtualTransactionId *old_snapshots; + + old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM + | PROC_IN_SAFE_IC, + &n_old_snapshots); + if (progress) + pgstat_progress_update_param(PROGRESS_WAITFOR_TOTAL, n_old_snapshots); + + for (i = 0; i < n_old_snapshots; i++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[i])) + continue; /* found uninteresting in previous cycle */ + + if (i > 0) + { + /* see if anything's changed ... */ + VirtualTransactionId *newer_snapshots; + int n_newer_snapshots; + int j; + int k; + + newer_snapshots = GetCurrentVirtualXIDs(limitXmin, + true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM + | PROC_IN_SAFE_IC, + &n_newer_snapshots); + for (j = i; j < n_old_snapshots; j++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[j])) + continue; /* found uninteresting in previous cycle */ + for (k = 0; k < n_newer_snapshots; k++) + { + if (VirtualTransactionIdEquals(old_snapshots[j], + newer_snapshots[k])) + break; + } + if (k >= n_newer_snapshots) /* not there anymore */ + SetInvalidVirtualTransactionId(old_snapshots[j]); + } + pfree(newer_snapshots); + } + + if (VirtualTransactionIdIsValid(old_snapshots[i])) + { + /* If requested, publish who we're going to wait for. */ + if (progress) + { + PGPROC *holder = BackendIdGetProc(old_snapshots[i].backendId); + + if (holder) + pgstat_progress_update_param(PROGRESS_WAITFOR_CURRENT_PID, + holder->pid); + } + VirtualXactLock(old_snapshots[i], true); + } + + if (progress) + pgstat_progress_update_param(PROGRESS_WAITFOR_DONE, i + 1); + } +} + + +/* + * DefineIndex + * Creates a new index. + * + * This function manages the current userid according to the needs of pg_dump. + * Recreating old-database catalog entries in new-database is fine, regardless + * of which users would have permission to recreate those entries now. That's + * just preservation of state. Running opaque expressions, like calling a + * function named in a catalog entry or evaluating a pg_node_tree in a catalog + * entry, as anyone other than the object owner, is not fine. To adhere to + * those principles and to remain fail-safe, use the table owner userid for + * most ACL checks. Use the original userid for ACL checks reached without + * traversing opaque expressions. (pg_dump can predict such ACL checks from + * catalogs.) Overall, this is a mess. Future DDL development should + * consider offering one DDL command for catalog setup and a separate DDL + * command for steps that run opaque expressions. + * + * 'relationId': the OID of the heap relation on which the index is to be + * created + * 'stmt': IndexStmt describing the properties of the new index. + * 'indexRelationId': normally InvalidOid, but during bootstrap can be + * nonzero to specify a preselected OID for the index. + * 'parentIndexId': the OID of the parent index; InvalidOid if not the child + * of a partitioned index. + * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not + * the child of a constraint (only used when recursing) + * 'is_alter_table': this is due to an ALTER rather than a CREATE operation. + * 'check_rights': check for CREATE rights in namespace and tablespace. (This + * should be true except when ALTER is deleting/recreating an index.) + * 'check_not_in_use': check for table not already in use in current session. + * This should be true unless caller is holding the table open, in which + * case the caller had better have checked it earlier. + * 'skip_build': make the catalog entries but don't create the index files + * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints. + * + * Returns the object address of the created index. + */ +ObjectAddress +DefineIndex(Oid relationId, + IndexStmt *stmt, + Oid indexRelationId, + Oid parentIndexId, + Oid parentConstraintId, + bool is_alter_table, + bool check_rights, + bool check_not_in_use, + bool skip_build, + bool quiet) +{ + bool concurrent; + char *indexRelationName; + char *accessMethodName; + Oid *typeObjectId; + Oid *collationObjectId; + Oid *classObjectId; + Oid accessMethodId; + Oid namespaceId; + Oid tablespaceId; + Oid createdConstraintId = InvalidOid; + List *indexColNames; + List *allIndexParams; + Relation rel; + HeapTuple tuple; + Form_pg_am accessMethodForm; + IndexAmRoutine *amRoutine; + bool amcanorder; + amoptions_function amoptions; + bool partitioned; + bool safe_index; + Datum reloptions; + int16 *coloptions; + IndexInfo *indexInfo; + bits16 flags; + bits16 constr_flags; + int numberOfAttributes; + int numberOfKeyAttributes; + TransactionId limitXmin; + ObjectAddress address; + LockRelId heaprelid; + LOCKTAG heaplocktag; + LOCKMODE lockmode; + Snapshot snapshot; + Oid root_save_userid; + int root_save_sec_context; + int root_save_nestlevel; + int i; + + root_save_nestlevel = NewGUCNestLevel(); + + /* + * Some callers need us to run with an empty default_tablespace; this is a + * necessary hack to be able to reproduce catalog state accurately when + * recreating indexes after table-rewriting ALTER TABLE. + */ + if (stmt->reset_default_tblspc) + (void) set_config_option("default_tablespace", "", + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + + /* + * Force non-concurrent build on temporary relations, even if CONCURRENTLY + * was requested. Other backends can't access a temporary relation, so + * there's no harm in grabbing a stronger lock, and a non-concurrent DROP + * is more efficient. Do this before any use of the concurrent option is + * done. + */ + if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP) + concurrent = true; + else + concurrent = false; + + /* + * Start progress report. If we're building a partition, this was already + * done. + */ + if (!OidIsValid(parentIndexId)) + { + pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, + relationId); + pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND, + concurrent ? + PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY : + PROGRESS_CREATEIDX_COMMAND_CREATE); + } + + /* + * No index OID to report yet + */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID, + InvalidOid); + + /* + * count key attributes in index + */ + numberOfKeyAttributes = list_length(stmt->indexParams); + + /* + * Calculate the new list of index columns including both key columns and + * INCLUDE columns. Later we can determine which of these are key + * columns, and which are just part of the INCLUDE list by checking the + * list position. A list item in a position less than ii_NumIndexKeyAttrs + * is part of the key columns, and anything equal to and over is part of + * the INCLUDE columns. + */ + allIndexParams = list_concat_copy(stmt->indexParams, + stmt->indexIncludingParams); + numberOfAttributes = list_length(allIndexParams); + + if (numberOfKeyAttributes <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("must specify at least one column"))); + if (numberOfAttributes > INDEX_MAX_KEYS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot use more than %d columns in an index", + INDEX_MAX_KEYS))); + + /* + * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard + * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE + * (but not VACUUM). + * + * NB: Caller is responsible for making sure that relationId refers to the + * relation on which the index should be built; except in bootstrap mode, + * this will typically require the caller to have already locked the + * relation. To avoid lock upgrade hazards, that lock should be at least + * as strong as the one we take here. + * + * NB: If the lock strength here ever changes, code that is run by + * parallel workers under the control of certain particular ambuild + * functions will need to be updated, too. + */ + lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock; + rel = table_open(relationId, lockmode); + + /* + * Switch to the table owner's userid, so that any index functions are run + * as that user. Also lock down security-restricted operations. We + * already arranged to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&root_save_userid, &root_save_sec_context); + SetUserIdAndSecContext(rel->rd_rel->relowner, + root_save_sec_context | SECURITY_RESTRICTED_OPERATION); + + namespaceId = RelationGetNamespace(rel); + + /* Ensure that it makes sense to index this kind of relation */ + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: + /* OK */ + break; + default: + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create index on relation \"%s\"", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + break; + } + + /* + * Establish behavior for partitioned tables, and verify sanity of + * parameters. + * + * We do not build an actual index in this case; we only create a few + * catalog entries. The actual indexes are built by recursing for each + * partition. + */ + partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE; + if (partitioned) + { + /* + * Note: we check 'stmt->concurrent' rather than 'concurrent', so that + * the error is thrown also for temporary tables. Seems better to be + * consistent, even though we could do it on temporary table because + * we're not actually doing it concurrently. + */ + if (stmt->concurrent) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create index on partitioned table \"%s\" concurrently", + RelationGetRelationName(rel)))); + if (stmt->excludeOpNames) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create exclusion constraints on partitioned table \"%s\"", + RelationGetRelationName(rel)))); + } + + /* + * Don't try to CREATE INDEX on temp tables of other backends. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create indexes on temporary tables of other sessions"))); + + /* + * Unless our caller vouches for having checked this already, insist that + * the table not be in use by our own session, either. Otherwise we might + * fail to make entries in the new index (for instance, if an INSERT or + * UPDATE is in progress and has already made its list of target indexes). + */ + if (check_not_in_use) + CheckTableNotInUse(rel, "CREATE INDEX"); + + /* + * Verify we (still) have CREATE rights in the rel's namespace. + * (Presumably we did when the rel was created, but maybe not anymore.) + * Skip check if caller doesn't want it. Also skip check if + * bootstrapping, since permissions machinery may not be working yet. + */ + if (check_rights && !IsBootstrapProcessingMode()) + { + AclResult aclresult; + + aclresult = pg_namespace_aclcheck(namespaceId, root_save_userid, + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + } + + /* + * Select tablespace to use. If not specified, use default tablespace + * (which may in turn default to database's default). + */ + if (stmt->tableSpace) + { + tablespaceId = get_tablespace_oid(stmt->tableSpace, false); + if (partitioned && tablespaceId == MyDatabaseTableSpace) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify default tablespace for partitioned relations"))); + } + else + { + tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence, + partitioned); + /* note InvalidOid is OK in this case */ + } + + /* Check tablespace permissions */ + if (check_rights && + OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace) + { + AclResult aclresult; + + aclresult = pg_tablespace_aclcheck(tablespaceId, root_save_userid, + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + get_tablespace_name(tablespaceId)); + } + + /* + * Force shared indexes into the pg_global tablespace. This is a bit of a + * hack but seems simpler than marking them in the BKI commands. On the + * other hand, if it's not shared, don't allow it to be placed there. + */ + if (rel->rd_rel->relisshared) + tablespaceId = GLOBALTABLESPACE_OID; + else if (tablespaceId == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("only shared relations can be placed in pg_global tablespace"))); + + /* + * Choose the index column names. + */ + indexColNames = ChooseIndexColumnNames(allIndexParams); + + /* + * Select name for index if caller didn't specify + */ + indexRelationName = stmt->idxname; + if (indexRelationName == NULL) + indexRelationName = ChooseIndexName(RelationGetRelationName(rel), + namespaceId, + indexColNames, + stmt->excludeOpNames, + stmt->primary, + stmt->isconstraint); + + /* + * look up the access method, verify it can handle the requested features + */ + accessMethodName = stmt->accessMethod; + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName)); + if (!HeapTupleIsValid(tuple)) + { + /* + * Hack to provide more-or-less-transparent updating of old RTREE + * indexes to GiST: if RTREE is requested and not found, use GIST. + */ + if (strcmp(accessMethodName, "rtree") == 0) + { + ereport(NOTICE, + (errmsg("substituting access method \"gist\" for obsolete method \"rtree\""))); + accessMethodName = "gist"; + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName)); + } + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("access method \"%s\" does not exist", + accessMethodName))); + } + accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); + accessMethodId = accessMethodForm->oid; + amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler); + + pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID, + accessMethodId); + + if (stmt->unique && !amRoutine->amcanunique) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support unique indexes", + accessMethodName))); + if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support included columns", + accessMethodName))); + if (numberOfKeyAttributes > 1 && !amRoutine->amcanmulticol) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support multicolumn indexes", + accessMethodName))); + if (stmt->excludeOpNames && amRoutine->amgettuple == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support exclusion constraints", + accessMethodName))); + + amcanorder = amRoutine->amcanorder; + amoptions = amRoutine->amoptions; + + pfree(amRoutine); + ReleaseSysCache(tuple); + + /* + * Validate predicate, if given + */ + if (stmt->whereClause) + CheckPredicate((Expr *) stmt->whereClause); + + /* + * Parse AM-specific options, convert to text array form, validate. + */ + reloptions = transformRelOptions((Datum) 0, stmt->options, + NULL, NULL, false, false); + + (void) index_reloptions(amoptions, reloptions, true); + + /* + * Prepare arguments for index_create, primarily an IndexInfo structure. + * Note that predicates must be in implicit-AND format. In a concurrent + * build, mark it not-ready-for-inserts. + */ + indexInfo = makeIndexInfo(numberOfAttributes, + numberOfKeyAttributes, + accessMethodId, + NIL, /* expressions, NIL for now */ + make_ands_implicit((Expr *) stmt->whereClause), + stmt->unique, + stmt->nulls_not_distinct, + !concurrent, + concurrent); + + typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); + coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16)); + ComputeIndexAttrs(indexInfo, + typeObjectId, collationObjectId, classObjectId, + coloptions, allIndexParams, + stmt->excludeOpNames, relationId, + accessMethodName, accessMethodId, + amcanorder, stmt->isconstraint, root_save_userid, + root_save_sec_context, &root_save_nestlevel); + + /* + * Extra checks when creating a PRIMARY KEY index. + */ + if (stmt->primary) + index_check_primary_key(rel, indexInfo, is_alter_table, stmt); + + /* + * If this table is partitioned and we're creating a unique index or a + * primary key, make sure that the partition key is a subset of the + * index's columns. Otherwise it would be possible to violate uniqueness + * by putting values that ought to be unique in different partitions. + * + * We could lift this limitation if we had global indexes, but those have + * their own problems, so this is a useful feature combination. + */ + if (partitioned && (stmt->unique || stmt->primary)) + { + PartitionKey key = RelationGetPartitionKey(rel); + const char *constraint_type; + int i; + + if (stmt->primary) + constraint_type = "PRIMARY KEY"; + else if (stmt->unique) + constraint_type = "UNIQUE"; + else if (stmt->excludeOpNames != NIL) + constraint_type = "EXCLUDE"; + else + { + elog(ERROR, "unknown constraint type"); + constraint_type = NULL; /* keep compiler quiet */ + } + + /* + * Verify that all the columns in the partition key appear in the + * unique key definition, with the same notion of equality. + */ + for (i = 0; i < key->partnatts; i++) + { + bool found = false; + int eq_strategy; + Oid ptkey_eqop; + int j; + + /* + * Identify the equality operator associated with this partkey + * column. For list and range partitioning, partkeys use btree + * operator classes; hash partitioning uses hash operator classes. + * (Keep this in sync with ComputePartitionAttrs!) + */ + if (key->strategy == PARTITION_STRATEGY_HASH) + eq_strategy = HTEqualStrategyNumber; + else + eq_strategy = BTEqualStrategyNumber; + + ptkey_eqop = get_opfamily_member(key->partopfamily[i], + key->partopcintype[i], + key->partopcintype[i], + eq_strategy); + if (!OidIsValid(ptkey_eqop)) + elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u", + eq_strategy, key->partopcintype[i], key->partopcintype[i], + key->partopfamily[i]); + + /* + * We'll need to be able to identify the equality operators + * associated with index columns, too. We know what to do with + * btree opclasses; if there are ever any other index types that + * support unique indexes, this logic will need extension. + */ + if (accessMethodId == BTREE_AM_OID) + eq_strategy = BTEqualStrategyNumber; + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot match partition key to an index using access method \"%s\"", + accessMethodName))); + + /* + * It may be possible to support UNIQUE constraints when partition + * keys are expressions, but is it worth it? Give up for now. + */ + if (key->partattrs[i] == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported %s constraint with partition key definition", + constraint_type), + errdetail("%s constraints cannot be used when partition keys include expressions.", + constraint_type))); + + /* Search the index column(s) for a match */ + for (j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++) + { + if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j]) + { + /* Matched the column, now what about the equality op? */ + Oid idx_opfamily; + Oid idx_opcintype; + + if (get_opclass_opfamily_and_input_type(classObjectId[j], + &idx_opfamily, + &idx_opcintype)) + { + Oid idx_eqop; + + idx_eqop = get_opfamily_member(idx_opfamily, + idx_opcintype, + idx_opcintype, + eq_strategy); + if (ptkey_eqop == idx_eqop) + { + found = true; + break; + } + } + } + } + + if (!found) + { + Form_pg_attribute att; + + att = TupleDescAttr(RelationGetDescr(rel), + key->partattrs[i] - 1); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unique constraint on partitioned table must include all partitioning columns"), + errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.", + constraint_type, RelationGetRelationName(rel), + NameStr(att->attname)))); + } + } + } + + + /* + * We disallow indexes on system columns. They would not necessarily get + * updated correctly, and they don't seem useful anyway. + */ + for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + { + AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i]; + + if (attno < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("index creation on system columns is not supported"))); + } + + /* + * Also check for system columns used in expressions or predicates. + */ + if (indexInfo->ii_Expressions || indexInfo->ii_Predicate) + { + Bitmapset *indexattrs = NULL; + + pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs); + pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs); + + for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++) + { + if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber, + indexattrs)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("index creation on system columns is not supported"))); + } + } + + /* Is index safe for others to ignore? See set_indexsafe_procflags() */ + safe_index = indexInfo->ii_Expressions == NIL && + indexInfo->ii_Predicate == NIL; + + /* + * Report index creation if appropriate (delay this till after most of the + * error checks) + */ + if (stmt->isconstraint && !quiet) + { + const char *constraint_type; + + if (stmt->primary) + constraint_type = "PRIMARY KEY"; + else if (stmt->unique) + constraint_type = "UNIQUE"; + else if (stmt->excludeOpNames != NIL) + constraint_type = "EXCLUDE"; + else + { + elog(ERROR, "unknown constraint type"); + constraint_type = NULL; /* keep compiler quiet */ + } + + ereport(DEBUG1, + (errmsg_internal("%s %s will create implicit index \"%s\" for table \"%s\"", + is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /", + constraint_type, + indexRelationName, RelationGetRelationName(rel)))); + } + + /* + * A valid stmt->oldNode implies that we already have a built form of the + * index. The caller should also decline any index build. + */ + Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent)); + + /* + * Make the catalog entries for the index, including constraints. This + * step also actually builds the index, except if caller requested not to + * or in concurrent mode, in which case it'll be done later, or doing a + * partitioned index (because those don't have storage). + */ + flags = constr_flags = 0; + if (stmt->isconstraint) + flags |= INDEX_CREATE_ADD_CONSTRAINT; + if (skip_build || concurrent || partitioned) + flags |= INDEX_CREATE_SKIP_BUILD; + if (stmt->if_not_exists) + flags |= INDEX_CREATE_IF_NOT_EXISTS; + if (concurrent) + flags |= INDEX_CREATE_CONCURRENT; + if (partitioned) + flags |= INDEX_CREATE_PARTITIONED; + if (stmt->primary) + flags |= INDEX_CREATE_IS_PRIMARY; + + /* + * If the table is partitioned, and recursion was declined but partitions + * exist, mark the index as invalid. + */ + if (partitioned && stmt->relation && !stmt->relation->inh) + { + PartitionDesc pd = RelationGetPartitionDesc(rel, true); + + if (pd->nparts != 0) + flags |= INDEX_CREATE_INVALID; + } + + if (stmt->deferrable) + constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE; + if (stmt->initdeferred) + constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED; + + indexRelationId = + index_create(rel, indexRelationName, indexRelationId, parentIndexId, + parentConstraintId, + stmt->oldNode, indexInfo, indexColNames, + accessMethodId, tablespaceId, + collationObjectId, classObjectId, + coloptions, reloptions, + flags, constr_flags, + allowSystemTableMods, !check_rights, + &createdConstraintId); + + ObjectAddressSet(address, RelationRelationId, indexRelationId); + + if (!OidIsValid(indexRelationId)) + { + /* + * Roll back any GUC changes executed by index functions. Also revert + * to original default_tablespace if we changed it above. + */ + AtEOXact_GUC(false, root_save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(root_save_userid, root_save_sec_context); + + table_close(rel, NoLock); + + /* If this is the top-level index, we're done */ + if (!OidIsValid(parentIndexId)) + pgstat_progress_end_command(); + + return address; + } + + /* + * Roll back any GUC changes executed by index functions, and keep + * subsequent changes local to this command. This is essential if some + * index function changed a behavior-affecting GUC, e.g. search_path. + */ + AtEOXact_GUC(false, root_save_nestlevel); + root_save_nestlevel = NewGUCNestLevel(); + + /* Add any requested comment */ + if (stmt->idxcomment != NULL) + CreateComments(indexRelationId, RelationRelationId, 0, + stmt->idxcomment); + + if (partitioned) + { + PartitionDesc partdesc; + + /* + * Unless caller specified to skip this step (via ONLY), process each + * partition to make sure they all contain a corresponding index. + * + * If we're called internally (no stmt->relation), recurse always. + */ + partdesc = RelationGetPartitionDesc(rel, true); + if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0) + { + int nparts = partdesc->nparts; + Oid *part_oids = palloc(sizeof(Oid) * nparts); + bool invalidate_parent = false; + Relation parentIndex; + TupleDesc parentDesc; + + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL, + nparts); + + /* Make a local copy of partdesc->oids[], just for safety */ + memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts); + + /* + * We'll need an IndexInfo describing the parent index. The one + * built above is almost good enough, but not quite, because (for + * example) its predicate expression if any hasn't been through + * expression preprocessing. The most reliable way to get an + * IndexInfo that will match those for child indexes is to build + * it the same way, using BuildIndexInfo(). + */ + parentIndex = index_open(indexRelationId, lockmode); + indexInfo = BuildIndexInfo(parentIndex); + + parentDesc = RelationGetDescr(rel); + + /* + * For each partition, scan all existing indexes; if one matches + * our index definition and is not already attached to some other + * parent index, attach it to the one we just created. + * + * If none matches, build a new index by calling ourselves + * recursively with the same options (except for the index name). + */ + for (i = 0; i < nparts; i++) + { + Oid childRelid = part_oids[i]; + Relation childrel; + Oid child_save_userid; + int child_save_sec_context; + int child_save_nestlevel; + List *childidxs; + ListCell *cell; + AttrMap *attmap; + bool found = false; + + childrel = table_open(childRelid, lockmode); + + GetUserIdAndSecContext(&child_save_userid, + &child_save_sec_context); + SetUserIdAndSecContext(childrel->rd_rel->relowner, + child_save_sec_context | SECURITY_RESTRICTED_OPERATION); + child_save_nestlevel = NewGUCNestLevel(); + + /* + * Don't try to create indexes on foreign tables, though. Skip + * those if a regular index, or fail if trying to create a + * constraint index. + */ + if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + if (stmt->unique || stmt->primary) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create unique index on partitioned table \"%s\"", + RelationGetRelationName(rel)), + errdetail("Table \"%s\" contains partitions that are foreign tables.", + RelationGetRelationName(rel)))); + + AtEOXact_GUC(false, child_save_nestlevel); + SetUserIdAndSecContext(child_save_userid, + child_save_sec_context); + table_close(childrel, lockmode); + continue; + } + + childidxs = RelationGetIndexList(childrel); + attmap = + build_attrmap_by_name(RelationGetDescr(childrel), + parentDesc); + + foreach(cell, childidxs) + { + Oid cldidxid = lfirst_oid(cell); + Relation cldidx; + IndexInfo *cldIdxInfo; + + /* this index is already partition of another one */ + if (has_superclass(cldidxid)) + continue; + + cldidx = index_open(cldidxid, lockmode); + cldIdxInfo = BuildIndexInfo(cldidx); + if (CompareIndexInfo(cldIdxInfo, indexInfo, + cldidx->rd_indcollation, + parentIndex->rd_indcollation, + cldidx->rd_opfamily, + parentIndex->rd_opfamily, + attmap)) + { + Oid cldConstrOid = InvalidOid; + + /* + * Found a match. + * + * If this index is being created in the parent + * because of a constraint, then the child needs to + * have a constraint also, so look for one. If there + * is no such constraint, this index is no good, so + * keep looking. + */ + if (createdConstraintId != InvalidOid) + { + cldConstrOid = + get_relation_idx_constraint_oid(childRelid, + cldidxid); + if (cldConstrOid == InvalidOid) + { + index_close(cldidx, lockmode); + continue; + } + } + + /* Attach index to parent and we're done. */ + IndexSetParentIndex(cldidx, indexRelationId); + if (createdConstraintId != InvalidOid) + ConstraintSetParentConstraint(cldConstrOid, + createdConstraintId, + childRelid); + + if (!cldidx->rd_index->indisvalid) + invalidate_parent = true; + + found = true; + /* keep lock till commit */ + index_close(cldidx, NoLock); + break; + } + + index_close(cldidx, lockmode); + } + + list_free(childidxs); + AtEOXact_GUC(false, child_save_nestlevel); + SetUserIdAndSecContext(child_save_userid, + child_save_sec_context); + table_close(childrel, NoLock); + + /* + * If no matching index was found, create our own. + */ + if (!found) + { + IndexStmt *childStmt = copyObject(stmt); + bool found_whole_row; + ListCell *lc; + ObjectAddress childAddr; + + /* + * We can't use the same index name for the child index, + * so clear idxname to let the recursive invocation choose + * a new name. Likewise, the existing target relation + * field is wrong, and if indexOid or oldNode are set, + * they mustn't be applied to the child either. + */ + childStmt->idxname = NULL; + childStmt->relation = NULL; + childStmt->indexOid = InvalidOid; + childStmt->oldNode = InvalidOid; + childStmt->oldCreateSubid = InvalidSubTransactionId; + childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId; + + /* + * Adjust any Vars (both in expressions and in the index's + * WHERE clause) to match the partition's column numbering + * in case it's different from the parent's. + */ + foreach(lc, childStmt->indexParams) + { + IndexElem *ielem = lfirst(lc); + + /* + * If the index parameter is an expression, we must + * translate it to contain child Vars. + */ + if (ielem->expr) + { + ielem->expr = + map_variable_attnos((Node *) ielem->expr, + 1, 0, attmap, + InvalidOid, + &found_whole_row); + if (found_whole_row) + elog(ERROR, "cannot convert whole-row table reference"); + } + } + childStmt->whereClause = + map_variable_attnos(stmt->whereClause, 1, 0, + attmap, + InvalidOid, &found_whole_row); + if (found_whole_row) + elog(ERROR, "cannot convert whole-row table reference"); + + /* + * Recurse as the starting user ID. Callee will use that + * for permission checks, then switch again. + */ + Assert(GetUserId() == child_save_userid); + SetUserIdAndSecContext(root_save_userid, + root_save_sec_context); + childAddr = + DefineIndex(childRelid, childStmt, + InvalidOid, /* no predefined OID */ + indexRelationId, /* this is our child */ + createdConstraintId, + is_alter_table, check_rights, + check_not_in_use, + skip_build, quiet); + SetUserIdAndSecContext(child_save_userid, + child_save_sec_context); + + /* + * Check if the index just created is valid or not, as it + * could be possible that it has been switched as invalid + * when recursing across multiple partition levels. + */ + if (!get_index_isvalid(childAddr.objectId)) + invalidate_parent = true; + } + + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + i + 1); + free_attrmap(attmap); + } + + index_close(parentIndex, lockmode); + + /* + * The pg_index row we inserted for this index was marked + * indisvalid=true. But if we attached an existing index that is + * invalid, this is incorrect, so update our row to invalid too. + */ + if (invalidate_parent) + { + Relation pg_index = table_open(IndexRelationId, RowExclusiveLock); + HeapTuple tup, + newtup; + + tup = SearchSysCache1(INDEXRELID, + ObjectIdGetDatum(indexRelationId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for index %u", + indexRelationId); + newtup = heap_copytuple(tup); + ((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false; + CatalogTupleUpdate(pg_index, &tup->t_self, newtup); + ReleaseSysCache(tup); + table_close(pg_index, RowExclusiveLock); + heap_freetuple(newtup); + + /* + * CCI here to make this update visible, in case this recurses + * across multiple partition levels. + */ + CommandCounterIncrement(); + } + } + + /* + * Indexes on partitioned tables are not themselves built, so we're + * done here. + */ + AtEOXact_GUC(false, root_save_nestlevel); + SetUserIdAndSecContext(root_save_userid, root_save_sec_context); + table_close(rel, NoLock); + if (!OidIsValid(parentIndexId)) + pgstat_progress_end_command(); + return address; + } + + AtEOXact_GUC(false, root_save_nestlevel); + SetUserIdAndSecContext(root_save_userid, root_save_sec_context); + + if (!concurrent) + { + /* Close the heap and we're done, in the non-concurrent case */ + table_close(rel, NoLock); + + /* If this is the top-level index, we're done. */ + if (!OidIsValid(parentIndexId)) + pgstat_progress_end_command(); + + return address; + } + + /* save lockrelid and locktag for below, then close rel */ + heaprelid = rel->rd_lockInfo.lockRelId; + SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId); + table_close(rel, NoLock); + + /* + * For a concurrent build, it's important to make the catalog entries + * visible to other transactions before we start to build the index. That + * will prevent them from making incompatible HOT updates. The new index + * will be marked not indisready and not indisvalid, so that no one else + * tries to either insert into it or use it for queries. + * + * We must commit our current transaction so that the index becomes + * visible; then start another. Note that all the data structures we just + * built are lost in the commit. The only data we keep past here are the + * relation IDs. + * + * Before committing, get a session-level lock on the table, to ensure + * that neither it nor the index can be dropped before we finish. This + * cannot block, even if someone else is waiting for access, because we + * already have the same lock within our transaction. + * + * Note: we don't currently bother with a session lock on the index, + * because there are no operations that could change its state while we + * hold lock on the parent table. This might need to change later. + */ + LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock); + + PopActiveSnapshot(); + CommitTransactionCommand(); + StartTransactionCommand(); + + /* Tell concurrent index builds to ignore us, if index qualifies */ + if (safe_index) + set_indexsafe_procflags(); + + /* + * The index is now visible, so we can report the OID. While on it, + * include the report for the beginning of phase 2. + */ + { + const int progress_cols[] = { + PROGRESS_CREATEIDX_INDEX_OID, + PROGRESS_CREATEIDX_PHASE + }; + const int64 progress_vals[] = { + indexRelationId, + PROGRESS_CREATEIDX_PHASE_WAIT_1 + }; + + pgstat_progress_update_multi_param(2, progress_cols, progress_vals); + } + + /* + * Phase 2 of concurrent index build (see comments for validate_index() + * for an overview of how this works) + * + * Now we must wait until no running transaction could have the table open + * with the old list of indexes. Use ShareLock to consider running + * transactions that hold locks that permit writing to the table. Note we + * do not need to worry about xacts that open the table for writing after + * this point; they will see the new index when they open it. + * + * Note: the reason we use actual lock acquisition here, rather than just + * checking the ProcArray and sleeping, is that deadlock is possible if + * one of the transactions in question is blocked trying to acquire an + * exclusive lock on our table. The lock code will detect deadlock and + * error out properly. + */ + WaitForLockers(heaplocktag, ShareLock, true); + + /* + * At this moment we are sure that there are no transactions with the + * table open for write that don't have this new index in their list of + * indexes. We have waited out all the existing transactions and any new + * transaction will have the new index in its list, but the index is still + * marked as "not-ready-for-inserts". The index is consulted while + * deciding HOT-safety though. This arrangement ensures that no new HOT + * chains can be created where the new tuple and the old tuple in the + * chain have different index keys. + * + * We now take a new snapshot, and build the index using all tuples that + * are visible in this snapshot. We can be sure that any HOT updates to + * these tuples will be compatible with the index, since any updates made + * by transactions that didn't know about the index are now committed or + * rolled back. Thus, each visible tuple is either the end of its + * HOT-chain or the extension of the chain is HOT-safe for this index. + */ + + /* Set ActiveSnapshot since functions in the indexes may need it */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* Perform concurrent build of index */ + index_concurrently_build(relationId, indexRelationId); + + /* we can do away with our snapshot */ + PopActiveSnapshot(); + + /* + * Commit this transaction to make the indisready update visible. + */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* Tell concurrent index builds to ignore us, if index qualifies */ + if (safe_index) + set_indexsafe_procflags(); + + /* + * Phase 3 of concurrent index build + * + * We once again wait until no transaction can have the table open with + * the index marked as read-only for updates. + */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_PHASE_WAIT_2); + WaitForLockers(heaplocktag, ShareLock, true); + + /* + * Now take the "reference snapshot" that will be used by validate_index() + * to filter candidate tuples. Beware! There might still be snapshots in + * use that treat some transaction as in-progress that our reference + * snapshot treats as committed. If such a recently-committed transaction + * deleted tuples in the table, we will not include them in the index; yet + * those transactions which see the deleting one as still-in-progress will + * expect such tuples to be there once we mark the index as valid. + * + * We solve this by waiting for all endangered transactions to exit before + * we mark the index as valid. + * + * We also set ActiveSnapshot to this snap, since functions in indexes may + * need a snapshot. + */ + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + PushActiveSnapshot(snapshot); + + /* + * Scan the index and the heap, insert any missing index entries. + */ + validate_index(relationId, indexRelationId, snapshot); + + /* + * Drop the reference snapshot. We must do this before waiting out other + * snapshot holders, else we will deadlock against other processes also + * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one + * they must wait for. But first, save the snapshot's xmin to use as + * limitXmin for GetCurrentVirtualXIDs(). + */ + limitXmin = snapshot->xmin; + + PopActiveSnapshot(); + UnregisterSnapshot(snapshot); + + /* + * The snapshot subsystem could still contain registered snapshots that + * are holding back our process's advertised xmin; in particular, if + * default_transaction_isolation = serializable, there is a transaction + * snapshot that is still active. The CatalogSnapshot is likewise a + * hazard. To ensure no deadlocks, we must commit and start yet another + * transaction, and do our wait before any snapshot has been taken in it. + */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* Tell concurrent index builds to ignore us, if index qualifies */ + if (safe_index) + set_indexsafe_procflags(); + + /* We should now definitely not be advertising any xmin. */ + Assert(MyProc->xmin == InvalidTransactionId); + + /* + * The index is now valid in the sense that it contains all currently + * interesting tuples. But since it might not contain tuples deleted just + * before the reference snap was taken, we have to wait out any + * transactions that might have older snapshots. + */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_PHASE_WAIT_3); + WaitForOlderSnapshots(limitXmin, true); + + /* + * Index can now be marked valid -- update its pg_index entry + */ + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); + + /* + * The pg_index update will cause backends (including this one) to update + * relcache entries for the index itself, but we should also send a + * relcache inval on the parent table to force replanning of cached plans. + * Otherwise existing sessions might fail to use the new index where it + * would be useful. (Note that our earlier commits did not create reasons + * to replan; so relcache flush on the index itself was sufficient.) + */ + CacheInvalidateRelcacheByRelid(heaprelid.relId); + + /* + * Last thing to do is release the session-level lock on the parent table. + */ + UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock); + + pgstat_progress_end_command(); + + return address; +} + + +/* + * CheckMutability + * Test whether given expression is mutable + */ +static bool +CheckMutability(Expr *expr) +{ + /* + * First run the expression through the planner. This has a couple of + * important consequences. First, function default arguments will get + * inserted, which may affect volatility (consider "default now()"). + * Second, inline-able functions will get inlined, which may allow us to + * conclude that the function is really less volatile than it's marked. As + * an example, polymorphic functions must be marked with the most volatile + * behavior that they have for any input type, but once we inline the + * function we may be able to conclude that it's not so volatile for the + * particular input type we're dealing with. + * + * We assume here that expression_planner() won't scribble on its input. + */ + expr = expression_planner(expr); + + /* Now we can search for non-immutable functions */ + return contain_mutable_functions((Node *) expr); +} + + +/* + * CheckPredicate + * Checks that the given partial-index predicate is valid. + * + * This used to also constrain the form of the predicate to forms that + * indxpath.c could do something with. However, that seems overly + * restrictive. One useful application of partial indexes is to apply + * a UNIQUE constraint across a subset of a table, and in that scenario + * any evaluable predicate will work. So accept any predicate here + * (except ones requiring a plan), and let indxpath.c fend for itself. + */ +static void +CheckPredicate(Expr *predicate) +{ + /* + * transformExpr() should have already rejected subqueries, aggregates, + * and window functions, based on the EXPR_KIND_ for a predicate. + */ + + /* + * A predicate using mutable functions is probably wrong, for the same + * reasons that we don't allow an index expression to use one. + */ + if (CheckMutability(predicate)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("functions in index predicate must be marked IMMUTABLE"))); +} + +/* + * Compute per-index-column information, including indexed column numbers + * or index expressions, opclasses and their options. Note, all output vectors + * should be allocated for all columns, including "including" ones. + * + * If the caller switched to the table owner, ddl_userid is the role for ACL + * checks reached without traversing opaque expressions. Otherwise, it's + * InvalidOid, and other ddl_* arguments are undefined. + */ +static void +ComputeIndexAttrs(IndexInfo *indexInfo, + Oid *typeOidP, + Oid *collationOidP, + Oid *classOidP, + int16 *colOptionP, + List *attList, /* list of IndexElem's */ + List *exclusionOpNames, + Oid relId, + const char *accessMethodName, + Oid accessMethodId, + bool amcanorder, + bool isconstraint, + Oid ddl_userid, + int ddl_sec_context, + int *ddl_save_nestlevel) +{ + ListCell *nextExclOp; + ListCell *lc; + int attn; + int nkeycols = indexInfo->ii_NumIndexKeyAttrs; + Oid save_userid; + int save_sec_context; + + /* Allocate space for exclusion operator info, if needed */ + if (exclusionOpNames) + { + Assert(list_length(exclusionOpNames) == nkeycols); + indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols); + indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols); + indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols); + nextExclOp = list_head(exclusionOpNames); + } + else + nextExclOp = NULL; + + if (OidIsValid(ddl_userid)) + GetUserIdAndSecContext(&save_userid, &save_sec_context); + + /* + * process attributeList + */ + attn = 0; + foreach(lc, attList) + { + IndexElem *attribute = (IndexElem *) lfirst(lc); + Oid atttype; + Oid attcollation; + + /* + * Process the column-or-expression to be indexed. + */ + if (attribute->name != NULL) + { + /* Simple index attribute */ + HeapTuple atttuple; + Form_pg_attribute attform; + + Assert(attribute->expr == NULL); + atttuple = SearchSysCacheAttName(relId, attribute->name); + if (!HeapTupleIsValid(atttuple)) + { + /* difference in error message spellings is historical */ + if (isconstraint) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" named in key does not exist", + attribute->name))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + attribute->name))); + } + attform = (Form_pg_attribute) GETSTRUCT(atttuple); + indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum; + atttype = attform->atttypid; + attcollation = attform->attcollation; + ReleaseSysCache(atttuple); + } + else + { + /* Index expression */ + Node *expr = attribute->expr; + + Assert(expr != NULL); + + if (attn >= nkeycols) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("expressions are not supported in included columns"))); + atttype = exprType(expr); + attcollation = exprCollation(expr); + + /* + * Strip any top-level COLLATE clause. This ensures that we treat + * "x COLLATE y" and "(x COLLATE y)" alike. + */ + while (IsA(expr, CollateExpr)) + expr = (Node *) ((CollateExpr *) expr)->arg; + + if (IsA(expr, Var) && + ((Var *) expr)->varattno != InvalidAttrNumber) + { + /* + * User wrote "(column)" or "(column COLLATE something)". + * Treat it like simple attribute anyway. + */ + indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno; + } + else + { + indexInfo->ii_IndexAttrNumbers[attn] = 0; /* marks expression */ + indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions, + expr); + + /* + * transformExpr() should have already rejected subqueries, + * aggregates, and window functions, based on the EXPR_KIND_ + * for an index expression. + */ + + /* + * An expression using mutable functions is probably wrong, + * since if you aren't going to get the same result for the + * same data every time, it's not clear what the index entries + * mean at all. + */ + if (CheckMutability((Expr *) expr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("functions in index expression must be marked IMMUTABLE"))); + } + } + + typeOidP[attn] = atttype; + + /* + * Included columns have no collation, no opclass and no ordering + * options. + */ + if (attn >= nkeycols) + { + if (attribute->collation) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("including column does not support a collation"))); + if (attribute->opclass) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("including column does not support an operator class"))); + if (attribute->ordering != SORTBY_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("including column does not support ASC/DESC options"))); + if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("including column does not support NULLS FIRST/LAST options"))); + + classOidP[attn] = InvalidOid; + colOptionP[attn] = 0; + collationOidP[attn] = InvalidOid; + attn++; + + continue; + } + + /* + * Apply collation override if any. Use of ddl_userid is necessary + * due to ACL checks therein, and it's safe because collations don't + * contain opaque expressions (or non-opaque expressions). + */ + if (attribute->collation) + { + if (OidIsValid(ddl_userid)) + { + AtEOXact_GUC(false, *ddl_save_nestlevel); + SetUserIdAndSecContext(ddl_userid, ddl_sec_context); + } + attcollation = get_collation_oid(attribute->collation, false); + if (OidIsValid(ddl_userid)) + { + SetUserIdAndSecContext(save_userid, save_sec_context); + *ddl_save_nestlevel = NewGUCNestLevel(); + } + } + + /* + * Check we have a collation iff it's a collatable type. The only + * expected failures here are (1) COLLATE applied to a noncollatable + * type, or (2) index expression had an unresolved collation. But we + * might as well code this to be a complete consistency check. + */ + if (type_is_collatable(atttype)) + { + if (!OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for index expression"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + else + { + if (OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("collations are not supported by type %s", + format_type_be(atttype)))); + } + + collationOidP[attn] = attcollation; + + /* + * Identify the opclass to use. Use of ddl_userid is necessary due to + * ACL checks therein. This is safe despite opclasses containing + * opaque expressions (specifically, functions), because only + * superusers can define opclasses. + */ + if (OidIsValid(ddl_userid)) + { + AtEOXact_GUC(false, *ddl_save_nestlevel); + SetUserIdAndSecContext(ddl_userid, ddl_sec_context); + } + classOidP[attn] = ResolveOpClass(attribute->opclass, + atttype, + accessMethodName, + accessMethodId); + if (OidIsValid(ddl_userid)) + { + SetUserIdAndSecContext(save_userid, save_sec_context); + *ddl_save_nestlevel = NewGUCNestLevel(); + } + + /* + * Identify the exclusion operator, if any. + */ + if (nextExclOp) + { + List *opname = (List *) lfirst(nextExclOp); + Oid opid; + Oid opfamily; + int strat; + + /* + * Find the operator --- it must accept the column datatype + * without runtime coercion (but binary compatibility is OK). + * Operators contain opaque expressions (specifically, functions). + * compatible_oper_opid() boils down to oper() and + * IsBinaryCoercible(). PostgreSQL would have security problems + * elsewhere if oper() started calling opaque expressions. + */ + if (OidIsValid(ddl_userid)) + { + AtEOXact_GUC(false, *ddl_save_nestlevel); + SetUserIdAndSecContext(ddl_userid, ddl_sec_context); + } + opid = compatible_oper_opid(opname, atttype, atttype, false); + if (OidIsValid(ddl_userid)) + { + SetUserIdAndSecContext(save_userid, save_sec_context); + *ddl_save_nestlevel = NewGUCNestLevel(); + } + + /* + * Only allow commutative operators to be used in exclusion + * constraints. If X conflicts with Y, but Y does not conflict + * with X, bad things will happen. + */ + if (get_commutator(opid) != opid) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("operator %s is not commutative", + format_operator(opid)), + errdetail("Only commutative operators can be used in exclusion constraints."))); + + /* + * Operator must be a member of the right opfamily, too + */ + opfamily = get_opclass_family(classOidP[attn]); + strat = get_op_opfamily_strategy(opid, opfamily); + if (strat == 0) + { + HeapTuple opftuple; + Form_pg_opfamily opfform; + + /* + * attribute->opclass might not explicitly name the opfamily, + * so fetch the name of the selected opfamily for use in the + * error message. + */ + opftuple = SearchSysCache1(OPFAMILYOID, + ObjectIdGetDatum(opfamily)); + if (!HeapTupleIsValid(opftuple)) + elog(ERROR, "cache lookup failed for opfamily %u", + opfamily); + opfform = (Form_pg_opfamily) GETSTRUCT(opftuple); + + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("operator %s is not a member of operator family \"%s\"", + format_operator(opid), + NameStr(opfform->opfname)), + errdetail("The exclusion operator must be related to the index operator class for the constraint."))); + } + + indexInfo->ii_ExclusionOps[attn] = opid; + indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid); + indexInfo->ii_ExclusionStrats[attn] = strat; + nextExclOp = lnext(exclusionOpNames, nextExclOp); + } + + /* + * Set up the per-column options (indoption field). For now, this is + * zero for any un-ordered index, while ordered indexes have DESC and + * NULLS FIRST/LAST options. + */ + colOptionP[attn] = 0; + if (amcanorder) + { + /* default ordering is ASC */ + if (attribute->ordering == SORTBY_DESC) + colOptionP[attn] |= INDOPTION_DESC; + /* default null ordering is LAST for ASC, FIRST for DESC */ + if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT) + { + if (attribute->ordering == SORTBY_DESC) + colOptionP[attn] |= INDOPTION_NULLS_FIRST; + } + else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST) + colOptionP[attn] |= INDOPTION_NULLS_FIRST; + } + else + { + /* index AM does not support ordering */ + if (attribute->ordering != SORTBY_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support ASC/DESC options", + accessMethodName))); + if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support NULLS FIRST/LAST options", + accessMethodName))); + } + + /* Set up the per-column opclass options (attoptions field). */ + if (attribute->opclassopts) + { + Assert(attn < nkeycols); + + if (!indexInfo->ii_OpclassOptions) + indexInfo->ii_OpclassOptions = + palloc0(sizeof(Datum) * indexInfo->ii_NumIndexAttrs); + + indexInfo->ii_OpclassOptions[attn] = + transformRelOptions((Datum) 0, attribute->opclassopts, + NULL, NULL, false, false); + } + + attn++; + } +} + +/* + * Resolve possibly-defaulted operator class specification + * + * Note: This is used to resolve operator class specifications in index and + * partition key definitions. + */ +Oid +ResolveOpClass(List *opclass, Oid attrType, + const char *accessMethodName, Oid accessMethodId) +{ + char *schemaname; + char *opcname; + HeapTuple tuple; + Form_pg_opclass opform; + Oid opClassId, + opInputType; + + if (opclass == NIL) + { + /* no operator class specified, so find the default */ + opClassId = GetDefaultOpClass(attrType, accessMethodId); + if (!OidIsValid(opClassId)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default operator class for access method \"%s\"", + format_type_be(attrType), accessMethodName), + errhint("You must specify an operator class for the index or define a default operator class for the data type."))); + return opClassId; + } + + /* + * Specific opclass name given, so look up the opclass. + */ + + /* deconstruct the name list */ + DeconstructQualifiedName(opclass, &schemaname, &opcname); + + if (schemaname) + { + /* Look in specific schema only */ + Oid namespaceId; + + namespaceId = LookupExplicitNamespace(schemaname, false); + tuple = SearchSysCache3(CLAAMNAMENSP, + ObjectIdGetDatum(accessMethodId), + PointerGetDatum(opcname), + ObjectIdGetDatum(namespaceId)); + } + else + { + /* Unqualified opclass name, so search the search path */ + opClassId = OpclassnameGetOpcid(accessMethodId, opcname); + if (!OidIsValid(opClassId)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("operator class \"%s\" does not exist for access method \"%s\"", + opcname, accessMethodName))); + tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId)); + } + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("operator class \"%s\" does not exist for access method \"%s\"", + NameListToString(opclass), accessMethodName))); + + /* + * Verify that the index operator class accepts this datatype. Note we + * will accept binary compatibility. + */ + opform = (Form_pg_opclass) GETSTRUCT(tuple); + opClassId = opform->oid; + opInputType = opform->opcintype; + + if (!IsBinaryCoercible(attrType, opInputType)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("operator class \"%s\" does not accept data type %s", + NameListToString(opclass), format_type_be(attrType)))); + + ReleaseSysCache(tuple); + + return opClassId; +} + +/* + * GetDefaultOpClass + * + * Given the OIDs of a datatype and an access method, find the default + * operator class, if any. Returns InvalidOid if there is none. + */ +Oid +GetDefaultOpClass(Oid type_id, Oid am_id) +{ + Oid result = InvalidOid; + int nexact = 0; + int ncompatible = 0; + int ncompatiblepreferred = 0; + Relation rel; + ScanKeyData skey[1]; + SysScanDesc scan; + HeapTuple tup; + TYPCATEGORY tcategory; + + /* If it's a domain, look at the base type instead */ + type_id = getBaseType(type_id); + + tcategory = TypeCategory(type_id); + + /* + * We scan through all the opclasses available for the access method, + * looking for one that is marked default and matches the target type + * (either exactly or binary-compatibly, but prefer an exact match). + * + * We could find more than one binary-compatible match. If just one is + * for a preferred type, use that one; otherwise we fail, forcing the user + * to specify which one he wants. (The preferred-type special case is a + * kluge for varchar: it's binary-compatible to both text and bpchar, so + * we need a tiebreaker.) If we find more than one exact match, then + * someone put bogus entries in pg_opclass. + */ + rel = table_open(OperatorClassRelationId, AccessShareLock); + + ScanKeyInit(&skey[0], + Anum_pg_opclass_opcmethod, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(am_id)); + + scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true, + NULL, 1, skey); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup); + + /* ignore altogether if not a default opclass */ + if (!opclass->opcdefault) + continue; + if (opclass->opcintype == type_id) + { + nexact++; + result = opclass->oid; + } + else if (nexact == 0 && + IsBinaryCoercible(type_id, opclass->opcintype)) + { + if (IsPreferredType(tcategory, opclass->opcintype)) + { + ncompatiblepreferred++; + result = opclass->oid; + } + else if (ncompatiblepreferred == 0) + { + ncompatible++; + result = opclass->oid; + } + } + } + + systable_endscan(scan); + + table_close(rel, AccessShareLock); + + /* raise error if pg_opclass contains inconsistent data */ + if (nexact > 1) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("there are multiple default operator classes for data type %s", + format_type_be(type_id)))); + + if (nexact == 1 || + ncompatiblepreferred == 1 || + (ncompatiblepreferred == 0 && ncompatible == 1)) + return result; + + return InvalidOid; +} + +/* + * makeObjectName() + * + * Create a name for an implicitly created index, sequence, constraint, + * extended statistics, etc. + * + * The parameters are typically: the original table name, the original field + * name, and a "type" string (such as "seq" or "pkey"). The field name + * and/or type can be NULL if not relevant. + * + * The result is a palloc'd string. + * + * The basic result we want is "name1_name2_label", omitting "_name2" or + * "_label" when those parameters are NULL. However, we must generate + * a name with less than NAMEDATALEN characters! So, we truncate one or + * both names if necessary to make a short-enough string. The label part + * is never truncated (so it had better be reasonably short). + * + * The caller is responsible for checking uniqueness of the generated + * name and retrying as needed; retrying will be done by altering the + * "label" string (which is why we never truncate that part). + */ +char * +makeObjectName(const char *name1, const char *name2, const char *label) +{ + char *name; + int overhead = 0; /* chars needed for label and underscores */ + int availchars; /* chars available for name(s) */ + int name1chars; /* chars allocated to name1 */ + int name2chars; /* chars allocated to name2 */ + int ndx; + + name1chars = strlen(name1); + if (name2) + { + name2chars = strlen(name2); + overhead++; /* allow for separating underscore */ + } + else + name2chars = 0; + if (label) + overhead += strlen(label) + 1; + + availchars = NAMEDATALEN - 1 - overhead; + Assert(availchars > 0); /* else caller chose a bad label */ + + /* + * If we must truncate, preferentially truncate the longer name. This + * logic could be expressed without a loop, but it's simple and obvious as + * a loop. + */ + while (name1chars + name2chars > availchars) + { + if (name1chars > name2chars) + name1chars--; + else + name2chars--; + } + + name1chars = pg_mbcliplen(name1, name1chars, name1chars); + if (name2) + name2chars = pg_mbcliplen(name2, name2chars, name2chars); + + /* Now construct the string using the chosen lengths */ + name = palloc(name1chars + name2chars + overhead + 1); + memcpy(name, name1, name1chars); + ndx = name1chars; + if (name2) + { + name[ndx++] = '_'; + memcpy(name + ndx, name2, name2chars); + ndx += name2chars; + } + if (label) + { + name[ndx++] = '_'; + strcpy(name + ndx, label); + } + else + name[ndx] = '\0'; + + return name; +} + +/* + * Select a nonconflicting name for a new relation. This is ordinarily + * used to choose index names (which is why it's here) but it can also + * be used for sequences, or any autogenerated relation kind. + * + * name1, name2, and label are used the same way as for makeObjectName(), + * except that the label can't be NULL; digits will be appended to the label + * if needed to create a name that is unique within the specified namespace. + * + * If isconstraint is true, we also avoid choosing a name matching any + * existing constraint in the same namespace. (This is stricter than what + * Postgres itself requires, but the SQL standard says that constraint names + * should be unique within schemas, so we follow that for autogenerated + * constraint names.) + * + * Note: it is theoretically possible to get a collision anyway, if someone + * else chooses the same name concurrently. This is fairly unlikely to be + * a problem in practice, especially if one is holding an exclusive lock on + * the relation identified by name1. However, if choosing multiple names + * within a single command, you'd better create the new object and do + * CommandCounterIncrement before choosing the next one! + * + * Returns a palloc'd string. + */ +char * +ChooseRelationName(const char *name1, const char *name2, + const char *label, Oid namespaceid, + bool isconstraint) +{ + int pass = 0; + char *relname = NULL; + char modlabel[NAMEDATALEN]; + + /* try the unmodified label first */ + strlcpy(modlabel, label, sizeof(modlabel)); + + for (;;) + { + relname = makeObjectName(name1, name2, modlabel); + + if (!OidIsValid(get_relname_relid(relname, namespaceid))) + { + if (!isconstraint || + !ConstraintNameExists(relname, namespaceid)) + break; + } + + /* found a conflict, so try a new name component */ + pfree(relname); + snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass); + } + + return relname; +} + +/* + * Select the name to be used for an index. + * + * The argument list is pretty ad-hoc :-( + */ +static char * +ChooseIndexName(const char *tabname, Oid namespaceId, + List *colnames, List *exclusionOpNames, + bool primary, bool isconstraint) +{ + char *indexname; + + if (primary) + { + /* the primary key's name does not depend on the specific column(s) */ + indexname = ChooseRelationName(tabname, + NULL, + "pkey", + namespaceId, + true); + } + else if (exclusionOpNames != NIL) + { + indexname = ChooseRelationName(tabname, + ChooseIndexNameAddition(colnames), + "excl", + namespaceId, + true); + } + else if (isconstraint) + { + indexname = ChooseRelationName(tabname, + ChooseIndexNameAddition(colnames), + "key", + namespaceId, + true); + } + else + { + indexname = ChooseRelationName(tabname, + ChooseIndexNameAddition(colnames), + "idx", + namespaceId, + false); + } + + return indexname; +} + +/* + * Generate "name2" for a new index given the list of column names for it + * (as produced by ChooseIndexColumnNames). This will be passed to + * ChooseRelationName along with the parent table name and a suitable label. + * + * We know that less than NAMEDATALEN characters will actually be used, + * so we can truncate the result once we've generated that many. + * + * XXX See also ChooseForeignKeyConstraintNameAddition and + * ChooseExtendedStatisticNameAddition. + */ +static char * +ChooseIndexNameAddition(List *colnames) +{ + char buf[NAMEDATALEN * 2]; + int buflen = 0; + ListCell *lc; + + buf[0] = '\0'; + foreach(lc, colnames) + { + const char *name = (const char *) lfirst(lc); + + if (buflen > 0) + buf[buflen++] = '_'; /* insert _ between names */ + + /* + * At this point we have buflen <= NAMEDATALEN. name should be less + * than NAMEDATALEN already, but use strlcpy for paranoia. + */ + strlcpy(buf + buflen, name, NAMEDATALEN); + buflen += strlen(buf + buflen); + if (buflen >= NAMEDATALEN) + break; + } + return pstrdup(buf); +} + +/* + * Select the actual names to be used for the columns of an index, given the + * list of IndexElems for the columns. This is mostly about ensuring the + * names are unique so we don't get a conflicting-attribute-names error. + * + * Returns a List of plain strings (char *, not String nodes). + */ +static List * +ChooseIndexColumnNames(List *indexElems) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, indexElems) + { + IndexElem *ielem = (IndexElem *) lfirst(lc); + const char *origname; + const char *curname; + int i; + char buf[NAMEDATALEN]; + + /* Get the preliminary name from the IndexElem */ + if (ielem->indexcolname) + origname = ielem->indexcolname; /* caller-specified name */ + else if (ielem->name) + origname = ielem->name; /* simple column reference */ + else + origname = "expr"; /* default name for expression */ + + /* If it conflicts with any previous column, tweak it */ + curname = origname; + for (i = 1;; i++) + { + ListCell *lc2; + char nbuf[32]; + int nlen; + + foreach(lc2, result) + { + if (strcmp(curname, (char *) lfirst(lc2)) == 0) + break; + } + if (lc2 == NULL) + break; /* found nonconflicting name */ + + sprintf(nbuf, "%d", i); + + /* Ensure generated names are shorter than NAMEDATALEN */ + nlen = pg_mbcliplen(origname, strlen(origname), + NAMEDATALEN - 1 - strlen(nbuf)); + memcpy(buf, origname, nlen); + strcpy(buf + nlen, nbuf); + curname = buf; + } + + /* And attach to the result list */ + result = lappend(result, pstrdup(curname)); + } + return result; +} + +/* + * ExecReindex + * + * Primary entry point for manual REINDEX commands. This is mainly a + * preparation wrapper for the real operations that will happen in + * each subroutine of REINDEX. + */ +void +ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool isTopLevel) +{ + ReindexParams params = {0}; + ListCell *lc; + bool concurrently = false; + bool verbose = false; + char *tablespacename = NULL; + + /* Parse option list */ + foreach(lc, stmt->params) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (strcmp(opt->defname, "verbose") == 0) + verbose = defGetBoolean(opt); + else if (strcmp(opt->defname, "concurrently") == 0) + concurrently = defGetBoolean(opt); + else if (strcmp(opt->defname, "tablespace") == 0) + tablespacename = defGetString(opt); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized REINDEX option \"%s\"", + opt->defname), + parser_errposition(pstate, opt->location))); + } + + if (concurrently) + PreventInTransactionBlock(isTopLevel, + "REINDEX CONCURRENTLY"); + + params.options = + (verbose ? REINDEXOPT_VERBOSE : 0) | + (concurrently ? REINDEXOPT_CONCURRENTLY : 0); + + /* + * Assign the tablespace OID to move indexes to, with InvalidOid to do + * nothing. + */ + if (tablespacename != NULL) + { + params.tablespaceOid = get_tablespace_oid(tablespacename, false); + + /* Check permissions except when moving to database's default */ + if (OidIsValid(params.tablespaceOid) && + params.tablespaceOid != MyDatabaseTableSpace) + { + AclResult aclresult; + + aclresult = pg_tablespace_aclcheck(params.tablespaceOid, + GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + get_tablespace_name(params.tablespaceOid)); + } + } + else + params.tablespaceOid = InvalidOid; + + switch (stmt->kind) + { + case REINDEX_OBJECT_INDEX: + ReindexIndex(stmt->relation, ¶ms, isTopLevel); + break; + case REINDEX_OBJECT_TABLE: + ReindexTable(stmt->relation, ¶ms, isTopLevel); + break; + case REINDEX_OBJECT_SCHEMA: + case REINDEX_OBJECT_SYSTEM: + case REINDEX_OBJECT_DATABASE: + + /* + * This cannot run inside a user transaction block; if we were + * inside a transaction, then its commit- and + * start-transaction-command calls would not have the intended + * effect! + */ + PreventInTransactionBlock(isTopLevel, + (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" : + (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" : + "REINDEX DATABASE"); + ReindexMultipleTables(stmt->name, stmt->kind, ¶ms); + break; + default: + elog(ERROR, "unrecognized object type: %d", + (int) stmt->kind); + break; + } +} + +/* + * ReindexIndex + * Recreate a specific index. + */ +static void +ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel) +{ + struct ReindexIndexCallbackState state; + Oid indOid; + char persistence; + char relkind; + + /* + * Find and lock index, and check permissions on table; use callback to + * obtain lock on table first, to avoid deadlock hazard. The lock level + * used here must match the index lock obtained in reindex_index(). + * + * If it's a temporary index, we will perform a non-concurrent reindex, + * even if CONCURRENTLY was requested. In that case, reindex_index() will + * upgrade the lock, but that's OK, because other sessions can't hold + * locks on our temporary table. + */ + state.params = *params; + state.locked_table_oid = InvalidOid; + indOid = RangeVarGetRelidExtended(indexRelation, + (params->options & REINDEXOPT_CONCURRENTLY) != 0 ? + ShareUpdateExclusiveLock : AccessExclusiveLock, + 0, + RangeVarCallbackForReindexIndex, + &state); + + /* + * Obtain the current persistence and kind of the existing index. We + * already hold a lock on the index. + */ + persistence = get_rel_persistence(indOid); + relkind = get_rel_relkind(indOid); + + if (relkind == RELKIND_PARTITIONED_INDEX) + ReindexPartitions(indOid, params, isTopLevel); + else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && + persistence != RELPERSISTENCE_TEMP) + ReindexRelationConcurrently(indOid, params); + else + { + ReindexParams newparams = *params; + + newparams.options |= REINDEXOPT_REPORT_PROGRESS; + reindex_index(indOid, false, persistence, &newparams); + } +} + +/* + * Check permissions on table before acquiring relation lock; also lock + * the heap before the RangeVarGetRelidExtended takes the index lock, to avoid + * deadlocks. + */ +static void +RangeVarCallbackForReindexIndex(const RangeVar *relation, + Oid relId, Oid oldRelId, void *arg) +{ + char relkind; + struct ReindexIndexCallbackState *state = arg; + LOCKMODE table_lockmode; + + /* + * Lock level here should match table lock in reindex_index() for + * non-concurrent case and table locks used by index_concurrently_*() for + * concurrent case. + */ + table_lockmode = (state->params.options & REINDEXOPT_CONCURRENTLY) != 0 ? + ShareUpdateExclusiveLock : ShareLock; + + /* + * If we previously locked some other index's heap, and the name we're + * looking up no longer refers to that relation, release the now-useless + * lock. + */ + if (relId != oldRelId && OidIsValid(oldRelId)) + { + UnlockRelationOid(state->locked_table_oid, table_lockmode); + state->locked_table_oid = InvalidOid; + } + + /* If the relation does not exist, there's nothing more to do. */ + if (!OidIsValid(relId)) + return; + + /* + * If the relation does exist, check whether it's an index. But note that + * the relation might have been dropped between the time we did the name + * lookup and now. In that case, there's nothing to do. + */ + relkind = get_rel_relkind(relId); + if (!relkind) + return; + if (relkind != RELKIND_INDEX && + relkind != RELKIND_PARTITIONED_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not an index", relation->relname))); + + /* Check permissions */ + if (!pg_class_ownercheck(relId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname); + + /* Lock heap before index to avoid deadlock. */ + if (relId != oldRelId) + { + Oid table_oid = IndexGetRelation(relId, true); + + /* + * If the OID isn't valid, it means the index was concurrently + * dropped, which is not a problem for us; just return normally. + */ + if (OidIsValid(table_oid)) + { + LockRelationOid(table_oid, table_lockmode); + state->locked_table_oid = table_oid; + } + } +} + +/* + * ReindexTable + * Recreate all indexes of a table (and of its toast table, if any) + */ +static Oid +ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel) +{ + Oid heapOid; + bool result; + + /* + * The lock level used here should match reindex_relation(). + * + * If it's a temporary table, we will perform a non-concurrent reindex, + * even if CONCURRENTLY was requested. In that case, reindex_relation() + * will upgrade the lock, but that's OK, because other sessions can't hold + * locks on our temporary table. + */ + heapOid = RangeVarGetRelidExtended(relation, + (params->options & REINDEXOPT_CONCURRENTLY) != 0 ? + ShareUpdateExclusiveLock : ShareLock, + 0, + RangeVarCallbackOwnsTable, NULL); + + if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE) + ReindexPartitions(heapOid, params, isTopLevel); + else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && + get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP) + { + result = ReindexRelationConcurrently(heapOid, params); + + if (!result) + ereport(NOTICE, + (errmsg("table \"%s\" has no indexes that can be reindexed concurrently", + relation->relname))); + } + else + { + ReindexParams newparams = *params; + + newparams.options |= REINDEXOPT_REPORT_PROGRESS; + result = reindex_relation(heapOid, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + &newparams); + if (!result) + ereport(NOTICE, + (errmsg("table \"%s\" has no indexes to reindex", + relation->relname))); + } + + return heapOid; +} + +/* + * ReindexMultipleTables + * Recreate indexes of tables selected by objectName/objectKind. + * + * To reduce the probability of deadlocks, each table is reindexed in a + * separate transaction, so we can release the lock on it right away. + * That means this must not be called within a user transaction block! + */ +static void +ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, + ReindexParams *params) +{ + Oid objectOid; + Relation relationRelation; + TableScanDesc scan; + ScanKeyData scan_keys[1]; + HeapTuple tuple; + MemoryContext private_context; + MemoryContext old; + List *relids = NIL; + int num_keys; + bool concurrent_warning = false; + bool tablespace_warning = false; + + AssertArg(objectName); + Assert(objectKind == REINDEX_OBJECT_SCHEMA || + objectKind == REINDEX_OBJECT_SYSTEM || + objectKind == REINDEX_OBJECT_DATABASE); + + if (objectKind == REINDEX_OBJECT_SYSTEM && + (params->options & REINDEXOPT_CONCURRENTLY) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex system catalogs concurrently"))); + + /* + * Get OID of object to reindex, being the database currently being used + * by session for a database or for system catalogs, or the schema defined + * by caller. At the same time do permission checks that need different + * processing depending on the object type. + */ + if (objectKind == REINDEX_OBJECT_SCHEMA) + { + objectOid = get_namespace_oid(objectName, false); + + if (!pg_namespace_ownercheck(objectOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA, + objectName); + } + else + { + objectOid = MyDatabaseId; + + if (strcmp(objectName, get_database_name(objectOid)) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("can only reindex the currently open database"))); + if (!pg_database_ownercheck(objectOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + objectName); + } + + /* + * Create a memory context that will survive forced transaction commits we + * do below. Since it is a child of PortalContext, it will go away + * eventually even if we suffer an error; there's no need for special + * abort cleanup logic. + */ + private_context = AllocSetContextCreate(PortalContext, + "ReindexMultipleTables", + ALLOCSET_SMALL_SIZES); + + /* + * Define the search keys to find the objects to reindex. For a schema, we + * select target relations using relnamespace, something not necessary for + * a database-wide operation. + */ + if (objectKind == REINDEX_OBJECT_SCHEMA) + { + num_keys = 1; + ScanKeyInit(&scan_keys[0], + Anum_pg_class_relnamespace, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(objectOid)); + } + else + num_keys = 0; + + /* + * Scan pg_class to build a list of the relations we need to reindex. + * + * We only consider plain relations and materialized views here (toast + * rels will be processed indirectly by reindex_relation). + */ + relationRelation = table_open(RelationRelationId, AccessShareLock); + scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys); + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple); + Oid relid = classtuple->oid; + + /* + * Only regular tables and matviews can have indexes, so ignore any + * other kind of relation. + * + * Partitioned tables/indexes are skipped but matching leaf partitions + * are processed. + */ + if (classtuple->relkind != RELKIND_RELATION && + classtuple->relkind != RELKIND_MATVIEW) + continue; + + /* Skip temp tables of other backends; we can't reindex them at all */ + if (classtuple->relpersistence == RELPERSISTENCE_TEMP && + !isTempNamespace(classtuple->relnamespace)) + continue; + + /* Check user/system classification, and optionally skip */ + if (objectKind == REINDEX_OBJECT_SYSTEM && + !IsSystemClass(relid, classtuple)) + continue; + + /* + * The table can be reindexed if the user is superuser, the table + * owner, or the database/schema owner (but in the latter case, only + * if it's not a shared relation). pg_class_ownercheck includes the + * superuser case, and depending on objectKind we already know that + * the user has permission to run REINDEX on this database or schema + * per the permission checks at the beginning of this routine. + */ + if (classtuple->relisshared && + !pg_class_ownercheck(relid, GetUserId())) + continue; + + /* + * Skip system tables, since index_create() would reject indexing them + * concurrently (and it would likely fail if we tried). + */ + if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && + IsCatalogRelationOid(relid)) + { + if (!concurrent_warning) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex system catalogs concurrently, skipping all"))); + concurrent_warning = true; + continue; + } + + /* + * If a new tablespace is set, check if this relation has to be + * skipped. + */ + if (OidIsValid(params->tablespaceOid)) + { + bool skip_rel = false; + + /* + * Mapped relations cannot be moved to different tablespaces (in + * particular this eliminates all shared catalogs.). + */ + if (RELKIND_HAS_STORAGE(classtuple->relkind) && + !OidIsValid(classtuple->relfilenode)) + skip_rel = true; + + /* + * A system relation is always skipped, even with + * allow_system_table_mods enabled. + */ + if (IsSystemClass(relid, classtuple)) + skip_rel = true; + + if (skip_rel) + { + if (!tablespace_warning) + ereport(WARNING, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot move system relations, skipping all"))); + tablespace_warning = true; + continue; + } + } + + /* Save the list of relation OIDs in private context */ + old = MemoryContextSwitchTo(private_context); + + /* + * We always want to reindex pg_class first if it's selected to be + * reindexed. This ensures that if there is any corruption in + * pg_class' indexes, they will be fixed before we process any other + * tables. This is critical because reindexing itself will try to + * update pg_class. + */ + if (relid == RelationRelationId) + relids = lcons_oid(relid, relids); + else + relids = lappend_oid(relids, relid); + + MemoryContextSwitchTo(old); + } + table_endscan(scan); + table_close(relationRelation, AccessShareLock); + + /* + * Process each relation listed in a separate transaction. Note that this + * commits and then starts a new transaction immediately. + */ + ReindexMultipleInternal(relids, params); + + MemoryContextDelete(private_context); +} + +/* + * Error callback specific to ReindexPartitions(). + */ +static void +reindex_error_callback(void *arg) +{ + ReindexErrorInfo *errinfo = (ReindexErrorInfo *) arg; + + Assert(RELKIND_HAS_PARTITIONS(errinfo->relkind)); + + if (errinfo->relkind == RELKIND_PARTITIONED_TABLE) + errcontext("while reindexing partitioned table \"%s.%s\"", + errinfo->relnamespace, errinfo->relname); + else if (errinfo->relkind == RELKIND_PARTITIONED_INDEX) + errcontext("while reindexing partitioned index \"%s.%s\"", + errinfo->relnamespace, errinfo->relname); +} + +/* + * ReindexPartitions + * + * Reindex a set of partitions, per the partitioned index or table given + * by the caller. + */ +static void +ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel) +{ + List *partitions = NIL; + char relkind = get_rel_relkind(relid); + char *relname = get_rel_name(relid); + char *relnamespace = get_namespace_name(get_rel_namespace(relid)); + MemoryContext reindex_context; + List *inhoids; + ListCell *lc; + ErrorContextCallback errcallback; + ReindexErrorInfo errinfo; + + Assert(RELKIND_HAS_PARTITIONS(relkind)); + + /* + * Check if this runs in a transaction block, with an error callback to + * provide more context under which a problem happens. + */ + errinfo.relname = pstrdup(relname); + errinfo.relnamespace = pstrdup(relnamespace); + errinfo.relkind = relkind; + errcallback.callback = reindex_error_callback; + errcallback.arg = (void *) &errinfo; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + PreventInTransactionBlock(isTopLevel, + relkind == RELKIND_PARTITIONED_TABLE ? + "REINDEX TABLE" : "REINDEX INDEX"); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + + /* + * Create special memory context for cross-transaction storage. + * + * Since it is a child of PortalContext, it will go away eventually even + * if we suffer an error so there is no need for special abort cleanup + * logic. + */ + reindex_context = AllocSetContextCreate(PortalContext, "Reindex", + ALLOCSET_DEFAULT_SIZES); + + /* ShareLock is enough to prevent schema modifications */ + inhoids = find_all_inheritors(relid, ShareLock, NULL); + + /* + * The list of relations to reindex are the physical partitions of the + * tree so discard any partitioned table or index. + */ + foreach(lc, inhoids) + { + Oid partoid = lfirst_oid(lc); + char partkind = get_rel_relkind(partoid); + MemoryContext old_context; + + /* + * This discards partitioned tables, partitioned indexes and foreign + * tables. + */ + if (!RELKIND_HAS_STORAGE(partkind)) + continue; + + Assert(partkind == RELKIND_INDEX || + partkind == RELKIND_RELATION); + + /* Save partition OID */ + old_context = MemoryContextSwitchTo(reindex_context); + partitions = lappend_oid(partitions, partoid); + MemoryContextSwitchTo(old_context); + } + + /* + * Process each partition listed in a separate transaction. Note that + * this commits and then starts a new transaction immediately. + */ + ReindexMultipleInternal(partitions, params); + + /* + * Clean up working storage --- note we must do this after + * StartTransactionCommand, else we might be trying to delete the active + * context! + */ + MemoryContextDelete(reindex_context); +} + +/* + * ReindexMultipleInternal + * + * Reindex a list of relations, each one being processed in its own + * transaction. This commits the existing transaction immediately, + * and starts a new transaction when finished. + */ +static void +ReindexMultipleInternal(List *relids, ReindexParams *params) +{ + ListCell *l; + + PopActiveSnapshot(); + CommitTransactionCommand(); + + foreach(l, relids) + { + Oid relid = lfirst_oid(l); + char relkind; + char relpersistence; + + StartTransactionCommand(); + + /* functions in indexes may want a snapshot set */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* check if the relation still exists */ + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid))) + { + PopActiveSnapshot(); + CommitTransactionCommand(); + continue; + } + + /* + * Check permissions except when moving to database's default if a new + * tablespace is chosen. Note that this check also happens in + * ExecReindex(), but we do an extra check here as this runs across + * multiple transactions. + */ + if (OidIsValid(params->tablespaceOid) && + params->tablespaceOid != MyDatabaseTableSpace) + { + AclResult aclresult; + + aclresult = pg_tablespace_aclcheck(params->tablespaceOid, + GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + get_tablespace_name(params->tablespaceOid)); + } + + relkind = get_rel_relkind(relid); + relpersistence = get_rel_persistence(relid); + + /* + * Partitioned tables and indexes can never be processed directly, and + * a list of their leaves should be built first. + */ + Assert(!RELKIND_HAS_PARTITIONS(relkind)); + + if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && + relpersistence != RELPERSISTENCE_TEMP) + { + ReindexParams newparams = *params; + + newparams.options |= REINDEXOPT_MISSING_OK; + (void) ReindexRelationConcurrently(relid, &newparams); + /* ReindexRelationConcurrently() does the verbose output */ + } + else if (relkind == RELKIND_INDEX) + { + ReindexParams newparams = *params; + + newparams.options |= + REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK; + reindex_index(relid, false, relpersistence, &newparams); + PopActiveSnapshot(); + /* reindex_index() does the verbose output */ + } + else + { + bool result; + ReindexParams newparams = *params; + + newparams.options |= + REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK; + result = reindex_relation(relid, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + &newparams); + + if (result && (params->options & REINDEXOPT_VERBOSE) != 0) + ereport(INFO, + (errmsg("table \"%s.%s\" was reindexed", + get_namespace_name(get_rel_namespace(relid)), + get_rel_name(relid)))); + + PopActiveSnapshot(); + } + + CommitTransactionCommand(); + } + + StartTransactionCommand(); +} + + +/* + * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given + * relation OID + * + * 'relationOid' can either belong to an index, a table or a materialized + * view. For tables and materialized views, all its indexes will be rebuilt, + * excluding invalid indexes and any indexes used in exclusion constraints, + * but including its associated toast table indexes. For indexes, the index + * itself will be rebuilt. + * + * The locks taken on parent tables and involved indexes are kept until the + * transaction is committed, at which point a session lock is taken on each + * relation. Both of these protect against concurrent schema changes. + * + * Returns true if any indexes have been rebuilt (including toast table's + * indexes, when relevant), otherwise returns false. + * + * NOTE: This cannot be used on temporary relations. A concurrent build would + * cause issues with ON COMMIT actions triggered by the transactions of the + * concurrent build. Temporary relations are not subject to concurrent + * concerns, so there's no need for the more complicated concurrent build, + * anyway, and a non-concurrent reindex is more efficient. + */ +static bool +ReindexRelationConcurrently(Oid relationOid, ReindexParams *params) +{ + typedef struct ReindexIndexInfo + { + Oid indexId; + Oid tableId; + Oid amId; + bool safe; /* for set_indexsafe_procflags */ + } ReindexIndexInfo; + List *heapRelationIds = NIL; + List *indexIds = NIL; + List *newIndexIds = NIL; + List *relationLocks = NIL; + List *lockTags = NIL; + ListCell *lc, + *lc2; + MemoryContext private_context; + MemoryContext oldcontext; + char relkind; + char *relationName = NULL; + char *relationNamespace = NULL; + PGRUsage ru0; + const int progress_index[] = { + PROGRESS_CREATEIDX_COMMAND, + PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_INDEX_OID, + PROGRESS_CREATEIDX_ACCESS_METHOD_OID + }; + int64 progress_vals[4]; + + /* + * Create a memory context that will survive forced transaction commits we + * do below. Since it is a child of PortalContext, it will go away + * eventually even if we suffer an error; there's no need for special + * abort cleanup logic. + */ + private_context = AllocSetContextCreate(PortalContext, + "ReindexConcurrent", + ALLOCSET_SMALL_SIZES); + + if ((params->options & REINDEXOPT_VERBOSE) != 0) + { + /* Save data needed by REINDEX VERBOSE in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + relationName = get_rel_name(relationOid); + relationNamespace = get_namespace_name(get_rel_namespace(relationOid)); + + pg_rusage_init(&ru0); + + MemoryContextSwitchTo(oldcontext); + } + + relkind = get_rel_relkind(relationOid); + + /* + * Extract the list of indexes that are going to be rebuilt based on the + * relation Oid given by caller. + */ + switch (relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + case RELKIND_TOASTVALUE: + { + /* + * In the case of a relation, find all its indexes including + * toast indexes. + */ + Relation heapRelation; + + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Track this relation for session locks */ + heapRelationIds = lappend_oid(heapRelationIds, relationOid); + + MemoryContextSwitchTo(oldcontext); + + if (IsCatalogRelationOid(relationOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex system catalogs concurrently"))); + + /* Open relation to get its indexes */ + if ((params->options & REINDEXOPT_MISSING_OK) != 0) + { + heapRelation = try_table_open(relationOid, + ShareUpdateExclusiveLock); + /* leave if relation does not exist */ + if (!heapRelation) + break; + } + else + heapRelation = table_open(relationOid, + ShareUpdateExclusiveLock); + + if (OidIsValid(params->tablespaceOid) && + IsSystemRelation(heapRelation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move system relation \"%s\"", + RelationGetRelationName(heapRelation)))); + + /* Add all the valid indexes of relation to list */ + foreach(lc, RelationGetIndexList(heapRelation)) + { + Oid cellOid = lfirst_oid(lc); + Relation indexRelation = index_open(cellOid, + ShareUpdateExclusiveLock); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else if (indexRelation->rd_index->indisexclusion) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex exclusion constraint index \"%s.%s\" concurrently, skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else + { + ReindexIndexInfo *idx; + + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + idx = palloc(sizeof(ReindexIndexInfo)); + idx->indexId = cellOid; + /* other fields set later */ + + indexIds = lappend(indexIds, idx); + + MemoryContextSwitchTo(oldcontext); + } + + index_close(indexRelation, NoLock); + } + + /* Also add the toast indexes */ + if (OidIsValid(heapRelation->rd_rel->reltoastrelid)) + { + Oid toastOid = heapRelation->rd_rel->reltoastrelid; + Relation toastRelation = table_open(toastOid, + ShareUpdateExclusiveLock); + + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Track this relation for session locks */ + heapRelationIds = lappend_oid(heapRelationIds, toastOid); + + MemoryContextSwitchTo(oldcontext); + + foreach(lc2, RelationGetIndexList(toastRelation)) + { + Oid cellOid = lfirst_oid(lc2); + Relation indexRelation = index_open(cellOid, + ShareUpdateExclusiveLock); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else + { + ReindexIndexInfo *idx; + + /* + * Save the list of relation OIDs in private + * context + */ + oldcontext = MemoryContextSwitchTo(private_context); + + idx = palloc(sizeof(ReindexIndexInfo)); + idx->indexId = cellOid; + indexIds = lappend(indexIds, idx); + /* other fields set later */ + + MemoryContextSwitchTo(oldcontext); + } + + index_close(indexRelation, NoLock); + } + + table_close(toastRelation, NoLock); + } + + table_close(heapRelation, NoLock); + break; + } + case RELKIND_INDEX: + { + Oid heapId = IndexGetRelation(relationOid, + (params->options & REINDEXOPT_MISSING_OK) != 0); + Relation heapRelation; + ReindexIndexInfo *idx; + + /* if relation is missing, leave */ + if (!OidIsValid(heapId)) + break; + + if (IsCatalogRelationOid(heapId)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex system catalogs concurrently"))); + + /* + * Don't allow reindex for an invalid index on TOAST table, as + * if rebuilt it would not be possible to drop it. Match + * error message in reindex_index(). + */ + if (IsToastNamespace(get_rel_namespace(relationOid)) && + !get_index_isvalid(relationOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex invalid index on TOAST table"))); + + /* + * Check if parent relation can be locked and if it exists, + * this needs to be done at this stage as the list of indexes + * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK + * should not be used once all the session locks are taken. + */ + if ((params->options & REINDEXOPT_MISSING_OK) != 0) + { + heapRelation = try_table_open(heapId, + ShareUpdateExclusiveLock); + /* leave if relation does not exist */ + if (!heapRelation) + break; + } + else + heapRelation = table_open(heapId, + ShareUpdateExclusiveLock); + + if (OidIsValid(params->tablespaceOid) && + IsSystemRelation(heapRelation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move system relation \"%s\"", + get_rel_name(relationOid)))); + + table_close(heapRelation, NoLock); + + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Track the heap relation of this index for session locks */ + heapRelationIds = list_make1_oid(heapId); + + /* + * Save the list of relation OIDs in private context. Note + * that invalid indexes are allowed here. + */ + idx = palloc(sizeof(ReindexIndexInfo)); + idx->indexId = relationOid; + indexIds = lappend(indexIds, idx); + /* other fields set later */ + + MemoryContextSwitchTo(oldcontext); + break; + } + + case RELKIND_PARTITIONED_TABLE: + case RELKIND_PARTITIONED_INDEX: + default: + /* Return error if type of relation is not supported */ + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot reindex this type of relation concurrently"))); + break; + } + + /* + * Definitely no indexes, so leave. Any checks based on + * REINDEXOPT_MISSING_OK should be done only while the list of indexes to + * work on is built as the session locks taken before this transaction + * commits will make sure that they cannot be dropped by a concurrent + * session until this operation completes. + */ + if (indexIds == NIL) + { + PopActiveSnapshot(); + return false; + } + + /* It's not a shared catalog, so refuse to move it to shared tablespace */ + if (params->tablespaceOid == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move non-shared relation to tablespace \"%s\"", + get_tablespace_name(params->tablespaceOid)))); + + Assert(heapRelationIds != NIL); + + /*----- + * Now we have all the indexes we want to process in indexIds. + * + * The phases now are: + * + * 1. create new indexes in the catalog + * 2. build new indexes + * 3. let new indexes catch up with tuples inserted in the meantime + * 4. swap index names + * 5. mark old indexes as dead + * 6. drop old indexes + * + * We process each phase for all indexes before moving to the next phase, + * for efficiency. + */ + + /* + * Phase 1 of REINDEX CONCURRENTLY + * + * Create a new index with the same properties as the old one, but it is + * only registered in catalogs and will be built later. Then get session + * locks on all involved tables. See analogous code in DefineIndex() for + * more detailed comments. + */ + + foreach(lc, indexIds) + { + char *concurrentName; + ReindexIndexInfo *idx = lfirst(lc); + ReindexIndexInfo *newidx; + Oid newIndexId; + Relation indexRel; + Relation heapRel; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + Relation newIndexRel; + LockRelId *lockrelid; + Oid tablespaceid; + + indexRel = index_open(idx->indexId, ShareUpdateExclusiveLock); + heapRel = table_open(indexRel->rd_index->indrelid, + ShareUpdateExclusiveLock); + + /* + * Switch to the table owner's userid, so that any index functions are + * run as that user. Also lock down security-restricted operations + * and arrange to make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(heapRel->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + + /* determine safety of this index for set_indexsafe_procflags */ + idx->safe = (indexRel->rd_indexprs == NIL && + indexRel->rd_indpred == NIL); + idx->tableId = RelationGetRelid(heapRel); + idx->amId = indexRel->rd_rel->relam; + + /* This function shouldn't be called for temporary relations. */ + if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + elog(ERROR, "cannot reindex a temporary table concurrently"); + + pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, + idx->tableId); + + progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY; + progress_vals[1] = 0; /* initializing */ + progress_vals[2] = idx->indexId; + progress_vals[3] = idx->amId; + pgstat_progress_update_multi_param(4, progress_index, progress_vals); + + /* Choose a temporary relation name for the new index */ + concurrentName = ChooseRelationName(get_rel_name(idx->indexId), + NULL, + "ccnew", + get_rel_namespace(indexRel->rd_index->indrelid), + false); + + /* Choose the new tablespace, indexes of toast tables are not moved */ + if (OidIsValid(params->tablespaceOid) && + heapRel->rd_rel->relkind != RELKIND_TOASTVALUE) + tablespaceid = params->tablespaceOid; + else + tablespaceid = indexRel->rd_rel->reltablespace; + + /* Create new index definition based on given index */ + newIndexId = index_concurrently_create_copy(heapRel, + idx->indexId, + tablespaceid, + concurrentName); + + /* + * Now open the relation of the new index, a session-level lock is + * also needed on it. + */ + newIndexRel = index_open(newIndexId, ShareUpdateExclusiveLock); + + /* + * Save the list of OIDs and locks in private context + */ + oldcontext = MemoryContextSwitchTo(private_context); + + newidx = palloc(sizeof(ReindexIndexInfo)); + newidx->indexId = newIndexId; + newidx->safe = idx->safe; + newidx->tableId = idx->tableId; + newidx->amId = idx->amId; + + newIndexIds = lappend(newIndexIds, newidx); + + /* + * Save lockrelid to protect each relation from drop then close + * relations. The lockrelid on parent relation is not taken here to + * avoid multiple locks taken on the same relation, instead we rely on + * parentRelationIds built earlier. + */ + lockrelid = palloc(sizeof(*lockrelid)); + *lockrelid = indexRel->rd_lockInfo.lockRelId; + relationLocks = lappend(relationLocks, lockrelid); + lockrelid = palloc(sizeof(*lockrelid)); + *lockrelid = newIndexRel->rd_lockInfo.lockRelId; + relationLocks = lappend(relationLocks, lockrelid); + + MemoryContextSwitchTo(oldcontext); + + index_close(indexRel, NoLock); + index_close(newIndexRel, NoLock); + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + table_close(heapRel, NoLock); + } + + /* + * Save the heap lock for following visibility checks with other backends + * might conflict with this session. + */ + foreach(lc, heapRelationIds) + { + Relation heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock); + LockRelId *lockrelid; + LOCKTAG *heaplocktag; + + /* Save the list of locks in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Add lockrelid of heap relation to the list of locked relations */ + lockrelid = palloc(sizeof(*lockrelid)); + *lockrelid = heapRelation->rd_lockInfo.lockRelId; + relationLocks = lappend(relationLocks, lockrelid); + + heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG)); + + /* Save the LOCKTAG for this parent relation for the wait phase */ + SET_LOCKTAG_RELATION(*heaplocktag, lockrelid->dbId, lockrelid->relId); + lockTags = lappend(lockTags, heaplocktag); + + MemoryContextSwitchTo(oldcontext); + + /* Close heap relation */ + table_close(heapRelation, NoLock); + } + + /* Get a session-level lock on each table. */ + foreach(lc, relationLocks) + { + LockRelId *lockrelid = (LockRelId *) lfirst(lc); + + LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * Because we don't take a snapshot in this transaction, there's no need + * to set the PROC_IN_SAFE_IC flag here. + */ + + /* + * Phase 2 of REINDEX CONCURRENTLY + * + * Build the new indexes in a separate transaction for each index to avoid + * having open transactions for an unnecessary long time. But before + * doing that, wait until no running transactions could have the table of + * the index open with the old list of indexes. See "phase 2" in + * DefineIndex() for more details. + */ + + pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_PHASE_WAIT_1); + WaitForLockersMultiple(lockTags, ShareLock, true); + CommitTransactionCommand(); + + foreach(lc, newIndexIds) + { + ReindexIndexInfo *newidx = lfirst(lc); + + /* Start new transaction for this index's concurrent build */ + StartTransactionCommand(); + + /* + * Check for user-requested abort. This is inside a transaction so as + * xact.c does not issue a useless WARNING, and ensures that + * session-level locks are cleaned up on abort. + */ + CHECK_FOR_INTERRUPTS(); + + /* Tell concurrent indexing to ignore us, if index qualifies */ + if (newidx->safe) + set_indexsafe_procflags(); + + /* Set ActiveSnapshot since functions in the indexes may need it */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* + * Update progress for the index to build, with the correct parent + * table involved. + */ + pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId); + progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY; + progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD; + progress_vals[2] = newidx->indexId; + progress_vals[3] = newidx->amId; + pgstat_progress_update_multi_param(4, progress_index, progress_vals); + + /* Perform concurrent build of new index */ + index_concurrently_build(newidx->tableId, newidx->indexId); + + PopActiveSnapshot(); + CommitTransactionCommand(); + } + + StartTransactionCommand(); + + /* + * Because we don't take a snapshot or Xid in this transaction, there's no + * need to set the PROC_IN_SAFE_IC flag here. + */ + + /* + * Phase 3 of REINDEX CONCURRENTLY + * + * During this phase the old indexes catch up with any new tuples that + * were created during the previous phase. See "phase 3" in DefineIndex() + * for more details. + */ + + pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_PHASE_WAIT_2); + WaitForLockersMultiple(lockTags, ShareLock, true); + CommitTransactionCommand(); + + foreach(lc, newIndexIds) + { + ReindexIndexInfo *newidx = lfirst(lc); + TransactionId limitXmin; + Snapshot snapshot; + + StartTransactionCommand(); + + /* + * Check for user-requested abort. This is inside a transaction so as + * xact.c does not issue a useless WARNING, and ensures that + * session-level locks are cleaned up on abort. + */ + CHECK_FOR_INTERRUPTS(); + + /* Tell concurrent indexing to ignore us, if index qualifies */ + if (newidx->safe) + set_indexsafe_procflags(); + + /* + * Take the "reference snapshot" that will be used by validate_index() + * to filter candidate tuples. + */ + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + PushActiveSnapshot(snapshot); + + /* + * Update progress for the index to build, with the correct parent + * table involved. + */ + pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, + newidx->tableId); + progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY; + progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN; + progress_vals[2] = newidx->indexId; + progress_vals[3] = newidx->amId; + pgstat_progress_update_multi_param(4, progress_index, progress_vals); + + validate_index(newidx->tableId, newidx->indexId, snapshot); + + /* + * We can now do away with our active snapshot, we still need to save + * the xmin limit to wait for older snapshots. + */ + limitXmin = snapshot->xmin; + + PopActiveSnapshot(); + UnregisterSnapshot(snapshot); + + /* + * To ensure no deadlocks, we must commit and start yet another + * transaction, and do our wait before any snapshot has been taken in + * it. + */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * The index is now valid in the sense that it contains all currently + * interesting tuples. But since it might not contain tuples deleted + * just before the reference snap was taken, we have to wait out any + * transactions that might have older snapshots. + * + * Because we don't take a snapshot or Xid in this transaction, + * there's no need to set the PROC_IN_SAFE_IC flag here. + */ + pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_PHASE_WAIT_3); + WaitForOlderSnapshots(limitXmin, true); + + CommitTransactionCommand(); + } + + /* + * Phase 4 of REINDEX CONCURRENTLY + * + * Now that the new indexes have been validated, swap each new index with + * its corresponding old index. + * + * We mark the new indexes as valid and the old indexes as not valid at + * the same time to make sure we only get constraint violations from the + * indexes with the correct names. + */ + + StartTransactionCommand(); + + /* + * Because this transaction only does catalog manipulations and doesn't do + * any index operations, we can set the PROC_IN_SAFE_IC flag here + * unconditionally. + */ + set_indexsafe_procflags(); + + forboth(lc, indexIds, lc2, newIndexIds) + { + ReindexIndexInfo *oldidx = lfirst(lc); + ReindexIndexInfo *newidx = lfirst(lc2); + char *oldName; + + /* + * Check for user-requested abort. This is inside a transaction so as + * xact.c does not issue a useless WARNING, and ensures that + * session-level locks are cleaned up on abort. + */ + CHECK_FOR_INTERRUPTS(); + + /* Choose a relation name for old index */ + oldName = ChooseRelationName(get_rel_name(oldidx->indexId), + NULL, + "ccold", + get_rel_namespace(oldidx->tableId), + false); + + /* + * Swap old index with the new one. This also marks the new one as + * valid and the old one as not valid. + */ + index_concurrently_swap(newidx->indexId, oldidx->indexId, oldName); + + /* + * Invalidate the relcache for the table, so that after this commit + * all sessions will refresh any cached plans that might reference the + * index. + */ + CacheInvalidateRelcacheByRelid(oldidx->tableId); + + /* + * CCI here so that subsequent iterations see the oldName in the + * catalog and can choose a nonconflicting name for their oldName. + * Otherwise, this could lead to conflicts if a table has two indexes + * whose names are equal for the first NAMEDATALEN-minus-a-few + * characters. + */ + CommandCounterIncrement(); + } + + /* Commit this transaction and make index swaps visible */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no + * real need for that, because we only acquire an Xid after the wait is + * done, and that lasts for a very short period. + */ + + /* + * Phase 5 of REINDEX CONCURRENTLY + * + * Mark the old indexes as dead. First we must wait until no running + * transaction could be using the index for a query. See also + * index_drop() for more details. + */ + + pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_PHASE_WAIT_4); + WaitForLockersMultiple(lockTags, AccessExclusiveLock, true); + + foreach(lc, indexIds) + { + ReindexIndexInfo *oldidx = lfirst(lc); + + /* + * Check for user-requested abort. This is inside a transaction so as + * xact.c does not issue a useless WARNING, and ensures that + * session-level locks are cleaned up on abort. + */ + CHECK_FOR_INTERRUPTS(); + + index_concurrently_set_dead(oldidx->tableId, oldidx->indexId); + } + + /* Commit this transaction to make the updates visible. */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no + * real need for that, because we only acquire an Xid after the wait is + * done, and that lasts for a very short period. + */ + + /* + * Phase 6 of REINDEX CONCURRENTLY + * + * Drop the old indexes. + */ + + pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, + PROGRESS_CREATEIDX_PHASE_WAIT_5); + WaitForLockersMultiple(lockTags, AccessExclusiveLock, true); + + PushActiveSnapshot(GetTransactionSnapshot()); + + { + ObjectAddresses *objects = new_object_addresses(); + + foreach(lc, indexIds) + { + ReindexIndexInfo *idx = lfirst(lc); + ObjectAddress object; + + object.classId = RelationRelationId; + object.objectId = idx->indexId; + object.objectSubId = 0; + + add_exact_object_address(&object, objects); + } + + /* + * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the + * right lock level. + */ + performMultipleDeletions(objects, DROP_RESTRICT, + PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* + * Finally, release the session-level lock on the table. + */ + foreach(lc, relationLocks) + { + LockRelId *lockrelid = (LockRelId *) lfirst(lc); + + UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock); + } + + /* Start a new transaction to finish process properly */ + StartTransactionCommand(); + + /* Log what we did */ + if ((params->options & REINDEXOPT_VERBOSE) != 0) + { + if (relkind == RELKIND_INDEX) + ereport(INFO, + (errmsg("index \"%s.%s\" was reindexed", + relationNamespace, relationName), + errdetail("%s.", + pg_rusage_show(&ru0)))); + else + { + foreach(lc, newIndexIds) + { + ReindexIndexInfo *idx = lfirst(lc); + Oid indOid = idx->indexId; + + ereport(INFO, + (errmsg("index \"%s.%s\" was reindexed", + get_namespace_name(get_rel_namespace(indOid)), + get_rel_name(indOid)))); + /* Don't show rusage here, since it's not per index. */ + } + + ereport(INFO, + (errmsg("table \"%s.%s\" was reindexed", + relationNamespace, relationName), + errdetail("%s.", + pg_rusage_show(&ru0)))); + } + } + + MemoryContextDelete(private_context); + + pgstat_progress_end_command(); + + return true; +} + +/* + * Insert or delete an appropriate pg_inherits tuple to make the given index + * be a partition of the indicated parent index. + * + * This also corrects the pg_depend information for the affected index. + */ +void +IndexSetParentIndex(Relation partitionIdx, Oid parentOid) +{ + Relation pg_inherits; + ScanKeyData key[2]; + SysScanDesc scan; + Oid partRelid = RelationGetRelid(partitionIdx); + HeapTuple tuple; + bool fix_dependencies; + + /* Make sure this is an index */ + Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX || + partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX); + + /* + * Scan pg_inherits for rows linking our index to some parent. + */ + pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock); + ScanKeyInit(&key[0], + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(partRelid)); + ScanKeyInit(&key[1], + Anum_pg_inherits_inhseqno, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(1)); + scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true, + NULL, 2, key); + tuple = systable_getnext(scan); + + if (!HeapTupleIsValid(tuple)) + { + if (parentOid == InvalidOid) + { + /* + * No pg_inherits row, and no parent wanted: nothing to do in this + * case. + */ + fix_dependencies = false; + } + else + { + StoreSingleInheritance(partRelid, parentOid, 1); + fix_dependencies = true; + } + } + else + { + Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple); + + if (parentOid == InvalidOid) + { + /* + * There exists a pg_inherits row, which we want to clear; do so. + */ + CatalogTupleDelete(pg_inherits, &tuple->t_self); + fix_dependencies = true; + } + else + { + /* + * A pg_inherits row exists. If it's the same we want, then we're + * good; if it differs, that amounts to a corrupt catalog and + * should not happen. + */ + if (inhForm->inhparent != parentOid) + { + /* unexpected: we should not get called in this case */ + elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u", + inhForm->inhrelid, inhForm->inhparent); + } + + /* already in the right state */ + fix_dependencies = false; + } + } + + /* done with pg_inherits */ + systable_endscan(scan); + relation_close(pg_inherits, RowExclusiveLock); + + /* set relhassubclass if an index partition has been added to the parent */ + if (OidIsValid(parentOid)) + SetRelationHasSubclass(parentOid, true); + + /* set relispartition correctly on the partition */ + update_relispartition(partRelid, OidIsValid(parentOid)); + + if (fix_dependencies) + { + /* + * Insert/delete pg_depend rows. If setting a parent, add PARTITION + * dependencies on the parent index and the table; if removing a + * parent, delete PARTITION dependencies. + */ + if (OidIsValid(parentOid)) + { + ObjectAddress partIdx; + ObjectAddress parentIdx; + ObjectAddress partitionTbl; + + ObjectAddressSet(partIdx, RelationRelationId, partRelid); + ObjectAddressSet(parentIdx, RelationRelationId, parentOid); + ObjectAddressSet(partitionTbl, RelationRelationId, + partitionIdx->rd_index->indrelid); + recordDependencyOn(&partIdx, &parentIdx, + DEPENDENCY_PARTITION_PRI); + recordDependencyOn(&partIdx, &partitionTbl, + DEPENDENCY_PARTITION_SEC); + } + else + { + deleteDependencyRecordsForClass(RelationRelationId, partRelid, + RelationRelationId, + DEPENDENCY_PARTITION_PRI); + deleteDependencyRecordsForClass(RelationRelationId, partRelid, + RelationRelationId, + DEPENDENCY_PARTITION_SEC); + } + + /* make our updates visible */ + CommandCounterIncrement(); + } +} + +/* + * Subroutine of IndexSetParentIndex to update the relispartition flag of the + * given index to the given value. + */ +static void +update_relispartition(Oid relationId, bool newval) +{ + HeapTuple tup; + Relation classRel; + + classRel = table_open(RelationRelationId, RowExclusiveLock); + tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for relation %u", relationId); + Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval); + ((Form_pg_class) GETSTRUCT(tup))->relispartition = newval; + CatalogTupleUpdate(classRel, &tup->t_self, tup); + heap_freetuple(tup); + table_close(classRel, RowExclusiveLock); +} + +/* + * Set the PROC_IN_SAFE_IC flag in MyProc->statusFlags. + * + * When doing concurrent index builds, we can set this flag + * to tell other processes concurrently running CREATE + * INDEX CONCURRENTLY or REINDEX CONCURRENTLY to ignore us when + * doing their waits for concurrent snapshots. On one hand it + * avoids pointlessly waiting for a process that's not interesting + * anyway; but more importantly it avoids deadlocks in some cases. + * + * This can be done safely only for indexes that don't execute any + * expressions that could access other tables, so index must not be + * expressional nor partial. Caller is responsible for only calling + * this routine when that assumption holds true. + * + * (The flag is reset automatically at transaction end, so it must be + * set for each transaction.) + */ +static inline void +set_indexsafe_procflags(void) +{ + /* + * This should only be called before installing xid or xmin in MyProc; + * otherwise, concurrent processes could see an Xmin that moves backwards. + */ + Assert(MyProc->xid == InvalidTransactionId && + MyProc->xmin == InvalidTransactionId); + + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->statusFlags |= PROC_IN_SAFE_IC; + ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; + LWLockRelease(ProcArrayLock); +} diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c new file mode 100644 index 0000000..b97b8b0 --- /dev/null +++ b/src/backend/commands/lockcmds.c @@ -0,0 +1,306 @@ +/*------------------------------------------------------------------------- + * + * lockcmds.c + * LOCK command support code + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/lockcmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/table.h" +#include "access/xact.h" +#include "catalog/namespace.h" +#include "catalog/pg_inherits.h" +#include "commands/lockcmds.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_clause.h" +#include "rewrite/rewriteHandler.h" +#include "storage/lmgr.h" +#include "utils/acl.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + +static void LockTableRecurse(Oid reloid, LOCKMODE lockmode, bool nowait); +static AclResult LockTableAclCheck(Oid relid, LOCKMODE lockmode, Oid userid); +static void RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, + Oid oldrelid, void *arg); +static void LockViewRecurse(Oid reloid, LOCKMODE lockmode, bool nowait, + List *ancestor_views); + +/* + * LOCK TABLE + */ +void +LockTableCommand(LockStmt *lockstmt) +{ + ListCell *p; + + /* + * Iterate over the list and process the named relations one at a time + */ + foreach(p, lockstmt->relations) + { + RangeVar *rv = (RangeVar *) lfirst(p); + bool recurse = rv->inh; + Oid reloid; + + reloid = RangeVarGetRelidExtended(rv, lockstmt->mode, + lockstmt->nowait ? RVR_NOWAIT : 0, + RangeVarCallbackForLockTable, + (void *) &lockstmt->mode); + + if (get_rel_relkind(reloid) == RELKIND_VIEW) + LockViewRecurse(reloid, lockstmt->mode, lockstmt->nowait, NIL); + else if (recurse) + LockTableRecurse(reloid, lockstmt->mode, lockstmt->nowait); + } +} + +/* + * Before acquiring a table lock on the named table, check whether we have + * permission to do so. + */ +static void +RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid, + void *arg) +{ + LOCKMODE lockmode = *(LOCKMODE *) arg; + char relkind; + char relpersistence; + AclResult aclresult; + + if (!OidIsValid(relid)) + return; /* doesn't exist, so no permissions check */ + relkind = get_rel_relkind(relid); + if (!relkind) + return; /* woops, concurrently dropped; no permissions + * check */ + + /* Currently, we only allow plain tables or views to be locked */ + if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE && + relkind != RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot lock relation \"%s\"", + rv->relname), + errdetail_relkind_not_supported(relkind))); + + /* + * Make note if a temporary relation has been accessed in this + * transaction. + */ + relpersistence = get_rel_persistence(relid); + if (relpersistence == RELPERSISTENCE_TEMP) + MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE; + + /* Check permissions. */ + aclresult = LockTableAclCheck(relid, lockmode, GetUserId()); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, get_relkind_objtype(get_rel_relkind(relid)), rv->relname); +} + +/* + * Apply LOCK TABLE recursively over an inheritance tree + * + * This doesn't check permission to perform LOCK TABLE on the child tables, + * because getting here means that the user has permission to lock the + * parent which is enough. + */ +static void +LockTableRecurse(Oid reloid, LOCKMODE lockmode, bool nowait) +{ + List *children; + ListCell *lc; + + children = find_all_inheritors(reloid, NoLock, NULL); + + foreach(lc, children) + { + Oid childreloid = lfirst_oid(lc); + + /* Parent already locked. */ + if (childreloid == reloid) + continue; + + if (!nowait) + LockRelationOid(childreloid, lockmode); + else if (!ConditionalLockRelationOid(childreloid, lockmode)) + { + /* try to throw error by name; relation could be deleted... */ + char *relname = get_rel_name(childreloid); + + if (!relname) + continue; /* child concurrently dropped, just skip it */ + ereport(ERROR, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("could not obtain lock on relation \"%s\"", + relname))); + } + + /* + * Even if we got the lock, child might have been concurrently + * dropped. If so, we can skip it. + */ + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(childreloid))) + { + /* Release useless lock */ + UnlockRelationOid(childreloid, lockmode); + continue; + } + } +} + +/* + * Apply LOCK TABLE recursively over a view + * + * All tables and views appearing in the view definition query are locked + * recursively with the same lock mode. + */ + +typedef struct +{ + LOCKMODE lockmode; /* lock mode to use */ + bool nowait; /* no wait mode */ + Oid check_as_user; /* user for checking the privilege */ + Oid viewoid; /* OID of the view to be locked */ + List *ancestor_views; /* OIDs of ancestor views */ +} LockViewRecurse_context; + +static bool +LockViewRecurse_walker(Node *node, LockViewRecurse_context *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Query)) + { + Query *query = (Query *) node; + ListCell *rtable; + + foreach(rtable, query->rtable) + { + RangeTblEntry *rte = lfirst(rtable); + AclResult aclresult; + + Oid relid = rte->relid; + char relkind = rte->relkind; + char *relname = get_rel_name(relid); + + /* + * The OLD and NEW placeholder entries in the view's rtable are + * skipped. + */ + if (relid == context->viewoid && + (strcmp(rte->eref->aliasname, "old") == 0 || + strcmp(rte->eref->aliasname, "new") == 0)) + continue; + + /* Currently, we only allow plain tables or views to be locked. */ + if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE && + relkind != RELKIND_VIEW) + continue; + + /* + * We might be dealing with a self-referential view. If so, we + * can just stop recursing, since we already locked it. + */ + if (list_member_oid(context->ancestor_views, relid)) + continue; + + /* + * Check permissions as the specified user. This will either be + * the view owner or the current user. + */ + aclresult = LockTableAclCheck(relid, context->lockmode, + context->check_as_user); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, get_relkind_objtype(relkind), relname); + + /* We have enough rights to lock the relation; do so. */ + if (!context->nowait) + LockRelationOid(relid, context->lockmode); + else if (!ConditionalLockRelationOid(relid, context->lockmode)) + ereport(ERROR, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("could not obtain lock on relation \"%s\"", + relname))); + + if (relkind == RELKIND_VIEW) + LockViewRecurse(relid, context->lockmode, context->nowait, + context->ancestor_views); + else if (rte->inh) + LockTableRecurse(relid, context->lockmode, context->nowait); + } + + return query_tree_walker(query, + LockViewRecurse_walker, + context, + QTW_IGNORE_JOINALIASES); + } + + return expression_tree_walker(node, + LockViewRecurse_walker, + context); +} + +static void +LockViewRecurse(Oid reloid, LOCKMODE lockmode, bool nowait, + List *ancestor_views) +{ + LockViewRecurse_context context; + Relation view; + Query *viewquery; + + /* caller has already locked the view */ + view = table_open(reloid, NoLock); + viewquery = get_view_query(view); + + /* + * If the view has the security_invoker property set, check permissions as + * the current user. Otherwise, check permissions as the view owner. + */ + context.lockmode = lockmode; + context.nowait = nowait; + if (RelationHasSecurityInvoker(view)) + context.check_as_user = GetUserId(); + else + context.check_as_user = view->rd_rel->relowner; + context.viewoid = reloid; + context.ancestor_views = lappend_oid(ancestor_views, reloid); + + LockViewRecurse_walker((Node *) viewquery, &context); + + context.ancestor_views = list_delete_last(context.ancestor_views); + + table_close(view, NoLock); +} + +/* + * Check whether the current user is permitted to lock this relation. + */ +static AclResult +LockTableAclCheck(Oid reloid, LOCKMODE lockmode, Oid userid) +{ + AclResult aclresult; + AclMode aclmask; + + /* Verify adequate privilege */ + if (lockmode == AccessShareLock) + aclmask = ACL_SELECT; + else if (lockmode == RowExclusiveLock) + aclmask = ACL_INSERT | ACL_UPDATE | ACL_DELETE | ACL_TRUNCATE; + else + aclmask = ACL_UPDATE | ACL_DELETE | ACL_TRUNCATE; + + aclresult = pg_class_aclcheck(reloid, userid, aclmask); + + return aclresult; +} diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c new file mode 100644 index 0000000..d1ee106 --- /dev/null +++ b/src/backend/commands/matview.c @@ -0,0 +1,936 @@ +/*------------------------------------------------------------------------- + * + * matview.c + * materialized view support + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/matview.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_am.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "commands/cluster.h" +#include "commands/matview.h" +#include "commands/tablecmds.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "parser/parse_relation.h" +#include "pgstat.h" +#include "rewrite/rewriteHandler.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +typedef struct +{ + DestReceiver pub; /* publicly-known function pointers */ + Oid transientoid; /* OID of new heap into which to store */ + /* These fields are filled by transientrel_startup: */ + Relation transientrel; /* relation to write to */ + CommandId output_cid; /* cmin to insert in output tuples */ + int ti_options; /* table_tuple_insert performance options */ + BulkInsertState bistate; /* bulk insert state */ +} DR_transientrel; + +static int matview_maintenance_depth = 0; + +static void transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo); +static bool transientrel_receive(TupleTableSlot *slot, DestReceiver *self); +static void transientrel_shutdown(DestReceiver *self); +static void transientrel_destroy(DestReceiver *self); +static uint64 refresh_matview_datafill(DestReceiver *dest, Query *query, + const char *queryString); +static char *make_temptable_name_n(char *tempname, int n); +static void refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, + int save_sec_context); +static void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence); +static bool is_usable_unique_index(Relation indexRel); +static void OpenMatViewIncrementalMaintenance(void); +static void CloseMatViewIncrementalMaintenance(void); + +/* + * SetMatViewPopulatedState + * Mark a materialized view as populated, or not. + * + * NOTE: caller must be holding an appropriate lock on the relation. + */ +void +SetMatViewPopulatedState(Relation relation, bool newstate) +{ + Relation pgrel; + HeapTuple tuple; + + Assert(relation->rd_rel->relkind == RELKIND_MATVIEW); + + /* + * Update relation's pg_class entry. Crucial side-effect: other backends + * (and this one too!) are sent SI message to make them rebuild relcache + * entries. + */ + pgrel = table_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", + RelationGetRelid(relation)); + + ((Form_pg_class) GETSTRUCT(tuple))->relispopulated = newstate; + + CatalogTupleUpdate(pgrel, &tuple->t_self, tuple); + + heap_freetuple(tuple); + table_close(pgrel, RowExclusiveLock); + + /* + * Advance command counter to make the updated pg_class row locally + * visible. + */ + CommandCounterIncrement(); +} + +/* + * ExecRefreshMatView -- execute a REFRESH MATERIALIZED VIEW command + * + * This refreshes the materialized view by creating a new table and swapping + * the relfilenodes of the new table and the old materialized view, so the OID + * of the original materialized view is preserved. Thus we do not lose GRANT + * nor references to this materialized view. + * + * If WITH NO DATA was specified, this is effectively like a TRUNCATE; + * otherwise it is like a TRUNCATE followed by an INSERT using the SELECT + * statement associated with the materialized view. The statement node's + * skipData field shows whether the clause was used. + * + * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading + * the new heap, it's better to create the indexes afterwards than to fill them + * incrementally while we load. + * + * The matview's "populated" state is changed based on whether the contents + * reflect the result set of the materialized view's query. + */ +ObjectAddress +ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, + ParamListInfo params, QueryCompletion *qc) +{ + Oid matviewOid; + Relation matviewRel; + RewriteRule *rule; + List *actions; + Query *dataQuery; + Oid tableSpace; + Oid relowner; + Oid OIDNewHeap; + DestReceiver *dest; + uint64 processed = 0; + bool concurrent; + LOCKMODE lockmode; + char relpersistence; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + ObjectAddress address; + + /* Determine strength of lock needed. */ + concurrent = stmt->concurrent; + lockmode = concurrent ? ExclusiveLock : AccessExclusiveLock; + + /* + * Get a lock until end of transaction. + */ + matviewOid = RangeVarGetRelidExtended(stmt->relation, + lockmode, 0, + RangeVarCallbackOwnsTable, NULL); + matviewRel = table_open(matviewOid, NoLock); + relowner = matviewRel->rd_rel->relowner; + + /* + * Switch to the owner's userid, so that any functions are run as that + * user. Also lock down security-restricted operations and arrange to + * make GUC variable changes local to this command. + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + + /* Make sure it is a materialized view. */ + if (matviewRel->rd_rel->relkind != RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" is not a materialized view", + RelationGetRelationName(matviewRel)))); + + /* Check that CONCURRENTLY is not specified if not populated. */ + if (concurrent && !RelationIsPopulated(matviewRel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("CONCURRENTLY cannot be used when the materialized view is not populated"))); + + /* Check that conflicting options have not been specified. */ + if (concurrent && stmt->skipData) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s and %s options cannot be used together", + "CONCURRENTLY", "WITH NO DATA"))); + + /* + * Check that everything is correct for a refresh. Problems at this point + * are internal errors, so elog is sufficient. + */ + if (matviewRel->rd_rel->relhasrules == false || + matviewRel->rd_rules->numLocks < 1) + elog(ERROR, + "materialized view \"%s\" is missing rewrite information", + RelationGetRelationName(matviewRel)); + + if (matviewRel->rd_rules->numLocks > 1) + elog(ERROR, + "materialized view \"%s\" has too many rules", + RelationGetRelationName(matviewRel)); + + rule = matviewRel->rd_rules->rules[0]; + if (rule->event != CMD_SELECT || !(rule->isInstead)) + elog(ERROR, + "the rule for materialized view \"%s\" is not a SELECT INSTEAD OF rule", + RelationGetRelationName(matviewRel)); + + actions = rule->actions; + if (list_length(actions) != 1) + elog(ERROR, + "the rule for materialized view \"%s\" is not a single action", + RelationGetRelationName(matviewRel)); + + /* + * Check that there is a unique index with no WHERE clause on one or more + * columns of the materialized view if CONCURRENTLY is specified. + */ + if (concurrent) + { + List *indexoidlist = RelationGetIndexList(matviewRel); + ListCell *indexoidscan; + bool hasUniqueIndex = false; + + foreach(indexoidscan, indexoidlist) + { + Oid indexoid = lfirst_oid(indexoidscan); + Relation indexRel; + + indexRel = index_open(indexoid, AccessShareLock); + hasUniqueIndex = is_usable_unique_index(indexRel); + index_close(indexRel, AccessShareLock); + if (hasUniqueIndex) + break; + } + + list_free(indexoidlist); + + if (!hasUniqueIndex) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot refresh materialized view \"%s\" concurrently", + quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)), + RelationGetRelationName(matviewRel))), + errhint("Create a unique index with no WHERE clause on one or more columns of the materialized view."))); + } + + /* + * The stored query was rewritten at the time of the MV definition, but + * has not been scribbled on by the planner. + */ + dataQuery = linitial_node(Query, actions); + + /* + * Check for active uses of the relation in the current transaction, such + * as open scans. + * + * NB: We count on this to protect us against problems with refreshing the + * data using TABLE_INSERT_FROZEN. + */ + CheckTableNotInUse(matviewRel, "REFRESH MATERIALIZED VIEW"); + + /* + * Tentatively mark the matview as populated or not (this will roll back + * if we fail later). + */ + SetMatViewPopulatedState(matviewRel, !stmt->skipData); + + /* Concurrent refresh builds new data in temp tablespace, and does diff. */ + if (concurrent) + { + tableSpace = GetDefaultTablespace(RELPERSISTENCE_TEMP, false); + relpersistence = RELPERSISTENCE_TEMP; + } + else + { + tableSpace = matviewRel->rd_rel->reltablespace; + relpersistence = matviewRel->rd_rel->relpersistence; + } + + /* + * Create the transient table that will receive the regenerated data. Lock + * it against access by any other process until commit (by which time it + * will be gone). + */ + OIDNewHeap = make_new_heap(matviewOid, tableSpace, + matviewRel->rd_rel->relam, + relpersistence, ExclusiveLock); + LockRelationOid(OIDNewHeap, AccessExclusiveLock); + dest = CreateTransientRelDestReceiver(OIDNewHeap); + + /* Generate the data, if wanted. */ + if (!stmt->skipData) + processed = refresh_matview_datafill(dest, dataQuery, queryString); + + /* Make the matview match the newly generated data. */ + if (concurrent) + { + int old_depth = matview_maintenance_depth; + + PG_TRY(); + { + refresh_by_match_merge(matviewOid, OIDNewHeap, relowner, + save_sec_context); + } + PG_CATCH(); + { + matview_maintenance_depth = old_depth; + PG_RE_THROW(); + } + PG_END_TRY(); + Assert(matview_maintenance_depth == old_depth); + } + else + { + refresh_by_heap_swap(matviewOid, OIDNewHeap, relpersistence); + + /* + * Inform cumulative stats system about our activity: basically, we + * truncated the matview and inserted some new data. (The concurrent + * code path above doesn't need to worry about this because the + * inserts and deletes it issues get counted by lower-level code.) + */ + pgstat_count_truncate(matviewRel); + if (!stmt->skipData) + pgstat_count_heap_insert(matviewRel, processed); + } + + table_close(matviewRel, NoLock); + + /* Roll back any GUC changes */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + ObjectAddressSet(address, RelationRelationId, matviewOid); + + /* + * Save the rowcount so that pg_stat_statements can track the total number + * of rows processed by REFRESH MATERIALIZED VIEW command. Note that we + * still don't display the rowcount in the command completion tag output, + * i.e., the display_rowcount flag of CMDTAG_REFRESH_MATERIALIZED_VIEW + * command tag is left false in cmdtaglist.h. Otherwise, the change of + * completion tag output might break applications using it. + */ + if (qc) + SetQueryCompletion(qc, CMDTAG_REFRESH_MATERIALIZED_VIEW, processed); + + return address; +} + +/* + * refresh_matview_datafill + * + * Execute the given query, sending result rows to "dest" (which will + * insert them into the target matview). + * + * Returns number of rows inserted. + */ +static uint64 +refresh_matview_datafill(DestReceiver *dest, Query *query, + const char *queryString) +{ + List *rewritten; + PlannedStmt *plan; + QueryDesc *queryDesc; + Query *copied_query; + uint64 processed; + + /* Lock and rewrite, using a copy to preserve the original query. */ + copied_query = copyObject(query); + AcquireRewriteLocks(copied_query, true, false); + rewritten = QueryRewrite(copied_query); + + /* SELECT should never rewrite to more or less than one SELECT query */ + if (list_length(rewritten) != 1) + elog(ERROR, "unexpected rewrite result for REFRESH MATERIALIZED VIEW"); + query = (Query *) linitial(rewritten); + + /* Check for user-requested abort. */ + CHECK_FOR_INTERRUPTS(); + + /* Plan the query which will generate data for the refresh. */ + plan = pg_plan_query(query, queryString, CURSOR_OPT_PARALLEL_OK, NULL); + + /* + * Use a snapshot with an updated command ID to ensure this query sees + * results of any previously executed queries. (This could only matter if + * the planner executed an allegedly-stable function that changed the + * database contents, but let's do it anyway to be safe.) + */ + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + /* Create a QueryDesc, redirecting output to our tuple receiver */ + queryDesc = CreateQueryDesc(plan, queryString, + GetActiveSnapshot(), InvalidSnapshot, + dest, NULL, NULL, 0); + + /* call ExecutorStart to prepare the plan for execution */ + ExecutorStart(queryDesc, 0); + + /* run the plan */ + ExecutorRun(queryDesc, ForwardScanDirection, 0L, true); + + processed = queryDesc->estate->es_processed; + + /* and clean up */ + ExecutorFinish(queryDesc); + ExecutorEnd(queryDesc); + + FreeQueryDesc(queryDesc); + + PopActiveSnapshot(); + + return processed; +} + +DestReceiver * +CreateTransientRelDestReceiver(Oid transientoid) +{ + DR_transientrel *self = (DR_transientrel *) palloc0(sizeof(DR_transientrel)); + + self->pub.receiveSlot = transientrel_receive; + self->pub.rStartup = transientrel_startup; + self->pub.rShutdown = transientrel_shutdown; + self->pub.rDestroy = transientrel_destroy; + self->pub.mydest = DestTransientRel; + self->transientoid = transientoid; + + return (DestReceiver *) self; +} + +/* + * transientrel_startup --- executor startup + */ +static void +transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) +{ + DR_transientrel *myState = (DR_transientrel *) self; + Relation transientrel; + + transientrel = table_open(myState->transientoid, NoLock); + + /* + * Fill private fields of myState for use by later routines + */ + myState->transientrel = transientrel; + myState->output_cid = GetCurrentCommandId(true); + myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN; + myState->bistate = GetBulkInsertState(); + + /* + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. + */ + Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber); +} + +/* + * transientrel_receive --- receive one tuple + */ +static bool +transientrel_receive(TupleTableSlot *slot, DestReceiver *self) +{ + DR_transientrel *myState = (DR_transientrel *) self; + + /* + * Note that the input slot might not be of the type of the target + * relation. That's supported by table_tuple_insert(), but slightly less + * efficient than inserting with the right slot - but the alternative + * would be to copy into a slot of the right type, which would not be + * cheap either. This also doesn't allow accessing per-AM data (say a + * tuple's xmin), but since we don't do that here... + */ + + table_tuple_insert(myState->transientrel, + slot, + myState->output_cid, + myState->ti_options, + myState->bistate); + + /* We know this is a newly created relation, so there are no indexes */ + + return true; +} + +/* + * transientrel_shutdown --- executor end + */ +static void +transientrel_shutdown(DestReceiver *self) +{ + DR_transientrel *myState = (DR_transientrel *) self; + + FreeBulkInsertState(myState->bistate); + + table_finish_bulk_insert(myState->transientrel, myState->ti_options); + + /* close transientrel, but keep lock until commit */ + table_close(myState->transientrel, NoLock); + myState->transientrel = NULL; +} + +/* + * transientrel_destroy --- release DestReceiver object + */ +static void +transientrel_destroy(DestReceiver *self) +{ + pfree(self); +} + + +/* + * Given a qualified temporary table name, append an underscore followed by + * the given integer, to make a new table name based on the old one. + * The result is a palloc'd string. + * + * As coded, this would fail to make a valid SQL name if the given name were, + * say, "FOO"."BAR". Currently, the table name portion of the input will + * never be double-quoted because it's of the form "pg_temp_NNN", cf + * make_new_heap(). But we might have to work harder someday. + */ +static char * +make_temptable_name_n(char *tempname, int n) +{ + StringInfoData namebuf; + + initStringInfo(&namebuf); + appendStringInfoString(&namebuf, tempname); + appendStringInfo(&namebuf, "_%d", n); + return namebuf.data; +} + +/* + * refresh_by_match_merge + * + * Refresh a materialized view with transactional semantics, while allowing + * concurrent reads. + * + * This is called after a new version of the data has been created in a + * temporary table. It performs a full outer join against the old version of + * the data, producing "diff" results. This join cannot work if there are any + * duplicated rows in either the old or new versions, in the sense that every + * column would compare as equal between the two rows. It does work correctly + * in the face of rows which have at least one NULL value, with all non-NULL + * columns equal. The behavior of NULLs on equality tests and on UNIQUE + * indexes turns out to be quite convenient here; the tests we need to make + * are consistent with default behavior. If there is at least one UNIQUE + * index on the materialized view, we have exactly the guarantee we need. + * + * The temporary table used to hold the diff results contains just the TID of + * the old record (if matched) and the ROW from the new table as a single + * column of complex record type (if matched). + * + * Once we have the diff table, we perform set-based DELETE and INSERT + * operations against the materialized view, and discard both temporary + * tables. + * + * Everything from the generation of the new data to applying the differences + * takes place under cover of an ExclusiveLock, since it seems as though we + * would want to prohibit not only concurrent REFRESH operations, but also + * incremental maintenance. It also doesn't seem reasonable or safe to allow + * SELECT FOR UPDATE or SELECT FOR SHARE on rows being updated or deleted by + * this command. + */ +static void +refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, + int save_sec_context) +{ + StringInfoData querybuf; + Relation matviewRel; + Relation tempRel; + char *matviewname; + char *tempname; + char *diffname; + TupleDesc tupdesc; + bool foundUniqueIndex; + List *indexoidlist; + ListCell *indexoidscan; + int16 relnatts; + Oid *opUsedForQual; + + initStringInfo(&querybuf); + matviewRel = table_open(matviewOid, NoLock); + matviewname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)), + RelationGetRelationName(matviewRel)); + tempRel = table_open(tempOid, NoLock); + tempname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(tempRel)), + RelationGetRelationName(tempRel)); + diffname = make_temptable_name_n(tempname, 2); + + relnatts = RelationGetNumberOfAttributes(matviewRel); + + /* Open SPI context. */ + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "SPI_connect failed"); + + /* Analyze the temp table with the new contents. */ + appendStringInfo(&querybuf, "ANALYZE %s", tempname); + if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + + /* + * We need to ensure that there are not duplicate rows without NULLs in + * the new data set before we can count on the "diff" results. Check for + * that in a way that allows showing the first duplicated row found. Even + * after we pass this test, a unique index on the materialized view may + * find a duplicate key problem. + * + * Note: here and below, we use "tablename.*::tablerowtype" as a hack to + * keep ".*" from being expanded into multiple columns in a SELECT list. + * Compare ruleutils.c's get_variable(). + */ + resetStringInfo(&querybuf); + appendStringInfo(&querybuf, + "SELECT newdata.*::%s FROM %s newdata " + "WHERE newdata.* IS NOT NULL AND EXISTS " + "(SELECT 1 FROM %s newdata2 WHERE newdata2.* IS NOT NULL " + "AND newdata2.* OPERATOR(pg_catalog.*=) newdata.* " + "AND newdata2.ctid OPERATOR(pg_catalog.<>) " + "newdata.ctid)", + tempname, tempname, tempname); + if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + if (SPI_processed > 0) + { + /* + * Note that this ereport() is returning data to the user. Generally, + * we would want to make sure that the user has been granted access to + * this data. However, REFRESH MAT VIEW is only able to be run by the + * owner of the mat view (or a superuser) and therefore there is no + * need to check for access to data in the mat view. + */ + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("new data for materialized view \"%s\" contains duplicate rows without any null columns", + RelationGetRelationName(matviewRel)), + errdetail("Row: %s", + SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1)))); + } + + SetUserIdAndSecContext(relowner, + save_sec_context | SECURITY_LOCAL_USERID_CHANGE); + + /* Start building the query for creating the diff table. */ + resetStringInfo(&querybuf); + appendStringInfo(&querybuf, + "CREATE TEMP TABLE %s AS " + "SELECT mv.ctid AS tid, newdata.*::%s AS newdata " + "FROM %s mv FULL JOIN %s newdata ON (", + diffname, tempname, matviewname, tempname); + + /* + * Get the list of index OIDs for the table from the relcache, and look up + * each one in the pg_index syscache. We will test for equality on all + * columns present in all unique indexes which only reference columns and + * include all rows. + */ + tupdesc = matviewRel->rd_att; + opUsedForQual = (Oid *) palloc0(sizeof(Oid) * relnatts); + foundUniqueIndex = false; + + indexoidlist = RelationGetIndexList(matviewRel); + + foreach(indexoidscan, indexoidlist) + { + Oid indexoid = lfirst_oid(indexoidscan); + Relation indexRel; + + indexRel = index_open(indexoid, RowExclusiveLock); + if (is_usable_unique_index(indexRel)) + { + Form_pg_index indexStruct = indexRel->rd_index; + int indnkeyatts = indexStruct->indnkeyatts; + oidvector *indclass; + Datum indclassDatum; + bool isnull; + int i; + + /* Must get indclass the hard way. */ + indclassDatum = SysCacheGetAttr(INDEXRELID, + indexRel->rd_indextuple, + Anum_pg_index_indclass, + &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + + /* Add quals for all columns from this index. */ + for (i = 0; i < indnkeyatts; i++) + { + int attnum = indexStruct->indkey.values[i]; + Oid opclass = indclass->values[i]; + Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1); + Oid attrtype = attr->atttypid; + HeapTuple cla_ht; + Form_pg_opclass cla_tup; + Oid opfamily; + Oid opcintype; + Oid op; + const char *leftop; + const char *rightop; + + /* + * Identify the equality operator associated with this index + * column. First we need to look up the column's opclass. + */ + cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(cla_ht)) + elog(ERROR, "cache lookup failed for opclass %u", opclass); + cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht); + Assert(cla_tup->opcmethod == BTREE_AM_OID); + opfamily = cla_tup->opcfamily; + opcintype = cla_tup->opcintype; + ReleaseSysCache(cla_ht); + + op = get_opfamily_member(opfamily, opcintype, opcintype, + BTEqualStrategyNumber); + if (!OidIsValid(op)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + BTEqualStrategyNumber, opcintype, opcintype, opfamily); + + /* + * If we find the same column with the same equality semantics + * in more than one index, we only need to emit the equality + * clause once. + * + * Since we only remember the last equality operator, this + * code could be fooled into emitting duplicate clauses given + * multiple indexes with several different opclasses ... but + * that's so unlikely it doesn't seem worth spending extra + * code to avoid. + */ + if (opUsedForQual[attnum - 1] == op) + continue; + opUsedForQual[attnum - 1] = op; + + /* + * Actually add the qual, ANDed with any others. + */ + if (foundUniqueIndex) + appendStringInfoString(&querybuf, " AND "); + + leftop = quote_qualified_identifier("newdata", + NameStr(attr->attname)); + rightop = quote_qualified_identifier("mv", + NameStr(attr->attname)); + + generate_operator_clause(&querybuf, + leftop, attrtype, + op, + rightop, attrtype); + + foundUniqueIndex = true; + } + } + + /* Keep the locks, since we're about to run DML which needs them. */ + index_close(indexRel, NoLock); + } + + list_free(indexoidlist); + + /* + * There must be at least one usable unique index on the matview. + * + * ExecRefreshMatView() checks that after taking the exclusive lock on the + * matview. So at least one unique index is guaranteed to exist here + * because the lock is still being held; so an Assert seems sufficient. + */ + Assert(foundUniqueIndex); + + appendStringInfoString(&querybuf, + " AND newdata.* OPERATOR(pg_catalog.*=) mv.*) " + "WHERE newdata.* IS NULL OR mv.* IS NULL " + "ORDER BY tid"); + + /* Create the temporary "diff" table. */ + if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + + SetUserIdAndSecContext(relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + + /* + * We have no further use for data from the "full-data" temp table, but we + * must keep it around because its type is referenced from the diff table. + */ + + /* Analyze the diff table. */ + resetStringInfo(&querybuf); + appendStringInfo(&querybuf, "ANALYZE %s", diffname); + if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + + OpenMatViewIncrementalMaintenance(); + + /* Deletes must come before inserts; do them first. */ + resetStringInfo(&querybuf); + appendStringInfo(&querybuf, + "DELETE FROM %s mv WHERE ctid OPERATOR(pg_catalog.=) ANY " + "(SELECT diff.tid FROM %s diff " + "WHERE diff.tid IS NOT NULL " + "AND diff.newdata IS NULL)", + matviewname, diffname); + if (SPI_exec(querybuf.data, 0) != SPI_OK_DELETE) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + + /* Inserts go last. */ + resetStringInfo(&querybuf); + appendStringInfo(&querybuf, + "INSERT INTO %s SELECT (diff.newdata).* " + "FROM %s diff WHERE tid IS NULL", + matviewname, diffname); + if (SPI_exec(querybuf.data, 0) != SPI_OK_INSERT) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + + /* We're done maintaining the materialized view. */ + CloseMatViewIncrementalMaintenance(); + table_close(tempRel, NoLock); + table_close(matviewRel, NoLock); + + /* Clean up temp tables. */ + resetStringInfo(&querybuf); + appendStringInfo(&querybuf, "DROP TABLE %s, %s", diffname, tempname); + if (SPI_exec(querybuf.data, 0) != SPI_OK_UTILITY) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + + /* Close SPI context. */ + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed"); +} + +/* + * Swap the physical files of the target and transient tables, then rebuild + * the target's indexes and throw away the transient table. Security context + * swapping is handled by the called function, so it is not needed here. + */ +static void +refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap, char relpersistence) +{ + finish_heap_swap(matviewOid, OIDNewHeap, false, false, true, true, + RecentXmin, ReadNextMultiXactId(), relpersistence); +} + +/* + * Check whether specified index is usable for match merge. + */ +static bool +is_usable_unique_index(Relation indexRel) +{ + Form_pg_index indexStruct = indexRel->rd_index; + + /* + * Must be unique, valid, immediate, non-partial, and be defined over + * plain user columns (not expressions). We also require it to be a + * btree. Even if we had any other unique index kinds, we'd not know how + * to identify the corresponding equality operator, nor could we be sure + * that the planner could implement the required FULL JOIN with non-btree + * operators. + */ + if (indexStruct->indisunique && + indexStruct->indimmediate && + indexRel->rd_rel->relam == BTREE_AM_OID && + indexStruct->indisvalid && + RelationGetIndexPredicate(indexRel) == NIL && + indexStruct->indnatts > 0) + { + /* + * The point of groveling through the index columns individually is to + * reject both index expressions and system columns. Currently, + * matviews couldn't have OID columns so there's no way to create an + * index on a system column; but maybe someday that wouldn't be true, + * so let's be safe. + */ + int numatts = indexStruct->indnatts; + int i; + + for (i = 0; i < numatts; i++) + { + int attnum = indexStruct->indkey.values[i]; + + if (attnum <= 0) + return false; + } + return true; + } + return false; +} + + +/* + * This should be used to test whether the backend is in a context where it is + * OK to allow DML statements to modify materialized views. We only want to + * allow that for internal code driven by the materialized view definition, + * not for arbitrary user-supplied code. + * + * While the function names reflect the fact that their main intended use is + * incremental maintenance of materialized views (in response to changes to + * the data in referenced relations), they are initially used to allow REFRESH + * without blocking concurrent reads. + */ +bool +MatViewIncrementalMaintenanceIsEnabled(void) +{ + return matview_maintenance_depth > 0; +} + +static void +OpenMatViewIncrementalMaintenance(void) +{ + matview_maintenance_depth++; +} + +static void +CloseMatViewIncrementalMaintenance(void) +{ + matview_maintenance_depth--; + Assert(matview_maintenance_depth >= 0); +} diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c new file mode 100644 index 0000000..7a931ab --- /dev/null +++ b/src/backend/commands/opclasscmds.c @@ -0,0 +1,1745 @@ +/*------------------------------------------------------------------------- + * + * opclasscmds.c + * + * Routines for opclass (and opfamily) manipulation commands + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/opclasscmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/genam.h" +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_am.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "commands/alter.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "parser/parse_oper.h" +#include "parser/parse_type.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +static void AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, + Oid amoid, Oid opfamilyoid, + int maxOpNumber, int maxProcNumber, + int opclassOptsProcNumber, List *items); +static void AlterOpFamilyDrop(AlterOpFamilyStmt *stmt, + Oid amoid, Oid opfamilyoid, + int maxOpNumber, int maxProcNumber, + List *items); +static void processTypesSpec(List *args, Oid *lefttype, Oid *righttype); +static void assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid); +static void assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid, + int opclassOptsProcNum); +static void addFamilyMember(List **list, OpFamilyMember *member); +static void storeOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *operators, bool isAdd); +static void storeProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *procedures, bool isAdd); +static void dropOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *operators); +static void dropProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *procedures); + +/* + * OpFamilyCacheLookup + * Look up an existing opfamily by name. + * + * Returns a syscache tuple reference, or NULL if not found. + */ +static HeapTuple +OpFamilyCacheLookup(Oid amID, List *opfamilyname, bool missing_ok) +{ + char *schemaname; + char *opfname; + HeapTuple htup; + + /* deconstruct the name list */ + DeconstructQualifiedName(opfamilyname, &schemaname, &opfname); + + if (schemaname) + { + /* Look in specific schema only */ + Oid namespaceId; + + namespaceId = LookupExplicitNamespace(schemaname, missing_ok); + if (!OidIsValid(namespaceId)) + htup = NULL; + else + htup = SearchSysCache3(OPFAMILYAMNAMENSP, + ObjectIdGetDatum(amID), + PointerGetDatum(opfname), + ObjectIdGetDatum(namespaceId)); + } + else + { + /* Unqualified opfamily name, so search the search path */ + Oid opfID = OpfamilynameGetOpfid(amID, opfname); + + if (!OidIsValid(opfID)) + htup = NULL; + else + htup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfID)); + } + + if (!HeapTupleIsValid(htup) && !missing_ok) + { + HeapTuple amtup; + + amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(amID)); + if (!HeapTupleIsValid(amtup)) + elog(ERROR, "cache lookup failed for access method %u", amID); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("operator family \"%s\" does not exist for access method \"%s\"", + NameListToString(opfamilyname), + NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname)))); + } + + return htup; +} + +/* + * get_opfamily_oid + * find an opfamily OID by possibly qualified name + * + * If not found, returns InvalidOid if missing_ok, else throws error. + */ +Oid +get_opfamily_oid(Oid amID, List *opfamilyname, bool missing_ok) +{ + HeapTuple htup; + Form_pg_opfamily opfamform; + Oid opfID; + + htup = OpFamilyCacheLookup(amID, opfamilyname, missing_ok); + if (!HeapTupleIsValid(htup)) + return InvalidOid; + opfamform = (Form_pg_opfamily) GETSTRUCT(htup); + opfID = opfamform->oid; + ReleaseSysCache(htup); + + return opfID; +} + +/* + * OpClassCacheLookup + * Look up an existing opclass by name. + * + * Returns a syscache tuple reference, or NULL if not found. + */ +static HeapTuple +OpClassCacheLookup(Oid amID, List *opclassname, bool missing_ok) +{ + char *schemaname; + char *opcname; + HeapTuple htup; + + /* deconstruct the name list */ + DeconstructQualifiedName(opclassname, &schemaname, &opcname); + + if (schemaname) + { + /* Look in specific schema only */ + Oid namespaceId; + + namespaceId = LookupExplicitNamespace(schemaname, missing_ok); + if (!OidIsValid(namespaceId)) + htup = NULL; + else + htup = SearchSysCache3(CLAAMNAMENSP, + ObjectIdGetDatum(amID), + PointerGetDatum(opcname), + ObjectIdGetDatum(namespaceId)); + } + else + { + /* Unqualified opclass name, so search the search path */ + Oid opcID = OpclassnameGetOpcid(amID, opcname); + + if (!OidIsValid(opcID)) + htup = NULL; + else + htup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opcID)); + } + + if (!HeapTupleIsValid(htup) && !missing_ok) + { + HeapTuple amtup; + + amtup = SearchSysCache1(AMOID, ObjectIdGetDatum(amID)); + if (!HeapTupleIsValid(amtup)) + elog(ERROR, "cache lookup failed for access method %u", amID); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("operator class \"%s\" does not exist for access method \"%s\"", + NameListToString(opclassname), + NameStr(((Form_pg_am) GETSTRUCT(amtup))->amname)))); + } + + return htup; +} + +/* + * get_opclass_oid + * find an opclass OID by possibly qualified name + * + * If not found, returns InvalidOid if missing_ok, else throws error. + */ +Oid +get_opclass_oid(Oid amID, List *opclassname, bool missing_ok) +{ + HeapTuple htup; + Form_pg_opclass opcform; + Oid opcID; + + htup = OpClassCacheLookup(amID, opclassname, missing_ok); + if (!HeapTupleIsValid(htup)) + return InvalidOid; + opcform = (Form_pg_opclass) GETSTRUCT(htup); + opcID = opcform->oid; + ReleaseSysCache(htup); + + return opcID; +} + +/* + * CreateOpFamily + * Internal routine to make the catalog entry for a new operator family. + * + * Caller must have done permissions checks etc. already. + */ +static ObjectAddress +CreateOpFamily(CreateOpFamilyStmt *stmt, const char *opfname, + Oid namespaceoid, Oid amoid) +{ + Oid opfamilyoid; + Relation rel; + HeapTuple tup; + Datum values[Natts_pg_opfamily]; + bool nulls[Natts_pg_opfamily]; + NameData opfName; + ObjectAddress myself, + referenced; + + rel = table_open(OperatorFamilyRelationId, RowExclusiveLock); + + /* + * Make sure there is no existing opfamily of this name (this is just to + * give a more friendly error message than "duplicate key"). + */ + if (SearchSysCacheExists3(OPFAMILYAMNAMENSP, + ObjectIdGetDatum(amoid), + CStringGetDatum(opfname), + ObjectIdGetDatum(namespaceoid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("operator family \"%s\" for access method \"%s\" already exists", + opfname, stmt->amname))); + + /* + * Okay, let's create the pg_opfamily entry. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + opfamilyoid = GetNewOidWithIndex(rel, OpfamilyOidIndexId, + Anum_pg_opfamily_oid); + values[Anum_pg_opfamily_oid - 1] = ObjectIdGetDatum(opfamilyoid); + values[Anum_pg_opfamily_opfmethod - 1] = ObjectIdGetDatum(amoid); + namestrcpy(&opfName, opfname); + values[Anum_pg_opfamily_opfname - 1] = NameGetDatum(&opfName); + values[Anum_pg_opfamily_opfnamespace - 1] = ObjectIdGetDatum(namespaceoid); + values[Anum_pg_opfamily_opfowner - 1] = ObjectIdGetDatum(GetUserId()); + + tup = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tup); + + heap_freetuple(tup); + + /* + * Create dependencies for the opfamily proper. + */ + myself.classId = OperatorFamilyRelationId; + myself.objectId = opfamilyoid; + myself.objectSubId = 0; + + /* dependency on access method */ + referenced.classId = AccessMethodRelationId; + referenced.objectId = amoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + + /* dependency on namespace */ + referenced.classId = NamespaceRelationId; + referenced.objectId = namespaceoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + /* dependency on owner */ + recordDependencyOnOwner(OperatorFamilyRelationId, opfamilyoid, GetUserId()); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, false); + + /* Report the new operator family to possibly interested event triggers */ + EventTriggerCollectSimpleCommand(myself, InvalidObjectAddress, + (Node *) stmt); + + /* Post creation hook for new operator family */ + InvokeObjectPostCreateHook(OperatorFamilyRelationId, opfamilyoid, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + +/* + * DefineOpClass + * Define a new index operator class. + */ +ObjectAddress +DefineOpClass(CreateOpClassStmt *stmt) +{ + char *opcname; /* name of opclass we're creating */ + Oid amoid, /* our AM's oid */ + typeoid, /* indexable datatype oid */ + storageoid, /* storage datatype oid, if any */ + namespaceoid, /* namespace to create opclass in */ + opfamilyoid, /* oid of containing opfamily */ + opclassoid; /* oid of opclass we create */ + int maxOpNumber, /* amstrategies value */ + optsProcNumber, /* amoptsprocnum value */ + maxProcNumber; /* amsupport value */ + bool amstorage; /* amstorage flag */ + List *operators; /* OpFamilyMember list for operators */ + List *procedures; /* OpFamilyMember list for support procs */ + ListCell *l; + Relation rel; + HeapTuple tup; + Form_pg_am amform; + IndexAmRoutine *amroutine; + Datum values[Natts_pg_opclass]; + bool nulls[Natts_pg_opclass]; + AclResult aclresult; + NameData opcName; + ObjectAddress myself, + referenced; + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(stmt->opclassname, + &opcname); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceoid)); + + /* Get necessary info about access method */ + tup = SearchSysCache1(AMNAME, CStringGetDatum(stmt->amname)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("access method \"%s\" does not exist", + stmt->amname))); + + amform = (Form_pg_am) GETSTRUCT(tup); + amoid = amform->oid; + amroutine = GetIndexAmRoutineByAmId(amoid, false); + ReleaseSysCache(tup); + + maxOpNumber = amroutine->amstrategies; + /* if amstrategies is zero, just enforce that op numbers fit in int16 */ + if (maxOpNumber <= 0) + maxOpNumber = SHRT_MAX; + maxProcNumber = amroutine->amsupport; + optsProcNumber = amroutine->amoptsprocnum; + amstorage = amroutine->amstorage; + + /* XXX Should we make any privilege check against the AM? */ + + /* + * The question of appropriate permissions for CREATE OPERATOR CLASS is + * interesting. Creating an opclass is tantamount to granting public + * execute access on the functions involved, since the index machinery + * generally does not check access permission before using the functions. + * A minimum expectation therefore is that the caller have execute + * privilege with grant option. Since we don't have a way to make the + * opclass go away if the grant option is revoked, we choose instead to + * require ownership of the functions. It's also not entirely clear what + * permissions should be required on the datatype, but ownership seems + * like a safe choice. + * + * Currently, we require superuser privileges to create an opclass. This + * seems necessary because we have no way to validate that the offered set + * of operators and functions are consistent with the AM's expectations. + * It would be nice to provide such a check someday, if it can be done + * without solving the halting problem :-( + * + * XXX re-enable NOT_USED code sections below if you remove this test. + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create an operator class"))); + + /* Look up the datatype */ + typeoid = typenameTypeId(NULL, stmt->datatype); + +#ifdef NOT_USED + /* XXX this is unnecessary given the superuser check above */ + /* Check we have ownership of the datatype */ + if (!pg_type_ownercheck(typeoid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typeoid); +#endif + + /* + * Look up the containing operator family, or create one if FAMILY option + * was omitted and there's not a match already. + */ + if (stmt->opfamilyname) + { + opfamilyoid = get_opfamily_oid(amoid, stmt->opfamilyname, false); + } + else + { + /* Lookup existing family of same name and namespace */ + tup = SearchSysCache3(OPFAMILYAMNAMENSP, + ObjectIdGetDatum(amoid), + PointerGetDatum(opcname), + ObjectIdGetDatum(namespaceoid)); + if (HeapTupleIsValid(tup)) + { + opfamilyoid = ((Form_pg_opfamily) GETSTRUCT(tup))->oid; + + /* + * XXX given the superuser check above, there's no need for an + * ownership check here + */ + ReleaseSysCache(tup); + } + else + { + CreateOpFamilyStmt *opfstmt; + ObjectAddress tmpAddr; + + opfstmt = makeNode(CreateOpFamilyStmt); + opfstmt->opfamilyname = stmt->opclassname; + opfstmt->amname = stmt->amname; + + /* + * Create it ... again no need for more permissions ... + */ + tmpAddr = CreateOpFamily(opfstmt, opcname, namespaceoid, amoid); + opfamilyoid = tmpAddr.objectId; + } + } + + operators = NIL; + procedures = NIL; + + /* Storage datatype is optional */ + storageoid = InvalidOid; + + /* + * Scan the "items" list to obtain additional info. + */ + foreach(l, stmt->items) + { + CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l); + Oid operOid; + Oid funcOid; + Oid sortfamilyOid; + OpFamilyMember *member; + + switch (item->itemtype) + { + case OPCLASS_ITEM_OPERATOR: + if (item->number <= 0 || item->number > maxOpNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("invalid operator number %d," + " must be between 1 and %d", + item->number, maxOpNumber))); + if (item->name->objargs != NIL) + operOid = LookupOperWithArgs(item->name, false); + else + { + /* Default to binary op on input datatype */ + operOid = LookupOperName(NULL, item->name->objname, + typeoid, typeoid, + false, -1); + } + + if (item->order_family) + sortfamilyOid = get_opfamily_oid(BTREE_AM_OID, + item->order_family, + false); + else + sortfamilyOid = InvalidOid; + +#ifdef NOT_USED + /* XXX this is unnecessary given the superuser check above */ + /* Caller must own operator and its underlying function */ + if (!pg_oper_ownercheck(operOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR, + get_opname(operOid)); + funcOid = get_opcode(operOid); + if (!pg_proc_ownercheck(funcOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + get_func_name(funcOid)); +#endif + + /* Save the info */ + member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember)); + member->is_func = false; + member->object = operOid; + member->number = item->number; + member->sortfamily = sortfamilyOid; + assignOperTypes(member, amoid, typeoid); + addFamilyMember(&operators, member); + break; + case OPCLASS_ITEM_FUNCTION: + if (item->number <= 0 || item->number > maxProcNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("invalid function number %d," + " must be between 1 and %d", + item->number, maxProcNumber))); + funcOid = LookupFuncWithArgs(OBJECT_FUNCTION, item->name, false); +#ifdef NOT_USED + /* XXX this is unnecessary given the superuser check above */ + /* Caller must own function */ + if (!pg_proc_ownercheck(funcOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + get_func_name(funcOid)); +#endif + /* Save the info */ + member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember)); + member->is_func = true; + member->object = funcOid; + member->number = item->number; + + /* allow overriding of the function's actual arg types */ + if (item->class_args) + processTypesSpec(item->class_args, + &member->lefttype, &member->righttype); + + assignProcTypes(member, amoid, typeoid, optsProcNumber); + addFamilyMember(&procedures, member); + break; + case OPCLASS_ITEM_STORAGETYPE: + if (OidIsValid(storageoid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("storage type specified more than once"))); + storageoid = typenameTypeId(NULL, item->storedtype); + +#ifdef NOT_USED + /* XXX this is unnecessary given the superuser check above */ + /* Check we have ownership of the datatype */ + if (!pg_type_ownercheck(storageoid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, storageoid); +#endif + break; + default: + elog(ERROR, "unrecognized item type: %d", item->itemtype); + break; + } + } + + /* + * If storagetype is specified, make sure it's legal. + */ + if (OidIsValid(storageoid)) + { + /* Just drop the spec if same as column datatype */ + if (storageoid == typeoid) + storageoid = InvalidOid; + else if (!amstorage) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("storage type cannot be different from data type for access method \"%s\"", + stmt->amname))); + } + + rel = table_open(OperatorClassRelationId, RowExclusiveLock); + + /* + * Make sure there is no existing opclass of this name (this is just to + * give a more friendly error message than "duplicate key"). + */ + if (SearchSysCacheExists3(CLAAMNAMENSP, + ObjectIdGetDatum(amoid), + CStringGetDatum(opcname), + ObjectIdGetDatum(namespaceoid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("operator class \"%s\" for access method \"%s\" already exists", + opcname, stmt->amname))); + + /* + * If we are creating a default opclass, check there isn't one already. + * (Note we do not restrict this test to visible opclasses; this ensures + * that typcache.c can find unique solutions to its questions.) + */ + if (stmt->isDefault) + { + ScanKeyData skey[1]; + SysScanDesc scan; + + ScanKeyInit(&skey[0], + Anum_pg_opclass_opcmethod, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(amoid)); + + scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true, + NULL, 1, skey); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup); + + if (opclass->opcintype == typeoid && opclass->opcdefault) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("could not make operator class \"%s\" be default for type %s", + opcname, + TypeNameToString(stmt->datatype)), + errdetail("Operator class \"%s\" already is the default.", + NameStr(opclass->opcname)))); + } + + systable_endscan(scan); + } + + /* + * Okay, let's create the pg_opclass entry. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + opclassoid = GetNewOidWithIndex(rel, OpclassOidIndexId, + Anum_pg_opclass_oid); + values[Anum_pg_opclass_oid - 1] = ObjectIdGetDatum(opclassoid); + values[Anum_pg_opclass_opcmethod - 1] = ObjectIdGetDatum(amoid); + namestrcpy(&opcName, opcname); + values[Anum_pg_opclass_opcname - 1] = NameGetDatum(&opcName); + values[Anum_pg_opclass_opcnamespace - 1] = ObjectIdGetDatum(namespaceoid); + values[Anum_pg_opclass_opcowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_opclass_opcfamily - 1] = ObjectIdGetDatum(opfamilyoid); + values[Anum_pg_opclass_opcintype - 1] = ObjectIdGetDatum(typeoid); + values[Anum_pg_opclass_opcdefault - 1] = BoolGetDatum(stmt->isDefault); + values[Anum_pg_opclass_opckeytype - 1] = ObjectIdGetDatum(storageoid); + + tup = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tup); + + heap_freetuple(tup); + + /* + * Now that we have the opclass OID, set up default dependency info for + * the pg_amop and pg_amproc entries. Historically, CREATE OPERATOR CLASS + * has created hard dependencies on the opclass, so that's what we use. + */ + foreach(l, operators) + { + OpFamilyMember *op = (OpFamilyMember *) lfirst(l); + + op->ref_is_hard = true; + op->ref_is_family = false; + op->refobjid = opclassoid; + } + foreach(l, procedures) + { + OpFamilyMember *proc = (OpFamilyMember *) lfirst(l); + + proc->ref_is_hard = true; + proc->ref_is_family = false; + proc->refobjid = opclassoid; + } + + /* + * Let the index AM editorialize on the dependency choices. It could also + * do further validation on the operators and functions, if it likes. + */ + if (amroutine->amadjustmembers) + amroutine->amadjustmembers(opfamilyoid, + opclassoid, + operators, + procedures); + + /* + * Now add tuples to pg_amop and pg_amproc tying in the operators and + * functions. Dependencies on them are inserted, too. + */ + storeOperators(stmt->opfamilyname, amoid, opfamilyoid, + operators, false); + storeProcedures(stmt->opfamilyname, amoid, opfamilyoid, + procedures, false); + + /* let event triggers know what happened */ + EventTriggerCollectCreateOpClass(stmt, opclassoid, operators, procedures); + + /* + * Create dependencies for the opclass proper. Note: we do not need a + * dependency link to the AM, because that exists through the opfamily. + */ + myself.classId = OperatorClassRelationId; + myself.objectId = opclassoid; + myself.objectSubId = 0; + + /* dependency on namespace */ + referenced.classId = NamespaceRelationId; + referenced.objectId = namespaceoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + /* dependency on opfamily */ + referenced.classId = OperatorFamilyRelationId; + referenced.objectId = opfamilyoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + + /* dependency on indexed datatype */ + referenced.classId = TypeRelationId; + referenced.objectId = typeoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + /* dependency on storage datatype */ + if (OidIsValid(storageoid)) + { + referenced.classId = TypeRelationId; + referenced.objectId = storageoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + + /* dependency on owner */ + recordDependencyOnOwner(OperatorClassRelationId, opclassoid, GetUserId()); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, false); + + /* Post creation hook for new operator class */ + InvokeObjectPostCreateHook(OperatorClassRelationId, opclassoid, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + + +/* + * DefineOpFamily + * Define a new index operator family. + */ +ObjectAddress +DefineOpFamily(CreateOpFamilyStmt *stmt) +{ + char *opfname; /* name of opfamily we're creating */ + Oid amoid, /* our AM's oid */ + namespaceoid; /* namespace to create opfamily in */ + AclResult aclresult; + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(stmt->opfamilyname, + &opfname); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceoid)); + + /* Get access method OID, throwing an error if it doesn't exist. */ + amoid = get_index_am_oid(stmt->amname, false); + + /* XXX Should we make any privilege check against the AM? */ + + /* + * Currently, we require superuser privileges to create an opfamily. See + * comments in DefineOpClass. + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create an operator family"))); + + /* Insert pg_opfamily catalog entry */ + return CreateOpFamily(stmt, opfname, namespaceoid, amoid); +} + + +/* + * AlterOpFamily + * Add or remove operators/procedures within an existing operator family. + * + * Note: this implements only ALTER OPERATOR FAMILY ... ADD/DROP. Some + * other commands called ALTER OPERATOR FAMILY exist, but go through + * different code paths. + */ +Oid +AlterOpFamily(AlterOpFamilyStmt *stmt) +{ + Oid amoid, /* our AM's oid */ + opfamilyoid; /* oid of opfamily */ + int maxOpNumber, /* amstrategies value */ + optsProcNumber, /* amopclassopts value */ + maxProcNumber; /* amsupport value */ + HeapTuple tup; + Form_pg_am amform; + IndexAmRoutine *amroutine; + + /* Get necessary info about access method */ + tup = SearchSysCache1(AMNAME, CStringGetDatum(stmt->amname)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("access method \"%s\" does not exist", + stmt->amname))); + + amform = (Form_pg_am) GETSTRUCT(tup); + amoid = amform->oid; + amroutine = GetIndexAmRoutineByAmId(amoid, false); + ReleaseSysCache(tup); + + maxOpNumber = amroutine->amstrategies; + /* if amstrategies is zero, just enforce that op numbers fit in int16 */ + if (maxOpNumber <= 0) + maxOpNumber = SHRT_MAX; + maxProcNumber = amroutine->amsupport; + optsProcNumber = amroutine->amoptsprocnum; + + /* XXX Should we make any privilege check against the AM? */ + + /* Look up the opfamily */ + opfamilyoid = get_opfamily_oid(amoid, stmt->opfamilyname, false); + + /* + * Currently, we require superuser privileges to alter an opfamily. + * + * XXX re-enable NOT_USED code sections below if you remove this test. + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter an operator family"))); + + /* + * ADD and DROP cases need separate code from here on down. + */ + if (stmt->isDrop) + AlterOpFamilyDrop(stmt, amoid, opfamilyoid, + maxOpNumber, maxProcNumber, stmt->items); + else + AlterOpFamilyAdd(stmt, amoid, opfamilyoid, + maxOpNumber, maxProcNumber, optsProcNumber, + stmt->items); + + return opfamilyoid; +} + +/* + * ADD part of ALTER OP FAMILY + */ +static void +AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid, + int maxOpNumber, int maxProcNumber, int optsProcNumber, + List *items) +{ + IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false); + List *operators; /* OpFamilyMember list for operators */ + List *procedures; /* OpFamilyMember list for support procs */ + ListCell *l; + + operators = NIL; + procedures = NIL; + + /* + * Scan the "items" list to obtain additional info. + */ + foreach(l, items) + { + CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l); + Oid operOid; + Oid funcOid; + Oid sortfamilyOid; + OpFamilyMember *member; + + switch (item->itemtype) + { + case OPCLASS_ITEM_OPERATOR: + if (item->number <= 0 || item->number > maxOpNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("invalid operator number %d," + " must be between 1 and %d", + item->number, maxOpNumber))); + if (item->name->objargs != NIL) + operOid = LookupOperWithArgs(item->name, false); + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("operator argument types must be specified in ALTER OPERATOR FAMILY"))); + operOid = InvalidOid; /* keep compiler quiet */ + } + + if (item->order_family) + sortfamilyOid = get_opfamily_oid(BTREE_AM_OID, + item->order_family, + false); + else + sortfamilyOid = InvalidOid; + +#ifdef NOT_USED + /* XXX this is unnecessary given the superuser check above */ + /* Caller must own operator and its underlying function */ + if (!pg_oper_ownercheck(operOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR, + get_opname(operOid)); + funcOid = get_opcode(operOid); + if (!pg_proc_ownercheck(funcOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + get_func_name(funcOid)); +#endif + + /* Save the info */ + member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember)); + member->is_func = false; + member->object = operOid; + member->number = item->number; + member->sortfamily = sortfamilyOid; + /* We can set up dependency fields immediately */ + /* Historically, ALTER ADD has created soft dependencies */ + member->ref_is_hard = false; + member->ref_is_family = true; + member->refobjid = opfamilyoid; + assignOperTypes(member, amoid, InvalidOid); + addFamilyMember(&operators, member); + break; + case OPCLASS_ITEM_FUNCTION: + if (item->number <= 0 || item->number > maxProcNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("invalid function number %d," + " must be between 1 and %d", + item->number, maxProcNumber))); + funcOid = LookupFuncWithArgs(OBJECT_FUNCTION, item->name, false); +#ifdef NOT_USED + /* XXX this is unnecessary given the superuser check above */ + /* Caller must own function */ + if (!pg_proc_ownercheck(funcOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + get_func_name(funcOid)); +#endif + + /* Save the info */ + member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember)); + member->is_func = true; + member->object = funcOid; + member->number = item->number; + /* We can set up dependency fields immediately */ + /* Historically, ALTER ADD has created soft dependencies */ + member->ref_is_hard = false; + member->ref_is_family = true; + member->refobjid = opfamilyoid; + + /* allow overriding of the function's actual arg types */ + if (item->class_args) + processTypesSpec(item->class_args, + &member->lefttype, &member->righttype); + + assignProcTypes(member, amoid, InvalidOid, optsProcNumber); + addFamilyMember(&procedures, member); + break; + case OPCLASS_ITEM_STORAGETYPE: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("STORAGE cannot be specified in ALTER OPERATOR FAMILY"))); + break; + default: + elog(ERROR, "unrecognized item type: %d", item->itemtype); + break; + } + } + + /* + * Let the index AM editorialize on the dependency choices. It could also + * do further validation on the operators and functions, if it likes. + */ + if (amroutine->amadjustmembers) + amroutine->amadjustmembers(opfamilyoid, + InvalidOid, /* no specific opclass */ + operators, + procedures); + + /* + * Add tuples to pg_amop and pg_amproc tying in the operators and + * functions. Dependencies on them are inserted, too. + */ + storeOperators(stmt->opfamilyname, amoid, opfamilyoid, + operators, true); + storeProcedures(stmt->opfamilyname, amoid, opfamilyoid, + procedures, true); + + /* make information available to event triggers */ + EventTriggerCollectAlterOpFam(stmt, opfamilyoid, + operators, procedures); +} + +/* + * DROP part of ALTER OP FAMILY + */ +static void +AlterOpFamilyDrop(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid, + int maxOpNumber, int maxProcNumber, List *items) +{ + List *operators; /* OpFamilyMember list for operators */ + List *procedures; /* OpFamilyMember list for support procs */ + ListCell *l; + + operators = NIL; + procedures = NIL; + + /* + * Scan the "items" list to obtain additional info. + */ + foreach(l, items) + { + CreateOpClassItem *item = lfirst_node(CreateOpClassItem, l); + Oid lefttype, + righttype; + OpFamilyMember *member; + + switch (item->itemtype) + { + case OPCLASS_ITEM_OPERATOR: + if (item->number <= 0 || item->number > maxOpNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("invalid operator number %d," + " must be between 1 and %d", + item->number, maxOpNumber))); + processTypesSpec(item->class_args, &lefttype, &righttype); + /* Save the info */ + member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember)); + member->is_func = false; + member->number = item->number; + member->lefttype = lefttype; + member->righttype = righttype; + addFamilyMember(&operators, member); + break; + case OPCLASS_ITEM_FUNCTION: + if (item->number <= 0 || item->number > maxProcNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("invalid function number %d," + " must be between 1 and %d", + item->number, maxProcNumber))); + processTypesSpec(item->class_args, &lefttype, &righttype); + /* Save the info */ + member = (OpFamilyMember *) palloc0(sizeof(OpFamilyMember)); + member->is_func = true; + member->number = item->number; + member->lefttype = lefttype; + member->righttype = righttype; + addFamilyMember(&procedures, member); + break; + case OPCLASS_ITEM_STORAGETYPE: + /* grammar prevents this from appearing */ + default: + elog(ERROR, "unrecognized item type: %d", item->itemtype); + break; + } + } + + /* + * Remove tuples from pg_amop and pg_amproc. + */ + dropOperators(stmt->opfamilyname, amoid, opfamilyoid, operators); + dropProcedures(stmt->opfamilyname, amoid, opfamilyoid, procedures); + + /* make information available to event triggers */ + EventTriggerCollectAlterOpFam(stmt, opfamilyoid, + operators, procedures); +} + + +/* + * Deal with explicit arg types used in ALTER ADD/DROP + */ +static void +processTypesSpec(List *args, Oid *lefttype, Oid *righttype) +{ + TypeName *typeName; + + Assert(args != NIL); + + typeName = (TypeName *) linitial(args); + *lefttype = typenameTypeId(NULL, typeName); + + if (list_length(args) > 1) + { + typeName = (TypeName *) lsecond(args); + *righttype = typenameTypeId(NULL, typeName); + } + else + *righttype = *lefttype; + + if (list_length(args) > 2) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("one or two argument types must be specified"))); +} + + +/* + * Determine the lefttype/righttype to assign to an operator, + * and do any validity checking we can manage. + */ +static void +assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) +{ + Operator optup; + Form_pg_operator opform; + + /* Fetch the operator definition */ + optup = SearchSysCache1(OPEROID, ObjectIdGetDatum(member->object)); + if (!HeapTupleIsValid(optup)) + elog(ERROR, "cache lookup failed for operator %u", member->object); + opform = (Form_pg_operator) GETSTRUCT(optup); + + /* + * Opfamily operators must be binary. + */ + if (opform->oprkind != 'b') + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("index operators must be binary"))); + + if (OidIsValid(member->sortfamily)) + { + /* + * Ordering op, check index supports that. (We could perhaps also + * check that the operator returns a type supported by the sortfamily, + * but that seems more trouble than it's worth here. If it does not, + * the operator will never be matchable to any ORDER BY clause, but no + * worse consequences can ensue. Also, trying to check that would + * create an ordering hazard during dump/reload: it's possible that + * the family has been created but not yet populated with the required + * operators.) + */ + IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false); + + if (!amroutine->amcanorderbyop) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("access method \"%s\" does not support ordering operators", + get_am_name(amoid)))); + } + else + { + /* + * Search operators must return boolean. + */ + if (opform->oprresult != BOOLOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("index search operators must return boolean"))); + } + + /* + * If lefttype/righttype isn't specified, use the operator's input types + */ + if (!OidIsValid(member->lefttype)) + member->lefttype = opform->oprleft; + if (!OidIsValid(member->righttype)) + member->righttype = opform->oprright; + + ReleaseSysCache(optup); +} + +/* + * Determine the lefttype/righttype to assign to a support procedure, + * and do any validity checking we can manage. + */ +static void +assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid, + int opclassOptsProcNum) +{ + HeapTuple proctup; + Form_pg_proc procform; + + /* Fetch the procedure definition */ + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(member->object)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", member->object); + procform = (Form_pg_proc) GETSTRUCT(proctup); + + /* Check the signature of the opclass options parsing function */ + if (member->number == opclassOptsProcNum) + { + if (OidIsValid(typeoid)) + { + if ((OidIsValid(member->lefttype) && member->lefttype != typeoid) || + (OidIsValid(member->righttype) && member->righttype != typeoid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("associated data types for operator class options parsing functions must match opclass input type"))); + } + else + { + if (member->lefttype != member->righttype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("left and right associated data types for operator class options parsing functions must match"))); + } + + if (procform->prorettype != VOIDOID || + procform->pronargs != 1 || + procform->proargtypes.values[0] != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("invalid operator class options parsing function"), + errhint("Valid signature of operator class options parsing function is %s.", + "(internal) RETURNS void"))); + } + + /* + * btree comparison procs must be 2-arg procs returning int4. btree + * sortsupport procs must take internal and return void. btree in_range + * procs must be 5-arg procs returning bool. btree equalimage procs must + * take 1 arg and return bool. hash support proc 1 must be a 1-arg proc + * returning int4, while proc 2 must be a 2-arg proc returning int8. + * Otherwise we don't know. + */ + else if (amoid == BTREE_AM_OID) + { + if (member->number == BTORDER_PROC) + { + if (procform->pronargs != 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree comparison functions must have two arguments"))); + if (procform->prorettype != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree comparison functions must return integer"))); + + /* + * If lefttype/righttype isn't specified, use the proc's input + * types + */ + if (!OidIsValid(member->lefttype)) + member->lefttype = procform->proargtypes.values[0]; + if (!OidIsValid(member->righttype)) + member->righttype = procform->proargtypes.values[1]; + } + else if (member->number == BTSORTSUPPORT_PROC) + { + if (procform->pronargs != 1 || + procform->proargtypes.values[0] != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree sort support functions must accept type \"internal\""))); + if (procform->prorettype != VOIDOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree sort support functions must return void"))); + + /* + * Can't infer lefttype/righttype from proc, so use default rule + */ + } + else if (member->number == BTINRANGE_PROC) + { + if (procform->pronargs != 5) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree in_range functions must have five arguments"))); + if (procform->prorettype != BOOLOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree in_range functions must return boolean"))); + + /* + * If lefttype/righttype isn't specified, use the proc's input + * types (we look at the test-value and offset arguments) + */ + if (!OidIsValid(member->lefttype)) + member->lefttype = procform->proargtypes.values[0]; + if (!OidIsValid(member->righttype)) + member->righttype = procform->proargtypes.values[2]; + } + else if (member->number == BTEQUALIMAGE_PROC) + { + if (procform->pronargs != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must have one argument"))); + if (procform->prorettype != BOOLOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must return boolean"))); + + /* + * pg_amproc functions are indexed by (lefttype, righttype), but + * an equalimage function can only be called at CREATE INDEX time. + * The same opclass opcintype OID is always used for leftype and + * righttype. Providing a cross-type routine isn't sensible. + * Reject cross-type ALTER OPERATOR FAMILY ... ADD FUNCTION 4 + * statements here. + */ + if (member->lefttype != member->righttype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must not be cross-type"))); + } + } + else if (amoid == HASH_AM_OID) + { + if (member->number == HASHSTANDARD_PROC) + { + if (procform->pronargs != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash function 1 must have one argument"))); + if (procform->prorettype != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash function 1 must return integer"))); + } + else if (member->number == HASHEXTENDED_PROC) + { + if (procform->pronargs != 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash function 2 must have two arguments"))); + if (procform->prorettype != INT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash function 2 must return bigint"))); + } + + /* + * If lefttype/righttype isn't specified, use the proc's input type + */ + if (!OidIsValid(member->lefttype)) + member->lefttype = procform->proargtypes.values[0]; + if (!OidIsValid(member->righttype)) + member->righttype = procform->proargtypes.values[0]; + } + + /* + * The default in CREATE OPERATOR CLASS is to use the class' opcintype as + * lefttype and righttype. In CREATE or ALTER OPERATOR FAMILY, opcintype + * isn't available, so make the user specify the types. + */ + if (!OidIsValid(member->lefttype)) + member->lefttype = typeoid; + if (!OidIsValid(member->righttype)) + member->righttype = typeoid; + + if (!OidIsValid(member->lefttype) || !OidIsValid(member->righttype)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("associated data types must be specified for index support function"))); + + ReleaseSysCache(proctup); +} + +/* + * Add a new family member to the appropriate list, after checking for + * duplicated strategy or proc number. + */ +static void +addFamilyMember(List **list, OpFamilyMember *member) +{ + ListCell *l; + + foreach(l, *list) + { + OpFamilyMember *old = (OpFamilyMember *) lfirst(l); + + if (old->number == member->number && + old->lefttype == member->lefttype && + old->righttype == member->righttype) + { + if (member->is_func) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("function number %d for (%s,%s) appears more than once", + member->number, + format_type_be(member->lefttype), + format_type_be(member->righttype)))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator number %d for (%s,%s) appears more than once", + member->number, + format_type_be(member->lefttype), + format_type_be(member->righttype)))); + } + } + *list = lappend(*list, member); +} + +/* + * Dump the operators to pg_amop + * + * We also make dependency entries in pg_depend for the pg_amop entries. + */ +static void +storeOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *operators, bool isAdd) +{ + Relation rel; + Datum values[Natts_pg_amop]; + bool nulls[Natts_pg_amop]; + HeapTuple tup; + Oid entryoid; + ObjectAddress myself, + referenced; + ListCell *l; + + rel = table_open(AccessMethodOperatorRelationId, RowExclusiveLock); + + foreach(l, operators) + { + OpFamilyMember *op = (OpFamilyMember *) lfirst(l); + char oppurpose; + + /* + * If adding to an existing family, check for conflict with an + * existing pg_amop entry (just to give a nicer error message) + */ + if (isAdd && + SearchSysCacheExists4(AMOPSTRATEGY, + ObjectIdGetDatum(opfamilyoid), + ObjectIdGetDatum(op->lefttype), + ObjectIdGetDatum(op->righttype), + Int16GetDatum(op->number))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("operator %d(%s,%s) already exists in operator family \"%s\"", + op->number, + format_type_be(op->lefttype), + format_type_be(op->righttype), + NameListToString(opfamilyname)))); + + oppurpose = OidIsValid(op->sortfamily) ? AMOP_ORDER : AMOP_SEARCH; + + /* Create the pg_amop entry */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + entryoid = GetNewOidWithIndex(rel, AccessMethodOperatorOidIndexId, + Anum_pg_amop_oid); + values[Anum_pg_amop_oid - 1] = ObjectIdGetDatum(entryoid); + values[Anum_pg_amop_amopfamily - 1] = ObjectIdGetDatum(opfamilyoid); + values[Anum_pg_amop_amoplefttype - 1] = ObjectIdGetDatum(op->lefttype); + values[Anum_pg_amop_amoprighttype - 1] = ObjectIdGetDatum(op->righttype); + values[Anum_pg_amop_amopstrategy - 1] = Int16GetDatum(op->number); + values[Anum_pg_amop_amoppurpose - 1] = CharGetDatum(oppurpose); + values[Anum_pg_amop_amopopr - 1] = ObjectIdGetDatum(op->object); + values[Anum_pg_amop_amopmethod - 1] = ObjectIdGetDatum(amoid); + values[Anum_pg_amop_amopsortfamily - 1] = ObjectIdGetDatum(op->sortfamily); + + tup = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tup); + + heap_freetuple(tup); + + /* Make its dependencies */ + myself.classId = AccessMethodOperatorRelationId; + myself.objectId = entryoid; + myself.objectSubId = 0; + + referenced.classId = OperatorRelationId; + referenced.objectId = op->object; + referenced.objectSubId = 0; + + /* see comments in amapi.h about dependency strength */ + recordDependencyOn(&myself, &referenced, + op->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO); + + referenced.classId = op->ref_is_family ? OperatorFamilyRelationId : + OperatorClassRelationId; + referenced.objectId = op->refobjid; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, + op->ref_is_hard ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO); + + /* A search operator also needs a dep on the referenced opfamily */ + if (OidIsValid(op->sortfamily)) + { + referenced.classId = OperatorFamilyRelationId; + referenced.objectId = op->sortfamily; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, + op->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO); + } + + /* Post create hook of this access method operator */ + InvokeObjectPostCreateHook(AccessMethodOperatorRelationId, + entryoid, 0); + } + + table_close(rel, RowExclusiveLock); +} + +/* + * Dump the procedures (support routines) to pg_amproc + * + * We also make dependency entries in pg_depend for the pg_amproc entries. + */ +static void +storeProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *procedures, bool isAdd) +{ + Relation rel; + Datum values[Natts_pg_amproc]; + bool nulls[Natts_pg_amproc]; + HeapTuple tup; + Oid entryoid; + ObjectAddress myself, + referenced; + ListCell *l; + + rel = table_open(AccessMethodProcedureRelationId, RowExclusiveLock); + + foreach(l, procedures) + { + OpFamilyMember *proc = (OpFamilyMember *) lfirst(l); + + /* + * If adding to an existing family, check for conflict with an + * existing pg_amproc entry (just to give a nicer error message) + */ + if (isAdd && + SearchSysCacheExists4(AMPROCNUM, + ObjectIdGetDatum(opfamilyoid), + ObjectIdGetDatum(proc->lefttype), + ObjectIdGetDatum(proc->righttype), + Int16GetDatum(proc->number))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("function %d(%s,%s) already exists in operator family \"%s\"", + proc->number, + format_type_be(proc->lefttype), + format_type_be(proc->righttype), + NameListToString(opfamilyname)))); + + /* Create the pg_amproc entry */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + entryoid = GetNewOidWithIndex(rel, AccessMethodProcedureOidIndexId, + Anum_pg_amproc_oid); + values[Anum_pg_amproc_oid - 1] = ObjectIdGetDatum(entryoid); + values[Anum_pg_amproc_amprocfamily - 1] = ObjectIdGetDatum(opfamilyoid); + values[Anum_pg_amproc_amproclefttype - 1] = ObjectIdGetDatum(proc->lefttype); + values[Anum_pg_amproc_amprocrighttype - 1] = ObjectIdGetDatum(proc->righttype); + values[Anum_pg_amproc_amprocnum - 1] = Int16GetDatum(proc->number); + values[Anum_pg_amproc_amproc - 1] = ObjectIdGetDatum(proc->object); + + tup = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tup); + + heap_freetuple(tup); + + /* Make its dependencies */ + myself.classId = AccessMethodProcedureRelationId; + myself.objectId = entryoid; + myself.objectSubId = 0; + + referenced.classId = ProcedureRelationId; + referenced.objectId = proc->object; + referenced.objectSubId = 0; + + /* see comments in amapi.h about dependency strength */ + recordDependencyOn(&myself, &referenced, + proc->ref_is_hard ? DEPENDENCY_NORMAL : DEPENDENCY_AUTO); + + referenced.classId = proc->ref_is_family ? OperatorFamilyRelationId : + OperatorClassRelationId; + referenced.objectId = proc->refobjid; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, + proc->ref_is_hard ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO); + + /* Post create hook of access method procedure */ + InvokeObjectPostCreateHook(AccessMethodProcedureRelationId, + entryoid, 0); + } + + table_close(rel, RowExclusiveLock); +} + + +/* + * Remove operator entries from an opfamily. + * + * Note: this is only allowed for "loose" members of an opfamily, hence + * behavior is always RESTRICT. + */ +static void +dropOperators(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *operators) +{ + ListCell *l; + + foreach(l, operators) + { + OpFamilyMember *op = (OpFamilyMember *) lfirst(l); + Oid amopid; + ObjectAddress object; + + amopid = GetSysCacheOid4(AMOPSTRATEGY, Anum_pg_amop_oid, + ObjectIdGetDatum(opfamilyoid), + ObjectIdGetDatum(op->lefttype), + ObjectIdGetDatum(op->righttype), + Int16GetDatum(op->number)); + if (!OidIsValid(amopid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("operator %d(%s,%s) does not exist in operator family \"%s\"", + op->number, + format_type_be(op->lefttype), + format_type_be(op->righttype), + NameListToString(opfamilyname)))); + + object.classId = AccessMethodOperatorRelationId; + object.objectId = amopid; + object.objectSubId = 0; + + performDeletion(&object, DROP_RESTRICT, 0); + } +} + +/* + * Remove procedure entries from an opfamily. + * + * Note: this is only allowed for "loose" members of an opfamily, hence + * behavior is always RESTRICT. + */ +static void +dropProcedures(List *opfamilyname, Oid amoid, Oid opfamilyoid, + List *procedures) +{ + ListCell *l; + + foreach(l, procedures) + { + OpFamilyMember *op = (OpFamilyMember *) lfirst(l); + Oid amprocid; + ObjectAddress object; + + amprocid = GetSysCacheOid4(AMPROCNUM, Anum_pg_amproc_oid, + ObjectIdGetDatum(opfamilyoid), + ObjectIdGetDatum(op->lefttype), + ObjectIdGetDatum(op->righttype), + Int16GetDatum(op->number)); + if (!OidIsValid(amprocid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("function %d(%s,%s) does not exist in operator family \"%s\"", + op->number, + format_type_be(op->lefttype), + format_type_be(op->righttype), + NameListToString(opfamilyname)))); + + object.classId = AccessMethodProcedureRelationId; + object.objectId = amprocid; + object.objectSubId = 0; + + performDeletion(&object, DROP_RESTRICT, 0); + } +} + +/* + * Subroutine for ALTER OPERATOR CLASS SET SCHEMA/RENAME + * + * Is there an operator class with the given name and signature already + * in the given namespace? If so, raise an appropriate error message. + */ +void +IsThereOpClassInNamespace(const char *opcname, Oid opcmethod, + Oid opcnamespace) +{ + /* make sure the new name doesn't exist */ + if (SearchSysCacheExists3(CLAAMNAMENSP, + ObjectIdGetDatum(opcmethod), + CStringGetDatum(opcname), + ObjectIdGetDatum(opcnamespace))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("operator class \"%s\" for access method \"%s\" already exists in schema \"%s\"", + opcname, + get_am_name(opcmethod), + get_namespace_name(opcnamespace)))); +} + +/* + * Subroutine for ALTER OPERATOR FAMILY SET SCHEMA/RENAME + * + * Is there an operator family with the given name and signature already + * in the given namespace? If so, raise an appropriate error message. + */ +void +IsThereOpFamilyInNamespace(const char *opfname, Oid opfmethod, + Oid opfnamespace) +{ + /* make sure the new name doesn't exist */ + if (SearchSysCacheExists3(OPFAMILYAMNAMENSP, + ObjectIdGetDatum(opfmethod), + CStringGetDatum(opfname), + ObjectIdGetDatum(opfnamespace))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("operator family \"%s\" for access method \"%s\" already exists in schema \"%s\"", + opfname, + get_am_name(opfmethod), + get_namespace_name(opfnamespace)))); +} diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c new file mode 100644 index 0000000..a5924d7 --- /dev/null +++ b/src/backend/commands/operatorcmds.c @@ -0,0 +1,552 @@ +/*------------------------------------------------------------------------- + * + * operatorcmds.c + * + * Routines for operator manipulation commands + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/operatorcmds.c + * + * DESCRIPTION + * The "DefineFoo" routines take the parse tree and pick out the + * appropriate arguments/flags, passing the results to the + * corresponding "FooDefine" routines (in src/catalog) that do + * the actual catalog-munging. These routines also verify permission + * of the user to execute the command. + * + * NOTES + * These things must be defined and committed in the following order: + * "create function": + * input/output, recv/send functions + * "create type": + * type + * "create operator": + * operators + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "commands/alter.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "parser/parse_oper.h" +#include "parser/parse_type.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +static Oid ValidateRestrictionEstimator(List *restrictionName); +static Oid ValidateJoinEstimator(List *joinName); + +/* + * DefineOperator + * this function extracts all the information from the + * parameter list generated by the parser and then has + * OperatorCreate() do all the actual work. + * + * 'parameters' is a list of DefElem + */ +ObjectAddress +DefineOperator(List *names, List *parameters) +{ + char *oprName; + Oid oprNamespace; + AclResult aclresult; + bool canMerge = false; /* operator merges */ + bool canHash = false; /* operator hashes */ + List *functionName = NIL; /* function for operator */ + TypeName *typeName1 = NULL; /* first type name */ + TypeName *typeName2 = NULL; /* second type name */ + Oid typeId1 = InvalidOid; /* types converted to OID */ + Oid typeId2 = InvalidOid; + Oid rettype; + List *commutatorName = NIL; /* optional commutator operator name */ + List *negatorName = NIL; /* optional negator operator name */ + List *restrictionName = NIL; /* optional restrict. sel. function */ + List *joinName = NIL; /* optional join sel. function */ + Oid functionOid; /* functions converted to OID */ + Oid restrictionOid; + Oid joinOid; + Oid typeId[2]; /* to hold left and right arg */ + int nargs; + ListCell *pl; + + /* Convert list of names to a name and namespace */ + oprNamespace = QualifiedNameGetCreationNamespace(names, &oprName); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(oprNamespace, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(oprNamespace)); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (strcmp(defel->defname, "leftarg") == 0) + { + typeName1 = defGetTypeName(defel); + if (typeName1->setof) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("SETOF type not allowed for operator argument"))); + } + else if (strcmp(defel->defname, "rightarg") == 0) + { + typeName2 = defGetTypeName(defel); + if (typeName2->setof) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("SETOF type not allowed for operator argument"))); + } + /* "function" and "procedure" are equivalent here */ + else if (strcmp(defel->defname, "function") == 0) + functionName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "procedure") == 0) + functionName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "commutator") == 0) + commutatorName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "negator") == 0) + negatorName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "restrict") == 0) + restrictionName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "join") == 0) + joinName = defGetQualifiedName(defel); + else if (strcmp(defel->defname, "hashes") == 0) + canHash = defGetBoolean(defel); + else if (strcmp(defel->defname, "merges") == 0) + canMerge = defGetBoolean(defel); + /* These obsolete options are taken as meaning canMerge */ + else if (strcmp(defel->defname, "sort1") == 0) + canMerge = true; + else if (strcmp(defel->defname, "sort2") == 0) + canMerge = true; + else if (strcmp(defel->defname, "ltcmp") == 0) + canMerge = true; + else if (strcmp(defel->defname, "gtcmp") == 0) + canMerge = true; + else + { + /* WARNING, not ERROR, for historical backwards-compatibility */ + ereport(WARNING, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("operator attribute \"%s\" not recognized", + defel->defname))); + } + } + + /* + * make sure we have our required definitions + */ + if (functionName == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("operator function must be specified"))); + + /* Transform type names to type OIDs */ + if (typeName1) + typeId1 = typenameTypeId(NULL, typeName1); + if (typeName2) + typeId2 = typenameTypeId(NULL, typeName2); + + /* + * If only the right argument is missing, the user is likely trying to + * create a postfix operator, so give them a hint about why that does not + * work. But if both arguments are missing, do not mention postfix + * operators, as the user most likely simply neglected to mention the + * arguments. + */ + if (!OidIsValid(typeId1) && !OidIsValid(typeId2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("operator argument types must be specified"))); + if (!OidIsValid(typeId2)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("operator right argument type must be specified"), + errdetail("Postfix operators are not supported."))); + + if (typeName1) + { + aclresult = pg_type_aclcheck(typeId1, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, typeId1); + } + + if (typeName2) + { + aclresult = pg_type_aclcheck(typeId2, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, typeId2); + } + + /* + * Look up the operator's underlying function. + */ + if (!OidIsValid(typeId1)) + { + typeId[0] = typeId2; + nargs = 1; + } + else if (!OidIsValid(typeId2)) + { + typeId[0] = typeId1; + nargs = 1; + } + else + { + typeId[0] = typeId1; + typeId[1] = typeId2; + nargs = 2; + } + functionOid = LookupFuncName(functionName, nargs, typeId, false); + + /* + * We require EXECUTE rights for the function. This isn't strictly + * necessary, since EXECUTE will be checked at any attempted use of the + * operator, but it seems like a good idea anyway. + */ + aclresult = pg_proc_aclcheck(functionOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(functionName)); + + rettype = get_func_rettype(functionOid); + aclresult = pg_type_aclcheck(rettype, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, rettype); + + /* + * Look up restriction and join estimators if specified + */ + if (restrictionName) + restrictionOid = ValidateRestrictionEstimator(restrictionName); + else + restrictionOid = InvalidOid; + if (joinName) + joinOid = ValidateJoinEstimator(joinName); + else + joinOid = InvalidOid; + + /* + * now have OperatorCreate do all the work.. + */ + return + OperatorCreate(oprName, /* operator name */ + oprNamespace, /* namespace */ + typeId1, /* left type id */ + typeId2, /* right type id */ + functionOid, /* function for operator */ + commutatorName, /* optional commutator operator name */ + negatorName, /* optional negator operator name */ + restrictionOid, /* optional restrict. sel. function */ + joinOid, /* optional join sel. function name */ + canMerge, /* operator merges */ + canHash); /* operator hashes */ +} + +/* + * Look up a restriction estimator function by name, and verify that it has + * the correct signature and we have the permissions to attach it to an + * operator. + */ +static Oid +ValidateRestrictionEstimator(List *restrictionName) +{ + Oid typeId[4]; + Oid restrictionOid; + AclResult aclresult; + + typeId[0] = INTERNALOID; /* PlannerInfo */ + typeId[1] = OIDOID; /* operator OID */ + typeId[2] = INTERNALOID; /* args list */ + typeId[3] = INT4OID; /* varRelid */ + + restrictionOid = LookupFuncName(restrictionName, 4, typeId, false); + + /* estimators must return float8 */ + if (get_func_rettype(restrictionOid) != FLOAT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("restriction estimator function %s must return type %s", + NameListToString(restrictionName), "float8"))); + + /* Require EXECUTE rights for the estimator */ + aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(restrictionName)); + + return restrictionOid; +} + +/* + * Look up a join estimator function by name, and verify that it has the + * correct signature and we have the permissions to attach it to an + * operator. + */ +static Oid +ValidateJoinEstimator(List *joinName) +{ + Oid typeId[5]; + Oid joinOid; + Oid joinOid2; + AclResult aclresult; + + typeId[0] = INTERNALOID; /* PlannerInfo */ + typeId[1] = OIDOID; /* operator OID */ + typeId[2] = INTERNALOID; /* args list */ + typeId[3] = INT2OID; /* jointype */ + typeId[4] = INTERNALOID; /* SpecialJoinInfo */ + + /* + * As of Postgres 8.4, the preferred signature for join estimators has 5 + * arguments, but we still allow the old 4-argument form. Whine about + * ambiguity if both forms exist. + */ + joinOid = LookupFuncName(joinName, 5, typeId, true); + joinOid2 = LookupFuncName(joinName, 4, typeId, true); + if (OidIsValid(joinOid)) + { + if (OidIsValid(joinOid2)) + ereport(ERROR, + (errcode(ERRCODE_AMBIGUOUS_FUNCTION), + errmsg("join estimator function %s has multiple matches", + NameListToString(joinName)))); + } + else + { + joinOid = joinOid2; + /* If not found, reference the 5-argument signature in error msg */ + if (!OidIsValid(joinOid)) + joinOid = LookupFuncName(joinName, 5, typeId, false); + } + + /* estimators must return float8 */ + if (get_func_rettype(joinOid) != FLOAT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("join estimator function %s must return type %s", + NameListToString(joinName), "float8"))); + + /* Require EXECUTE rights for the estimator */ + aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(joinName)); + + return joinOid; +} + +/* + * Guts of operator deletion. + */ +void +RemoveOperatorById(Oid operOid) +{ + Relation relation; + HeapTuple tup; + Form_pg_operator op; + + relation = table_open(OperatorRelationId, RowExclusiveLock); + + tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for operator %u", operOid); + op = (Form_pg_operator) GETSTRUCT(tup); + + /* + * Reset links from commutator and negator, if any. In case of a + * self-commutator or self-negator, this means we have to re-fetch the + * updated tuple. (We could optimize away updates on the tuple we're + * about to drop, but it doesn't seem worth convoluting the logic for.) + */ + if (OidIsValid(op->oprcom) || OidIsValid(op->oprnegate)) + { + OperatorUpd(operOid, op->oprcom, op->oprnegate, true); + if (operOid == op->oprcom || operOid == op->oprnegate) + { + ReleaseSysCache(tup); + tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operOid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for operator %u", operOid); + } + } + + CatalogTupleDelete(relation, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(relation, RowExclusiveLock); +} + +/* + * AlterOperator + * routine implementing ALTER OPERATOR SET (option = ...). + * + * Currently, only RESTRICT and JOIN estimator functions can be changed. + */ +ObjectAddress +AlterOperator(AlterOperatorStmt *stmt) +{ + ObjectAddress address; + Oid oprId; + Relation catalog; + HeapTuple tup; + Form_pg_operator oprForm; + int i; + ListCell *pl; + Datum values[Natts_pg_operator]; + bool nulls[Natts_pg_operator]; + bool replaces[Natts_pg_operator]; + List *restrictionName = NIL; /* optional restrict. sel. function */ + bool updateRestriction = false; + Oid restrictionOid; + List *joinName = NIL; /* optional join sel. function */ + bool updateJoin = false; + Oid joinOid; + + /* Look up the operator */ + oprId = LookupOperWithArgs(stmt->opername, false); + catalog = table_open(OperatorRelationId, RowExclusiveLock); + tup = SearchSysCacheCopy1(OPEROID, ObjectIdGetDatum(oprId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for operator %u", oprId); + oprForm = (Form_pg_operator) GETSTRUCT(tup); + + /* Process options */ + foreach(pl, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(pl); + List *param; + + if (defel->arg == NULL) + param = NIL; /* NONE, removes the function */ + else + param = defGetQualifiedName(defel); + + if (strcmp(defel->defname, "restrict") == 0) + { + restrictionName = param; + updateRestriction = true; + } + else if (strcmp(defel->defname, "join") == 0) + { + joinName = param; + updateJoin = true; + } + + /* + * The rest of the options that CREATE accepts cannot be changed. + * Check for them so that we can give a meaningful error message. + */ + else if (strcmp(defel->defname, "leftarg") == 0 || + strcmp(defel->defname, "rightarg") == 0 || + strcmp(defel->defname, "function") == 0 || + strcmp(defel->defname, "procedure") == 0 || + strcmp(defel->defname, "commutator") == 0 || + strcmp(defel->defname, "negator") == 0 || + strcmp(defel->defname, "hashes") == 0 || + strcmp(defel->defname, "merges") == 0) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("operator attribute \"%s\" cannot be changed", + defel->defname))); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("operator attribute \"%s\" not recognized", + defel->defname))); + } + + /* Check permissions. Must be owner. */ + if (!pg_oper_ownercheck(oprId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_OPERATOR, + NameStr(oprForm->oprname)); + + /* + * Look up restriction and join estimators if specified + */ + if (restrictionName) + restrictionOid = ValidateRestrictionEstimator(restrictionName); + else + restrictionOid = InvalidOid; + if (joinName) + joinOid = ValidateJoinEstimator(joinName); + else + joinOid = InvalidOid; + + /* Perform additional checks, like OperatorCreate does */ + if (!(OidIsValid(oprForm->oprleft) && OidIsValid(oprForm->oprright))) + { + /* If it's not a binary op, these things mustn't be set: */ + if (OidIsValid(joinOid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only binary operators can have join selectivity"))); + } + + if (oprForm->oprresult != BOOLOID) + { + if (OidIsValid(restrictionOid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have restriction selectivity"))); + if (OidIsValid(joinOid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only boolean operators can have join selectivity"))); + } + + /* Update the tuple */ + for (i = 0; i < Natts_pg_operator; ++i) + { + values[i] = (Datum) 0; + replaces[i] = false; + nulls[i] = false; + } + if (updateRestriction) + { + replaces[Anum_pg_operator_oprrest - 1] = true; + values[Anum_pg_operator_oprrest - 1] = restrictionOid; + } + if (updateJoin) + { + replaces[Anum_pg_operator_oprjoin - 1] = true; + values[Anum_pg_operator_oprjoin - 1] = joinOid; + } + + tup = heap_modify_tuple(tup, RelationGetDescr(catalog), + values, nulls, replaces); + + CatalogTupleUpdate(catalog, &tup->t_self, tup); + + address = makeOperatorDependencies(tup, false, true); + + InvokeObjectPostAlterHook(OperatorRelationId, oprId, 0); + + table_close(catalog, NoLock); + + return address; +} diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c new file mode 100644 index 0000000..a59ee3b --- /dev/null +++ b/src/backend/commands/policy.c @@ -0,0 +1,1285 @@ +/*------------------------------------------------------------------------- + * + * policy.c + * Commands for manipulating policies. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/commands/policy.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup.h" +#include "access/htup_details.h" +#include "access/relation.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_policy.h" +#include "catalog/pg_type.h" +#include "commands/policy.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/pg_list.h" +#include "parser/parse_clause.h" +#include "parser/parse_collate.h" +#include "parser/parse_node.h" +#include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rowsecurity.h" +#include "storage/lock.h" +#include "utils/acl.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +static void RangeVarCallbackForPolicy(const RangeVar *rv, + Oid relid, Oid oldrelid, void *arg); +static char parse_policy_command(const char *cmd_name); +static Datum *policy_role_list_to_array(List *roles, int *num_roles); + +/* + * Callback to RangeVarGetRelidExtended(). + * + * Checks the following: + * - the relation specified is a table. + * - current user owns the table. + * - the table is not a system table. + * + * If any of these checks fails then an error is raised. + */ +static void +RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid, + void *arg) +{ + HeapTuple tuple; + Form_pg_class classform; + char relkind; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + return; + + classform = (Form_pg_class) GETSTRUCT(tuple); + relkind = classform->relkind; + + /* Must own relation. */ + if (!pg_class_ownercheck(relid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname); + + /* No system table modifications unless explicitly allowed. */ + if (!allowSystemTableMods && IsSystemClass(relid, classform)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + rv->relname))); + + /* Relation type MUST be a table. */ + if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table", rv->relname))); + + ReleaseSysCache(tuple); +} + +/* + * parse_policy_command - + * helper function to convert full command strings to their char + * representation. + * + * cmd_name - full string command name. Valid values are 'all', 'select', + * 'insert', 'update' and 'delete'. + * + */ +static char +parse_policy_command(const char *cmd_name) +{ + char polcmd; + + if (!cmd_name) + elog(ERROR, "unrecognized policy command"); + + if (strcmp(cmd_name, "all") == 0) + polcmd = '*'; + else if (strcmp(cmd_name, "select") == 0) + polcmd = ACL_SELECT_CHR; + else if (strcmp(cmd_name, "insert") == 0) + polcmd = ACL_INSERT_CHR; + else if (strcmp(cmd_name, "update") == 0) + polcmd = ACL_UPDATE_CHR; + else if (strcmp(cmd_name, "delete") == 0) + polcmd = ACL_DELETE_CHR; + else + elog(ERROR, "unrecognized policy command"); + + return polcmd; +} + +/* + * policy_role_list_to_array + * helper function to convert a list of RoleSpecs to an array of + * role id Datums. + */ +static Datum * +policy_role_list_to_array(List *roles, int *num_roles) +{ + Datum *role_oids; + ListCell *cell; + int i = 0; + + /* Handle no roles being passed in as being for public */ + if (roles == NIL) + { + *num_roles = 1; + role_oids = (Datum *) palloc(*num_roles * sizeof(Datum)); + role_oids[0] = ObjectIdGetDatum(ACL_ID_PUBLIC); + + return role_oids; + } + + *num_roles = list_length(roles); + role_oids = (Datum *) palloc(*num_roles * sizeof(Datum)); + + foreach(cell, roles) + { + RoleSpec *spec = lfirst(cell); + + /* + * PUBLIC covers all roles, so it only makes sense alone. + */ + if (spec->roletype == ROLESPEC_PUBLIC) + { + if (*num_roles != 1) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ignoring specified roles other than PUBLIC"), + errhint("All roles are members of the PUBLIC role."))); + *num_roles = 1; + } + role_oids[0] = ObjectIdGetDatum(ACL_ID_PUBLIC); + + return role_oids; + } + else + role_oids[i++] = + ObjectIdGetDatum(get_rolespec_oid(spec, false)); + } + + return role_oids; +} + +/* + * Load row security policy from the catalog, and store it in + * the relation's relcache entry. + * + * Note that caller should have verified that pg_class.relrowsecurity + * is true for this relation. + */ +void +RelationBuildRowSecurity(Relation relation) +{ + MemoryContext rscxt; + MemoryContext oldcxt = CurrentMemoryContext; + RowSecurityDesc *rsdesc; + Relation catalog; + ScanKeyData skey; + SysScanDesc sscan; + HeapTuple tuple; + + /* + * Create a memory context to hold everything associated with this + * relation's row security policy. This makes it easy to clean up during + * a relcache flush. However, to cover the possibility of an error + * partway through, we don't make the context long-lived till we're done. + */ + rscxt = AllocSetContextCreate(CurrentMemoryContext, + "row security descriptor", + ALLOCSET_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(rscxt, + RelationGetRelationName(relation)); + + rsdesc = MemoryContextAllocZero(rscxt, sizeof(RowSecurityDesc)); + rsdesc->rscxt = rscxt; + + /* + * Now scan pg_policy for RLS policies associated with this relation. + * Because we use the index on (polrelid, polname), we should consistently + * visit the rel's policies in name order, at least when system indexes + * aren't disabled. This simplifies equalRSDesc(). + */ + catalog = table_open(PolicyRelationId, AccessShareLock); + + ScanKeyInit(&skey, + Anum_pg_policy_polrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + sscan = systable_beginscan(catalog, PolicyPolrelidPolnameIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(tuple = systable_getnext(sscan))) + { + Form_pg_policy policy_form = (Form_pg_policy) GETSTRUCT(tuple); + RowSecurityPolicy *policy; + Datum datum; + bool isnull; + char *str_value; + + policy = MemoryContextAllocZero(rscxt, sizeof(RowSecurityPolicy)); + + /* + * Note: we must be sure that pass-by-reference data gets copied into + * rscxt. We avoid making that context current over wider spans than + * we have to, though. + */ + + /* Get policy command */ + policy->polcmd = policy_form->polcmd; + + /* Get policy, permissive or restrictive */ + policy->permissive = policy_form->polpermissive; + + /* Get policy name */ + policy->policy_name = + MemoryContextStrdup(rscxt, NameStr(policy_form->polname)); + + /* Get policy roles */ + datum = heap_getattr(tuple, Anum_pg_policy_polroles, + RelationGetDescr(catalog), &isnull); + /* shouldn't be null, but let's check for luck */ + if (isnull) + elog(ERROR, "unexpected null value in pg_policy.polroles"); + MemoryContextSwitchTo(rscxt); + policy->roles = DatumGetArrayTypePCopy(datum); + MemoryContextSwitchTo(oldcxt); + + /* Get policy qual */ + datum = heap_getattr(tuple, Anum_pg_policy_polqual, + RelationGetDescr(catalog), &isnull); + if (!isnull) + { + str_value = TextDatumGetCString(datum); + MemoryContextSwitchTo(rscxt); + policy->qual = (Expr *) stringToNode(str_value); + MemoryContextSwitchTo(oldcxt); + pfree(str_value); + } + else + policy->qual = NULL; + + /* Get WITH CHECK qual */ + datum = heap_getattr(tuple, Anum_pg_policy_polwithcheck, + RelationGetDescr(catalog), &isnull); + if (!isnull) + { + str_value = TextDatumGetCString(datum); + MemoryContextSwitchTo(rscxt); + policy->with_check_qual = (Expr *) stringToNode(str_value); + MemoryContextSwitchTo(oldcxt); + pfree(str_value); + } + else + policy->with_check_qual = NULL; + + /* We want to cache whether there are SubLinks in these expressions */ + policy->hassublinks = checkExprHasSubLink((Node *) policy->qual) || + checkExprHasSubLink((Node *) policy->with_check_qual); + + /* + * Add this object to list. For historical reasons, the list is built + * in reverse order. + */ + MemoryContextSwitchTo(rscxt); + rsdesc->policies = lcons(policy, rsdesc->policies); + MemoryContextSwitchTo(oldcxt); + } + + systable_endscan(sscan); + table_close(catalog, AccessShareLock); + + /* + * Success. Reparent the descriptor's memory context under + * CacheMemoryContext so that it will live indefinitely, then attach the + * policy descriptor to the relcache entry. + */ + MemoryContextSetParent(rscxt, CacheMemoryContext); + + relation->rd_rsdesc = rsdesc; +} + +/* + * RemovePolicyById - + * remove a policy by its OID. If a policy does not exist with the provided + * oid, then an error is raised. + * + * policy_id - the oid of the policy. + */ +void +RemovePolicyById(Oid policy_id) +{ + Relation pg_policy_rel; + SysScanDesc sscan; + ScanKeyData skey[1]; + HeapTuple tuple; + Oid relid; + Relation rel; + + pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock); + + /* + * Find the policy to delete. + */ + ScanKeyInit(&skey[0], + Anum_pg_policy_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(policy_id)); + + sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true, + NULL, 1, skey); + + tuple = systable_getnext(sscan); + + /* If the policy exists, then remove it, otherwise raise an error. */ + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for policy %u", policy_id); + + /* + * Open and exclusive-lock the relation the policy belongs to. (We need + * exclusive lock to lock out queries that might otherwise depend on the + * set of policies the rel has; furthermore we've got to hold the lock + * till commit.) + */ + relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid; + + rel = table_open(relid, AccessExclusiveLock); + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table", + RelationGetRelationName(rel)))); + + if (!allowSystemTableMods && IsSystemRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + RelationGetRelationName(rel)))); + + CatalogTupleDelete(pg_policy_rel, &tuple->t_self); + + systable_endscan(sscan); + + /* + * Note that, unlike some of the other flags in pg_class, relrowsecurity + * is not just an indication of if policies exist. When relrowsecurity is + * set by a user, then all access to the relation must be through a + * policy. If no policy is defined for the relation then a default-deny + * policy is created and all records are filtered (except for queries from + * the owner). + */ + CacheInvalidateRelcache(rel); + + table_close(rel, NoLock); + + /* Clean up */ + table_close(pg_policy_rel, RowExclusiveLock); +} + +/* + * RemoveRoleFromObjectPolicy - + * remove a role from a policy's applicable-roles list. + * + * Returns true if the role was successfully removed from the policy. + * Returns false if the role was not removed because it would have left + * polroles empty (which is disallowed, though perhaps it should not be). + * On false return, the caller should instead drop the policy altogether. + * + * roleid - the oid of the role to remove + * classid - should always be PolicyRelationId + * policy_id - the oid of the policy. + */ +bool +RemoveRoleFromObjectPolicy(Oid roleid, Oid classid, Oid policy_id) +{ + Relation pg_policy_rel; + SysScanDesc sscan; + ScanKeyData skey[1]; + HeapTuple tuple; + Oid relid; + ArrayType *policy_roles; + Datum roles_datum; + Oid *roles; + int num_roles; + Datum *role_oids; + bool attr_isnull; + bool keep_policy = true; + int i, + j; + + Assert(classid == PolicyRelationId); + + pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock); + + /* + * Find the policy to update. + */ + ScanKeyInit(&skey[0], + Anum_pg_policy_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(policy_id)); + + sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true, + NULL, 1, skey); + + tuple = systable_getnext(sscan); + + /* Raise an error if we don't find the policy. */ + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for policy %u", policy_id); + + /* Identify rel the policy belongs to */ + relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid; + + /* Get the current set of roles */ + roles_datum = heap_getattr(tuple, + Anum_pg_policy_polroles, + RelationGetDescr(pg_policy_rel), + &attr_isnull); + + Assert(!attr_isnull); + + policy_roles = DatumGetArrayTypePCopy(roles_datum); + roles = (Oid *) ARR_DATA_PTR(policy_roles); + num_roles = ARR_DIMS(policy_roles)[0]; + + /* + * Rebuild the polroles array, without any mentions of the target role. + * Ordinarily there'd be exactly one, but we must cope with duplicate + * mentions, since CREATE/ALTER POLICY historically have allowed that. + */ + role_oids = (Datum *) palloc(num_roles * sizeof(Datum)); + for (i = 0, j = 0; i < num_roles; i++) + { + if (roles[i] != roleid) + role_oids[j++] = ObjectIdGetDatum(roles[i]); + } + num_roles = j; + + /* If any roles remain, update the policy entry. */ + if (num_roles > 0) + { + ArrayType *role_ids; + Datum values[Natts_pg_policy]; + bool isnull[Natts_pg_policy]; + bool replaces[Natts_pg_policy]; + HeapTuple new_tuple; + HeapTuple reltup; + ObjectAddress target; + ObjectAddress myself; + + /* zero-clear */ + memset(values, 0, sizeof(values)); + memset(replaces, 0, sizeof(replaces)); + memset(isnull, 0, sizeof(isnull)); + + /* This is the array for the new tuple */ + role_ids = construct_array(role_oids, num_roles, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + + replaces[Anum_pg_policy_polroles - 1] = true; + values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids); + + new_tuple = heap_modify_tuple(tuple, + RelationGetDescr(pg_policy_rel), + values, isnull, replaces); + CatalogTupleUpdate(pg_policy_rel, &new_tuple->t_self, new_tuple); + + /* Remove all the old shared dependencies (roles) */ + deleteSharedDependencyRecordsFor(PolicyRelationId, policy_id, 0); + + /* Record the new shared dependencies (roles) */ + myself.classId = PolicyRelationId; + myself.objectId = policy_id; + myself.objectSubId = 0; + + target.classId = AuthIdRelationId; + target.objectSubId = 0; + for (i = 0; i < num_roles; i++) + { + target.objectId = DatumGetObjectId(role_oids[i]); + /* no need for dependency on the public role */ + if (target.objectId != ACL_ID_PUBLIC) + recordSharedDependencyOn(&myself, &target, + SHARED_DEPENDENCY_POLICY); + } + + InvokeObjectPostAlterHook(PolicyRelationId, policy_id, 0); + + heap_freetuple(new_tuple); + + /* Make updates visible */ + CommandCounterIncrement(); + + /* + * Invalidate relcache entry for rel the policy belongs to, to force + * redoing any dependent plans. In case of a race condition where the + * rel was just dropped, we need do nothing. + */ + reltup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(reltup)) + { + CacheInvalidateRelcacheByTuple(reltup); + ReleaseSysCache(reltup); + } + } + else + { + /* No roles would remain, so drop the policy instead. */ + keep_policy = false; + } + + /* Clean up. */ + systable_endscan(sscan); + + table_close(pg_policy_rel, RowExclusiveLock); + + return keep_policy; +} + +/* + * CreatePolicy - + * handles the execution of the CREATE POLICY command. + * + * stmt - the CreatePolicyStmt that describes the policy to create. + */ +ObjectAddress +CreatePolicy(CreatePolicyStmt *stmt) +{ + Relation pg_policy_rel; + Oid policy_id; + Relation target_table; + Oid table_id; + char polcmd; + Datum *role_oids; + int nitems = 0; + ArrayType *role_ids; + ParseState *qual_pstate; + ParseState *with_check_pstate; + ParseNamespaceItem *nsitem; + Node *qual; + Node *with_check_qual; + ScanKeyData skey[2]; + SysScanDesc sscan; + HeapTuple policy_tuple; + Datum values[Natts_pg_policy]; + bool isnull[Natts_pg_policy]; + ObjectAddress target; + ObjectAddress myself; + int i; + + /* Parse command */ + polcmd = parse_policy_command(stmt->cmd_name); + + /* + * If the command is SELECT or DELETE then WITH CHECK should be NULL. + */ + if ((polcmd == ACL_SELECT_CHR || polcmd == ACL_DELETE_CHR) + && stmt->with_check != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("WITH CHECK cannot be applied to SELECT or DELETE"))); + + /* + * If the command is INSERT then WITH CHECK should be the only expression + * provided. + */ + if (polcmd == ACL_INSERT_CHR && stmt->qual != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("only WITH CHECK expression allowed for INSERT"))); + + /* Collect role ids */ + role_oids = policy_role_list_to_array(stmt->roles, &nitems); + role_ids = construct_array(role_oids, nitems, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + + /* Parse the supplied clause */ + qual_pstate = make_parsestate(NULL); + with_check_pstate = make_parsestate(NULL); + + /* zero-clear */ + memset(values, 0, sizeof(values)); + memset(isnull, 0, sizeof(isnull)); + + /* Get id of table. Also handles permissions checks. */ + table_id = RangeVarGetRelidExtended(stmt->table, AccessExclusiveLock, + 0, + RangeVarCallbackForPolicy, + (void *) stmt); + + /* Open target_table to build quals. No additional lock is necessary. */ + target_table = relation_open(table_id, NoLock); + + /* Add for the regular security quals */ + nsitem = addRangeTableEntryForRelation(qual_pstate, target_table, + AccessShareLock, + NULL, false, false); + addNSItemToQuery(qual_pstate, nsitem, false, true, true); + + /* Add for the with-check quals */ + nsitem = addRangeTableEntryForRelation(with_check_pstate, target_table, + AccessShareLock, + NULL, false, false); + addNSItemToQuery(with_check_pstate, nsitem, false, true, true); + + qual = transformWhereClause(qual_pstate, + stmt->qual, + EXPR_KIND_POLICY, + "POLICY"); + + with_check_qual = transformWhereClause(with_check_pstate, + stmt->with_check, + EXPR_KIND_POLICY, + "POLICY"); + + /* Fix up collation information */ + assign_expr_collations(qual_pstate, qual); + assign_expr_collations(with_check_pstate, with_check_qual); + + /* Open pg_policy catalog */ + pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock); + + /* Set key - policy's relation id. */ + ScanKeyInit(&skey[0], + Anum_pg_policy_polrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(table_id)); + + /* Set key - policy's name. */ + ScanKeyInit(&skey[1], + Anum_pg_policy_polname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->policy_name)); + + sscan = systable_beginscan(pg_policy_rel, + PolicyPolrelidPolnameIndexId, true, NULL, 2, + skey); + + policy_tuple = systable_getnext(sscan); + + /* Complain if the policy name already exists for the table */ + if (HeapTupleIsValid(policy_tuple)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("policy \"%s\" for table \"%s\" already exists", + stmt->policy_name, RelationGetRelationName(target_table)))); + + policy_id = GetNewOidWithIndex(pg_policy_rel, PolicyOidIndexId, + Anum_pg_policy_oid); + values[Anum_pg_policy_oid - 1] = ObjectIdGetDatum(policy_id); + values[Anum_pg_policy_polrelid - 1] = ObjectIdGetDatum(table_id); + values[Anum_pg_policy_polname - 1] = DirectFunctionCall1(namein, + CStringGetDatum(stmt->policy_name)); + values[Anum_pg_policy_polcmd - 1] = CharGetDatum(polcmd); + values[Anum_pg_policy_polpermissive - 1] = BoolGetDatum(stmt->permissive); + values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids); + + /* Add qual if present. */ + if (qual) + values[Anum_pg_policy_polqual - 1] = CStringGetTextDatum(nodeToString(qual)); + else + isnull[Anum_pg_policy_polqual - 1] = true; + + /* Add WITH CHECK qual if present */ + if (with_check_qual) + values[Anum_pg_policy_polwithcheck - 1] = CStringGetTextDatum(nodeToString(with_check_qual)); + else + isnull[Anum_pg_policy_polwithcheck - 1] = true; + + policy_tuple = heap_form_tuple(RelationGetDescr(pg_policy_rel), values, + isnull); + + CatalogTupleInsert(pg_policy_rel, policy_tuple); + + /* Record Dependencies */ + target.classId = RelationRelationId; + target.objectId = table_id; + target.objectSubId = 0; + + myself.classId = PolicyRelationId; + myself.objectId = policy_id; + myself.objectSubId = 0; + + recordDependencyOn(&myself, &target, DEPENDENCY_AUTO); + + recordDependencyOnExpr(&myself, qual, qual_pstate->p_rtable, + DEPENDENCY_NORMAL); + + recordDependencyOnExpr(&myself, with_check_qual, + with_check_pstate->p_rtable, DEPENDENCY_NORMAL); + + /* Register role dependencies */ + target.classId = AuthIdRelationId; + target.objectSubId = 0; + for (i = 0; i < nitems; i++) + { + target.objectId = DatumGetObjectId(role_oids[i]); + /* no dependency if public */ + if (target.objectId != ACL_ID_PUBLIC) + recordSharedDependencyOn(&myself, &target, + SHARED_DEPENDENCY_POLICY); + } + + InvokeObjectPostCreateHook(PolicyRelationId, policy_id, 0); + + /* Invalidate Relation Cache */ + CacheInvalidateRelcache(target_table); + + /* Clean up. */ + heap_freetuple(policy_tuple); + free_parsestate(qual_pstate); + free_parsestate(with_check_pstate); + systable_endscan(sscan); + relation_close(target_table, NoLock); + table_close(pg_policy_rel, RowExclusiveLock); + + return myself; +} + +/* + * AlterPolicy - + * handles the execution of the ALTER POLICY command. + * + * stmt - the AlterPolicyStmt that describes the policy and how to alter it. + */ +ObjectAddress +AlterPolicy(AlterPolicyStmt *stmt) +{ + Relation pg_policy_rel; + Oid policy_id; + Relation target_table; + Oid table_id; + Datum *role_oids = NULL; + int nitems = 0; + ArrayType *role_ids = NULL; + List *qual_parse_rtable = NIL; + List *with_check_parse_rtable = NIL; + Node *qual = NULL; + Node *with_check_qual = NULL; + ScanKeyData skey[2]; + SysScanDesc sscan; + HeapTuple policy_tuple; + HeapTuple new_tuple; + Datum values[Natts_pg_policy]; + bool isnull[Natts_pg_policy]; + bool replaces[Natts_pg_policy]; + ObjectAddress target; + ObjectAddress myself; + Datum polcmd_datum; + char polcmd; + bool polcmd_isnull; + int i; + + /* Parse role_ids */ + if (stmt->roles != NULL) + { + role_oids = policy_role_list_to_array(stmt->roles, &nitems); + role_ids = construct_array(role_oids, nitems, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + } + + /* Get id of table. Also handles permissions checks. */ + table_id = RangeVarGetRelidExtended(stmt->table, AccessExclusiveLock, + 0, + RangeVarCallbackForPolicy, + (void *) stmt); + + target_table = relation_open(table_id, NoLock); + + /* Parse the using policy clause */ + if (stmt->qual) + { + ParseNamespaceItem *nsitem; + ParseState *qual_pstate = make_parsestate(NULL); + + nsitem = addRangeTableEntryForRelation(qual_pstate, target_table, + AccessShareLock, + NULL, false, false); + + addNSItemToQuery(qual_pstate, nsitem, false, true, true); + + qual = transformWhereClause(qual_pstate, stmt->qual, + EXPR_KIND_POLICY, + "POLICY"); + + /* Fix up collation information */ + assign_expr_collations(qual_pstate, qual); + + qual_parse_rtable = qual_pstate->p_rtable; + free_parsestate(qual_pstate); + } + + /* Parse the with-check policy clause */ + if (stmt->with_check) + { + ParseNamespaceItem *nsitem; + ParseState *with_check_pstate = make_parsestate(NULL); + + nsitem = addRangeTableEntryForRelation(with_check_pstate, target_table, + AccessShareLock, + NULL, false, false); + + addNSItemToQuery(with_check_pstate, nsitem, false, true, true); + + with_check_qual = transformWhereClause(with_check_pstate, + stmt->with_check, + EXPR_KIND_POLICY, + "POLICY"); + + /* Fix up collation information */ + assign_expr_collations(with_check_pstate, with_check_qual); + + with_check_parse_rtable = with_check_pstate->p_rtable; + free_parsestate(with_check_pstate); + } + + /* zero-clear */ + memset(values, 0, sizeof(values)); + memset(replaces, 0, sizeof(replaces)); + memset(isnull, 0, sizeof(isnull)); + + /* Find policy to update. */ + pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock); + + /* Set key - policy's relation id. */ + ScanKeyInit(&skey[0], + Anum_pg_policy_polrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(table_id)); + + /* Set key - policy's name. */ + ScanKeyInit(&skey[1], + Anum_pg_policy_polname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->policy_name)); + + sscan = systable_beginscan(pg_policy_rel, + PolicyPolrelidPolnameIndexId, true, NULL, 2, + skey); + + policy_tuple = systable_getnext(sscan); + + /* Check that the policy is found, raise an error if not. */ + if (!HeapTupleIsValid(policy_tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("policy \"%s\" for table \"%s\" does not exist", + stmt->policy_name, + RelationGetRelationName(target_table)))); + + /* Get policy command */ + polcmd_datum = heap_getattr(policy_tuple, Anum_pg_policy_polcmd, + RelationGetDescr(pg_policy_rel), + &polcmd_isnull); + Assert(!polcmd_isnull); + polcmd = DatumGetChar(polcmd_datum); + + /* + * If the command is SELECT or DELETE then WITH CHECK should be NULL. + */ + if ((polcmd == ACL_SELECT_CHR || polcmd == ACL_DELETE_CHR) + && stmt->with_check != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("only USING expression allowed for SELECT, DELETE"))); + + /* + * If the command is INSERT then WITH CHECK should be the only expression + * provided. + */ + if ((polcmd == ACL_INSERT_CHR) + && stmt->qual != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("only WITH CHECK expression allowed for INSERT"))); + + policy_id = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid; + + if (role_ids != NULL) + { + replaces[Anum_pg_policy_polroles - 1] = true; + values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids); + } + else + { + Oid *roles; + Datum roles_datum; + bool attr_isnull; + ArrayType *policy_roles; + + /* + * We need to pull the set of roles this policy applies to from what's + * in the catalog, so that we can recreate the dependencies correctly + * for the policy. + */ + + roles_datum = heap_getattr(policy_tuple, Anum_pg_policy_polroles, + RelationGetDescr(pg_policy_rel), + &attr_isnull); + Assert(!attr_isnull); + + policy_roles = DatumGetArrayTypePCopy(roles_datum); + + roles = (Oid *) ARR_DATA_PTR(policy_roles); + + nitems = ARR_DIMS(policy_roles)[0]; + + role_oids = (Datum *) palloc(nitems * sizeof(Datum)); + + for (i = 0; i < nitems; i++) + role_oids[i] = ObjectIdGetDatum(roles[i]); + } + + if (qual != NULL) + { + replaces[Anum_pg_policy_polqual - 1] = true; + values[Anum_pg_policy_polqual - 1] + = CStringGetTextDatum(nodeToString(qual)); + } + else + { + Datum value_datum; + bool attr_isnull; + + /* + * We need to pull the USING expression and build the range table for + * the policy from what's in the catalog, so that we can recreate the + * dependencies correctly for the policy. + */ + + /* Check if the policy has a USING expr */ + value_datum = heap_getattr(policy_tuple, Anum_pg_policy_polqual, + RelationGetDescr(pg_policy_rel), + &attr_isnull); + if (!attr_isnull) + { + char *qual_value; + ParseState *qual_pstate; + + /* parsestate is built just to build the range table */ + qual_pstate = make_parsestate(NULL); + + qual_value = TextDatumGetCString(value_datum); + qual = stringToNode(qual_value); + + /* Add this rel to the parsestate's rangetable, for dependencies */ + (void) addRangeTableEntryForRelation(qual_pstate, target_table, + AccessShareLock, + NULL, false, false); + + qual_parse_rtable = qual_pstate->p_rtable; + free_parsestate(qual_pstate); + } + } + + if (with_check_qual != NULL) + { + replaces[Anum_pg_policy_polwithcheck - 1] = true; + values[Anum_pg_policy_polwithcheck - 1] + = CStringGetTextDatum(nodeToString(with_check_qual)); + } + else + { + Datum value_datum; + bool attr_isnull; + + /* + * We need to pull the WITH CHECK expression and build the range table + * for the policy from what's in the catalog, so that we can recreate + * the dependencies correctly for the policy. + */ + + /* Check if the policy has a WITH CHECK expr */ + value_datum = heap_getattr(policy_tuple, Anum_pg_policy_polwithcheck, + RelationGetDescr(pg_policy_rel), + &attr_isnull); + if (!attr_isnull) + { + char *with_check_value; + ParseState *with_check_pstate; + + /* parsestate is built just to build the range table */ + with_check_pstate = make_parsestate(NULL); + + with_check_value = TextDatumGetCString(value_datum); + with_check_qual = stringToNode(with_check_value); + + /* Add this rel to the parsestate's rangetable, for dependencies */ + (void) addRangeTableEntryForRelation(with_check_pstate, + target_table, + AccessShareLock, + NULL, false, false); + + with_check_parse_rtable = with_check_pstate->p_rtable; + free_parsestate(with_check_pstate); + } + } + + new_tuple = heap_modify_tuple(policy_tuple, + RelationGetDescr(pg_policy_rel), + values, isnull, replaces); + CatalogTupleUpdate(pg_policy_rel, &new_tuple->t_self, new_tuple); + + /* Update Dependencies. */ + deleteDependencyRecordsFor(PolicyRelationId, policy_id, false); + + /* Record Dependencies */ + target.classId = RelationRelationId; + target.objectId = table_id; + target.objectSubId = 0; + + myself.classId = PolicyRelationId; + myself.objectId = policy_id; + myself.objectSubId = 0; + + recordDependencyOn(&myself, &target, DEPENDENCY_AUTO); + + recordDependencyOnExpr(&myself, qual, qual_parse_rtable, DEPENDENCY_NORMAL); + + recordDependencyOnExpr(&myself, with_check_qual, with_check_parse_rtable, + DEPENDENCY_NORMAL); + + /* Register role dependencies */ + deleteSharedDependencyRecordsFor(PolicyRelationId, policy_id, 0); + target.classId = AuthIdRelationId; + target.objectSubId = 0; + for (i = 0; i < nitems; i++) + { + target.objectId = DatumGetObjectId(role_oids[i]); + /* no dependency if public */ + if (target.objectId != ACL_ID_PUBLIC) + recordSharedDependencyOn(&myself, &target, + SHARED_DEPENDENCY_POLICY); + } + + InvokeObjectPostAlterHook(PolicyRelationId, policy_id, 0); + + heap_freetuple(new_tuple); + + /* Invalidate Relation Cache */ + CacheInvalidateRelcache(target_table); + + /* Clean up. */ + systable_endscan(sscan); + relation_close(target_table, NoLock); + table_close(pg_policy_rel, RowExclusiveLock); + + return myself; +} + +/* + * rename_policy - + * change the name of a policy on a relation + */ +ObjectAddress +rename_policy(RenameStmt *stmt) +{ + Relation pg_policy_rel; + Relation target_table; + Oid table_id; + Oid opoloid; + ScanKeyData skey[2]; + SysScanDesc sscan; + HeapTuple policy_tuple; + ObjectAddress address; + + /* Get id of table. Also handles permissions checks. */ + table_id = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, + 0, + RangeVarCallbackForPolicy, + (void *) stmt); + + target_table = relation_open(table_id, NoLock); + + pg_policy_rel = table_open(PolicyRelationId, RowExclusiveLock); + + /* First pass -- check for conflict */ + + /* Add key - policy's relation id. */ + ScanKeyInit(&skey[0], + Anum_pg_policy_polrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(table_id)); + + /* Add key - policy's name. */ + ScanKeyInit(&skey[1], + Anum_pg_policy_polname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->newname)); + + sscan = systable_beginscan(pg_policy_rel, + PolicyPolrelidPolnameIndexId, true, NULL, 2, + skey); + + if (HeapTupleIsValid(systable_getnext(sscan))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("policy \"%s\" for table \"%s\" already exists", + stmt->newname, RelationGetRelationName(target_table)))); + + systable_endscan(sscan); + + /* Second pass -- find existing policy and update */ + /* Add key - policy's relation id. */ + ScanKeyInit(&skey[0], + Anum_pg_policy_polrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(table_id)); + + /* Add key - policy's name. */ + ScanKeyInit(&skey[1], + Anum_pg_policy_polname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->subname)); + + sscan = systable_beginscan(pg_policy_rel, + PolicyPolrelidPolnameIndexId, true, NULL, 2, + skey); + + policy_tuple = systable_getnext(sscan); + + /* Complain if we did not find the policy */ + if (!HeapTupleIsValid(policy_tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("policy \"%s\" for table \"%s\" does not exist", + stmt->subname, RelationGetRelationName(target_table)))); + + opoloid = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid; + + policy_tuple = heap_copytuple(policy_tuple); + + namestrcpy(&((Form_pg_policy) GETSTRUCT(policy_tuple))->polname, + stmt->newname); + + CatalogTupleUpdate(pg_policy_rel, &policy_tuple->t_self, policy_tuple); + + InvokeObjectPostAlterHook(PolicyRelationId, opoloid, 0); + + ObjectAddressSet(address, PolicyRelationId, opoloid); + + /* + * Invalidate relation's relcache entry so that other backends (and this + * one too!) are sent SI message to make them rebuild relcache entries. + * (Ideally this should happen automatically...) + */ + CacheInvalidateRelcache(target_table); + + /* Clean up. */ + systable_endscan(sscan); + table_close(pg_policy_rel, RowExclusiveLock); + relation_close(target_table, NoLock); + + return address; +} + +/* + * get_relation_policy_oid - Look up a policy by name to find its OID + * + * If missing_ok is false, throw an error if policy not found. If + * true, just return InvalidOid. + */ +Oid +get_relation_policy_oid(Oid relid, const char *policy_name, bool missing_ok) +{ + Relation pg_policy_rel; + ScanKeyData skey[2]; + SysScanDesc sscan; + HeapTuple policy_tuple; + Oid policy_oid; + + pg_policy_rel = table_open(PolicyRelationId, AccessShareLock); + + /* Add key - policy's relation id. */ + ScanKeyInit(&skey[0], + Anum_pg_policy_polrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + /* Add key - policy's name. */ + ScanKeyInit(&skey[1], + Anum_pg_policy_polname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(policy_name)); + + sscan = systable_beginscan(pg_policy_rel, + PolicyPolrelidPolnameIndexId, true, NULL, 2, + skey); + + policy_tuple = systable_getnext(sscan); + + if (!HeapTupleIsValid(policy_tuple)) + { + if (!missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("policy \"%s\" for table \"%s\" does not exist", + policy_name, get_rel_name(relid)))); + + policy_oid = InvalidOid; + } + else + policy_oid = ((Form_pg_policy) GETSTRUCT(policy_tuple))->oid; + + /* Clean up. */ + systable_endscan(sscan); + table_close(pg_policy_rel, AccessShareLock); + + return policy_oid; +} + +/* + * relation_has_policies - Determine if relation has any policies + */ +bool +relation_has_policies(Relation rel) +{ + Relation catalog; + ScanKeyData skey; + SysScanDesc sscan; + HeapTuple policy_tuple; + bool ret = false; + + catalog = table_open(PolicyRelationId, AccessShareLock); + ScanKeyInit(&skey, + Anum_pg_policy_polrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + sscan = systable_beginscan(catalog, PolicyPolrelidPolnameIndexId, true, + NULL, 1, &skey); + policy_tuple = systable_getnext(sscan); + if (HeapTupleIsValid(policy_tuple)) + ret = true; + + systable_endscan(sscan); + table_close(catalog, AccessShareLock); + + return ret; +} diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c new file mode 100644 index 0000000..9902c5c --- /dev/null +++ b/src/backend/commands/portalcmds.c @@ -0,0 +1,496 @@ +/*------------------------------------------------------------------------- + * + * portalcmds.c + * Utility commands affecting portals (that is, SQL cursor commands) + * + * Note: see also tcop/pquery.c, which implements portal operations for + * the FE/BE protocol. This module uses pquery.c for some operations. + * And both modules depend on utils/mmgr/portalmem.c, which controls + * storage management for portals (but doesn't run any queries in them). + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/portalcmds.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/xact.h" +#include "commands/portalcmds.h" +#include "executor/executor.h" +#include "executor/tstoreReceiver.h" +#include "miscadmin.h" +#include "rewrite/rewriteHandler.h" +#include "tcop/pquery.h" +#include "tcop/tcopprot.h" +#include "utils/memutils.h" +#include "utils/snapmgr.h" + + +/* + * PerformCursorOpen + * Execute SQL DECLARE CURSOR command. + */ +void +PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo params, + bool isTopLevel) +{ + Query *query = castNode(Query, cstmt->query); + List *rewritten; + PlannedStmt *plan; + Portal portal; + MemoryContext oldContext; + char *queryString; + + /* + * Disallow empty-string cursor name (conflicts with protocol-level + * unnamed portal). + */ + if (!cstmt->portalname || cstmt->portalname[0] == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_CURSOR_NAME), + errmsg("invalid cursor name: must not be empty"))); + + /* + * If this is a non-holdable cursor, we require that this statement has + * been executed inside a transaction block (or else, it would have no + * user-visible effect). + */ + if (!(cstmt->options & CURSOR_OPT_HOLD)) + RequireTransactionBlock(isTopLevel, "DECLARE CURSOR"); + else if (InSecurityRestrictedOperation()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot create a cursor WITH HOLD within security-restricted operation"))); + + /* + * Parse analysis was done already, but we still have to run the rule + * rewriter. We do not do AcquireRewriteLocks: we assume the query either + * came straight from the parser, or suitable locks were acquired by + * plancache.c. + */ + rewritten = QueryRewrite(query); + + /* SELECT should never rewrite to more or less than one query */ + if (list_length(rewritten) != 1) + elog(ERROR, "non-SELECT statement in DECLARE CURSOR"); + + query = linitial_node(Query, rewritten); + + if (query->commandType != CMD_SELECT) + elog(ERROR, "non-SELECT statement in DECLARE CURSOR"); + + /* Plan the query, applying the specified options */ + plan = pg_plan_query(query, pstate->p_sourcetext, cstmt->options, params); + + /* + * Create a portal and copy the plan and query string into its memory. + */ + portal = CreatePortal(cstmt->portalname, false, false); + + oldContext = MemoryContextSwitchTo(portal->portalContext); + + plan = copyObject(plan); + + queryString = pstrdup(pstate->p_sourcetext); + + PortalDefineQuery(portal, + NULL, + queryString, + CMDTAG_SELECT, /* cursor's query is always a SELECT */ + list_make1(plan), + NULL); + + /*---------- + * Also copy the outer portal's parameter list into the inner portal's + * memory context. We want to pass down the parameter values in case we + * had a command like + * DECLARE c CURSOR FOR SELECT ... WHERE foo = $1 + * This will have been parsed using the outer parameter set and the + * parameter value needs to be preserved for use when the cursor is + * executed. + *---------- + */ + params = copyParamList(params); + + MemoryContextSwitchTo(oldContext); + + /* + * Set up options for portal. + * + * If the user didn't specify a SCROLL type, allow or disallow scrolling + * based on whether it would require any additional runtime overhead to do + * so. Also, we disallow scrolling for FOR UPDATE cursors. + */ + portal->cursorOptions = cstmt->options; + if (!(portal->cursorOptions & (CURSOR_OPT_SCROLL | CURSOR_OPT_NO_SCROLL))) + { + if (plan->rowMarks == NIL && + ExecSupportsBackwardScan(plan->planTree)) + portal->cursorOptions |= CURSOR_OPT_SCROLL; + else + portal->cursorOptions |= CURSOR_OPT_NO_SCROLL; + } + + /* + * Start execution, inserting parameters if any. + */ + PortalStart(portal, params, 0, GetActiveSnapshot()); + + Assert(portal->strategy == PORTAL_ONE_SELECT); + + /* + * We're done; the query won't actually be run until PerformPortalFetch is + * called. + */ +} + +/* + * PerformPortalFetch + * Execute SQL FETCH or MOVE command. + * + * stmt: parsetree node for command + * dest: where to send results + * qc: where to store a command completion status data. + * + * qc may be NULL if caller doesn't want status data. + */ +void +PerformPortalFetch(FetchStmt *stmt, + DestReceiver *dest, + QueryCompletion *qc) +{ + Portal portal; + uint64 nprocessed; + + /* + * Disallow empty-string cursor name (conflicts with protocol-level + * unnamed portal). + */ + if (!stmt->portalname || stmt->portalname[0] == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_CURSOR_NAME), + errmsg("invalid cursor name: must not be empty"))); + + /* get the portal from the portal name */ + portal = GetPortalByName(stmt->portalname); + if (!PortalIsValid(portal)) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_CURSOR), + errmsg("cursor \"%s\" does not exist", stmt->portalname))); + return; /* keep compiler happy */ + } + + /* Adjust dest if needed. MOVE wants destination DestNone */ + if (stmt->ismove) + dest = None_Receiver; + + /* Do it */ + nprocessed = PortalRunFetch(portal, + stmt->direction, + stmt->howMany, + dest); + + /* Return command status if wanted */ + if (qc) + SetQueryCompletion(qc, stmt->ismove ? CMDTAG_MOVE : CMDTAG_FETCH, + nprocessed); +} + +/* + * PerformPortalClose + * Close a cursor. + */ +void +PerformPortalClose(const char *name) +{ + Portal portal; + + /* NULL means CLOSE ALL */ + if (name == NULL) + { + PortalHashTableDeleteAll(); + return; + } + + /* + * Disallow empty-string cursor name (conflicts with protocol-level + * unnamed portal). + */ + if (name[0] == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_CURSOR_NAME), + errmsg("invalid cursor name: must not be empty"))); + + /* + * get the portal from the portal name + */ + portal = GetPortalByName(name); + if (!PortalIsValid(portal)) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_CURSOR), + errmsg("cursor \"%s\" does not exist", name))); + return; /* keep compiler happy */ + } + + /* + * Note: PortalCleanup is called as a side-effect, if not already done. + */ + PortalDrop(portal, false); +} + +/* + * PortalCleanup + * + * Clean up a portal when it's dropped. This is the standard cleanup hook + * for portals. + * + * Note: if portal->status is PORTAL_FAILED, we are probably being called + * during error abort, and must be careful to avoid doing anything that + * is likely to fail again. + */ +void +PortalCleanup(Portal portal) +{ + QueryDesc *queryDesc; + + /* + * sanity checks + */ + AssertArg(PortalIsValid(portal)); + AssertArg(portal->cleanup == PortalCleanup); + + /* + * Shut down executor, if still running. We skip this during error abort, + * since other mechanisms will take care of releasing executor resources, + * and we can't be sure that ExecutorEnd itself wouldn't fail. + */ + queryDesc = portal->queryDesc; + if (queryDesc) + { + /* + * Reset the queryDesc before anything else. This prevents us from + * trying to shut down the executor twice, in case of an error below. + * The transaction abort mechanisms will take care of resource cleanup + * in such a case. + */ + portal->queryDesc = NULL; + + if (portal->status != PORTAL_FAILED) + { + ResourceOwner saveResourceOwner; + + /* We must make the portal's resource owner current */ + saveResourceOwner = CurrentResourceOwner; + if (portal->resowner) + CurrentResourceOwner = portal->resowner; + + ExecutorFinish(queryDesc); + ExecutorEnd(queryDesc); + FreeQueryDesc(queryDesc); + + CurrentResourceOwner = saveResourceOwner; + } + } +} + +/* + * PersistHoldablePortal + * + * Prepare the specified Portal for access outside of the current + * transaction. When this function returns, all future accesses to the + * portal must be done via the Tuplestore (not by invoking the + * executor). + */ +void +PersistHoldablePortal(Portal portal) +{ + QueryDesc *queryDesc = portal->queryDesc; + Portal saveActivePortal; + ResourceOwner saveResourceOwner; + MemoryContext savePortalContext; + MemoryContext oldcxt; + + /* + * If we're preserving a holdable portal, we had better be inside the + * transaction that originally created it. + */ + Assert(portal->createSubid != InvalidSubTransactionId); + Assert(queryDesc != NULL); + + /* + * Caller must have created the tuplestore already ... but not a snapshot. + */ + Assert(portal->holdContext != NULL); + Assert(portal->holdStore != NULL); + Assert(portal->holdSnapshot == NULL); + + /* + * Before closing down the executor, we must copy the tupdesc into + * long-term memory, since it was created in executor memory. + */ + oldcxt = MemoryContextSwitchTo(portal->holdContext); + + portal->tupDesc = CreateTupleDescCopy(portal->tupDesc); + + MemoryContextSwitchTo(oldcxt); + + /* + * Check for improper portal use, and mark portal active. + */ + MarkPortalActive(portal); + + /* + * Set up global portal context pointers. + */ + saveActivePortal = ActivePortal; + saveResourceOwner = CurrentResourceOwner; + savePortalContext = PortalContext; + PG_TRY(); + { + ScanDirection direction = ForwardScanDirection; + + ActivePortal = portal; + if (portal->resowner) + CurrentResourceOwner = portal->resowner; + PortalContext = portal->portalContext; + + MemoryContextSwitchTo(PortalContext); + + PushActiveSnapshot(queryDesc->snapshot); + + /* + * If the portal is marked scrollable, we need to store the entire + * result set in the tuplestore, so that subsequent backward FETCHs + * can be processed. Otherwise, store only the not-yet-fetched rows. + * (The latter is not only more efficient, but avoids semantic + * problems if the query's output isn't stable.) + * + * In the no-scroll case, tuple indexes in the tuplestore will not + * match the cursor's nominal position (portalPos). Currently this + * causes no difficulty because we only navigate in the tuplestore by + * relative position, except for the tuplestore_skiptuples call below + * and the tuplestore_rescan call in DoPortalRewind, both of which are + * disabled for no-scroll cursors. But someday we might need to track + * the offset between the holdStore and the cursor's nominal position + * explicitly. + */ + if (portal->cursorOptions & CURSOR_OPT_SCROLL) + { + ExecutorRewind(queryDesc); + } + else + { + /* + * If we already reached end-of-query, set the direction to + * NoMovement to avoid trying to fetch any tuples. (This check + * exists because not all plan node types are robust about being + * called again if they've already returned NULL once.) We'll + * still set up an empty tuplestore, though, to keep this from + * being a special case later. + */ + if (portal->atEnd) + direction = NoMovementScanDirection; + } + + /* + * Change the destination to output to the tuplestore. Note we tell + * the tuplestore receiver to detoast all data passed through it; this + * makes it safe to not keep a snapshot associated with the data. + */ + queryDesc->dest = CreateDestReceiver(DestTuplestore); + SetTuplestoreDestReceiverParams(queryDesc->dest, + portal->holdStore, + portal->holdContext, + true, + NULL, + NULL); + + /* Fetch the result set into the tuplestore */ + ExecutorRun(queryDesc, direction, 0L, false); + + queryDesc->dest->rDestroy(queryDesc->dest); + queryDesc->dest = NULL; + + /* + * Now shut down the inner executor. + */ + portal->queryDesc = NULL; /* prevent double shutdown */ + ExecutorFinish(queryDesc); + ExecutorEnd(queryDesc); + FreeQueryDesc(queryDesc); + + /* + * Set the position in the result set. + */ + MemoryContextSwitchTo(portal->holdContext); + + if (portal->atEnd) + { + /* + * Just force the tuplestore forward to its end. The size of the + * skip request here is arbitrary. + */ + while (tuplestore_skiptuples(portal->holdStore, 1000000, true)) + /* continue */ ; + } + else + { + tuplestore_rescan(portal->holdStore); + + /* + * In the no-scroll case, the start of the tuplestore is exactly + * where we want to be, so no repositioning is wanted. + */ + if (portal->cursorOptions & CURSOR_OPT_SCROLL) + { + if (!tuplestore_skiptuples(portal->holdStore, + portal->portalPos, + true)) + elog(ERROR, "unexpected end of tuple stream"); + } + } + } + PG_CATCH(); + { + /* Uncaught error while executing portal: mark it dead */ + MarkPortalFailed(portal); + + /* Restore global vars and propagate error */ + ActivePortal = saveActivePortal; + CurrentResourceOwner = saveResourceOwner; + PortalContext = savePortalContext; + + PG_RE_THROW(); + } + PG_END_TRY(); + + MemoryContextSwitchTo(oldcxt); + + /* Mark portal not active */ + portal->status = PORTAL_READY; + + ActivePortal = saveActivePortal; + CurrentResourceOwner = saveResourceOwner; + PortalContext = savePortalContext; + + PopActiveSnapshot(); + + /* + * We can now release any subsidiary memory of the portal's context; we'll + * never use it again. The executor already dropped its context, but this + * will clean up anything that glommed onto the portal's context via + * PortalContext. + */ + MemoryContextDeleteChildren(portal->portalContext); +} diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c new file mode 100644 index 0000000..fc5c7f9 --- /dev/null +++ b/src/backend/commands/prepare.c @@ -0,0 +1,729 @@ +/*------------------------------------------------------------------------- + * + * prepare.c + * Prepareable SQL statements via PREPARE, EXECUTE and DEALLOCATE + * + * This module also implements storage of prepared statements that are + * accessed via the extended FE/BE query protocol. + * + * + * Copyright (c) 2002-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/commands/prepare.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/xact.h" +#include "catalog/pg_type.h" +#include "commands/createas.h" +#include "commands/prepare.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "parser/analyze.h" +#include "parser/parse_coerce.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_type.h" +#include "rewrite/rewriteHandler.h" +#include "tcop/pquery.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/snapmgr.h" +#include "utils/timestamp.h" + + +/* + * The hash table in which prepared queries are stored. This is + * per-backend: query plans are not shared between backends. + * The keys for this hash table are the arguments to PREPARE and EXECUTE + * (statement names); the entries are PreparedStatement structs. + */ +static HTAB *prepared_queries = NULL; + +static void InitQueryHashTable(void); +static ParamListInfo EvaluateParams(ParseState *pstate, + PreparedStatement *pstmt, List *params, + EState *estate); +static Datum build_regtype_array(Oid *param_types, int num_params); + +/* + * Implements the 'PREPARE' utility statement. + */ +void +PrepareQuery(ParseState *pstate, PrepareStmt *stmt, + int stmt_location, int stmt_len) +{ + RawStmt *rawstmt; + CachedPlanSource *plansource; + Oid *argtypes = NULL; + int nargs; + List *query_list; + + /* + * Disallow empty-string statement name (conflicts with protocol-level + * unnamed statement). + */ + if (!stmt->name || stmt->name[0] == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PSTATEMENT_DEFINITION), + errmsg("invalid statement name: must not be empty"))); + + /* + * Need to wrap the contained statement in a RawStmt node to pass it to + * parse analysis. + */ + rawstmt = makeNode(RawStmt); + rawstmt->stmt = stmt->query; + rawstmt->stmt_location = stmt_location; + rawstmt->stmt_len = stmt_len; + + /* + * Create the CachedPlanSource before we do parse analysis, since it needs + * to see the unmodified raw parse tree. + */ + plansource = CreateCachedPlan(rawstmt, pstate->p_sourcetext, + CreateCommandTag(stmt->query)); + + /* Transform list of TypeNames to array of type OIDs */ + nargs = list_length(stmt->argtypes); + + if (nargs) + { + int i; + ListCell *l; + + argtypes = (Oid *) palloc(nargs * sizeof(Oid)); + i = 0; + + foreach(l, stmt->argtypes) + { + TypeName *tn = lfirst(l); + Oid toid = typenameTypeId(pstate, tn); + + argtypes[i++] = toid; + } + } + + /* + * Analyze the statement using these parameter types (any parameters + * passed in from above us will not be visible to it), allowing + * information about unknown parameters to be deduced from context. + * Rewrite the query. The result could be 0, 1, or many queries. + */ + query_list = pg_analyze_and_rewrite_varparams(rawstmt, pstate->p_sourcetext, + &argtypes, &nargs, NULL); + + /* Finish filling in the CachedPlanSource */ + CompleteCachedPlan(plansource, + query_list, + NULL, + argtypes, + nargs, + NULL, + NULL, + CURSOR_OPT_PARALLEL_OK, /* allow parallel mode */ + true); /* fixed result */ + + /* + * Save the results. + */ + StorePreparedStatement(stmt->name, + plansource, + true); +} + +/* + * ExecuteQuery --- implement the 'EXECUTE' utility statement. + * + * This code also supports CREATE TABLE ... AS EXECUTE. That case is + * indicated by passing a non-null intoClause. The DestReceiver is already + * set up correctly for CREATE TABLE AS, but we still have to make a few + * other adjustments here. + */ +void +ExecuteQuery(ParseState *pstate, + ExecuteStmt *stmt, IntoClause *intoClause, + ParamListInfo params, + DestReceiver *dest, QueryCompletion *qc) +{ + PreparedStatement *entry; + CachedPlan *cplan; + List *plan_list; + ParamListInfo paramLI = NULL; + EState *estate = NULL; + Portal portal; + char *query_string; + int eflags; + long count; + + /* Look it up in the hash table */ + entry = FetchPreparedStatement(stmt->name, true); + + /* Shouldn't find a non-fixed-result cached plan */ + if (!entry->plansource->fixed_result) + elog(ERROR, "EXECUTE does not support variable-result cached plans"); + + /* Evaluate parameters, if any */ + if (entry->plansource->num_params > 0) + { + /* + * Need an EState to evaluate parameters; must not delete it till end + * of query, in case parameters are pass-by-reference. Note that the + * passed-in "params" could possibly be referenced in the parameter + * expressions. + */ + estate = CreateExecutorState(); + estate->es_param_list_info = params; + paramLI = EvaluateParams(pstate, entry, stmt->params, estate); + } + + /* Create a new portal to run the query in */ + portal = CreateNewPortal(); + /* Don't display the portal in pg_cursors, it is for internal use only */ + portal->visible = false; + + /* Copy the plan's saved query string into the portal's memory */ + query_string = MemoryContextStrdup(portal->portalContext, + entry->plansource->query_string); + + /* Replan if needed, and increment plan refcount for portal */ + cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL); + plan_list = cplan->stmt_list; + + /* + * DO NOT add any logic that could possibly throw an error between + * GetCachedPlan and PortalDefineQuery, or you'll leak the plan refcount. + */ + PortalDefineQuery(portal, + NULL, + query_string, + entry->plansource->commandTag, + plan_list, + cplan); + + /* + * For CREATE TABLE ... AS EXECUTE, we must verify that the prepared + * statement is one that produces tuples. Currently we insist that it be + * a plain old SELECT. In future we might consider supporting other + * things such as INSERT ... RETURNING, but there are a couple of issues + * to be settled first, notably how WITH NO DATA should be handled in such + * a case (do we really want to suppress execution?) and how to pass down + * the OID-determining eflags (PortalStart won't handle them in such a + * case, and for that matter it's not clear the executor will either). + * + * For CREATE TABLE ... AS EXECUTE, we also have to ensure that the proper + * eflags and fetch count are passed to PortalStart/PortalRun. + */ + if (intoClause) + { + PlannedStmt *pstmt; + + if (list_length(plan_list) != 1) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("prepared statement is not a SELECT"))); + pstmt = linitial_node(PlannedStmt, plan_list); + if (pstmt->commandType != CMD_SELECT) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("prepared statement is not a SELECT"))); + + /* Set appropriate eflags */ + eflags = GetIntoRelEFlags(intoClause); + + /* And tell PortalRun whether to run to completion or not */ + if (intoClause->skipData) + count = 0; + else + count = FETCH_ALL; + } + else + { + /* Plain old EXECUTE */ + eflags = 0; + count = FETCH_ALL; + } + + /* + * Run the portal as appropriate. + */ + PortalStart(portal, paramLI, eflags, GetActiveSnapshot()); + + (void) PortalRun(portal, count, false, true, dest, dest, qc); + + PortalDrop(portal, false); + + if (estate) + FreeExecutorState(estate); + + /* No need to pfree other memory, MemoryContext will be reset */ +} + +/* + * EvaluateParams: evaluate a list of parameters. + * + * pstate: parse state + * pstmt: statement we are getting parameters for. + * params: list of given parameter expressions (raw parser output!) + * estate: executor state to use. + * + * Returns a filled-in ParamListInfo -- this can later be passed to + * CreateQueryDesc(), which allows the executor to make use of the parameters + * during query execution. + */ +static ParamListInfo +EvaluateParams(ParseState *pstate, PreparedStatement *pstmt, List *params, + EState *estate) +{ + Oid *param_types = pstmt->plansource->param_types; + int num_params = pstmt->plansource->num_params; + int nparams = list_length(params); + ParamListInfo paramLI; + List *exprstates; + ListCell *l; + int i; + + if (nparams != num_params) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("wrong number of parameters for prepared statement \"%s\"", + pstmt->stmt_name), + errdetail("Expected %d parameters but got %d.", + num_params, nparams))); + + /* Quick exit if no parameters */ + if (num_params == 0) + return NULL; + + /* + * We have to run parse analysis for the expressions. Since the parser is + * not cool about scribbling on its input, copy first. + */ + params = copyObject(params); + + i = 0; + foreach(l, params) + { + Node *expr = lfirst(l); + Oid expected_type_id = param_types[i]; + Oid given_type_id; + + expr = transformExpr(pstate, expr, EXPR_KIND_EXECUTE_PARAMETER); + + given_type_id = exprType(expr); + + expr = coerce_to_target_type(pstate, expr, given_type_id, + expected_type_id, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + + if (expr == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("parameter $%d of type %s cannot be coerced to the expected type %s", + i + 1, + format_type_be(given_type_id), + format_type_be(expected_type_id)), + errhint("You will need to rewrite or cast the expression."), + parser_errposition(pstate, exprLocation(lfirst(l))))); + + /* Take care of collations in the finished expression. */ + assign_expr_collations(pstate, expr); + + lfirst(l) = expr; + i++; + } + + /* Prepare the expressions for execution */ + exprstates = ExecPrepareExprList(params, estate); + + paramLI = makeParamList(num_params); + + i = 0; + foreach(l, exprstates) + { + ExprState *n = (ExprState *) lfirst(l); + ParamExternData *prm = ¶mLI->params[i]; + + prm->ptype = param_types[i]; + prm->pflags = PARAM_FLAG_CONST; + prm->value = ExecEvalExprSwitchContext(n, + GetPerTupleExprContext(estate), + &prm->isnull); + + i++; + } + + return paramLI; +} + + +/* + * Initialize query hash table upon first use. + */ +static void +InitQueryHashTable(void) +{ + HASHCTL hash_ctl; + + hash_ctl.keysize = NAMEDATALEN; + hash_ctl.entrysize = sizeof(PreparedStatement); + + prepared_queries = hash_create("Prepared Queries", + 32, + &hash_ctl, + HASH_ELEM | HASH_STRINGS); +} + +/* + * Store all the data pertaining to a query in the hash table using + * the specified key. The passed CachedPlanSource should be "unsaved" + * in case we get an error here; we'll save it once we've created the hash + * table entry. + */ +void +StorePreparedStatement(const char *stmt_name, + CachedPlanSource *plansource, + bool from_sql) +{ + PreparedStatement *entry; + TimestampTz cur_ts = GetCurrentStatementStartTimestamp(); + bool found; + + /* Initialize the hash table, if necessary */ + if (!prepared_queries) + InitQueryHashTable(); + + /* Add entry to hash table */ + entry = (PreparedStatement *) hash_search(prepared_queries, + stmt_name, + HASH_ENTER, + &found); + + /* Shouldn't get a duplicate entry */ + if (found) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_PSTATEMENT), + errmsg("prepared statement \"%s\" already exists", + stmt_name))); + + /* Fill in the hash table entry */ + entry->plansource = plansource; + entry->from_sql = from_sql; + entry->prepare_time = cur_ts; + + /* Now it's safe to move the CachedPlanSource to permanent memory */ + SaveCachedPlan(plansource); +} + +/* + * Lookup an existing query in the hash table. If the query does not + * actually exist, throw ereport(ERROR) or return NULL per second parameter. + * + * Note: this does not force the referenced plancache entry to be valid, + * since not all callers care. + */ +PreparedStatement * +FetchPreparedStatement(const char *stmt_name, bool throwError) +{ + PreparedStatement *entry; + + /* + * If the hash table hasn't been initialized, it can't be storing + * anything, therefore it couldn't possibly store our plan. + */ + if (prepared_queries) + entry = (PreparedStatement *) hash_search(prepared_queries, + stmt_name, + HASH_FIND, + NULL); + else + entry = NULL; + + if (!entry && throwError) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_PSTATEMENT), + errmsg("prepared statement \"%s\" does not exist", + stmt_name))); + + return entry; +} + +/* + * Given a prepared statement, determine the result tupledesc it will + * produce. Returns NULL if the execution will not return tuples. + * + * Note: the result is created or copied into current memory context. + */ +TupleDesc +FetchPreparedStatementResultDesc(PreparedStatement *stmt) +{ + /* + * Since we don't allow prepared statements' result tupdescs to change, + * there's no need to worry about revalidating the cached plan here. + */ + Assert(stmt->plansource->fixed_result); + if (stmt->plansource->resultDesc) + return CreateTupleDescCopy(stmt->plansource->resultDesc); + else + return NULL; +} + +/* + * Given a prepared statement that returns tuples, extract the query + * targetlist. Returns NIL if the statement doesn't have a determinable + * targetlist. + * + * Note: this is pretty ugly, but since it's only used in corner cases like + * Describe Statement on an EXECUTE command, we don't worry too much about + * efficiency. + */ +List * +FetchPreparedStatementTargetList(PreparedStatement *stmt) +{ + List *tlist; + + /* Get the plan's primary targetlist */ + tlist = CachedPlanGetTargetList(stmt->plansource, NULL); + + /* Copy into caller's context in case plan gets invalidated */ + return copyObject(tlist); +} + +/* + * Implements the 'DEALLOCATE' utility statement: deletes the + * specified plan from storage. + */ +void +DeallocateQuery(DeallocateStmt *stmt) +{ + if (stmt->name) + DropPreparedStatement(stmt->name, true); + else + DropAllPreparedStatements(); +} + +/* + * Internal version of DEALLOCATE + * + * If showError is false, dropping a nonexistent statement is a no-op. + */ +void +DropPreparedStatement(const char *stmt_name, bool showError) +{ + PreparedStatement *entry; + + /* Find the query's hash table entry; raise error if wanted */ + entry = FetchPreparedStatement(stmt_name, showError); + + if (entry) + { + /* Release the plancache entry */ + DropCachedPlan(entry->plansource); + + /* Now we can remove the hash table entry */ + hash_search(prepared_queries, entry->stmt_name, HASH_REMOVE, NULL); + } +} + +/* + * Drop all cached statements. + */ +void +DropAllPreparedStatements(void) +{ + HASH_SEQ_STATUS seq; + PreparedStatement *entry; + + /* nothing cached */ + if (!prepared_queries) + return; + + /* walk over cache */ + hash_seq_init(&seq, prepared_queries); + while ((entry = hash_seq_search(&seq)) != NULL) + { + /* Release the plancache entry */ + DropCachedPlan(entry->plansource); + + /* Now we can remove the hash table entry */ + hash_search(prepared_queries, entry->stmt_name, HASH_REMOVE, NULL); + } +} + +/* + * Implements the 'EXPLAIN EXECUTE' utility statement. + * + * "into" is NULL unless we are doing EXPLAIN CREATE TABLE AS EXECUTE, + * in which case executing the query should result in creating that table. + * + * Note: the passed-in queryString is that of the EXPLAIN EXECUTE, + * not the original PREPARE; we get the latter string from the plancache. + */ +void +ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, + const char *queryString, ParamListInfo params, + QueryEnvironment *queryEnv) +{ + PreparedStatement *entry; + const char *query_string; + CachedPlan *cplan; + List *plan_list; + ListCell *p; + ParamListInfo paramLI = NULL; + EState *estate = NULL; + instr_time planstart; + instr_time planduration; + BufferUsage bufusage_start, + bufusage; + + if (es->buffers) + bufusage_start = pgBufferUsage; + INSTR_TIME_SET_CURRENT(planstart); + + /* Look it up in the hash table */ + entry = FetchPreparedStatement(execstmt->name, true); + + /* Shouldn't find a non-fixed-result cached plan */ + if (!entry->plansource->fixed_result) + elog(ERROR, "EXPLAIN EXECUTE does not support variable-result cached plans"); + + query_string = entry->plansource->query_string; + + /* Evaluate parameters, if any */ + if (entry->plansource->num_params) + { + ParseState *pstate; + + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + /* + * Need an EState to evaluate parameters; must not delete it till end + * of query, in case parameters are pass-by-reference. Note that the + * passed-in "params" could possibly be referenced in the parameter + * expressions. + */ + estate = CreateExecutorState(); + estate->es_param_list_info = params; + + paramLI = EvaluateParams(pstate, entry, execstmt->params, estate); + } + + /* Replan if needed, and acquire a transient refcount */ + cplan = GetCachedPlan(entry->plansource, paramLI, + CurrentResourceOwner, queryEnv); + + INSTR_TIME_SET_CURRENT(planduration); + INSTR_TIME_SUBTRACT(planduration, planstart); + + /* calc differences of buffer counters. */ + if (es->buffers) + { + memset(&bufusage, 0, sizeof(BufferUsage)); + BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start); + } + + plan_list = cplan->stmt_list; + + /* Explain each query */ + foreach(p, plan_list) + { + PlannedStmt *pstmt = lfirst_node(PlannedStmt, p); + + if (pstmt->commandType != CMD_UTILITY) + ExplainOnePlan(pstmt, into, es, query_string, paramLI, queryEnv, + &planduration, (es->buffers ? &bufusage : NULL)); + else + ExplainOneUtility(pstmt->utilityStmt, into, es, query_string, + paramLI, queryEnv); + + /* No need for CommandCounterIncrement, as ExplainOnePlan did it */ + + /* Separate plans with an appropriate separator */ + if (lnext(plan_list, p) != NULL) + ExplainSeparatePlans(es); + } + + if (estate) + FreeExecutorState(estate); + + ReleaseCachedPlan(cplan, CurrentResourceOwner); +} + +/* + * This set returning function reads all the prepared statements and + * returns a set of (name, statement, prepare_time, param_types, from_sql, + * generic_plans, custom_plans). + */ +Datum +pg_prepared_statement(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + /* + * We put all the tuples into a tuplestore in one scan of the hashtable. + * This avoids any issue of the hashtable possibly changing between calls. + */ + InitMaterializedSRF(fcinfo, 0); + + /* hash table might be uninitialized */ + if (prepared_queries) + { + HASH_SEQ_STATUS hash_seq; + PreparedStatement *prep_stmt; + + hash_seq_init(&hash_seq, prepared_queries); + while ((prep_stmt = hash_seq_search(&hash_seq)) != NULL) + { + Datum values[7]; + bool nulls[7]; + + MemSet(nulls, 0, sizeof(nulls)); + + values[0] = CStringGetTextDatum(prep_stmt->stmt_name); + values[1] = CStringGetTextDatum(prep_stmt->plansource->query_string); + values[2] = TimestampTzGetDatum(prep_stmt->prepare_time); + values[3] = build_regtype_array(prep_stmt->plansource->param_types, + prep_stmt->plansource->num_params); + values[4] = BoolGetDatum(prep_stmt->from_sql); + values[5] = Int64GetDatumFast(prep_stmt->plansource->num_generic_plans); + values[6] = Int64GetDatumFast(prep_stmt->plansource->num_custom_plans); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + } + + return (Datum) 0; +} + +/* + * This utility function takes a C array of Oids, and returns a Datum + * pointing to a one-dimensional Postgres array of regtypes. An empty + * array is returned as a zero-element array, not NULL. + */ +static Datum +build_regtype_array(Oid *param_types, int num_params) +{ + Datum *tmp_ary; + ArrayType *result; + int i; + + tmp_ary = (Datum *) palloc(num_params * sizeof(Datum)); + + for (i = 0; i < num_params; i++) + tmp_ary[i] = ObjectIdGetDatum(param_types[i]); + + /* XXX: this hardcodes assumptions about the regtype type */ + result = construct_array(tmp_ary, num_params, REGTYPEOID, + 4, true, TYPALIGN_INT); + return PointerGetDatum(result); +} diff --git a/src/backend/commands/proclang.c b/src/backend/commands/proclang.c new file mode 100644 index 0000000..4a093f4 --- /dev/null +++ b/src/backend/commands/proclang.c @@ -0,0 +1,239 @@ +/*------------------------------------------------------------------------- + * + * proclang.c + * PostgreSQL LANGUAGE support code. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/proclang.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_language.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "commands/proclang.h" +#include "miscadmin.h" +#include "parser/parse_func.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +/* + * CREATE LANGUAGE + */ +ObjectAddress +CreateProceduralLanguage(CreatePLangStmt *stmt) +{ + const char *languageName = stmt->plname; + Oid languageOwner = GetUserId(); + Oid handlerOid, + inlineOid, + valOid; + Oid funcrettype; + Oid funcargtypes[1]; + Relation rel; + TupleDesc tupDesc; + Datum values[Natts_pg_language]; + bool nulls[Natts_pg_language]; + bool replaces[Natts_pg_language]; + NameData langname; + HeapTuple oldtup; + HeapTuple tup; + Oid langoid; + bool is_update; + ObjectAddress myself, + referenced; + ObjectAddresses *addrs; + + /* + * Check permission + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create custom procedural language"))); + + /* + * Lookup the PL handler function and check that it is of the expected + * return type + */ + Assert(stmt->plhandler); + handlerOid = LookupFuncName(stmt->plhandler, 0, NULL, false); + funcrettype = get_func_rettype(handlerOid); + if (funcrettype != LANGUAGE_HANDLEROID) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("function %s must return type %s", + NameListToString(stmt->plhandler), "language_handler"))); + + /* validate the inline function */ + if (stmt->plinline) + { + funcargtypes[0] = INTERNALOID; + inlineOid = LookupFuncName(stmt->plinline, 1, funcargtypes, false); + /* return value is ignored, so we don't check the type */ + } + else + inlineOid = InvalidOid; + + /* validate the validator function */ + if (stmt->plvalidator) + { + funcargtypes[0] = OIDOID; + valOid = LookupFuncName(stmt->plvalidator, 1, funcargtypes, false); + /* return value is ignored, so we don't check the type */ + } + else + valOid = InvalidOid; + + /* ok to create it */ + rel = table_open(LanguageRelationId, RowExclusiveLock); + tupDesc = RelationGetDescr(rel); + + /* Prepare data to be inserted */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, true, sizeof(replaces)); + + namestrcpy(&langname, languageName); + values[Anum_pg_language_lanname - 1] = NameGetDatum(&langname); + values[Anum_pg_language_lanowner - 1] = ObjectIdGetDatum(languageOwner); + values[Anum_pg_language_lanispl - 1] = BoolGetDatum(true); + values[Anum_pg_language_lanpltrusted - 1] = BoolGetDatum(stmt->pltrusted); + values[Anum_pg_language_lanplcallfoid - 1] = ObjectIdGetDatum(handlerOid); + values[Anum_pg_language_laninline - 1] = ObjectIdGetDatum(inlineOid); + values[Anum_pg_language_lanvalidator - 1] = ObjectIdGetDatum(valOid); + nulls[Anum_pg_language_lanacl - 1] = true; + + /* Check for pre-existing definition */ + oldtup = SearchSysCache1(LANGNAME, PointerGetDatum(languageName)); + + if (HeapTupleIsValid(oldtup)) + { + Form_pg_language oldform = (Form_pg_language) GETSTRUCT(oldtup); + + /* There is one; okay to replace it? */ + if (!stmt->replace) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("language \"%s\" already exists", languageName))); + + /* This is currently pointless, since we already checked superuser */ +#ifdef NOT_USED + if (!pg_language_ownercheck(oldform->oid, languageOwner)) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_LANGUAGE, + languageName); +#endif + + /* + * Do not change existing oid, ownership or permissions. Note + * dependency-update code below has to agree with this decision. + */ + replaces[Anum_pg_language_oid - 1] = false; + replaces[Anum_pg_language_lanowner - 1] = false; + replaces[Anum_pg_language_lanacl - 1] = false; + + /* Okay, do it... */ + tup = heap_modify_tuple(oldtup, tupDesc, values, nulls, replaces); + CatalogTupleUpdate(rel, &tup->t_self, tup); + + langoid = oldform->oid; + ReleaseSysCache(oldtup); + is_update = true; + } + else + { + /* Creating a new language */ + langoid = GetNewOidWithIndex(rel, LanguageOidIndexId, + Anum_pg_language_oid); + values[Anum_pg_language_oid - 1] = ObjectIdGetDatum(langoid); + tup = heap_form_tuple(tupDesc, values, nulls); + CatalogTupleInsert(rel, tup); + is_update = false; + } + + /* + * Create dependencies for the new language. If we are updating an + * existing language, first delete any existing pg_depend entries. + * (However, since we are not changing ownership or permissions, the + * shared dependencies do *not* need to change, and we leave them alone.) + */ + myself.classId = LanguageRelationId; + myself.objectId = langoid; + myself.objectSubId = 0; + + if (is_update) + deleteDependencyRecordsFor(myself.classId, myself.objectId, true); + + /* dependency on owner of language */ + if (!is_update) + recordDependencyOnOwner(myself.classId, myself.objectId, + languageOwner); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, is_update); + + addrs = new_object_addresses(); + + /* dependency on the PL handler function */ + ObjectAddressSet(referenced, ProcedureRelationId, handlerOid); + add_exact_object_address(&referenced, addrs); + + /* dependency on the inline handler function, if any */ + if (OidIsValid(inlineOid)) + { + ObjectAddressSet(referenced, ProcedureRelationId, inlineOid); + add_exact_object_address(&referenced, addrs); + } + + /* dependency on the validator function, if any */ + if (OidIsValid(valOid)) + { + ObjectAddressSet(referenced, ProcedureRelationId, valOid); + add_exact_object_address(&referenced, addrs); + } + + record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL); + free_object_addresses(addrs); + + /* Post creation hook for new procedural language */ + InvokeObjectPostCreateHook(LanguageRelationId, myself.objectId, 0); + + table_close(rel, RowExclusiveLock); + + return myself; +} + +/* + * get_language_oid - given a language name, look up the OID + * + * If missing_ok is false, throw an error if language name not found. If + * true, just return InvalidOid. + */ +Oid +get_language_oid(const char *langname, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid1(LANGNAME, Anum_pg_language_oid, + CStringGetDatum(langname)); + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("language \"%s\" does not exist", langname))); + return oid; +} diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c new file mode 100644 index 0000000..473c72e --- /dev/null +++ b/src/backend/commands/publicationcmds.c @@ -0,0 +1,2006 @@ +/*------------------------------------------------------------------------- + * + * publicationcmds.c + * publication manipulation + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/publicationcmds.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/objectaddress.h" +#include "catalog/partition.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_namespace.h" +#include "catalog/pg_publication_rel.h" +#include "catalog/pg_type.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/publicationcmds.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_clause.h" +#include "parser/parse_collate.h" +#include "parser/parse_relation.h" +#include "storage/lmgr.h" +#include "utils/acl.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/varlena.h" + + +/* + * Information used to validate the columns in the row filter expression. See + * contain_invalid_rfcolumn_walker for details. + */ +typedef struct rf_context +{ + Bitmapset *bms_replident; /* bitset of replica identity columns */ + bool pubviaroot; /* true if we are validating the parent + * relation's row filter */ + Oid relid; /* relid of the relation */ + Oid parentid; /* relid of the parent relation */ +} rf_context; + +static List *OpenTableList(List *tables); +static void CloseTableList(List *rels); +static void LockSchemaList(List *schemalist); +static void PublicationAddTables(Oid pubid, List *rels, bool if_not_exists, + AlterPublicationStmt *stmt); +static void PublicationDropTables(Oid pubid, List *rels, bool missing_ok); +static void PublicationAddSchemas(Oid pubid, List *schemas, bool if_not_exists, + AlterPublicationStmt *stmt); +static void PublicationDropSchemas(Oid pubid, List *schemas, bool missing_ok); + + +static void +parse_publication_options(ParseState *pstate, + List *options, + bool *publish_given, + PublicationActions *pubactions, + bool *publish_via_partition_root_given, + bool *publish_via_partition_root) +{ + ListCell *lc; + + *publish_given = false; + *publish_via_partition_root_given = false; + + /* defaults */ + pubactions->pubinsert = true; + pubactions->pubupdate = true; + pubactions->pubdelete = true; + pubactions->pubtruncate = true; + *publish_via_partition_root = false; + + /* Parse options */ + foreach(lc, options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "publish") == 0) + { + char *publish; + List *publish_list; + ListCell *lc; + + if (*publish_given) + errorConflictingDefElem(defel, pstate); + + /* + * If publish option was given only the explicitly listed actions + * should be published. + */ + pubactions->pubinsert = false; + pubactions->pubupdate = false; + pubactions->pubdelete = false; + pubactions->pubtruncate = false; + + *publish_given = true; + publish = defGetString(defel); + + if (!SplitIdentifierString(publish, ',', &publish_list)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid list syntax in parameter \"%s\"", + "publish"))); + + /* Process the option list. */ + foreach(lc, publish_list) + { + char *publish_opt = (char *) lfirst(lc); + + if (strcmp(publish_opt, "insert") == 0) + pubactions->pubinsert = true; + else if (strcmp(publish_opt, "update") == 0) + pubactions->pubupdate = true; + else if (strcmp(publish_opt, "delete") == 0) + pubactions->pubdelete = true; + else if (strcmp(publish_opt, "truncate") == 0) + pubactions->pubtruncate = true; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized value for publication option \"%s\": \"%s\"", + "publish", publish_opt))); + } + } + else if (strcmp(defel->defname, "publish_via_partition_root") == 0) + { + if (*publish_via_partition_root_given) + errorConflictingDefElem(defel, pstate); + *publish_via_partition_root_given = true; + *publish_via_partition_root = defGetBoolean(defel); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized publication parameter: \"%s\"", defel->defname))); + } +} + +/* + * Convert the PublicationObjSpecType list into schema oid list and + * PublicationTable list. + */ +static void +ObjectsInPublicationToOids(List *pubobjspec_list, ParseState *pstate, + List **rels, List **schemas) +{ + ListCell *cell; + PublicationObjSpec *pubobj; + + if (!pubobjspec_list) + return; + + foreach(cell, pubobjspec_list) + { + Oid schemaid; + List *search_path; + + pubobj = (PublicationObjSpec *) lfirst(cell); + + switch (pubobj->pubobjtype) + { + case PUBLICATIONOBJ_TABLE: + *rels = lappend(*rels, pubobj->pubtable); + break; + case PUBLICATIONOBJ_TABLES_IN_SCHEMA: + schemaid = get_namespace_oid(pubobj->name, false); + + /* Filter out duplicates if user specifies "sch1, sch1" */ + *schemas = list_append_unique_oid(*schemas, schemaid); + break; + case PUBLICATIONOBJ_TABLES_IN_CUR_SCHEMA: + search_path = fetch_search_path(false); + if (search_path == NIL) /* nothing valid in search_path? */ + ereport(ERROR, + errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("no schema has been selected for CURRENT_SCHEMA")); + + schemaid = linitial_oid(search_path); + list_free(search_path); + + /* Filter out duplicates if user specifies "sch1, sch1" */ + *schemas = list_append_unique_oid(*schemas, schemaid); + break; + default: + /* shouldn't happen */ + elog(ERROR, "invalid publication object type %d", pubobj->pubobjtype); + break; + } + } +} + +/* + * Returns true if any of the columns used in the row filter WHERE expression is + * not part of REPLICA IDENTITY, false otherwise. + */ +static bool +contain_invalid_rfcolumn_walker(Node *node, rf_context *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + AttrNumber attnum = var->varattno; + + /* + * If pubviaroot is true, we are validating the row filter of the + * parent table, but the bitmap contains the replica identity + * information of the child table. So, get the column number of the + * child table as parent and child column order could be different. + */ + if (context->pubviaroot) + { + char *colname = get_attname(context->parentid, attnum, false); + + attnum = get_attnum(context->relid, colname); + } + + if (!bms_is_member(attnum - FirstLowInvalidHeapAttributeNumber, + context->bms_replident)) + return true; + } + + return expression_tree_walker(node, contain_invalid_rfcolumn_walker, + (void *) context); +} + +/* + * Check if all columns referenced in the filter expression are part of the + * REPLICA IDENTITY index or not. + * + * Returns true if any invalid column is found. + */ +bool +pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, + bool pubviaroot) +{ + HeapTuple rftuple; + Oid relid = RelationGetRelid(relation); + Oid publish_as_relid = RelationGetRelid(relation); + bool result = false; + Datum rfdatum; + bool rfisnull; + + /* + * FULL means all columns are in the REPLICA IDENTITY, so all columns are + * allowed in the row filter and we can skip the validation. + */ + if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) + return false; + + /* + * For a partition, if pubviaroot is true, find the topmost ancestor that + * is published via this publication as we need to use its row filter + * expression to filter the partition's changes. + * + * Note that even though the row filter used is for an ancestor, the + * REPLICA IDENTITY used will be for the actual child table. + */ + if (pubviaroot && relation->rd_rel->relispartition) + { + publish_as_relid + = GetTopMostAncestorInPublication(pubid, ancestors, NULL); + + if (!OidIsValid(publish_as_relid)) + publish_as_relid = relid; + } + + rftuple = SearchSysCache2(PUBLICATIONRELMAP, + ObjectIdGetDatum(publish_as_relid), + ObjectIdGetDatum(pubid)); + + if (!HeapTupleIsValid(rftuple)) + return false; + + rfdatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple, + Anum_pg_publication_rel_prqual, + &rfisnull); + + if (!rfisnull) + { + rf_context context = {0}; + Node *rfnode; + Bitmapset *bms = NULL; + + context.pubviaroot = pubviaroot; + context.parentid = publish_as_relid; + context.relid = relid; + + /* Remember columns that are part of the REPLICA IDENTITY */ + bms = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + + context.bms_replident = bms; + rfnode = stringToNode(TextDatumGetCString(rfdatum)); + result = contain_invalid_rfcolumn_walker(rfnode, &context); + } + + ReleaseSysCache(rftuple); + + return result; +} + +/* + * Check if all columns referenced in the REPLICA IDENTITY are covered by + * the column list. + * + * Returns true if any replica identity column is not covered by column list. + */ +bool +pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, + bool pubviaroot) +{ + HeapTuple tuple; + Oid relid = RelationGetRelid(relation); + Oid publish_as_relid = RelationGetRelid(relation); + bool result = false; + Datum datum; + bool isnull; + + /* + * For a partition, if pubviaroot is true, find the topmost ancestor that + * is published via this publication as we need to use its column list for + * the changes. + * + * Note that even though the column list used is for an ancestor, the + * REPLICA IDENTITY used will be for the actual child table. + */ + if (pubviaroot && relation->rd_rel->relispartition) + { + publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL); + + if (!OidIsValid(publish_as_relid)) + publish_as_relid = relid; + } + + tuple = SearchSysCache2(PUBLICATIONRELMAP, + ObjectIdGetDatum(publish_as_relid), + ObjectIdGetDatum(pubid)); + + if (!HeapTupleIsValid(tuple)) + return false; + + datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple, + Anum_pg_publication_rel_prattrs, + &isnull); + + if (!isnull) + { + int x; + Bitmapset *idattrs; + Bitmapset *columns = NULL; + + /* With REPLICA IDENTITY FULL, no column list is allowed. */ + if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) + result = true; + + /* Transform the column list datum to a bitmapset. */ + columns = pub_collist_to_bitmapset(NULL, datum, NULL); + + /* Remember columns that are part of the REPLICA IDENTITY */ + idattrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + + /* + * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are + * offset (to handle system columns the usual way), while column list + * does not use offset, so we can't do bms_is_subset(). Instead, we + * have to loop over the idattrs and check all of them are in the + * list. + */ + x = -1; + while ((x = bms_next_member(idattrs, x)) >= 0) + { + AttrNumber attnum = (x + FirstLowInvalidHeapAttributeNumber); + + /* + * If pubviaroot is true, we are validating the column list of the + * parent table, but the bitmap contains the replica identity + * information of the child table. The parent/child attnums may + * not match, so translate them to the parent - get the attname + * from the child, and look it up in the parent. + */ + if (pubviaroot) + { + /* attribute name in the child table */ + char *colname = get_attname(relid, attnum, false); + + /* + * Determine the attnum for the attribute name in parent (we + * are using the column list defined on the parent). + */ + attnum = get_attnum(publish_as_relid, colname); + } + + /* replica identity column, not covered by the column list */ + if (!bms_is_member(attnum, columns)) + { + result = true; + break; + } + } + + bms_free(idattrs); + bms_free(columns); + } + + ReleaseSysCache(tuple); + + return result; +} + +/* check_functions_in_node callback */ +static bool +contain_mutable_or_user_functions_checker(Oid func_id, void *context) +{ + return (func_volatile(func_id) != PROVOLATILE_IMMUTABLE || + func_id >= FirstNormalObjectId); +} + +/* + * The row filter walker checks if the row filter expression is a "simple + * expression". + * + * It allows only simple or compound expressions such as: + * - (Var Op Const) + * - (Var Op Var) + * - (Var Op Const) AND/OR (Var Op Const) + * - etc + * (where Var is a column of the table this filter belongs to) + * + * The simple expression has the following restrictions: + * - User-defined operators are not allowed; + * - User-defined functions are not allowed; + * - User-defined types are not allowed; + * - User-defined collations are not allowed; + * - Non-immutable built-in functions are not allowed; + * - System columns are not allowed. + * + * NOTES + * + * We don't allow user-defined functions/operators/types/collations because + * (a) if a user drops a user-defined object used in a row filter expression or + * if there is any other error while using it, the logical decoding + * infrastructure won't be able to recover from such an error even if the + * object is recreated again because a historic snapshot is used to evaluate + * the row filter; + * (b) a user-defined function can be used to access tables that could have + * unpleasant results because a historic snapshot is used. That's why only + * immutable built-in functions are allowed in row filter expressions. + * + * We don't allow system columns because currently, we don't have that + * information in the tuple passed to downstream. Also, as we don't replicate + * those to subscribers, there doesn't seem to be a need for a filter on those + * columns. + * + * We can allow other node types after more analysis and testing. + */ +static bool +check_simple_rowfilter_expr_walker(Node *node, ParseState *pstate) +{ + char *errdetail_msg = NULL; + + if (node == NULL) + return false; + + switch (nodeTag(node)) + { + case T_Var: + /* System columns are not allowed. */ + if (((Var *) node)->varattno < InvalidAttrNumber) + errdetail_msg = _("System columns are not allowed."); + break; + case T_OpExpr: + case T_DistinctExpr: + case T_NullIfExpr: + /* OK, except user-defined operators are not allowed. */ + if (((OpExpr *) node)->opno >= FirstNormalObjectId) + errdetail_msg = _("User-defined operators are not allowed."); + break; + case T_ScalarArrayOpExpr: + /* OK, except user-defined operators are not allowed. */ + if (((ScalarArrayOpExpr *) node)->opno >= FirstNormalObjectId) + errdetail_msg = _("User-defined operators are not allowed."); + + /* + * We don't need to check the hashfuncid and negfuncid of + * ScalarArrayOpExpr as those functions are only built for a + * subquery. + */ + break; + case T_RowCompareExpr: + { + ListCell *opid; + + /* OK, except user-defined operators are not allowed. */ + foreach(opid, ((RowCompareExpr *) node)->opnos) + { + if (lfirst_oid(opid) >= FirstNormalObjectId) + { + errdetail_msg = _("User-defined operators are not allowed."); + break; + } + } + } + break; + case T_Const: + case T_FuncExpr: + case T_BoolExpr: + case T_RelabelType: + case T_CollateExpr: + case T_CaseExpr: + case T_CaseTestExpr: + case T_ArrayExpr: + case T_RowExpr: + case T_CoalesceExpr: + case T_MinMaxExpr: + case T_XmlExpr: + case T_NullTest: + case T_BooleanTest: + case T_List: + /* OK, supported */ + break; + default: + errdetail_msg = _("Only columns, constants, built-in operators, built-in data types, built-in collations, and immutable built-in functions are allowed."); + break; + } + + /* + * For all the supported nodes, if we haven't already found a problem, + * check the types, functions, and collations used in it. We check List + * by walking through each element. + */ + if (!errdetail_msg && !IsA(node, List)) + { + if (exprType(node) >= FirstNormalObjectId) + errdetail_msg = _("User-defined types are not allowed."); + else if (check_functions_in_node(node, contain_mutable_or_user_functions_checker, + (void *) pstate)) + errdetail_msg = _("User-defined or built-in mutable functions are not allowed."); + else if (exprCollation(node) >= FirstNormalObjectId || + exprInputCollation(node) >= FirstNormalObjectId) + errdetail_msg = _("User-defined collations are not allowed."); + } + + /* + * If we found a problem in this node, throw error now. Otherwise keep + * going. + */ + if (errdetail_msg) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("invalid publication WHERE expression"), + errdetail_internal("%s", errdetail_msg), + parser_errposition(pstate, exprLocation(node)))); + + return expression_tree_walker(node, check_simple_rowfilter_expr_walker, + (void *) pstate); +} + +/* + * Check if the row filter expression is a "simple expression". + * + * See check_simple_rowfilter_expr_walker for details. + */ +static bool +check_simple_rowfilter_expr(Node *node, ParseState *pstate) +{ + return check_simple_rowfilter_expr_walker(node, pstate); +} + +/* + * Transform the publication WHERE expression for all the relations in the list, + * ensuring it is coerced to boolean and necessary collation information is + * added if required, and add a new nsitem/RTE for the associated relation to + * the ParseState's namespace list. + * + * Also check the publication row filter expression and throw an error if + * anything not permitted or unexpected is encountered. + */ +static void +TransformPubWhereClauses(List *tables, const char *queryString, + bool pubviaroot) +{ + ListCell *lc; + + foreach(lc, tables) + { + ParseNamespaceItem *nsitem; + Node *whereclause = NULL; + ParseState *pstate; + PublicationRelInfo *pri = (PublicationRelInfo *) lfirst(lc); + + if (pri->whereClause == NULL) + continue; + + /* + * If the publication doesn't publish changes via the root partitioned + * table, the partition's row filter will be used. So disallow using + * WHERE clause on partitioned table in this case. + */ + if (!pubviaroot && + pri->relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot use publication WHERE clause for relation \"%s\"", + RelationGetRelationName(pri->relation)), + errdetail("WHERE clause cannot be used for a partitioned table when %s is false.", + "publish_via_partition_root"))); + + /* + * A fresh pstate is required so that we only have "this" table in its + * rangetable + */ + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + nsitem = addRangeTableEntryForRelation(pstate, pri->relation, + AccessShareLock, NULL, + false, false); + addNSItemToQuery(pstate, nsitem, false, true, true); + + whereclause = transformWhereClause(pstate, + copyObject(pri->whereClause), + EXPR_KIND_WHERE, + "PUBLICATION WHERE"); + + /* Fix up collation information */ + assign_expr_collations(pstate, whereclause); + + /* + * We allow only simple expressions in row filters. See + * check_simple_rowfilter_expr_walker. + */ + check_simple_rowfilter_expr(whereclause, pstate); + + free_parsestate(pstate); + + pri->whereClause = whereclause; + } +} + + +/* + * Given a list of tables that are going to be added to a publication, + * verify that they fulfill the necessary preconditions, namely: no tables + * have a column list if any schema is published; and partitioned tables do + * not have column lists if publish_via_partition_root is not set. + * + * 'publish_schema' indicates that the publication contains any TABLES IN + * SCHEMA elements (newly added in this command, or preexisting). + * 'pubviaroot' is the value of publish_via_partition_root. + */ +static void +CheckPubRelationColumnList(char *pubname, List *tables, + bool publish_schema, bool pubviaroot) +{ + ListCell *lc; + + foreach(lc, tables) + { + PublicationRelInfo *pri = (PublicationRelInfo *) lfirst(lc); + + if (pri->columns == NIL) + continue; + + /* + * Disallow specifying column list if any schema is in the + * publication. + * + * XXX We could instead just forbid the case when the publication + * tries to publish the table with a column list and a schema for that + * table. However, if we do that then we need a restriction during + * ALTER TABLE ... SET SCHEMA to prevent such a case which doesn't + * seem to be a good idea. + */ + if (publish_schema) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot use column list for relation \"%s.%s\" in publication \"%s\"", + get_namespace_name(RelationGetNamespace(pri->relation)), + RelationGetRelationName(pri->relation), pubname), + errdetail("Column lists cannot be specified in publications containing FOR TABLES IN SCHEMA elements.")); + + /* + * If the publication doesn't publish changes via the root partitioned + * table, the partition's column list will be used. So disallow using + * a column list on the partitioned table in this case. + */ + if (!pubviaroot && + pri->relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot use column list for relation \"%s.%s\" in publication \"%s\"", + get_namespace_name(RelationGetNamespace(pri->relation)), + RelationGetRelationName(pri->relation), pubname), + errdetail("Column lists cannot be specified for partitioned tables when %s is false.", + "publish_via_partition_root"))); + } +} + +/* + * Create new publication. + */ +ObjectAddress +CreatePublication(ParseState *pstate, CreatePublicationStmt *stmt) +{ + Relation rel; + ObjectAddress myself; + Oid puboid; + bool nulls[Natts_pg_publication]; + Datum values[Natts_pg_publication]; + HeapTuple tup; + bool publish_given; + PublicationActions pubactions; + bool publish_via_partition_root_given; + bool publish_via_partition_root; + AclResult aclresult; + List *relations = NIL; + List *schemaidlist = NIL; + + /* must have CREATE privilege on database */ + aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_DATABASE, + get_database_name(MyDatabaseId)); + + /* FOR ALL TABLES requires superuser */ + if (stmt->for_all_tables && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create FOR ALL TABLES publication"))); + + rel = table_open(PublicationRelationId, RowExclusiveLock); + + /* Check if name is used */ + puboid = GetSysCacheOid1(PUBLICATIONNAME, Anum_pg_publication_oid, + CStringGetDatum(stmt->pubname)); + if (OidIsValid(puboid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("publication \"%s\" already exists", + stmt->pubname))); + + /* Form a tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + values[Anum_pg_publication_pubname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->pubname)); + values[Anum_pg_publication_pubowner - 1] = ObjectIdGetDatum(GetUserId()); + + parse_publication_options(pstate, + stmt->options, + &publish_given, &pubactions, + &publish_via_partition_root_given, + &publish_via_partition_root); + + puboid = GetNewOidWithIndex(rel, PublicationObjectIndexId, + Anum_pg_publication_oid); + values[Anum_pg_publication_oid - 1] = ObjectIdGetDatum(puboid); + values[Anum_pg_publication_puballtables - 1] = + BoolGetDatum(stmt->for_all_tables); + values[Anum_pg_publication_pubinsert - 1] = + BoolGetDatum(pubactions.pubinsert); + values[Anum_pg_publication_pubupdate - 1] = + BoolGetDatum(pubactions.pubupdate); + values[Anum_pg_publication_pubdelete - 1] = + BoolGetDatum(pubactions.pubdelete); + values[Anum_pg_publication_pubtruncate - 1] = + BoolGetDatum(pubactions.pubtruncate); + values[Anum_pg_publication_pubviaroot - 1] = + BoolGetDatum(publish_via_partition_root); + + tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + /* Insert tuple into catalog. */ + CatalogTupleInsert(rel, tup); + heap_freetuple(tup); + + recordDependencyOnOwner(PublicationRelationId, puboid, GetUserId()); + + ObjectAddressSet(myself, PublicationRelationId, puboid); + + /* Make the changes visible. */ + CommandCounterIncrement(); + + /* Associate objects with the publication. */ + if (stmt->for_all_tables) + { + /* Invalidate relcache so that publication info is rebuilt. */ + CacheInvalidateRelcacheAll(); + } + else + { + ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations, + &schemaidlist); + + /* FOR TABLES IN SCHEMA requires superuser */ + if (schemaidlist != NIL && !superuser()) + ereport(ERROR, + errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create FOR TABLES IN SCHEMA publication")); + + if (list_length(relations) > 0) + { + List *rels; + + rels = OpenTableList(relations); + TransformPubWhereClauses(rels, pstate->p_sourcetext, + publish_via_partition_root); + + CheckPubRelationColumnList(stmt->pubname, rels, + schemaidlist != NIL, + publish_via_partition_root); + + PublicationAddTables(puboid, rels, true, NULL); + CloseTableList(rels); + } + + if (list_length(schemaidlist) > 0) + { + /* + * Schema lock is held until the publication is created to prevent + * concurrent schema deletion. + */ + LockSchemaList(schemaidlist); + PublicationAddSchemas(puboid, schemaidlist, true, NULL); + } + } + + table_close(rel, RowExclusiveLock); + + InvokeObjectPostCreateHook(PublicationRelationId, puboid, 0); + + if (wal_level != WAL_LEVEL_LOGICAL) + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("wal_level is insufficient to publish logical changes"), + errhint("Set wal_level to \"logical\" before creating subscriptions."))); + + return myself; +} + +/* + * Change options of a publication. + */ +static void +AlterPublicationOptions(ParseState *pstate, AlterPublicationStmt *stmt, + Relation rel, HeapTuple tup) +{ + bool nulls[Natts_pg_publication]; + bool replaces[Natts_pg_publication]; + Datum values[Natts_pg_publication]; + bool publish_given; + PublicationActions pubactions; + bool publish_via_partition_root_given; + bool publish_via_partition_root; + ObjectAddress obj; + Form_pg_publication pubform; + List *root_relids = NIL; + ListCell *lc; + + parse_publication_options(pstate, + stmt->options, + &publish_given, &pubactions, + &publish_via_partition_root_given, + &publish_via_partition_root); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + + /* + * If the publication doesn't publish changes via the root partitioned + * table, the partition's row filter and column list will be used. So + * disallow using WHERE clause and column lists on partitioned table in + * this case. + */ + if (!pubform->puballtables && publish_via_partition_root_given && + !publish_via_partition_root) + { + /* + * Lock the publication so nobody else can do anything with it. This + * prevents concurrent alter to add partitioned table(s) with WHERE + * clause(s) and/or column lists which we don't allow when not + * publishing via root. + */ + LockDatabaseObject(PublicationRelationId, pubform->oid, 0, + AccessShareLock); + + root_relids = GetPublicationRelations(pubform->oid, + PUBLICATION_PART_ROOT); + + foreach(lc, root_relids) + { + Oid relid = lfirst_oid(lc); + HeapTuple rftuple; + char relkind; + char *relname; + bool has_rowfilter; + bool has_collist; + + /* + * Beware: we don't have lock on the relations, so cope silently + * with the cache lookups returning NULL. + */ + + rftuple = SearchSysCache2(PUBLICATIONRELMAP, + ObjectIdGetDatum(relid), + ObjectIdGetDatum(pubform->oid)); + if (!HeapTupleIsValid(rftuple)) + continue; + has_rowfilter = !heap_attisnull(rftuple, Anum_pg_publication_rel_prqual, NULL); + has_collist = !heap_attisnull(rftuple, Anum_pg_publication_rel_prattrs, NULL); + if (!has_rowfilter && !has_collist) + { + ReleaseSysCache(rftuple); + continue; + } + + relkind = get_rel_relkind(relid); + if (relkind != RELKIND_PARTITIONED_TABLE) + { + ReleaseSysCache(rftuple); + continue; + } + relname = get_rel_name(relid); + if (relname == NULL) /* table concurrently dropped */ + { + ReleaseSysCache(rftuple); + continue; + } + + if (has_rowfilter) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot set parameter \"%s\" to false for publication \"%s\"", + "publish_via_partition_root", + stmt->pubname), + errdetail("The publication contains a WHERE clause for partitioned table \"%s\", which is not allowed when \"%s\" is false.", + relname, "publish_via_partition_root"))); + Assert(has_collist); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot set parameter \"%s\" to false for publication \"%s\"", + "publish_via_partition_root", + stmt->pubname), + errdetail("The publication contains a column list for partitioned table \"%s\", which is not allowed when \"%s\" is false.", + relname, "publish_via_partition_root"))); + } + } + + /* Everything ok, form a new tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + if (publish_given) + { + values[Anum_pg_publication_pubinsert - 1] = BoolGetDatum(pubactions.pubinsert); + replaces[Anum_pg_publication_pubinsert - 1] = true; + + values[Anum_pg_publication_pubupdate - 1] = BoolGetDatum(pubactions.pubupdate); + replaces[Anum_pg_publication_pubupdate - 1] = true; + + values[Anum_pg_publication_pubdelete - 1] = BoolGetDatum(pubactions.pubdelete); + replaces[Anum_pg_publication_pubdelete - 1] = true; + + values[Anum_pg_publication_pubtruncate - 1] = BoolGetDatum(pubactions.pubtruncate); + replaces[Anum_pg_publication_pubtruncate - 1] = true; + } + + if (publish_via_partition_root_given) + { + values[Anum_pg_publication_pubviaroot - 1] = BoolGetDatum(publish_via_partition_root); + replaces[Anum_pg_publication_pubviaroot - 1] = true; + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, + replaces); + + /* Update the catalog. */ + CatalogTupleUpdate(rel, &tup->t_self, tup); + + CommandCounterIncrement(); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + + /* Invalidate the relcache. */ + if (pubform->puballtables) + { + CacheInvalidateRelcacheAll(); + } + else + { + List *relids = NIL; + List *schemarelids = NIL; + + /* + * For any partitioned tables contained in the publication, we must + * invalidate all partitions contained in the respective partition + * trees, not just those explicitly mentioned in the publication. + */ + if (root_relids == NIL) + relids = GetPublicationRelations(pubform->oid, + PUBLICATION_PART_ALL); + else + { + /* + * We already got tables explicitly mentioned in the publication. + * Now get all partitions for the partitioned table in the list. + */ + foreach(lc, root_relids) + relids = GetPubPartitionOptionRelations(relids, + PUBLICATION_PART_ALL, + lfirst_oid(lc)); + } + + schemarelids = GetAllSchemaPublicationRelations(pubform->oid, + PUBLICATION_PART_ALL); + relids = list_concat_unique_oid(relids, schemarelids); + + InvalidatePublicationRels(relids); + } + + ObjectAddressSet(obj, PublicationRelationId, pubform->oid); + EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress, + (Node *) stmt); + + InvokeObjectPostAlterHook(PublicationRelationId, pubform->oid, 0); +} + +/* + * Invalidate the relations. + */ +void +InvalidatePublicationRels(List *relids) +{ + /* + * We don't want to send too many individual messages, at some point it's + * cheaper to just reset whole relcache. + */ + if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS) + { + ListCell *lc; + + foreach(lc, relids) + CacheInvalidateRelcacheByRelid(lfirst_oid(lc)); + } + else + CacheInvalidateRelcacheAll(); +} + +/* + * Add or remove table to/from publication. + */ +static void +AlterPublicationTables(AlterPublicationStmt *stmt, HeapTuple tup, + List *tables, const char *queryString, + bool publish_schema) +{ + List *rels = NIL; + Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup); + Oid pubid = pubform->oid; + + /* + * Nothing to do if no objects, except in SET: for that it is quite + * possible that user has not specified any tables in which case we need + * to remove all the existing tables. + */ + if (!tables && stmt->action != AP_SetObjects) + return; + + rels = OpenTableList(tables); + + if (stmt->action == AP_AddObjects) + { + TransformPubWhereClauses(rels, queryString, pubform->pubviaroot); + + publish_schema |= is_schema_publication(pubid); + + CheckPubRelationColumnList(stmt->pubname, rels, publish_schema, + pubform->pubviaroot); + + PublicationAddTables(pubid, rels, false, stmt); + } + else if (stmt->action == AP_DropObjects) + PublicationDropTables(pubid, rels, false); + else /* AP_SetObjects */ + { + List *oldrelids = GetPublicationRelations(pubid, + PUBLICATION_PART_ROOT); + List *delrels = NIL; + ListCell *oldlc; + + TransformPubWhereClauses(rels, queryString, pubform->pubviaroot); + + CheckPubRelationColumnList(stmt->pubname, rels, publish_schema, + pubform->pubviaroot); + + /* + * To recreate the relation list for the publication, look for + * existing relations that do not need to be dropped. + */ + foreach(oldlc, oldrelids) + { + Oid oldrelid = lfirst_oid(oldlc); + ListCell *newlc; + PublicationRelInfo *oldrel; + bool found = false; + HeapTuple rftuple; + Node *oldrelwhereclause = NULL; + Bitmapset *oldcolumns = NULL; + + /* look up the cache for the old relmap */ + rftuple = SearchSysCache2(PUBLICATIONRELMAP, + ObjectIdGetDatum(oldrelid), + ObjectIdGetDatum(pubid)); + + /* + * See if the existing relation currently has a WHERE clause or a + * column list. We need to compare those too. + */ + if (HeapTupleIsValid(rftuple)) + { + bool isnull = true; + Datum whereClauseDatum; + Datum columnListDatum; + + /* Load the WHERE clause for this table. */ + whereClauseDatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple, + Anum_pg_publication_rel_prqual, + &isnull); + if (!isnull) + oldrelwhereclause = stringToNode(TextDatumGetCString(whereClauseDatum)); + + /* Transform the int2vector column list to a bitmap. */ + columnListDatum = SysCacheGetAttr(PUBLICATIONRELMAP, rftuple, + Anum_pg_publication_rel_prattrs, + &isnull); + + if (!isnull) + oldcolumns = pub_collist_to_bitmapset(NULL, columnListDatum, NULL); + + ReleaseSysCache(rftuple); + } + + foreach(newlc, rels) + { + PublicationRelInfo *newpubrel; + Oid newrelid; + Bitmapset *newcolumns = NULL; + + newpubrel = (PublicationRelInfo *) lfirst(newlc); + newrelid = RelationGetRelid(newpubrel->relation); + + /* + * If the new publication has column list, transform it to a + * bitmap too. + */ + if (newpubrel->columns) + { + ListCell *lc; + + foreach(lc, newpubrel->columns) + { + char *colname = strVal(lfirst(lc)); + AttrNumber attnum = get_attnum(newrelid, colname); + + newcolumns = bms_add_member(newcolumns, attnum); + } + } + + /* + * Check if any of the new set of relations matches with the + * existing relations in the publication. Additionally, if the + * relation has an associated WHERE clause, check the WHERE + * expressions also match. Same for the column list. Drop the + * rest. + */ + if (RelationGetRelid(newpubrel->relation) == oldrelid) + { + if (equal(oldrelwhereclause, newpubrel->whereClause) && + bms_equal(oldcolumns, newcolumns)) + { + found = true; + break; + } + } + } + + /* + * Add the non-matched relations to a list so that they can be + * dropped. + */ + if (!found) + { + oldrel = palloc(sizeof(PublicationRelInfo)); + oldrel->whereClause = NULL; + oldrel->columns = NIL; + oldrel->relation = table_open(oldrelid, + ShareUpdateExclusiveLock); + delrels = lappend(delrels, oldrel); + } + } + + /* And drop them. */ + PublicationDropTables(pubid, delrels, true); + + /* + * Don't bother calculating the difference for adding, we'll catch and + * skip existing ones when doing catalog update. + */ + PublicationAddTables(pubid, rels, true, stmt); + + CloseTableList(delrels); + } + + CloseTableList(rels); +} + +/* + * Alter the publication schemas. + * + * Add or remove schemas to/from publication. + */ +static void +AlterPublicationSchemas(AlterPublicationStmt *stmt, + HeapTuple tup, List *schemaidlist) +{ + Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup); + + /* + * Nothing to do if no objects, except in SET: for that it is quite + * possible that user has not specified any schemas in which case we need + * to remove all the existing schemas. + */ + if (!schemaidlist && stmt->action != AP_SetObjects) + return; + + /* + * Schema lock is held until the publication is altered to prevent + * concurrent schema deletion. + */ + LockSchemaList(schemaidlist); + if (stmt->action == AP_AddObjects) + { + ListCell *lc; + List *reloids; + + reloids = GetPublicationRelations(pubform->oid, PUBLICATION_PART_ROOT); + + foreach(lc, reloids) + { + HeapTuple coltuple; + + coltuple = SearchSysCache2(PUBLICATIONRELMAP, + ObjectIdGetDatum(lfirst_oid(lc)), + ObjectIdGetDatum(pubform->oid)); + + if (!HeapTupleIsValid(coltuple)) + continue; + + /* + * Disallow adding schema if column list is already part of the + * publication. See CheckPubRelationColumnList. + */ + if (!heap_attisnull(coltuple, Anum_pg_publication_rel_prattrs, NULL)) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot add schema to publication \"%s\"", + stmt->pubname), + errdetail("Schemas cannot be added if any tables that specify a column list are already part of the publication.")); + + ReleaseSysCache(coltuple); + } + + PublicationAddSchemas(pubform->oid, schemaidlist, false, stmt); + } + else if (stmt->action == AP_DropObjects) + PublicationDropSchemas(pubform->oid, schemaidlist, false); + else /* AP_SetObjects */ + { + List *oldschemaids = GetPublicationSchemas(pubform->oid); + List *delschemas = NIL; + + /* Identify which schemas should be dropped */ + delschemas = list_difference_oid(oldschemaids, schemaidlist); + + /* + * Schema lock is held until the publication is altered to prevent + * concurrent schema deletion. + */ + LockSchemaList(delschemas); + + /* And drop them */ + PublicationDropSchemas(pubform->oid, delschemas, true); + + /* + * Don't bother calculating the difference for adding, we'll catch and + * skip existing ones when doing catalog update. + */ + PublicationAddSchemas(pubform->oid, schemaidlist, true, stmt); + } +} + +/* + * Check if relations and schemas can be in a given publication and throw + * appropriate error if not. + */ +static void +CheckAlterPublication(AlterPublicationStmt *stmt, HeapTuple tup, + List *tables, List *schemaidlist) +{ + Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup); + + if ((stmt->action == AP_AddObjects || stmt->action == AP_SetObjects) && + schemaidlist && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to add or set schemas"))); + + /* + * Check that user is allowed to manipulate the publication tables in + * schema + */ + if (schemaidlist && pubform->puballtables) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("publication \"%s\" is defined as FOR ALL TABLES", + NameStr(pubform->pubname)), + errdetail("Schemas cannot be added to or dropped from FOR ALL TABLES publications."))); + + /* Check that user is allowed to manipulate the publication tables. */ + if (tables && pubform->puballtables) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("publication \"%s\" is defined as FOR ALL TABLES", + NameStr(pubform->pubname)), + errdetail("Tables cannot be added to or dropped from FOR ALL TABLES publications."))); +} + +/* + * Alter the existing publication. + * + * This is dispatcher function for AlterPublicationOptions, + * AlterPublicationSchemas and AlterPublicationTables. + */ +void +AlterPublication(ParseState *pstate, AlterPublicationStmt *stmt) +{ + Relation rel; + HeapTuple tup; + Form_pg_publication pubform; + + rel = table_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(PUBLICATIONNAME, + CStringGetDatum(stmt->pubname)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", + stmt->pubname))); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + + /* must be owner */ + if (!pg_publication_ownercheck(pubform->oid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION, + stmt->pubname); + + if (stmt->options) + AlterPublicationOptions(pstate, stmt, rel, tup); + else + { + List *relations = NIL; + List *schemaidlist = NIL; + Oid pubid = pubform->oid; + + ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations, + &schemaidlist); + + CheckAlterPublication(stmt, tup, relations, schemaidlist); + + heap_freetuple(tup); + + /* Lock the publication so nobody else can do anything with it. */ + LockDatabaseObject(PublicationRelationId, pubid, 0, + AccessExclusiveLock); + + /* + * It is possible that by the time we acquire the lock on publication, + * concurrent DDL has removed it. We can test this by checking the + * existence of publication. We get the tuple again to avoid the risk + * of any publication option getting changed. + */ + tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", + stmt->pubname)); + + AlterPublicationTables(stmt, tup, relations, pstate->p_sourcetext, + schemaidlist != NIL); + AlterPublicationSchemas(stmt, tup, schemaidlist); + } + + /* Cleanup. */ + heap_freetuple(tup); + table_close(rel, RowExclusiveLock); +} + +/* + * Remove relation from publication by mapping OID. + */ +void +RemovePublicationRelById(Oid proid) +{ + Relation rel; + HeapTuple tup; + Form_pg_publication_rel pubrel; + List *relids = NIL; + + rel = table_open(PublicationRelRelationId, RowExclusiveLock); + + tup = SearchSysCache1(PUBLICATIONREL, ObjectIdGetDatum(proid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication table %u", + proid); + + pubrel = (Form_pg_publication_rel) GETSTRUCT(tup); + + /* + * Invalidate relcache so that publication info is rebuilt. + * + * For the partitioned tables, we must invalidate all partitions contained + * in the respective partition hierarchies, not just the one explicitly + * mentioned in the publication. This is required because we implicitly + * publish the child tables when the parent table is published. + */ + relids = GetPubPartitionOptionRelations(relids, PUBLICATION_PART_ALL, + pubrel->prrelid); + + InvalidatePublicationRels(relids); + + CatalogTupleDelete(rel, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(rel, RowExclusiveLock); +} + +/* + * Remove the publication by mapping OID. + */ +void +RemovePublicationById(Oid pubid) +{ + Relation rel; + HeapTuple tup; + Form_pg_publication pubform; + + rel = table_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication %u", pubid); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + + /* Invalidate relcache so that publication info is rebuilt. */ + if (pubform->puballtables) + CacheInvalidateRelcacheAll(); + + CatalogTupleDelete(rel, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(rel, RowExclusiveLock); +} + +/* + * Remove schema from publication by mapping OID. + */ +void +RemovePublicationSchemaById(Oid psoid) +{ + Relation rel; + HeapTuple tup; + List *schemaRels = NIL; + Form_pg_publication_namespace pubsch; + + rel = table_open(PublicationNamespaceRelationId, RowExclusiveLock); + + tup = SearchSysCache1(PUBLICATIONNAMESPACE, ObjectIdGetDatum(psoid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication schema %u", psoid); + + pubsch = (Form_pg_publication_namespace) GETSTRUCT(tup); + + /* + * Invalidate relcache so that publication info is rebuilt. See + * RemovePublicationRelById for why we need to consider all the + * partitions. + */ + schemaRels = GetSchemaPublicationRelations(pubsch->pnnspid, + PUBLICATION_PART_ALL); + InvalidatePublicationRels(schemaRels); + + CatalogTupleDelete(rel, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(rel, RowExclusiveLock); +} + +/* + * Open relations specified by a PublicationTable list. + * The returned tables are locked in ShareUpdateExclusiveLock mode in order to + * add them to a publication. + */ +static List * +OpenTableList(List *tables) +{ + List *relids = NIL; + List *rels = NIL; + ListCell *lc; + List *relids_with_rf = NIL; + List *relids_with_collist = NIL; + + /* + * Open, share-lock, and check all the explicitly-specified relations + */ + foreach(lc, tables) + { + PublicationTable *t = lfirst_node(PublicationTable, lc); + bool recurse = t->relation->inh; + Relation rel; + Oid myrelid; + PublicationRelInfo *pub_rel; + + /* Allow query cancel in case this takes a long time */ + CHECK_FOR_INTERRUPTS(); + + rel = table_openrv(t->relation, ShareUpdateExclusiveLock); + myrelid = RelationGetRelid(rel); + + /* + * Filter out duplicates if user specifies "foo, foo". + * + * Note that this algorithm is known to not be very efficient (O(N^2)) + * but given that it only works on list of tables given to us by user + * it's deemed acceptable. + */ + if (list_member_oid(relids, myrelid)) + { + /* Disallow duplicate tables if there are any with row filters. */ + if (t->whereClause || list_member_oid(relids_with_rf, myrelid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("conflicting or redundant WHERE clauses for table \"%s\"", + RelationGetRelationName(rel)))); + + /* Disallow duplicate tables if there are any with column lists. */ + if (t->columns || list_member_oid(relids_with_collist, myrelid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("conflicting or redundant column lists for table \"%s\"", + RelationGetRelationName(rel)))); + + table_close(rel, ShareUpdateExclusiveLock); + continue; + } + + pub_rel = palloc(sizeof(PublicationRelInfo)); + pub_rel->relation = rel; + pub_rel->whereClause = t->whereClause; + pub_rel->columns = t->columns; + rels = lappend(rels, pub_rel); + relids = lappend_oid(relids, myrelid); + + if (t->whereClause) + relids_with_rf = lappend_oid(relids_with_rf, myrelid); + + if (t->columns) + relids_with_collist = lappend_oid(relids_with_collist, myrelid); + + /* + * Add children of this rel, if requested, so that they too are added + * to the publication. A partitioned table can't have any inheritance + * children other than its partitions, which need not be explicitly + * added to the publication. + */ + if (recurse && rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + { + List *children; + ListCell *child; + + children = find_all_inheritors(myrelid, ShareUpdateExclusiveLock, + NULL); + + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + + /* Allow query cancel in case this takes a long time */ + CHECK_FOR_INTERRUPTS(); + + /* + * Skip duplicates if user specified both parent and child + * tables. + */ + if (list_member_oid(relids, childrelid)) + { + /* + * We don't allow to specify row filter for both parent + * and child table at the same time as it is not very + * clear which one should be given preference. + */ + if (childrelid != myrelid && + (t->whereClause || list_member_oid(relids_with_rf, childrelid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("conflicting or redundant WHERE clauses for table \"%s\"", + RelationGetRelationName(rel)))); + + /* + * We don't allow to specify column list for both parent + * and child table at the same time as it is not very + * clear which one should be given preference. + */ + if (childrelid != myrelid && + (t->columns || list_member_oid(relids_with_collist, childrelid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("conflicting or redundant column lists for table \"%s\"", + RelationGetRelationName(rel)))); + + continue; + } + + /* find_all_inheritors already got lock */ + rel = table_open(childrelid, NoLock); + pub_rel = palloc(sizeof(PublicationRelInfo)); + pub_rel->relation = rel; + /* child inherits WHERE clause from parent */ + pub_rel->whereClause = t->whereClause; + + /* child inherits column list from parent */ + pub_rel->columns = t->columns; + rels = lappend(rels, pub_rel); + relids = lappend_oid(relids, childrelid); + + if (t->whereClause) + relids_with_rf = lappend_oid(relids_with_rf, childrelid); + + if (t->columns) + relids_with_collist = lappend_oid(relids_with_collist, childrelid); + } + } + } + + list_free(relids); + list_free(relids_with_rf); + + return rels; +} + +/* + * Close all relations in the list. + */ +static void +CloseTableList(List *rels) +{ + ListCell *lc; + + foreach(lc, rels) + { + PublicationRelInfo *pub_rel; + + pub_rel = (PublicationRelInfo *) lfirst(lc); + table_close(pub_rel->relation, NoLock); + } + + list_free_deep(rels); +} + +/* + * Lock the schemas specified in the schema list in AccessShareLock mode in + * order to prevent concurrent schema deletion. + */ +static void +LockSchemaList(List *schemalist) +{ + ListCell *lc; + + foreach(lc, schemalist) + { + Oid schemaid = lfirst_oid(lc); + + /* Allow query cancel in case this takes a long time */ + CHECK_FOR_INTERRUPTS(); + LockDatabaseObject(NamespaceRelationId, schemaid, 0, AccessShareLock); + + /* + * It is possible that by the time we acquire the lock on schema, + * concurrent DDL has removed it. We can test this by checking the + * existence of schema. + */ + if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaid))) + ereport(ERROR, + errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema with OID %u does not exist", schemaid)); + } +} + +/* + * Add listed tables to the publication. + */ +static void +PublicationAddTables(Oid pubid, List *rels, bool if_not_exists, + AlterPublicationStmt *stmt) +{ + ListCell *lc; + + Assert(!stmt || !stmt->for_all_tables); + + foreach(lc, rels) + { + PublicationRelInfo *pub_rel = (PublicationRelInfo *) lfirst(lc); + Relation rel = pub_rel->relation; + ObjectAddress obj; + + /* Must be owner of the table or superuser. */ + if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind), + RelationGetRelationName(rel)); + + obj = publication_add_relation(pubid, pub_rel, if_not_exists); + if (stmt) + { + EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress, + (Node *) stmt); + + InvokeObjectPostCreateHook(PublicationRelRelationId, + obj.objectId, 0); + } + } +} + +/* + * Remove listed tables from the publication. + */ +static void +PublicationDropTables(Oid pubid, List *rels, bool missing_ok) +{ + ObjectAddress obj; + ListCell *lc; + Oid prid; + + foreach(lc, rels) + { + PublicationRelInfo *pubrel = (PublicationRelInfo *) lfirst(lc); + Relation rel = pubrel->relation; + Oid relid = RelationGetRelid(rel); + + if (pubrel->columns) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("column list must not be specified in ALTER PUBLICATION ... DROP")); + + prid = GetSysCacheOid2(PUBLICATIONRELMAP, Anum_pg_publication_rel_oid, + ObjectIdGetDatum(relid), + ObjectIdGetDatum(pubid)); + if (!OidIsValid(prid)) + { + if (missing_ok) + continue; + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("relation \"%s\" is not part of the publication", + RelationGetRelationName(rel)))); + } + + if (pubrel->whereClause) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use a WHERE clause when removing a table from a publication"))); + + ObjectAddressSet(obj, PublicationRelRelationId, prid); + performDeletion(&obj, DROP_CASCADE, 0); + } +} + +/* + * Add listed schemas to the publication. + */ +static void +PublicationAddSchemas(Oid pubid, List *schemas, bool if_not_exists, + AlterPublicationStmt *stmt) +{ + ListCell *lc; + + Assert(!stmt || !stmt->for_all_tables); + + foreach(lc, schemas) + { + Oid schemaid = lfirst_oid(lc); + ObjectAddress obj; + + obj = publication_add_schema(pubid, schemaid, if_not_exists); + if (stmt) + { + EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress, + (Node *) stmt); + + InvokeObjectPostCreateHook(PublicationNamespaceRelationId, + obj.objectId, 0); + } + } +} + +/* + * Remove listed schemas from the publication. + */ +static void +PublicationDropSchemas(Oid pubid, List *schemas, bool missing_ok) +{ + ObjectAddress obj; + ListCell *lc; + Oid psid; + + foreach(lc, schemas) + { + Oid schemaid = lfirst_oid(lc); + + psid = GetSysCacheOid2(PUBLICATIONNAMESPACEMAP, + Anum_pg_publication_namespace_oid, + ObjectIdGetDatum(schemaid), + ObjectIdGetDatum(pubid)); + if (!OidIsValid(psid)) + { + if (missing_ok) + continue; + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tables from schema \"%s\" are not part of the publication", + get_namespace_name(schemaid)))); + } + + ObjectAddressSet(obj, PublicationNamespaceRelationId, psid); + performDeletion(&obj, DROP_CASCADE, 0); + } +} + +/* + * Internal workhorse for changing a publication owner + */ +static void +AlterPublicationOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) +{ + Form_pg_publication form; + + form = (Form_pg_publication) GETSTRUCT(tup); + + if (form->pubowner == newOwnerId) + return; + + if (!superuser()) + { + AclResult aclresult; + + /* Must be owner */ + if (!pg_publication_ownercheck(form->oid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION, + NameStr(form->pubname)); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* New owner must have CREATE privilege on database */ + aclresult = pg_database_aclcheck(MyDatabaseId, newOwnerId, ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_DATABASE, + get_database_name(MyDatabaseId)); + + if (form->puballtables && !superuser_arg(newOwnerId)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of publication \"%s\"", + NameStr(form->pubname)), + errhint("The owner of a FOR ALL TABLES publication must be a superuser."))); + + if (!superuser_arg(newOwnerId) && is_schema_publication(form->oid)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of publication \"%s\"", + NameStr(form->pubname)), + errhint("The owner of a FOR TABLES IN SCHEMA publication must be a superuser."))); + } + + form->pubowner = newOwnerId; + CatalogTupleUpdate(rel, &tup->t_self, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(PublicationRelationId, + form->oid, + newOwnerId); + + InvokeObjectPostAlterHook(PublicationRelationId, + form->oid, 0); +} + +/* + * Change publication owner -- by name + */ +ObjectAddress +AlterPublicationOwner(const char *name, Oid newOwnerId) +{ + Oid subid; + HeapTuple tup; + Relation rel; + ObjectAddress address; + Form_pg_publication pubform; + + rel = table_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(PUBLICATIONNAME, CStringGetDatum(name)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", name))); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + subid = pubform->oid; + + AlterPublicationOwner_internal(rel, tup, newOwnerId); + + ObjectAddressSet(address, PublicationRelationId, subid); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change publication owner -- by OID + */ +void +AlterPublicationOwner_oid(Oid subid, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = table_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(subid)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication with OID %u does not exist", subid))); + + AlterPublicationOwner_internal(rel, tup, newOwnerId); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); +} diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c new file mode 100644 index 0000000..1a9132c --- /dev/null +++ b/src/backend/commands/schemacmds.c @@ -0,0 +1,441 @@ +/*------------------------------------------------------------------------- + * + * schemacmds.c + * schema creation/manipulation commands + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/schemacmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_namespace.h" +#include "commands/dbcommands.h" +#include "commands/event_trigger.h" +#include "commands/schemacmds.h" +#include "miscadmin.h" +#include "parser/parse_utilcmd.h" +#include "parser/scansup.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +static void AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId); + +/* + * CREATE SCHEMA + * + * Note: caller should pass in location information for the whole + * CREATE SCHEMA statement, which in turn we pass down as the location + * of the component commands. This comports with our general plan of + * reporting location/len for the whole command even when executing + * a subquery. + */ +Oid +CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString, + int stmt_location, int stmt_len) +{ + const char *schemaName = stmt->schemaname; + Oid namespaceId; + List *parsetree_list; + ListCell *parsetree_item; + Oid owner_uid; + Oid saved_uid; + int save_sec_context; + int save_nestlevel; + char *nsp = namespace_search_path; + AclResult aclresult; + ObjectAddress address; + StringInfoData pathbuf; + + GetUserIdAndSecContext(&saved_uid, &save_sec_context); + + /* + * Who is supposed to own the new schema? + */ + if (stmt->authrole) + owner_uid = get_rolespec_oid(stmt->authrole, false); + else + owner_uid = saved_uid; + + /* fill schema name with the user name if not specified */ + if (!schemaName) + { + HeapTuple tuple; + + tuple = SearchSysCache1(AUTHOID, ObjectIdGetDatum(owner_uid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for role %u", owner_uid); + schemaName = + pstrdup(NameStr(((Form_pg_authid) GETSTRUCT(tuple))->rolname)); + ReleaseSysCache(tuple); + } + + /* + * To create a schema, must have schema-create privilege on the current + * database and must be able to become the target role (this does not + * imply that the target role itself must have create-schema privilege). + * The latter provision guards against "giveaway" attacks. Note that a + * superuser will always have both of these privileges a fortiori. + */ + aclresult = pg_database_aclcheck(MyDatabaseId, saved_uid, ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_DATABASE, + get_database_name(MyDatabaseId)); + + check_is_member_of_role(saved_uid, owner_uid); + + /* Additional check to protect reserved schema names */ + if (!allowSystemTableMods && IsReservedName(schemaName)) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("unacceptable schema name \"%s\"", schemaName), + errdetail("The prefix \"pg_\" is reserved for system schemas."))); + + /* + * If if_not_exists was given and the schema already exists, bail out. + * (Note: we needn't check this when not if_not_exists, because + * NamespaceCreate will complain anyway.) We could do this before making + * the permissions checks, but since CREATE TABLE IF NOT EXISTS makes its + * creation-permission check first, we do likewise. + */ + if (stmt->if_not_exists) + { + namespaceId = get_namespace_oid(schemaName, true); + if (OidIsValid(namespaceId)) + { + /* + * If we are in an extension script, insist that the pre-existing + * object be a member of the extension, to avoid security risks. + */ + ObjectAddressSet(address, NamespaceRelationId, namespaceId); + checkMembershipInCurrentExtension(&address); + + /* OK to skip */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_SCHEMA), + errmsg("schema \"%s\" already exists, skipping", + schemaName))); + return InvalidOid; + } + } + + /* + * If the requested authorization is different from the current user, + * temporarily set the current user so that the object(s) will be created + * with the correct ownership. + * + * (The setting will be restored at the end of this routine, or in case of + * error, transaction abort will clean things up.) + */ + if (saved_uid != owner_uid) + SetUserIdAndSecContext(owner_uid, + save_sec_context | SECURITY_LOCAL_USERID_CHANGE); + + /* Create the schema's namespace */ + namespaceId = NamespaceCreate(schemaName, owner_uid, false); + + /* Advance cmd counter to make the namespace visible */ + CommandCounterIncrement(); + + /* + * Prepend the new schema to the current search path. + * + * We use the equivalent of a function SET option to allow the setting to + * persist for exactly the duration of the schema creation. guc.c also + * takes care of undoing the setting on error. + */ + save_nestlevel = NewGUCNestLevel(); + + initStringInfo(&pathbuf); + appendStringInfoString(&pathbuf, quote_identifier(schemaName)); + + while (scanner_isspace(*nsp)) + nsp++; + + if (*nsp != '\0') + appendStringInfo(&pathbuf, ", %s", nsp); + + (void) set_config_option("search_path", pathbuf.data, + PGC_USERSET, PGC_S_SESSION, + GUC_ACTION_SAVE, true, 0, false); + + /* + * Report the new schema to possibly interested event triggers. Note we + * must do this here and not in ProcessUtilitySlow because otherwise the + * objects created below are reported before the schema, which would be + * wrong. + */ + ObjectAddressSet(address, NamespaceRelationId, namespaceId); + EventTriggerCollectSimpleCommand(address, InvalidObjectAddress, + (Node *) stmt); + + /* + * Examine the list of commands embedded in the CREATE SCHEMA command, and + * reorganize them into a sequentially executable order with no forward + * references. Note that the result is still a list of raw parsetrees --- + * we cannot, in general, run parse analysis on one statement until we + * have actually executed the prior ones. + */ + parsetree_list = transformCreateSchemaStmtElements(stmt->schemaElts, + schemaName); + + /* + * Execute each command contained in the CREATE SCHEMA. Since the grammar + * allows only utility commands in CREATE SCHEMA, there is no need to pass + * them through parse_analyze_*() or the rewriter; we can just hand them + * straight to ProcessUtility. + */ + foreach(parsetree_item, parsetree_list) + { + Node *stmt = (Node *) lfirst(parsetree_item); + PlannedStmt *wrapper; + + /* need to make a wrapper PlannedStmt */ + wrapper = makeNode(PlannedStmt); + wrapper->commandType = CMD_UTILITY; + wrapper->canSetTag = false; + wrapper->utilityStmt = stmt; + wrapper->stmt_location = stmt_location; + wrapper->stmt_len = stmt_len; + + /* do this step */ + ProcessUtility(wrapper, + queryString, + false, + PROCESS_UTILITY_SUBCOMMAND, + NULL, + NULL, + None_Receiver, + NULL); + + /* make sure later steps can see the object created here */ + CommandCounterIncrement(); + } + + /* + * Restore the GUC variable search_path we set above. + */ + AtEOXact_GUC(true, save_nestlevel); + + /* Reset current user and security context */ + SetUserIdAndSecContext(saved_uid, save_sec_context); + + return namespaceId; +} + + +/* + * Rename schema + */ +ObjectAddress +RenameSchema(const char *oldname, const char *newname) +{ + Oid nspOid; + HeapTuple tup; + Relation rel; + AclResult aclresult; + ObjectAddress address; + Form_pg_namespace nspform; + + rel = table_open(NamespaceRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(NAMESPACENAME, CStringGetDatum(oldname)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema \"%s\" does not exist", oldname))); + + nspform = (Form_pg_namespace) GETSTRUCT(tup); + nspOid = nspform->oid; + + /* make sure the new name doesn't exist */ + if (OidIsValid(get_namespace_oid(newname, true))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_SCHEMA), + errmsg("schema \"%s\" already exists", newname))); + + /* must be owner */ + if (!pg_namespace_ownercheck(nspOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA, + oldname); + + /* must have CREATE privilege on database */ + aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_DATABASE, + get_database_name(MyDatabaseId)); + + if (!allowSystemTableMods && IsReservedName(newname)) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("unacceptable schema name \"%s\"", newname), + errdetail("The prefix \"pg_\" is reserved for system schemas."))); + + /* rename */ + namestrcpy(&nspform->nspname, newname); + CatalogTupleUpdate(rel, &tup->t_self, tup); + + InvokeObjectPostAlterHook(NamespaceRelationId, nspOid, 0); + + ObjectAddressSet(address, NamespaceRelationId, nspOid); + + table_close(rel, NoLock); + heap_freetuple(tup); + + return address; +} + +void +AlterSchemaOwner_oid(Oid oid, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = table_open(NamespaceRelationId, RowExclusiveLock); + + tup = SearchSysCache1(NAMESPACEOID, ObjectIdGetDatum(oid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for schema %u", oid); + + AlterSchemaOwner_internal(tup, rel, newOwnerId); + + ReleaseSysCache(tup); + + table_close(rel, RowExclusiveLock); +} + + +/* + * Change schema owner + */ +ObjectAddress +AlterSchemaOwner(const char *name, Oid newOwnerId) +{ + Oid nspOid; + HeapTuple tup; + Relation rel; + ObjectAddress address; + Form_pg_namespace nspform; + + rel = table_open(NamespaceRelationId, RowExclusiveLock); + + tup = SearchSysCache1(NAMESPACENAME, CStringGetDatum(name)); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema \"%s\" does not exist", name))); + + nspform = (Form_pg_namespace) GETSTRUCT(tup); + nspOid = nspform->oid; + + AlterSchemaOwner_internal(tup, rel, newOwnerId); + + ObjectAddressSet(address, NamespaceRelationId, nspOid); + + ReleaseSysCache(tup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +static void +AlterSchemaOwner_internal(HeapTuple tup, Relation rel, Oid newOwnerId) +{ + Form_pg_namespace nspForm; + + Assert(tup->t_tableOid == NamespaceRelationId); + Assert(RelationGetRelid(rel) == NamespaceRelationId); + + nspForm = (Form_pg_namespace) GETSTRUCT(tup); + + /* + * If the new owner is the same as the existing owner, consider the + * command to have succeeded. This is for dump restoration purposes. + */ + if (nspForm->nspowner != newOwnerId) + { + Datum repl_val[Natts_pg_namespace]; + bool repl_null[Natts_pg_namespace]; + bool repl_repl[Natts_pg_namespace]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + HeapTuple newtuple; + AclResult aclresult; + + /* Otherwise, must be owner of the existing object */ + if (!pg_namespace_ownercheck(nspForm->oid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA, + NameStr(nspForm->nspname)); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* + * must have create-schema rights + * + * NOTE: This is different from other alter-owner checks in that the + * current user is checked for create privileges instead of the + * destination owner. This is consistent with the CREATE case for + * schemas. Because superusers will always have this right, we need + * no special case for them. + */ + aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_DATABASE, + get_database_name(MyDatabaseId)); + + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_repl[Anum_pg_namespace_nspowner - 1] = true; + repl_val[Anum_pg_namespace_nspowner - 1] = ObjectIdGetDatum(newOwnerId); + + /* + * Determine the modified ACL for the new owner. This is only + * necessary when the ACL is non-null. + */ + aclDatum = SysCacheGetAttr(NAMESPACENAME, tup, + Anum_pg_namespace_nspacl, + &isNull); + if (!isNull) + { + newAcl = aclnewowner(DatumGetAclP(aclDatum), + nspForm->nspowner, newOwnerId); + repl_repl[Anum_pg_namespace_nspacl - 1] = true; + repl_val[Anum_pg_namespace_nspacl - 1] = PointerGetDatum(newAcl); + } + + newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(rel, &newtuple->t_self, newtuple); + + heap_freetuple(newtuple); + + /* Update owner dependency reference */ + changeDependencyOnOwner(NamespaceRelationId, nspForm->oid, + newOwnerId); + } + + InvokeObjectPostAlterHook(NamespaceRelationId, + nspForm->oid, 0); +} diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c new file mode 100644 index 0000000..7ae19b9 --- /dev/null +++ b/src/backend/commands/seclabel.c @@ -0,0 +1,581 @@ +/* ------------------------------------------------------------------------- + * + * seclabel.c + * routines to support security label feature. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/relation.h" +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" +#include "catalog/pg_seclabel.h" +#include "catalog/pg_shseclabel.h" +#include "commands/seclabel.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +typedef struct +{ + const char *provider_name; + check_object_relabel_type hook; +} LabelProvider; + +static List *label_provider_list = NIL; + +static bool +SecLabelSupportsObjectType(ObjectType objtype) +{ + switch (objtype) + { + case OBJECT_AGGREGATE: + case OBJECT_COLUMN: + case OBJECT_DATABASE: + case OBJECT_DOMAIN: + case OBJECT_EVENT_TRIGGER: + case OBJECT_FOREIGN_TABLE: + case OBJECT_FUNCTION: + case OBJECT_LANGUAGE: + case OBJECT_LARGEOBJECT: + case OBJECT_MATVIEW: + case OBJECT_PROCEDURE: + case OBJECT_PUBLICATION: + case OBJECT_ROLE: + case OBJECT_ROUTINE: + case OBJECT_SCHEMA: + case OBJECT_SEQUENCE: + case OBJECT_SUBSCRIPTION: + case OBJECT_TABLE: + case OBJECT_TABLESPACE: + case OBJECT_TYPE: + case OBJECT_VIEW: + return true; + + case OBJECT_ACCESS_METHOD: + case OBJECT_AMOP: + case OBJECT_AMPROC: + case OBJECT_ATTRIBUTE: + case OBJECT_CAST: + case OBJECT_COLLATION: + case OBJECT_CONVERSION: + case OBJECT_DEFAULT: + case OBJECT_DEFACL: + case OBJECT_DOMCONSTRAINT: + case OBJECT_EXTENSION: + case OBJECT_FDW: + case OBJECT_FOREIGN_SERVER: + case OBJECT_INDEX: + case OBJECT_OPCLASS: + case OBJECT_OPERATOR: + case OBJECT_OPFAMILY: + case OBJECT_PARAMETER_ACL: + case OBJECT_POLICY: + case OBJECT_PUBLICATION_NAMESPACE: + case OBJECT_PUBLICATION_REL: + case OBJECT_RULE: + case OBJECT_STATISTIC_EXT: + case OBJECT_TABCONSTRAINT: + case OBJECT_TRANSFORM: + case OBJECT_TRIGGER: + case OBJECT_TSCONFIGURATION: + case OBJECT_TSDICTIONARY: + case OBJECT_TSPARSER: + case OBJECT_TSTEMPLATE: + case OBJECT_USER_MAPPING: + return false; + + /* + * There's intentionally no default: case here; we want the + * compiler to warn if a new ObjectType hasn't been handled above. + */ + } + + /* Shouldn't get here, but if we do, say "no support" */ + return false; +} + +/* + * ExecSecLabelStmt -- + * + * Apply a security label to a database object. + * + * Returns the ObjectAddress of the object to which the policy was applied. + */ +ObjectAddress +ExecSecLabelStmt(SecLabelStmt *stmt) +{ + LabelProvider *provider = NULL; + ObjectAddress address; + Relation relation; + ListCell *lc; + + /* + * Find the named label provider, or if none specified, check whether + * there's exactly one, and if so use it. + */ + if (stmt->provider == NULL) + { + if (label_provider_list == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("no security label providers have been loaded"))); + if (list_length(label_provider_list) != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("must specify provider when multiple security label providers have been loaded"))); + provider = (LabelProvider *) linitial(label_provider_list); + } + else + { + foreach(lc, label_provider_list) + { + LabelProvider *lp = lfirst(lc); + + if (strcmp(stmt->provider, lp->provider_name) == 0) + { + provider = lp; + break; + } + } + if (provider == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("security label provider \"%s\" is not loaded", + stmt->provider))); + } + + if (!SecLabelSupportsObjectType(stmt->objtype)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("security labels are not supported for this type of object"))); + + /* + * Translate the parser representation which identifies this object into + * an ObjectAddress. get_object_address() will throw an error if the + * object does not exist, and will also acquire a lock on the target to + * guard against concurrent modifications. + */ + address = get_object_address(stmt->objtype, stmt->object, + &relation, ShareUpdateExclusiveLock, false); + + /* Require ownership of the target object. */ + check_object_ownership(GetUserId(), stmt->objtype, address, + stmt->object, relation); + + /* Perform other integrity checks as needed. */ + switch (stmt->objtype) + { + case OBJECT_COLUMN: + + /* + * Allow security labels only on columns of tables, views, + * materialized views, composite types, and foreign tables (which + * are the only relkinds for which pg_dump will dump labels). + */ + if (relation->rd_rel->relkind != RELKIND_RELATION && + relation->rd_rel->relkind != RELKIND_VIEW && + relation->rd_rel->relkind != RELKIND_MATVIEW && + relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE && + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot set security label on relation \"%s\"", + RelationGetRelationName(relation)), + errdetail_relkind_not_supported(relation->rd_rel->relkind))); + break; + default: + break; + } + + /* Provider gets control here, may throw ERROR to veto new label. */ + provider->hook(&address, stmt->label); + + /* Apply new label. */ + SetSecurityLabel(&address, provider->provider_name, stmt->label); + + /* + * If get_object_address() opened the relation for us, we close it to keep + * the reference count correct - but we retain any locks acquired by + * get_object_address() until commit time, to guard against concurrent + * activity. + */ + if (relation != NULL) + relation_close(relation, NoLock); + + return address; +} + +/* + * GetSharedSecurityLabel returns the security label for a shared object for + * a given provider, or NULL if there is no such label. + */ +static char * +GetSharedSecurityLabel(const ObjectAddress *object, const char *provider) +{ + Relation pg_shseclabel; + ScanKeyData keys[3]; + SysScanDesc scan; + HeapTuple tuple; + Datum datum; + bool isnull; + char *seclabel = NULL; + + ScanKeyInit(&keys[0], + Anum_pg_shseclabel_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->objectId)); + ScanKeyInit(&keys[1], + Anum_pg_shseclabel_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->classId)); + ScanKeyInit(&keys[2], + Anum_pg_shseclabel_provider, + BTEqualStrategyNumber, F_TEXTEQ, + CStringGetTextDatum(provider)); + + pg_shseclabel = table_open(SharedSecLabelRelationId, AccessShareLock); + + scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, + criticalSharedRelcachesBuilt, NULL, 3, keys); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + datum = heap_getattr(tuple, Anum_pg_shseclabel_label, + RelationGetDescr(pg_shseclabel), &isnull); + if (!isnull) + seclabel = TextDatumGetCString(datum); + } + systable_endscan(scan); + + table_close(pg_shseclabel, AccessShareLock); + + return seclabel; +} + +/* + * GetSecurityLabel returns the security label for a shared or database object + * for a given provider, or NULL if there is no such label. + */ +char * +GetSecurityLabel(const ObjectAddress *object, const char *provider) +{ + Relation pg_seclabel; + ScanKeyData keys[4]; + SysScanDesc scan; + HeapTuple tuple; + Datum datum; + bool isnull; + char *seclabel = NULL; + + /* Shared objects have their own security label catalog. */ + if (IsSharedRelation(object->classId)) + return GetSharedSecurityLabel(object, provider); + + /* Must be an unshared object, so examine pg_seclabel. */ + ScanKeyInit(&keys[0], + Anum_pg_seclabel_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->objectId)); + ScanKeyInit(&keys[1], + Anum_pg_seclabel_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->classId)); + ScanKeyInit(&keys[2], + Anum_pg_seclabel_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(object->objectSubId)); + ScanKeyInit(&keys[3], + Anum_pg_seclabel_provider, + BTEqualStrategyNumber, F_TEXTEQ, + CStringGetTextDatum(provider)); + + pg_seclabel = table_open(SecLabelRelationId, AccessShareLock); + + scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true, + NULL, 4, keys); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + datum = heap_getattr(tuple, Anum_pg_seclabel_label, + RelationGetDescr(pg_seclabel), &isnull); + if (!isnull) + seclabel = TextDatumGetCString(datum); + } + systable_endscan(scan); + + table_close(pg_seclabel, AccessShareLock); + + return seclabel; +} + +/* + * SetSharedSecurityLabel is a helper function of SetSecurityLabel to + * handle shared database objects. + */ +static void +SetSharedSecurityLabel(const ObjectAddress *object, + const char *provider, const char *label) +{ + Relation pg_shseclabel; + ScanKeyData keys[4]; + SysScanDesc scan; + HeapTuple oldtup; + HeapTuple newtup = NULL; + Datum values[Natts_pg_shseclabel]; + bool nulls[Natts_pg_shseclabel]; + bool replaces[Natts_pg_shseclabel]; + + /* Prepare to form or update a tuple, if necessary. */ + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + values[Anum_pg_shseclabel_objoid - 1] = ObjectIdGetDatum(object->objectId); + values[Anum_pg_shseclabel_classoid - 1] = ObjectIdGetDatum(object->classId); + values[Anum_pg_shseclabel_provider - 1] = CStringGetTextDatum(provider); + if (label != NULL) + values[Anum_pg_shseclabel_label - 1] = CStringGetTextDatum(label); + + /* Use the index to search for a matching old tuple */ + ScanKeyInit(&keys[0], + Anum_pg_shseclabel_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->objectId)); + ScanKeyInit(&keys[1], + Anum_pg_shseclabel_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->classId)); + ScanKeyInit(&keys[2], + Anum_pg_shseclabel_provider, + BTEqualStrategyNumber, F_TEXTEQ, + CStringGetTextDatum(provider)); + + pg_shseclabel = table_open(SharedSecLabelRelationId, RowExclusiveLock); + + scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true, + NULL, 3, keys); + + oldtup = systable_getnext(scan); + if (HeapTupleIsValid(oldtup)) + { + if (label == NULL) + CatalogTupleDelete(pg_shseclabel, &oldtup->t_self); + else + { + replaces[Anum_pg_shseclabel_label - 1] = true; + newtup = heap_modify_tuple(oldtup, RelationGetDescr(pg_shseclabel), + values, nulls, replaces); + CatalogTupleUpdate(pg_shseclabel, &oldtup->t_self, newtup); + } + } + systable_endscan(scan); + + /* If we didn't find an old tuple, insert a new one */ + if (newtup == NULL && label != NULL) + { + newtup = heap_form_tuple(RelationGetDescr(pg_shseclabel), + values, nulls); + CatalogTupleInsert(pg_shseclabel, newtup); + } + + if (newtup != NULL) + heap_freetuple(newtup); + + table_close(pg_shseclabel, RowExclusiveLock); +} + +/* + * SetSecurityLabel attempts to set the security label for the specified + * provider on the specified object to the given value. NULL means that any + * existing label should be deleted. + */ +void +SetSecurityLabel(const ObjectAddress *object, + const char *provider, const char *label) +{ + Relation pg_seclabel; + ScanKeyData keys[4]; + SysScanDesc scan; + HeapTuple oldtup; + HeapTuple newtup = NULL; + Datum values[Natts_pg_seclabel]; + bool nulls[Natts_pg_seclabel]; + bool replaces[Natts_pg_seclabel]; + + /* Shared objects have their own security label catalog. */ + if (IsSharedRelation(object->classId)) + { + SetSharedSecurityLabel(object, provider, label); + return; + } + + /* Prepare to form or update a tuple, if necessary. */ + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + values[Anum_pg_seclabel_objoid - 1] = ObjectIdGetDatum(object->objectId); + values[Anum_pg_seclabel_classoid - 1] = ObjectIdGetDatum(object->classId); + values[Anum_pg_seclabel_objsubid - 1] = Int32GetDatum(object->objectSubId); + values[Anum_pg_seclabel_provider - 1] = CStringGetTextDatum(provider); + if (label != NULL) + values[Anum_pg_seclabel_label - 1] = CStringGetTextDatum(label); + + /* Use the index to search for a matching old tuple */ + ScanKeyInit(&keys[0], + Anum_pg_seclabel_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->objectId)); + ScanKeyInit(&keys[1], + Anum_pg_seclabel_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->classId)); + ScanKeyInit(&keys[2], + Anum_pg_seclabel_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(object->objectSubId)); + ScanKeyInit(&keys[3], + Anum_pg_seclabel_provider, + BTEqualStrategyNumber, F_TEXTEQ, + CStringGetTextDatum(provider)); + + pg_seclabel = table_open(SecLabelRelationId, RowExclusiveLock); + + scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true, + NULL, 4, keys); + + oldtup = systable_getnext(scan); + if (HeapTupleIsValid(oldtup)) + { + if (label == NULL) + CatalogTupleDelete(pg_seclabel, &oldtup->t_self); + else + { + replaces[Anum_pg_seclabel_label - 1] = true; + newtup = heap_modify_tuple(oldtup, RelationGetDescr(pg_seclabel), + values, nulls, replaces); + CatalogTupleUpdate(pg_seclabel, &oldtup->t_self, newtup); + } + } + systable_endscan(scan); + + /* If we didn't find an old tuple, insert a new one */ + if (newtup == NULL && label != NULL) + { + newtup = heap_form_tuple(RelationGetDescr(pg_seclabel), + values, nulls); + CatalogTupleInsert(pg_seclabel, newtup); + } + + /* Update indexes, if necessary */ + if (newtup != NULL) + heap_freetuple(newtup); + + table_close(pg_seclabel, RowExclusiveLock); +} + +/* + * DeleteSharedSecurityLabel is a helper function of DeleteSecurityLabel + * to handle shared database objects. + */ +void +DeleteSharedSecurityLabel(Oid objectId, Oid classId) +{ + Relation pg_shseclabel; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple oldtup; + + ScanKeyInit(&skey[0], + Anum_pg_shseclabel_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(objectId)); + ScanKeyInit(&skey[1], + Anum_pg_shseclabel_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(classId)); + + pg_shseclabel = table_open(SharedSecLabelRelationId, RowExclusiveLock); + + scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true, + NULL, 2, skey); + while (HeapTupleIsValid(oldtup = systable_getnext(scan))) + CatalogTupleDelete(pg_shseclabel, &oldtup->t_self); + systable_endscan(scan); + + table_close(pg_shseclabel, RowExclusiveLock); +} + +/* + * DeleteSecurityLabel removes all security labels for an object (and any + * sub-objects, if applicable). + */ +void +DeleteSecurityLabel(const ObjectAddress *object) +{ + Relation pg_seclabel; + ScanKeyData skey[3]; + SysScanDesc scan; + HeapTuple oldtup; + int nkeys; + + /* Shared objects have their own security label catalog. */ + if (IsSharedRelation(object->classId)) + { + Assert(object->objectSubId == 0); + DeleteSharedSecurityLabel(object->objectId, object->classId); + return; + } + + ScanKeyInit(&skey[0], + Anum_pg_seclabel_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->objectId)); + ScanKeyInit(&skey[1], + Anum_pg_seclabel_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(object->classId)); + if (object->objectSubId != 0) + { + ScanKeyInit(&skey[2], + Anum_pg_seclabel_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(object->objectSubId)); + nkeys = 3; + } + else + nkeys = 2; + + pg_seclabel = table_open(SecLabelRelationId, RowExclusiveLock); + + scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true, + NULL, nkeys, skey); + while (HeapTupleIsValid(oldtup = systable_getnext(scan))) + CatalogTupleDelete(pg_seclabel, &oldtup->t_self); + systable_endscan(scan); + + table_close(pg_seclabel, RowExclusiveLock); +} + +void +register_label_provider(const char *provider_name, check_object_relabel_type hook) +{ + LabelProvider *provider; + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(TopMemoryContext); + provider = palloc(sizeof(LabelProvider)); + provider->provider_name = pstrdup(provider_name); + provider->hook = hook; + label_provider_list = lappend(label_provider_list, provider); + MemoryContextSwitchTo(oldcxt); +} diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c new file mode 100644 index 0000000..acaf660 --- /dev/null +++ b/src/backend/commands/sequence.c @@ -0,0 +1,1917 @@ +/*------------------------------------------------------------------------- + * + * sequence.c + * PostgreSQL sequences support code. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/sequence.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/bufmask.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/relation.h" +#include "access/table.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "access/xlogutils.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_sequence.h" +#include "catalog/pg_type.h" +#include "catalog/storage_xlog.h" +#include "commands/defrem.h" +#include "commands/sequence.h" +#include "commands/tablecmds.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "parser/parse_type.h" +#include "storage/lmgr.h" +#include "storage/proc.h" +#include "storage/smgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/resowner.h" +#include "utils/syscache.h" +#include "utils/varlena.h" + + +/* + * We don't want to log each fetching of a value from a sequence, + * so we pre-log a few fetches in advance. In the event of + * crash we can lose (skip over) as many values as we pre-logged. + */ +#define SEQ_LOG_VALS 32 + +/* + * The "special area" of a sequence's buffer page looks like this. + */ +#define SEQ_MAGIC 0x1717 + +typedef struct sequence_magic +{ + uint32 magic; +} sequence_magic; + +/* + * We store a SeqTable item for every sequence we have touched in the current + * session. This is needed to hold onto nextval/currval state. (We can't + * rely on the relcache, since it's only, well, a cache, and may decide to + * discard entries.) + */ +typedef struct SeqTableData +{ + Oid relid; /* pg_class OID of this sequence (hash key) */ + Oid filenode; /* last seen relfilenode of this sequence */ + LocalTransactionId lxid; /* xact in which we last did a seq op */ + bool last_valid; /* do we have a valid "last" value? */ + int64 last; /* value last returned by nextval */ + int64 cached; /* last value already cached for nextval */ + /* if last != cached, we have not used up all the cached values */ + int64 increment; /* copy of sequence's increment field */ + /* note that increment is zero until we first do nextval_internal() */ +} SeqTableData; + +typedef SeqTableData *SeqTable; + +static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */ + +/* + * last_used_seq is updated by nextval() to point to the last used + * sequence. + */ +static SeqTableData *last_used_seq = NULL; + +static void fill_seq_with_data(Relation rel, HeapTuple tuple); +static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum); +static Relation lock_and_open_sequence(SeqTable seq); +static void create_seq_hashtable(void); +static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel); +static Form_pg_sequence_data read_seq_tuple(Relation rel, + Buffer *buf, HeapTuple seqdatatuple); +static void init_params(ParseState *pstate, List *options, bool for_identity, + bool isInit, + Form_pg_sequence seqform, + Form_pg_sequence_data seqdataform, + bool *need_seq_rewrite, + List **owned_by); +static void do_setval(Oid relid, int64 next, bool iscalled); +static void process_owned_by(Relation seqrel, List *owned_by, bool for_identity); + + +/* + * DefineSequence + * Creates a new sequence relation + */ +ObjectAddress +DefineSequence(ParseState *pstate, CreateSeqStmt *seq) +{ + FormData_pg_sequence seqform; + FormData_pg_sequence_data seqdataform; + bool need_seq_rewrite; + List *owned_by; + CreateStmt *stmt = makeNode(CreateStmt); + Oid seqoid; + ObjectAddress address; + Relation rel; + HeapTuple tuple; + TupleDesc tupDesc; + Datum value[SEQ_COL_LASTCOL]; + bool null[SEQ_COL_LASTCOL]; + Datum pgs_values[Natts_pg_sequence]; + bool pgs_nulls[Natts_pg_sequence]; + int i; + + /* + * If if_not_exists was given and a relation with the same name already + * exists, bail out. (Note: we needn't check this when not if_not_exists, + * because DefineRelation will complain anyway.) + */ + if (seq->if_not_exists) + { + RangeVarGetAndCheckCreationNamespace(seq->sequence, NoLock, &seqoid); + if (OidIsValid(seqoid)) + { + /* + * If we are in an extension script, insist that the pre-existing + * object be a member of the extension, to avoid security risks. + */ + ObjectAddressSet(address, RelationRelationId, seqoid); + checkMembershipInCurrentExtension(&address); + + /* OK to skip */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists, skipping", + seq->sequence->relname))); + return InvalidObjectAddress; + } + } + + /* Check and set all option values */ + init_params(pstate, seq->options, seq->for_identity, true, + &seqform, &seqdataform, + &need_seq_rewrite, &owned_by); + + /* + * Create relation (and fill value[] and null[] for the tuple) + */ + stmt->tableElts = NIL; + for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++) + { + ColumnDef *coldef = makeNode(ColumnDef); + + coldef->inhcount = 0; + coldef->is_local = true; + coldef->is_not_null = true; + coldef->is_from_type = false; + coldef->storage = 0; + coldef->raw_default = NULL; + coldef->cooked_default = NULL; + coldef->collClause = NULL; + coldef->collOid = InvalidOid; + coldef->constraints = NIL; + coldef->location = -1; + + null[i - 1] = false; + + switch (i) + { + case SEQ_COL_LASTVAL: + coldef->typeName = makeTypeNameFromOid(INT8OID, -1); + coldef->colname = "last_value"; + value[i - 1] = Int64GetDatumFast(seqdataform.last_value); + break; + case SEQ_COL_LOG: + coldef->typeName = makeTypeNameFromOid(INT8OID, -1); + coldef->colname = "log_cnt"; + value[i - 1] = Int64GetDatum((int64) 0); + break; + case SEQ_COL_CALLED: + coldef->typeName = makeTypeNameFromOid(BOOLOID, -1); + coldef->colname = "is_called"; + value[i - 1] = BoolGetDatum(false); + break; + } + stmt->tableElts = lappend(stmt->tableElts, coldef); + } + + stmt->relation = seq->sequence; + stmt->inhRelations = NIL; + stmt->constraints = NIL; + stmt->options = NIL; + stmt->oncommit = ONCOMMIT_NOOP; + stmt->tablespacename = NULL; + stmt->if_not_exists = seq->if_not_exists; + + address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL); + seqoid = address.objectId; + Assert(seqoid != InvalidOid); + + rel = table_open(seqoid, AccessExclusiveLock); + tupDesc = RelationGetDescr(rel); + + /* now initialize the sequence's data */ + tuple = heap_form_tuple(tupDesc, value, null); + fill_seq_with_data(rel, tuple); + + /* process OWNED BY if given */ + if (owned_by) + process_owned_by(rel, owned_by, seq->for_identity); + + table_close(rel, NoLock); + + /* fill in pg_sequence */ + rel = table_open(SequenceRelationId, RowExclusiveLock); + tupDesc = RelationGetDescr(rel); + + memset(pgs_nulls, 0, sizeof(pgs_nulls)); + + pgs_values[Anum_pg_sequence_seqrelid - 1] = ObjectIdGetDatum(seqoid); + pgs_values[Anum_pg_sequence_seqtypid - 1] = ObjectIdGetDatum(seqform.seqtypid); + pgs_values[Anum_pg_sequence_seqstart - 1] = Int64GetDatumFast(seqform.seqstart); + pgs_values[Anum_pg_sequence_seqincrement - 1] = Int64GetDatumFast(seqform.seqincrement); + pgs_values[Anum_pg_sequence_seqmax - 1] = Int64GetDatumFast(seqform.seqmax); + pgs_values[Anum_pg_sequence_seqmin - 1] = Int64GetDatumFast(seqform.seqmin); + pgs_values[Anum_pg_sequence_seqcache - 1] = Int64GetDatumFast(seqform.seqcache); + pgs_values[Anum_pg_sequence_seqcycle - 1] = BoolGetDatum(seqform.seqcycle); + + tuple = heap_form_tuple(tupDesc, pgs_values, pgs_nulls); + CatalogTupleInsert(rel, tuple); + + heap_freetuple(tuple); + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Reset a sequence to its initial value. + * + * The change is made transactionally, so that on failure of the current + * transaction, the sequence will be restored to its previous state. + * We do that by creating a whole new relfilenode for the sequence; so this + * works much like the rewriting forms of ALTER TABLE. + * + * Caller is assumed to have acquired AccessExclusiveLock on the sequence, + * which must not be released until end of transaction. Caller is also + * responsible for permissions checking. + */ +void +ResetSequence(Oid seq_relid) +{ + Relation seq_rel; + SeqTable elm; + Form_pg_sequence_data seq; + Buffer buf; + HeapTupleData seqdatatuple; + HeapTuple tuple; + HeapTuple pgstuple; + Form_pg_sequence pgsform; + int64 startv; + + /* + * Read the old sequence. This does a bit more work than really + * necessary, but it's simple, and we do want to double-check that it's + * indeed a sequence. + */ + init_sequence(seq_relid, &elm, &seq_rel); + (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple); + + pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid)); + if (!HeapTupleIsValid(pgstuple)) + elog(ERROR, "cache lookup failed for sequence %u", seq_relid); + pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple); + startv = pgsform->seqstart; + ReleaseSysCache(pgstuple); + + /* + * Copy the existing sequence tuple. + */ + tuple = heap_copytuple(&seqdatatuple); + + /* Now we're done with the old page */ + UnlockReleaseBuffer(buf); + + /* + * Modify the copied tuple to execute the restart (compare the RESTART + * action in AlterSequence) + */ + seq = (Form_pg_sequence_data) GETSTRUCT(tuple); + seq->last_value = startv; + seq->is_called = false; + seq->log_cnt = 0; + + /* + * Create a new storage file for the sequence. + */ + RelationSetNewRelfilenode(seq_rel, seq_rel->rd_rel->relpersistence); + + /* + * Ensure sequence's relfrozenxid is at 0, since it won't contain any + * unfrozen XIDs. Same with relminmxid, since a sequence will never + * contain multixacts. + */ + Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId); + Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId); + + /* + * Insert the modified tuple into the new storage file. + */ + fill_seq_with_data(seq_rel, tuple); + + /* Clear local cache so that we don't think we have cached numbers */ + /* Note that we do not change the currval() state */ + elm->cached = elm->last; + + relation_close(seq_rel, NoLock); +} + +/* + * Initialize a sequence's relation with the specified tuple as content + * + * This handles unlogged sequences by writing to both the main and the init + * fork as necessary. + */ +static void +fill_seq_with_data(Relation rel, HeapTuple tuple) +{ + fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM); + + if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + { + SMgrRelation srel; + + srel = smgropen(rel->rd_node, InvalidBackendId); + smgrcreate(srel, INIT_FORKNUM, false); + log_smgrcreate(&rel->rd_node, INIT_FORKNUM); + fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); + FlushRelationBuffers(rel); + smgrclose(srel); + } +} + +/* + * Initialize a sequence's relation fork with the specified tuple as content + */ +static void +fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) +{ + Buffer buf; + Page page; + sequence_magic *sm; + OffsetNumber offnum; + + /* Initialize first page of relation with special magic number */ + + buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL); + Assert(BufferGetBlockNumber(buf) == 0); + + page = BufferGetPage(buf); + + PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); + sm = (sequence_magic *) PageGetSpecialPointer(page); + sm->magic = SEQ_MAGIC; + + /* Now insert sequence tuple */ + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + /* + * Since VACUUM does not process sequences, we have to force the tuple to + * have xmin = FrozenTransactionId now. Otherwise it would become + * invisible to SELECTs after 2G transactions. It is okay to do this + * because if the current transaction aborts, no other xact will ever + * examine the sequence tuple anyway. + */ + HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple->t_data); + HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); + HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); + + /* check the comment above nextval_internal()'s equivalent call. */ + if (RelationNeedsWAL(rel)) + GetTopTransactionId(); + + START_CRIT_SECTION(); + + MarkBufferDirty(buf); + + offnum = PageAddItem(page, (Item) tuple->t_data, tuple->t_len, + InvalidOffsetNumber, false, false); + if (offnum != FirstOffsetNumber) + elog(ERROR, "failed to add sequence tuple to page"); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM) + { + xl_seq_rec xlrec; + XLogRecPtr recptr; + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.node = rel->rd_node; + + XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); + XLogRegisterData((char *) tuple->t_data, tuple->t_len); + + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * AlterSequence + * + * Modify the definition of a sequence relation + */ +ObjectAddress +AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) +{ + Oid relid; + SeqTable elm; + Relation seqrel; + Buffer buf; + HeapTupleData datatuple; + Form_pg_sequence seqform; + Form_pg_sequence_data newdataform; + bool need_seq_rewrite; + List *owned_by; + ObjectAddress address; + Relation rel; + HeapTuple seqtuple; + HeapTuple newdatatuple; + + /* Open and lock sequence, and check for ownership along the way. */ + relid = RangeVarGetRelidExtended(stmt->sequence, + ShareRowExclusiveLock, + stmt->missing_ok ? RVR_MISSING_OK : 0, + RangeVarCallbackOwnsRelation, + NULL); + if (relid == InvalidOid) + { + ereport(NOTICE, + (errmsg("relation \"%s\" does not exist, skipping", + stmt->sequence->relname))); + return InvalidObjectAddress; + } + + init_sequence(relid, &elm, &seqrel); + + rel = table_open(SequenceRelationId, RowExclusiveLock); + seqtuple = SearchSysCacheCopy1(SEQRELID, + ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(seqtuple)) + elog(ERROR, "cache lookup failed for sequence %u", + relid); + + seqform = (Form_pg_sequence) GETSTRUCT(seqtuple); + + /* lock page's buffer and read tuple into new sequence structure */ + (void) read_seq_tuple(seqrel, &buf, &datatuple); + + /* copy the existing sequence data tuple, so it can be modified locally */ + newdatatuple = heap_copytuple(&datatuple); + newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple); + + UnlockReleaseBuffer(buf); + + /* Check and set new values */ + init_params(pstate, stmt->options, stmt->for_identity, false, + seqform, newdataform, + &need_seq_rewrite, &owned_by); + + /* Clear local cache so that we don't think we have cached numbers */ + /* Note that we do not change the currval() state */ + elm->cached = elm->last; + + /* If needed, rewrite the sequence relation itself */ + if (need_seq_rewrite) + { + /* check the comment above nextval_internal()'s equivalent call. */ + if (RelationNeedsWAL(seqrel)) + GetTopTransactionId(); + + /* + * Create a new storage file for the sequence, making the state + * changes transactional. + */ + RelationSetNewRelfilenode(seqrel, seqrel->rd_rel->relpersistence); + + /* + * Ensure sequence's relfrozenxid is at 0, since it won't contain any + * unfrozen XIDs. Same with relminmxid, since a sequence will never + * contain multixacts. + */ + Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId); + Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId); + + /* + * Insert the modified tuple into the new storage file. + */ + fill_seq_with_data(seqrel, newdatatuple); + } + + /* process OWNED BY if given */ + if (owned_by) + process_owned_by(seqrel, owned_by, stmt->for_identity); + + /* update the pg_sequence tuple (we could skip this in some cases...) */ + CatalogTupleUpdate(rel, &seqtuple->t_self, seqtuple); + + InvokeObjectPostAlterHook(RelationRelationId, relid, 0); + + ObjectAddressSet(address, RelationRelationId, relid); + + table_close(rel, RowExclusiveLock); + relation_close(seqrel, NoLock); + + return address; +} + +void +SequenceChangePersistence(Oid relid, char newrelpersistence) +{ + SeqTable elm; + Relation seqrel; + Buffer buf; + HeapTupleData seqdatatuple; + + init_sequence(relid, &elm, &seqrel); + + /* check the comment above nextval_internal()'s equivalent call. */ + if (RelationNeedsWAL(seqrel)) + GetTopTransactionId(); + + (void) read_seq_tuple(seqrel, &buf, &seqdatatuple); + RelationSetNewRelfilenode(seqrel, newrelpersistence); + fill_seq_with_data(seqrel, &seqdatatuple); + UnlockReleaseBuffer(buf); + + relation_close(seqrel, NoLock); +} + +void +DeleteSequenceTuple(Oid relid) +{ + Relation rel; + HeapTuple tuple; + + rel = table_open(SequenceRelationId, RowExclusiveLock); + + tuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for sequence %u", relid); + + CatalogTupleDelete(rel, &tuple->t_self); + + ReleaseSysCache(tuple); + table_close(rel, RowExclusiveLock); +} + +/* + * Note: nextval with a text argument is no longer exported as a pg_proc + * entry, but we keep it around to ease porting of C code that may have + * called the function directly. + */ +Datum +nextval(PG_FUNCTION_ARGS) +{ + text *seqin = PG_GETARG_TEXT_PP(0); + RangeVar *sequence; + Oid relid; + + sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin)); + + /* + * XXX: This is not safe in the presence of concurrent DDL, but acquiring + * a lock here is more expensive than letting nextval_internal do it, + * since the latter maintains a cache that keeps us from hitting the lock + * manager more than once per transaction. It's not clear whether the + * performance penalty is material in practice, but for now, we do it this + * way. + */ + relid = RangeVarGetRelid(sequence, NoLock, false); + + PG_RETURN_INT64(nextval_internal(relid, true)); +} + +Datum +nextval_oid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + PG_RETURN_INT64(nextval_internal(relid, true)); +} + +int64 +nextval_internal(Oid relid, bool check_permissions) +{ + SeqTable elm; + Relation seqrel; + Buffer buf; + Page page; + HeapTuple pgstuple; + Form_pg_sequence pgsform; + HeapTupleData seqdatatuple; + Form_pg_sequence_data seq; + int64 incby, + maxv, + minv, + cache, + log, + fetch, + last; + int64 result, + next, + rescnt = 0; + bool cycle; + bool logit = false; + + /* open and lock sequence */ + init_sequence(relid, &elm, &seqrel); + + if (check_permissions && + pg_class_aclcheck(elm->relid, GetUserId(), + ACL_USAGE | ACL_UPDATE) != ACLCHECK_OK) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied for sequence %s", + RelationGetRelationName(seqrel)))); + + /* read-only transactions may only modify temp sequences */ + if (!seqrel->rd_islocaltemp) + PreventCommandIfReadOnly("nextval()"); + + /* + * Forbid this during parallel operation because, to make it work, the + * cooperating backends would need to share the backend-local cached + * sequence information. Currently, we don't support that. + */ + PreventCommandIfParallelMode("nextval()"); + + if (elm->last != elm->cached) /* some numbers were cached */ + { + Assert(elm->last_valid); + Assert(elm->increment != 0); + elm->last += elm->increment; + relation_close(seqrel, NoLock); + last_used_seq = elm; + return elm->last; + } + + pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(pgstuple)) + elog(ERROR, "cache lookup failed for sequence %u", relid); + pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple); + incby = pgsform->seqincrement; + maxv = pgsform->seqmax; + minv = pgsform->seqmin; + cache = pgsform->seqcache; + cycle = pgsform->seqcycle; + ReleaseSysCache(pgstuple); + + /* lock page' buffer and read tuple */ + seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); + page = BufferGetPage(buf); + + elm->increment = incby; + last = next = result = seq->last_value; + fetch = cache; + log = seq->log_cnt; + + if (!seq->is_called) + { + rescnt++; /* return last_value if not is_called */ + fetch--; + } + + /* + * Decide whether we should emit a WAL log record. If so, force up the + * fetch count to grab SEQ_LOG_VALS more values than we actually need to + * cache. (These will then be usable without logging.) + * + * If this is the first nextval after a checkpoint, we must force a new + * WAL record to be written anyway, else replay starting from the + * checkpoint would fail to advance the sequence past the logged values. + * In this case we may as well fetch extra values. + */ + if (log < fetch || !seq->is_called) + { + /* forced log to satisfy local demand for values */ + fetch = log = fetch + SEQ_LOG_VALS; + logit = true; + } + else + { + XLogRecPtr redoptr = GetRedoRecPtr(); + + if (PageGetLSN(page) <= redoptr) + { + /* last update of seq was before checkpoint */ + fetch = log = fetch + SEQ_LOG_VALS; + logit = true; + } + } + + while (fetch) /* try to fetch cache [+ log ] numbers */ + { + /* + * Check MAXVALUE for ascending sequences and MINVALUE for descending + * sequences + */ + if (incby > 0) + { + /* ascending sequence */ + if ((maxv >= 0 && next > maxv - incby) || + (maxv < 0 && next + incby > maxv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached maximum value of sequence \"%s\" (%lld)", + RelationGetRelationName(seqrel), + (long long) maxv))); + next = minv; + } + else + next += incby; + } + else + { + /* descending sequence */ + if ((minv < 0 && next < minv - incby) || + (minv >= 0 && next + incby < minv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached minimum value of sequence \"%s\" (%lld)", + RelationGetRelationName(seqrel), + (long long) minv))); + next = maxv; + } + else + next += incby; + } + fetch--; + if (rescnt < cache) + { + log--; + rescnt++; + last = next; + if (rescnt == 1) /* if it's first result - */ + result = next; /* it's what to return */ + } + } + + log -= fetch; /* adjust for any unfetched numbers */ + Assert(log >= 0); + + /* save info in local cache */ + elm->last = result; /* last returned number */ + elm->cached = last; /* last fetched number */ + elm->last_valid = true; + + last_used_seq = elm; + + /* + * If something needs to be WAL logged, acquire an xid, so this + * transaction's commit will trigger a WAL flush and wait for syncrep. + * It's sufficient to ensure the toplevel transaction has an xid, no need + * to assign xids subxacts, that'll already trigger an appropriate wait. + * (Have to do that here, so we're outside the critical section) + */ + if (logit && RelationNeedsWAL(seqrel)) + GetTopTransactionId(); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + + /* + * We must mark the buffer dirty before doing XLogInsert(); see notes in + * SyncOneBuffer(). However, we don't apply the desired changes just yet. + * This looks like a violation of the buffer update protocol, but it is in + * fact safe because we hold exclusive lock on the buffer. Any other + * process, including a checkpoint, that tries to examine the buffer + * contents will block until we release the lock, and then will see the + * final state that we install below. + */ + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (logit && RelationNeedsWAL(seqrel)) + { + xl_seq_rec xlrec; + XLogRecPtr recptr; + + /* + * We don't log the current state of the tuple, but rather the state + * as it would appear after "log" more fetches. This lets us skip + * that many future WAL records, at the cost that we lose those + * sequence values if we crash. + */ + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + /* set values that will be saved in xlog */ + seq->last_value = next; + seq->is_called = true; + seq->log_cnt = 0; + + xlrec.node = seqrel->rd_node; + + XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); + XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + + PageSetLSN(page, recptr); + } + + /* Now update sequence tuple to the intended final state */ + seq->last_value = last; /* last fetched number */ + seq->is_called = true; + seq->log_cnt = log; /* how much is logged */ + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + relation_close(seqrel, NoLock); + + return result; +} + +Datum +currval_oid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 result; + SeqTable elm; + Relation seqrel; + + /* open and lock sequence */ + init_sequence(relid, &elm, &seqrel); + + if (pg_class_aclcheck(elm->relid, GetUserId(), + ACL_SELECT | ACL_USAGE) != ACLCHECK_OK) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied for sequence %s", + RelationGetRelationName(seqrel)))); + + if (!elm->last_valid) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("currval of sequence \"%s\" is not yet defined in this session", + RelationGetRelationName(seqrel)))); + + result = elm->last; + + relation_close(seqrel, NoLock); + + PG_RETURN_INT64(result); +} + +Datum +lastval(PG_FUNCTION_ARGS) +{ + Relation seqrel; + int64 result; + + if (last_used_seq == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("lastval is not yet defined in this session"))); + + /* Someone may have dropped the sequence since the last nextval() */ + if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(last_used_seq->relid))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("lastval is not yet defined in this session"))); + + seqrel = lock_and_open_sequence(last_used_seq); + + /* nextval() must have already been called for this sequence */ + Assert(last_used_seq->last_valid); + + if (pg_class_aclcheck(last_used_seq->relid, GetUserId(), + ACL_SELECT | ACL_USAGE) != ACLCHECK_OK) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied for sequence %s", + RelationGetRelationName(seqrel)))); + + result = last_used_seq->last; + relation_close(seqrel, NoLock); + + PG_RETURN_INT64(result); +} + +/* + * Main internal procedure that handles 2 & 3 arg forms of SETVAL. + * + * Note that the 3 arg version (which sets the is_called flag) is + * only for use in pg_dump, and setting the is_called flag may not + * work if multiple users are attached to the database and referencing + * the sequence (unlikely if pg_dump is restoring it). + * + * It is necessary to have the 3 arg version so that pg_dump can + * restore the state of a sequence exactly during data-only restores - + * it is the only way to clear the is_called flag in an existing + * sequence. + */ +static void +do_setval(Oid relid, int64 next, bool iscalled) +{ + SeqTable elm; + Relation seqrel; + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_sequence_data seq; + HeapTuple pgstuple; + Form_pg_sequence pgsform; + int64 maxv, + minv; + + /* open and lock sequence */ + init_sequence(relid, &elm, &seqrel); + + if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_UPDATE) != ACLCHECK_OK) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied for sequence %s", + RelationGetRelationName(seqrel)))); + + pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(pgstuple)) + elog(ERROR, "cache lookup failed for sequence %u", relid); + pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple); + maxv = pgsform->seqmax; + minv = pgsform->seqmin; + ReleaseSysCache(pgstuple); + + /* read-only transactions may only modify temp sequences */ + if (!seqrel->rd_islocaltemp) + PreventCommandIfReadOnly("setval()"); + + /* + * Forbid this during parallel operation because, to make it work, the + * cooperating backends would need to share the backend-local cached + * sequence information. Currently, we don't support that. + */ + PreventCommandIfParallelMode("setval()"); + + /* lock page' buffer and read tuple */ + seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); + + if ((next < minv) || (next > maxv)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("setval: value %lld is out of bounds for sequence \"%s\" (%lld..%lld)", + (long long) next, RelationGetRelationName(seqrel), + (long long) minv, (long long) maxv))); + + /* Set the currval() state only if iscalled = true */ + if (iscalled) + { + elm->last = next; /* last returned number */ + elm->last_valid = true; + } + + /* In any case, forget any future cached numbers */ + elm->cached = elm->last; + + /* check the comment above nextval_internal()'s equivalent call. */ + if (RelationNeedsWAL(seqrel)) + GetTopTransactionId(); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + + seq->last_value = next; /* last fetched number */ + seq->is_called = iscalled; + seq->log_cnt = 0; + + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (RelationNeedsWAL(seqrel)) + { + xl_seq_rec xlrec; + XLogRecPtr recptr; + Page page = BufferGetPage(buf); + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.node = seqrel->rd_node; + XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); + XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + relation_close(seqrel, NoLock); +} + +/* + * Implement the 2 arg setval procedure. + * See do_setval for discussion. + */ +Datum +setval_oid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 next = PG_GETARG_INT64(1); + + do_setval(relid, next, true); + + PG_RETURN_INT64(next); +} + +/* + * Implement the 3 arg setval procedure. + * See do_setval for discussion. + */ +Datum +setval3_oid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 next = PG_GETARG_INT64(1); + bool iscalled = PG_GETARG_BOOL(2); + + do_setval(relid, next, iscalled); + + PG_RETURN_INT64(next); +} + + +/* + * Open the sequence and acquire lock if needed + * + * If we haven't touched the sequence already in this transaction, + * we need to acquire a lock. We arrange for the lock to + * be owned by the top transaction, so that we don't need to do it + * more than once per xact. + */ +static Relation +lock_and_open_sequence(SeqTable seq) +{ + LocalTransactionId thislxid = MyProc->lxid; + + /* Get the lock if not already held in this xact */ + if (seq->lxid != thislxid) + { + ResourceOwner currentOwner; + + currentOwner = CurrentResourceOwner; + CurrentResourceOwner = TopTransactionResourceOwner; + + LockRelationOid(seq->relid, RowExclusiveLock); + + CurrentResourceOwner = currentOwner; + + /* Flag that we have a lock in the current xact */ + seq->lxid = thislxid; + } + + /* We now know we have the lock, and can safely open the rel */ + return relation_open(seq->relid, NoLock); +} + +/* + * Creates the hash table for storing sequence data + */ +static void +create_seq_hashtable(void) +{ + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(SeqTableData); + + seqhashtab = hash_create("Sequence values", 16, &ctl, + HASH_ELEM | HASH_BLOBS); +} + +/* + * Given a relation OID, open and lock the sequence. p_elm and p_rel are + * output parameters. + */ +static void +init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel) +{ + SeqTable elm; + Relation seqrel; + bool found; + + /* Find or create a hash table entry for this sequence */ + if (seqhashtab == NULL) + create_seq_hashtable(); + + elm = (SeqTable) hash_search(seqhashtab, &relid, HASH_ENTER, &found); + + /* + * Initialize the new hash table entry if it did not exist already. + * + * NOTE: seqhashtab entries are stored for the life of a backend (unless + * explicitly discarded with DISCARD). If the sequence itself is deleted + * then the entry becomes wasted memory, but it's small enough that this + * should not matter. + */ + if (!found) + { + /* relid already filled in */ + elm->filenode = InvalidOid; + elm->lxid = InvalidLocalTransactionId; + elm->last_valid = false; + elm->last = elm->cached = 0; + } + + /* + * Open the sequence relation. + */ + seqrel = lock_and_open_sequence(elm); + + if (seqrel->rd_rel->relkind != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", + RelationGetRelationName(seqrel)))); + + /* + * If the sequence has been transactionally replaced since we last saw it, + * discard any cached-but-unissued values. We do not touch the currval() + * state, however. + */ + if (seqrel->rd_rel->relfilenode != elm->filenode) + { + elm->filenode = seqrel->rd_rel->relfilenode; + elm->cached = elm->last; + } + + /* Return results */ + *p_elm = elm; + *p_rel = seqrel; +} + + +/* + * Given an opened sequence relation, lock the page buffer and find the tuple + * + * *buf receives the reference to the pinned-and-ex-locked buffer + * *seqdatatuple receives the reference to the sequence tuple proper + * (this arg should point to a local variable of type HeapTupleData) + * + * Function's return value points to the data payload of the tuple + */ +static Form_pg_sequence_data +read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) +{ + Page page; + ItemId lp; + sequence_magic *sm; + Form_pg_sequence_data seq; + + *buf = ReadBuffer(rel, 0); + LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(*buf); + sm = (sequence_magic *) PageGetSpecialPointer(page); + + if (sm->magic != SEQ_MAGIC) + elog(ERROR, "bad magic number in sequence \"%s\": %08X", + RelationGetRelationName(rel), sm->magic); + + lp = PageGetItemId(page, FirstOffsetNumber); + Assert(ItemIdIsNormal(lp)); + + /* Note we currently only bother to set these two fields of *seqdatatuple */ + seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); + seqdatatuple->t_len = ItemIdGetLength(lp); + + /* + * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on + * a sequence, which would leave a non-frozen XID in the sequence tuple's + * xmax, which eventually leads to clog access failures or worse. If we + * see this has happened, clean up after it. We treat this like a hint + * bit update, ie, don't bother to WAL-log it, since we can certainly do + * this again if the update gets lost. + */ + Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); + if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + { + HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; + seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + MarkBufferDirtyHint(*buf, true); + } + + seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple); + + return seq; +} + +/* + * init_params: process the options list of CREATE or ALTER SEQUENCE, and + * store the values into appropriate fields of seqform, for changes that go + * into the pg_sequence catalog, and fields of seqdataform for changes to the + * sequence relation itself. Set *need_seq_rewrite to true if we changed any + * parameters that require rewriting the sequence's relation (interesting for + * ALTER SEQUENCE). Also set *owned_by to any OWNED BY option, or to NIL if + * there is none. + * + * If isInit is true, fill any unspecified options with default values; + * otherwise, do not change existing options that aren't explicitly overridden. + * + * Note: we force a sequence rewrite whenever we change parameters that affect + * generation of future sequence values, even if the seqdataform per se is not + * changed. This allows ALTER SEQUENCE to behave transactionally. Currently, + * the only option that doesn't cause that is OWNED BY. It's *necessary* for + * ALTER SEQUENCE OWNED BY to not rewrite the sequence, because that would + * break pg_upgrade by causing unwanted changes in the sequence's relfilenode. + */ +static void +init_params(ParseState *pstate, List *options, bool for_identity, + bool isInit, + Form_pg_sequence seqform, + Form_pg_sequence_data seqdataform, + bool *need_seq_rewrite, + List **owned_by) +{ + DefElem *as_type = NULL; + DefElem *start_value = NULL; + DefElem *restart_value = NULL; + DefElem *increment_by = NULL; + DefElem *max_value = NULL; + DefElem *min_value = NULL; + DefElem *cache_value = NULL; + DefElem *is_cycled = NULL; + ListCell *option; + bool reset_max_value = false; + bool reset_min_value = false; + + *need_seq_rewrite = false; + *owned_by = NIL; + + foreach(option, options) + { + DefElem *defel = (DefElem *) lfirst(option); + + if (strcmp(defel->defname, "as") == 0) + { + if (as_type) + errorConflictingDefElem(defel, pstate); + as_type = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "increment") == 0) + { + if (increment_by) + errorConflictingDefElem(defel, pstate); + increment_by = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "start") == 0) + { + if (start_value) + errorConflictingDefElem(defel, pstate); + start_value = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "restart") == 0) + { + if (restart_value) + errorConflictingDefElem(defel, pstate); + restart_value = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "maxvalue") == 0) + { + if (max_value) + errorConflictingDefElem(defel, pstate); + max_value = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "minvalue") == 0) + { + if (min_value) + errorConflictingDefElem(defel, pstate); + min_value = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "cache") == 0) + { + if (cache_value) + errorConflictingDefElem(defel, pstate); + cache_value = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "cycle") == 0) + { + if (is_cycled) + errorConflictingDefElem(defel, pstate); + is_cycled = defel; + *need_seq_rewrite = true; + } + else if (strcmp(defel->defname, "owned_by") == 0) + { + if (*owned_by) + errorConflictingDefElem(defel, pstate); + *owned_by = defGetQualifiedName(defel); + } + else if (strcmp(defel->defname, "sequence_name") == 0) + { + /* + * The parser allows this, but it is only for identity columns, in + * which case it is filtered out in parse_utilcmd.c. We only get + * here if someone puts it into a CREATE SEQUENCE. + */ + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid sequence option SEQUENCE NAME"), + parser_errposition(pstate, defel->location))); + } + else + elog(ERROR, "option \"%s\" not recognized", + defel->defname); + } + + /* + * We must reset log_cnt when isInit or when changing any parameters that + * would affect future nextval allocations. + */ + if (isInit) + seqdataform->log_cnt = 0; + + /* AS type */ + if (as_type != NULL) + { + Oid newtypid = typenameTypeId(pstate, defGetTypeName(as_type)); + + if (newtypid != INT2OID && + newtypid != INT4OID && + newtypid != INT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + for_identity + ? errmsg("identity column type must be smallint, integer, or bigint") + : errmsg("sequence type must be smallint, integer, or bigint"))); + + if (!isInit) + { + /* + * When changing type and the old sequence min/max values were the + * min/max of the old type, adjust sequence min/max values to + * min/max of new type. (Otherwise, the user chose explicit + * min/max values, which we'll leave alone.) + */ + if ((seqform->seqtypid == INT2OID && seqform->seqmax == PG_INT16_MAX) || + (seqform->seqtypid == INT4OID && seqform->seqmax == PG_INT32_MAX) || + (seqform->seqtypid == INT8OID && seqform->seqmax == PG_INT64_MAX)) + reset_max_value = true; + if ((seqform->seqtypid == INT2OID && seqform->seqmin == PG_INT16_MIN) || + (seqform->seqtypid == INT4OID && seqform->seqmin == PG_INT32_MIN) || + (seqform->seqtypid == INT8OID && seqform->seqmin == PG_INT64_MIN)) + reset_min_value = true; + } + + seqform->seqtypid = newtypid; + } + else if (isInit) + { + seqform->seqtypid = INT8OID; + } + + /* INCREMENT BY */ + if (increment_by != NULL) + { + seqform->seqincrement = defGetInt64(increment_by); + if (seqform->seqincrement == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("INCREMENT must not be zero"))); + seqdataform->log_cnt = 0; + } + else if (isInit) + { + seqform->seqincrement = 1; + } + + /* CYCLE */ + if (is_cycled != NULL) + { + seqform->seqcycle = boolVal(is_cycled->arg); + Assert(BoolIsValid(seqform->seqcycle)); + seqdataform->log_cnt = 0; + } + else if (isInit) + { + seqform->seqcycle = false; + } + + /* MAXVALUE (null arg means NO MAXVALUE) */ + if (max_value != NULL && max_value->arg) + { + seqform->seqmax = defGetInt64(max_value); + seqdataform->log_cnt = 0; + } + else if (isInit || max_value != NULL || reset_max_value) + { + if (seqform->seqincrement > 0 || reset_max_value) + { + /* ascending seq */ + if (seqform->seqtypid == INT2OID) + seqform->seqmax = PG_INT16_MAX; + else if (seqform->seqtypid == INT4OID) + seqform->seqmax = PG_INT32_MAX; + else + seqform->seqmax = PG_INT64_MAX; + } + else + seqform->seqmax = -1; /* descending seq */ + seqdataform->log_cnt = 0; + } + + /* Validate maximum value. No need to check INT8 as seqmax is an int64 */ + if ((seqform->seqtypid == INT2OID && (seqform->seqmax < PG_INT16_MIN || seqform->seqmax > PG_INT16_MAX)) + || (seqform->seqtypid == INT4OID && (seqform->seqmax < PG_INT32_MIN || seqform->seqmax > PG_INT32_MAX))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MAXVALUE (%lld) is out of range for sequence data type %s", + (long long) seqform->seqmax, + format_type_be(seqform->seqtypid)))); + + /* MINVALUE (null arg means NO MINVALUE) */ + if (min_value != NULL && min_value->arg) + { + seqform->seqmin = defGetInt64(min_value); + seqdataform->log_cnt = 0; + } + else if (isInit || min_value != NULL || reset_min_value) + { + if (seqform->seqincrement < 0 || reset_min_value) + { + /* descending seq */ + if (seqform->seqtypid == INT2OID) + seqform->seqmin = PG_INT16_MIN; + else if (seqform->seqtypid == INT4OID) + seqform->seqmin = PG_INT32_MIN; + else + seqform->seqmin = PG_INT64_MIN; + } + else + seqform->seqmin = 1; /* ascending seq */ + seqdataform->log_cnt = 0; + } + + /* Validate minimum value. No need to check INT8 as seqmin is an int64 */ + if ((seqform->seqtypid == INT2OID && (seqform->seqmin < PG_INT16_MIN || seqform->seqmin > PG_INT16_MAX)) + || (seqform->seqtypid == INT4OID && (seqform->seqmin < PG_INT32_MIN || seqform->seqmin > PG_INT32_MAX))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MINVALUE (%lld) is out of range for sequence data type %s", + (long long) seqform->seqmin, + format_type_be(seqform->seqtypid)))); + + /* crosscheck min/max */ + if (seqform->seqmin >= seqform->seqmax) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MINVALUE (%lld) must be less than MAXVALUE (%lld)", + (long long) seqform->seqmin, + (long long) seqform->seqmax))); + + /* START WITH */ + if (start_value != NULL) + { + seqform->seqstart = defGetInt64(start_value); + } + else if (isInit) + { + if (seqform->seqincrement > 0) + seqform->seqstart = seqform->seqmin; /* ascending seq */ + else + seqform->seqstart = seqform->seqmax; /* descending seq */ + } + + /* crosscheck START */ + if (seqform->seqstart < seqform->seqmin) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("START value (%lld) cannot be less than MINVALUE (%lld)", + (long long) seqform->seqstart, + (long long) seqform->seqmin))); + if (seqform->seqstart > seqform->seqmax) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("START value (%lld) cannot be greater than MAXVALUE (%lld)", + (long long) seqform->seqstart, + (long long) seqform->seqmax))); + + /* RESTART [WITH] */ + if (restart_value != NULL) + { + if (restart_value->arg != NULL) + seqdataform->last_value = defGetInt64(restart_value); + else + seqdataform->last_value = seqform->seqstart; + seqdataform->is_called = false; + seqdataform->log_cnt = 0; + } + else if (isInit) + { + seqdataform->last_value = seqform->seqstart; + seqdataform->is_called = false; + } + + /* crosscheck RESTART (or current value, if changing MIN/MAX) */ + if (seqdataform->last_value < seqform->seqmin) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("RESTART value (%lld) cannot be less than MINVALUE (%lld)", + (long long) seqdataform->last_value, + (long long) seqform->seqmin))); + if (seqdataform->last_value > seqform->seqmax) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("RESTART value (%lld) cannot be greater than MAXVALUE (%lld)", + (long long) seqdataform->last_value, + (long long) seqform->seqmax))); + + /* CACHE */ + if (cache_value != NULL) + { + seqform->seqcache = defGetInt64(cache_value); + if (seqform->seqcache <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("CACHE (%lld) must be greater than zero", + (long long) seqform->seqcache))); + seqdataform->log_cnt = 0; + } + else if (isInit) + { + seqform->seqcache = 1; + } +} + +/* + * Process an OWNED BY option for CREATE/ALTER SEQUENCE + * + * Ownership permissions on the sequence are already checked, + * but if we are establishing a new owned-by dependency, we must + * enforce that the referenced table has the same owner and namespace + * as the sequence. + */ +static void +process_owned_by(Relation seqrel, List *owned_by, bool for_identity) +{ + DependencyType deptype; + int nnames; + Relation tablerel; + AttrNumber attnum; + + deptype = for_identity ? DEPENDENCY_INTERNAL : DEPENDENCY_AUTO; + + nnames = list_length(owned_by); + Assert(nnames > 0); + if (nnames == 1) + { + /* Must be OWNED BY NONE */ + if (strcmp(strVal(linitial(owned_by)), "none") != 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid OWNED BY option"), + errhint("Specify OWNED BY table.column or OWNED BY NONE."))); + tablerel = NULL; + attnum = 0; + } + else + { + List *relname; + char *attrname; + RangeVar *rel; + + /* Separate relname and attr name */ + relname = list_truncate(list_copy(owned_by), nnames - 1); + attrname = strVal(llast(owned_by)); + + /* Open and lock rel to ensure it won't go away meanwhile */ + rel = makeRangeVarFromNameList(relname); + tablerel = relation_openrv(rel, AccessShareLock); + + /* Must be a regular or foreign table */ + if (!(tablerel->rd_rel->relkind == RELKIND_RELATION || + tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + tablerel->rd_rel->relkind == RELKIND_VIEW || + tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("sequence cannot be owned by relation \"%s\"", + RelationGetRelationName(tablerel)), + errdetail_relkind_not_supported(tablerel->rd_rel->relkind))); + + /* We insist on same owner and schema */ + if (seqrel->rd_rel->relowner != tablerel->rd_rel->relowner) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("sequence must have same owner as table it is linked to"))); + if (RelationGetNamespace(seqrel) != RelationGetNamespace(tablerel)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("sequence must be in same schema as table it is linked to"))); + + /* Now, fetch the attribute number from the system cache */ + attnum = get_attnum(RelationGetRelid(tablerel), attrname); + if (attnum == InvalidAttrNumber) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + attrname, RelationGetRelationName(tablerel)))); + } + + /* + * Catch user explicitly running OWNED BY on identity sequence. + */ + if (deptype == DEPENDENCY_AUTO) + { + Oid tableId; + int32 colId; + + if (sequenceIsOwned(RelationGetRelid(seqrel), DEPENDENCY_INTERNAL, &tableId, &colId)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change ownership of identity sequence"), + errdetail("Sequence \"%s\" is linked to table \"%s\".", + RelationGetRelationName(seqrel), + get_rel_name(tableId)))); + } + + /* + * OK, we are ready to update pg_depend. First remove any existing + * dependencies for the sequence, then optionally add a new one. + */ + deleteDependencyRecordsForClass(RelationRelationId, RelationGetRelid(seqrel), + RelationRelationId, deptype); + + if (tablerel) + { + ObjectAddress refobject, + depobject; + + refobject.classId = RelationRelationId; + refobject.objectId = RelationGetRelid(tablerel); + refobject.objectSubId = attnum; + depobject.classId = RelationRelationId; + depobject.objectId = RelationGetRelid(seqrel); + depobject.objectSubId = 0; + recordDependencyOn(&depobject, &refobject, deptype); + } + + /* Done, but hold lock until commit */ + if (tablerel) + relation_close(tablerel, NoLock); +} + + +/* + * Return sequence parameters in a list of the form created by the parser. + */ +List * +sequence_options(Oid relid) +{ + HeapTuple pgstuple; + Form_pg_sequence pgsform; + List *options = NIL; + + pgstuple = SearchSysCache1(SEQRELID, relid); + if (!HeapTupleIsValid(pgstuple)) + elog(ERROR, "cache lookup failed for sequence %u", relid); + pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple); + + /* Use makeFloat() for 64-bit integers, like gram.y does. */ + options = lappend(options, + makeDefElem("cache", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqcache)), -1)); + options = lappend(options, + makeDefElem("cycle", (Node *) makeBoolean(pgsform->seqcycle), -1)); + options = lappend(options, + makeDefElem("increment", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqincrement)), -1)); + options = lappend(options, + makeDefElem("maxvalue", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqmax)), -1)); + options = lappend(options, + makeDefElem("minvalue", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqmin)), -1)); + options = lappend(options, + makeDefElem("start", (Node *) makeFloat(psprintf(INT64_FORMAT, pgsform->seqstart)), -1)); + + ReleaseSysCache(pgstuple); + + return options; +} + +/* + * Return sequence parameters (formerly for use by information schema) + */ +Datum +pg_sequence_parameters(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + TupleDesc tupdesc; + Datum values[7]; + bool isnull[7]; + HeapTuple pgstuple; + Form_pg_sequence pgsform; + + if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_UPDATE | ACL_USAGE) != ACLCHECK_OK) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied for sequence %s", + get_rel_name(relid)))); + + tupdesc = CreateTemplateTupleDesc(7); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "start_value", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "minimum_value", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "maximum_value", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "increment", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "cycle_option", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "cache_size", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "data_type", + OIDOID, -1, 0); + + BlessTupleDesc(tupdesc); + + memset(isnull, 0, sizeof(isnull)); + + pgstuple = SearchSysCache1(SEQRELID, relid); + if (!HeapTupleIsValid(pgstuple)) + elog(ERROR, "cache lookup failed for sequence %u", relid); + pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple); + + values[0] = Int64GetDatum(pgsform->seqstart); + values[1] = Int64GetDatum(pgsform->seqmin); + values[2] = Int64GetDatum(pgsform->seqmax); + values[3] = Int64GetDatum(pgsform->seqincrement); + values[4] = BoolGetDatum(pgsform->seqcycle); + values[5] = Int64GetDatum(pgsform->seqcache); + values[6] = ObjectIdGetDatum(pgsform->seqtypid); + + ReleaseSysCache(pgstuple); + + return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, isnull)); +} + +/* + * Return the last value from the sequence + * + * Note: This has a completely different meaning than lastval(). + */ +Datum +pg_sequence_last_value(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + SeqTable elm; + Relation seqrel; + Buffer buf; + HeapTupleData seqtuple; + Form_pg_sequence_data seq; + bool is_called; + int64 result; + + /* open and lock sequence */ + init_sequence(relid, &elm, &seqrel); + + if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_USAGE) != ACLCHECK_OK) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied for sequence %s", + RelationGetRelationName(seqrel)))); + + seq = read_seq_tuple(seqrel, &buf, &seqtuple); + + is_called = seq->is_called; + result = seq->last_value; + + UnlockReleaseBuffer(buf); + relation_close(seqrel, NoLock); + + if (is_called) + PG_RETURN_INT64(result); + else + PG_RETURN_NULL(); +} + + +void +seq_redo(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + Buffer buffer; + Page page; + Page localpage; + char *item; + Size itemsz; + xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); + sequence_magic *sm; + + if (info != XLOG_SEQ_LOG) + elog(PANIC, "seq_redo: unknown op code %u", info); + + buffer = XLogInitBufferForRedo(record, 0); + page = (Page) BufferGetPage(buffer); + + /* + * We always reinit the page. However, since this WAL record type is also + * used for updating sequences, it's possible that a hot-standby backend + * is examining the page concurrently; so we mustn't transiently trash the + * buffer. The solution is to build the correct new page contents in + * local workspace and then memcpy into the buffer. Then only bytes that + * are supposed to change will change, even transiently. We must palloc + * the local page for alignment reasons. + */ + localpage = (Page) palloc(BufferGetPageSize(buffer)); + + PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic)); + sm = (sequence_magic *) PageGetSpecialPointer(localpage); + sm->magic = SEQ_MAGIC; + + item = (char *) xlrec + sizeof(xl_seq_rec); + itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec); + + if (PageAddItem(localpage, (Item) item, itemsz, + FirstOffsetNumber, false, false) == InvalidOffsetNumber) + elog(PANIC, "seq_redo: failed to add item to page"); + + PageSetLSN(localpage, lsn); + + memcpy(page, localpage, BufferGetPageSize(buffer)); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + + pfree(localpage); +} + +/* + * Flush cached sequence information. + */ +void +ResetSequenceCaches(void) +{ + if (seqhashtab) + { + hash_destroy(seqhashtab); + seqhashtab = NULL; + } + + last_used_seq = NULL; +} + +/* + * Mask a Sequence page before performing consistency checks on it. + */ +void +seq_mask(char *page, BlockNumber blkno) +{ + mask_page_lsn_and_checksum(page); + + mask_unused_space(page); +} diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c new file mode 100644 index 0000000..f442d85 --- /dev/null +++ b/src/backend/commands/statscmds.c @@ -0,0 +1,898 @@ +/*------------------------------------------------------------------------- + * + * statscmds.c + * Commands for creating and altering extended statistics objects + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/statscmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/relation.h" +#include "access/relscan.h" +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "commands/comment.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "statistics/statistics.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + + +static char *ChooseExtendedStatisticName(const char *name1, const char *name2, + const char *label, Oid namespaceid); +static char *ChooseExtendedStatisticNameAddition(List *exprs); + + +/* qsort comparator for the attnums in CreateStatistics */ +static int +compare_int16(const void *a, const void *b) +{ + int av = *(const int16 *) a; + int bv = *(const int16 *) b; + + /* this can't overflow if int is wider than int16 */ + return (av - bv); +} + +/* + * CREATE STATISTICS + */ +ObjectAddress +CreateStatistics(CreateStatsStmt *stmt) +{ + int16 attnums[STATS_MAX_DIMENSIONS]; + int nattnums = 0; + int numcols; + char *namestr; + NameData stxname; + Oid statoid; + Oid namespaceId; + Oid stxowner = GetUserId(); + HeapTuple htup; + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + int2vector *stxkeys; + List *stxexprs = NIL; + Datum exprsDatum; + Relation statrel; + Relation rel = NULL; + Oid relid; + ObjectAddress parentobject, + myself; + Datum types[4]; /* one for each possible type of statistic */ + int ntypes; + ArrayType *stxkind; + bool build_ndistinct; + bool build_dependencies; + bool build_mcv; + bool build_expressions; + bool requested_type = false; + int i; + ListCell *cell; + ListCell *cell2; + + Assert(IsA(stmt, CreateStatsStmt)); + + /* + * Examine the FROM clause. Currently, we only allow it to be a single + * simple table, but later we'll probably allow multiple tables and JOIN + * syntax. The grammar is already prepared for that, so we have to check + * here that what we got is what we can support. + */ + if (list_length(stmt->relations) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only a single relation is allowed in CREATE STATISTICS"))); + + foreach(cell, stmt->relations) + { + Node *rln = (Node *) lfirst(cell); + + if (!IsA(rln, RangeVar)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only a single relation is allowed in CREATE STATISTICS"))); + + /* + * CREATE STATISTICS will influence future execution plans but does + * not interfere with currently executing plans. So it should be + * enough to take only ShareUpdateExclusiveLock on relation, + * conflicting with ANALYZE and other DDL that sets statistical + * information, but not with normal queries. + */ + rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock); + + /* Restrict to allowed relation types */ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW && + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot define statistics for relation \"%s\"", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + + /* You must own the relation to create stats on it */ + if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner)) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind), + RelationGetRelationName(rel)); + + /* Creating statistics on system catalogs is not allowed */ + if (!allowSystemTableMods && IsSystemRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + RelationGetRelationName(rel)))); + } + + Assert(rel); + relid = RelationGetRelid(rel); + + /* + * If the node has a name, split it up and determine creation namespace. + * If not (a possibility not considered by the grammar, but one which can + * occur via the "CREATE TABLE ... (LIKE)" command), then we put the + * object in the same namespace as the relation, and cons up a name for + * it. + */ + if (stmt->defnames) + namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, + &namestr); + else + { + namespaceId = RelationGetNamespace(rel); + namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel), + ChooseExtendedStatisticNameAddition(stmt->exprs), + "stat", + namespaceId); + } + namestrcpy(&stxname, namestr); + + /* + * Deal with the possibility that the statistics object already exists. + */ + if (SearchSysCacheExists2(STATEXTNAMENSP, + CStringGetDatum(namestr), + ObjectIdGetDatum(namespaceId))) + { + if (stmt->if_not_exists) + { + /* + * Since stats objects aren't members of extensions (see comments + * below), no need for checkMembershipInCurrentExtension here. + */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics object \"%s\" already exists, skipping", + namestr))); + relation_close(rel, NoLock); + return InvalidObjectAddress; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics object \"%s\" already exists", namestr))); + } + + /* + * Make sure no more than STATS_MAX_DIMENSIONS columns are used. There + * might be duplicates and so on, but we'll deal with those later. + */ + numcols = list_length(stmt->exprs); + if (numcols > STATS_MAX_DIMENSIONS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot have more than %d columns in statistics", + STATS_MAX_DIMENSIONS))); + + /* + * Convert the expression list to a simple array of attnums, but also keep + * a list of more complex expressions. While at it, enforce some + * constraints - we don't allow extended statistics on system attributes, + * and we require the data type to have a less-than operator. + * + * There are many ways to "mask" a simple attribute reference as an + * expression, for example "(a+0)" etc. We can't possibly detect all of + * them, but we handle at least the simple case with the attribute in + * parens. There'll always be a way around this, if the user is determined + * (like the "(a+0)" example), but this makes it somewhat consistent with + * how indexes treat attributes/expressions. + */ + foreach(cell, stmt->exprs) + { + StatsElem *selem = lfirst_node(StatsElem, cell); + + if (selem->name) /* column reference */ + { + char *attname; + HeapTuple atttuple; + Form_pg_attribute attForm; + TypeCacheEntry *type; + + attname = selem->name; + + atttuple = SearchSysCacheAttName(relid, attname); + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + attname))); + attForm = (Form_pg_attribute) GETSTRUCT(atttuple); + + /* Disallow use of system attributes in extended stats */ + if (attForm->attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("statistics creation on system columns is not supported"))); + + /* Disallow data types without a less-than operator */ + type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", + attname, format_type_be(attForm->atttypid)))); + + attnums[nattnums] = attForm->attnum; + nattnums++; + ReleaseSysCache(atttuple); + } + else if (IsA(selem->expr, Var)) /* column reference in parens */ + { + Var *var = (Var *) selem->expr; + TypeCacheEntry *type; + + /* Disallow use of system attributes in extended stats */ + if (var->varattno <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("statistics creation on system columns is not supported"))); + + /* Disallow data types without a less-than operator */ + type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", + get_attname(relid, var->varattno, false), format_type_be(var->vartype)))); + + attnums[nattnums] = var->varattno; + nattnums++; + } + else /* expression */ + { + Node *expr = selem->expr; + Oid atttype; + TypeCacheEntry *type; + Bitmapset *attnums = NULL; + int k; + + Assert(expr != NULL); + + /* Disallow expressions referencing system attributes. */ + pull_varattnos(expr, 1, &attnums); + + k = -1; + while ((k = bms_next_member(attnums, k)) >= 0) + { + AttrNumber attnum = k + FirstLowInvalidHeapAttributeNumber; + + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("statistics creation on system columns is not supported"))); + } + + /* + * Disallow data types without a less-than operator. + * + * We ignore this for statistics on a single expression, in which + * case we'll build the regular statistics only (and that code can + * deal with such data types). + */ + if (list_length(stmt->exprs) > 1) + { + atttype = exprType(expr); + type = lookup_type_cache(atttype, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("expression cannot be used in multivariate statistics because its type %s has no default btree operator class", + format_type_be(atttype)))); + } + + stxexprs = lappend(stxexprs, expr); + } + } + + /* + * Parse the statistics kinds. + * + * First check that if this is the case with a single expression, there + * are no statistics kinds specified (we don't allow that for the simple + * CREATE STATISTICS form). + */ + if ((list_length(stmt->exprs) == 1) && (list_length(stxexprs) == 1)) + { + /* statistics kinds not specified */ + if (list_length(stmt->stat_types) > 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("when building statistics on a single expression, statistics kinds may not be specified"))); + } + + /* OK, let's check that we recognize the statistics kinds. */ + build_ndistinct = false; + build_dependencies = false; + build_mcv = false; + foreach(cell, stmt->stat_types) + { + char *type = strVal(lfirst(cell)); + + if (strcmp(type, "ndistinct") == 0) + { + build_ndistinct = true; + requested_type = true; + } + else if (strcmp(type, "dependencies") == 0) + { + build_dependencies = true; + requested_type = true; + } + else if (strcmp(type, "mcv") == 0) + { + build_mcv = true; + requested_type = true; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized statistics kind \"%s\"", + type))); + } + + /* + * If no statistic type was specified, build them all (but only when the + * statistics is defined on more than one column/expression). + */ + if ((!requested_type) && (numcols >= 2)) + { + build_ndistinct = true; + build_dependencies = true; + build_mcv = true; + } + + /* + * When there are non-trivial expressions, build the expression stats + * automatically. This allows calculating good estimates for stats that + * consider per-clause estimates (e.g. functional dependencies). + */ + build_expressions = (list_length(stxexprs) > 0); + + /* + * Check that at least two columns were specified in the statement, or + * that we're building statistics on a single expression. + */ + if ((numcols < 2) && (list_length(stxexprs) != 1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("extended statistics require at least 2 columns"))); + + /* + * Sort the attnums, which makes detecting duplicates somewhat easier, and + * it does not hurt (it does not matter for the contents, unlike for + * indexes, for example). + */ + qsort(attnums, nattnums, sizeof(int16), compare_int16); + + /* + * Check for duplicates in the list of columns. The attnums are sorted so + * just check consecutive elements. + */ + for (i = 1; i < nattnums; i++) + { + if (attnums[i] == attnums[i - 1]) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("duplicate column name in statistics definition"))); + } + + /* + * Check for duplicate expressions. We do two loops, counting the + * occurrences of each expression. This is O(N^2) but we only allow small + * number of expressions and it's not executed often. + * + * XXX We don't cross-check attributes and expressions, because it does + * not seem worth it. In principle we could check that expressions don't + * contain trivial attribute references like "(a)", but the reasoning is + * similar to why we don't bother with extracting columns from + * expressions. It's either expensive or very easy to defeat for + * determined user, and there's no risk if we allow such statistics (the + * statistics is useless, but harmless). + */ + foreach(cell, stxexprs) + { + Node *expr1 = (Node *) lfirst(cell); + int cnt = 0; + + foreach(cell2, stxexprs) + { + Node *expr2 = (Node *) lfirst(cell2); + + if (equal(expr1, expr2)) + cnt += 1; + } + + /* every expression should find at least itself */ + Assert(cnt >= 1); + + if (cnt > 1) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("duplicate expression in statistics definition"))); + } + + /* Form an int2vector representation of the sorted column list */ + stxkeys = buildint2vector(attnums, nattnums); + + /* construct the char array of enabled statistic types */ + ntypes = 0; + if (build_ndistinct) + types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT); + if (build_dependencies) + types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES); + if (build_mcv) + types[ntypes++] = CharGetDatum(STATS_EXT_MCV); + if (build_expressions) + types[ntypes++] = CharGetDatum(STATS_EXT_EXPRESSIONS); + Assert(ntypes > 0 && ntypes <= lengthof(types)); + stxkind = construct_array(types, ntypes, CHAROID, 1, true, TYPALIGN_CHAR); + + /* convert the expressions (if any) to a text datum */ + if (stxexprs != NIL) + { + char *exprsString; + + exprsString = nodeToString(stxexprs); + exprsDatum = CStringGetTextDatum(exprsString); + pfree(exprsString); + } + else + exprsDatum = (Datum) 0; + + statrel = table_open(StatisticExtRelationId, RowExclusiveLock); + + /* + * Everything seems fine, so let's build the pg_statistic_ext tuple. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + statoid = GetNewOidWithIndex(statrel, StatisticExtOidIndexId, + Anum_pg_statistic_ext_oid); + values[Anum_pg_statistic_ext_oid - 1] = ObjectIdGetDatum(statoid); + values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid); + values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname); + values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId); + values[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(-1); + values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner); + values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys); + values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind); + + values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum; + if (exprsDatum == (Datum) 0) + nulls[Anum_pg_statistic_ext_stxexprs - 1] = true; + + /* insert it into pg_statistic_ext */ + htup = heap_form_tuple(statrel->rd_att, values, nulls); + CatalogTupleInsert(statrel, htup); + heap_freetuple(htup); + + relation_close(statrel, RowExclusiveLock); + + /* + * We used to create the pg_statistic_ext_data tuple too, but it's not + * clear what value should the stxdinherit flag have (it depends on + * whether the rel is partitioned, contains data, etc.) + */ + + InvokeObjectPostCreateHook(StatisticExtRelationId, statoid, 0); + + /* + * Invalidate relcache so that others see the new statistics object. + */ + CacheInvalidateRelcache(rel); + + relation_close(rel, NoLock); + + /* + * Add an AUTO dependency on each column used in the stats, so that the + * stats object goes away if any or all of them get dropped. + */ + ObjectAddressSet(myself, StatisticExtRelationId, statoid); + + /* add dependencies for plain column references */ + for (i = 0; i < nattnums; i++) + { + ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]); + recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO); + } + + /* + * If there are no dependencies on a column, give the statistics object an + * auto dependency on the whole table. In most cases, this will be + * redundant, but it might not be if the statistics expressions contain no + * Vars (which might seem strange but possible). This is consistent with + * what we do for indexes in index_create. + * + * XXX We intentionally don't consider the expressions before adding this + * dependency, because recordDependencyOnSingleRelExpr may not create any + * dependencies for whole-row Vars. + */ + if (!nattnums) + { + ObjectAddressSet(parentobject, RelationRelationId, relid); + recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO); + } + + /* + * Store dependencies on anything mentioned in statistics expressions, + * just like we do for index expressions. + */ + if (stxexprs) + recordDependencyOnSingleRelExpr(&myself, + (Node *) stxexprs, + relid, + DEPENDENCY_NORMAL, + DEPENDENCY_AUTO, false); + + /* + * Also add dependencies on namespace and owner. These are required + * because the stats object might have a different namespace and/or owner + * than the underlying table(s). + */ + ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId); + recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL); + + recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner); + + /* + * XXX probably there should be a recordDependencyOnCurrentExtension call + * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP + * STATISTICS, which is more work than it seems worth. + */ + + /* Add any requested comment */ + if (stmt->stxcomment != NULL) + CreateComments(statoid, StatisticExtRelationId, 0, + stmt->stxcomment); + + /* Return stats object's address */ + return myself; +} + +/* + * ALTER STATISTICS + */ +ObjectAddress +AlterStatistics(AlterStatsStmt *stmt) +{ + Relation rel; + Oid stxoid; + HeapTuple oldtup; + HeapTuple newtup; + Datum repl_val[Natts_pg_statistic_ext]; + bool repl_null[Natts_pg_statistic_ext]; + bool repl_repl[Natts_pg_statistic_ext]; + ObjectAddress address; + int newtarget = stmt->stxstattarget; + + /* Limit statistics target to a sane range */ + if (newtarget < -1) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistics target %d is too low", + newtarget))); + } + else if (newtarget > 10000) + { + newtarget = 10000; + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lowering statistics target to %d", + newtarget))); + } + + /* lookup OID of the statistics object */ + stxoid = get_statistics_object_oid(stmt->defnames, stmt->missing_ok); + + /* + * If we got here and the OID is not valid, it means the statistics object + * does not exist, but the command specified IF EXISTS. So report this as + * a simple NOTICE and we're done. + */ + if (!OidIsValid(stxoid)) + { + char *schemaname; + char *statname; + + Assert(stmt->missing_ok); + + DeconstructQualifiedName(stmt->defnames, &schemaname, &statname); + + if (schemaname) + ereport(NOTICE, + (errmsg("statistics object \"%s.%s\" does not exist, skipping", + schemaname, statname))); + else + ereport(NOTICE, + (errmsg("statistics object \"%s\" does not exist, skipping", + statname))); + + return InvalidObjectAddress; + } + + /* Search pg_statistic_ext */ + rel = table_open(StatisticExtRelationId, RowExclusiveLock); + + oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stxoid)); + if (!HeapTupleIsValid(oldtup)) + elog(ERROR, "cache lookup failed for extended statistics object %u", stxoid); + + /* Must be owner of the existing statistics object */ + if (!pg_statistics_object_ownercheck(stxoid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_STATISTIC_EXT, + NameListToString(stmt->defnames)); + + /* Build new tuple. */ + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + /* replace the stxstattarget column */ + repl_repl[Anum_pg_statistic_ext_stxstattarget - 1] = true; + repl_val[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(newtarget); + + newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel), + repl_val, repl_null, repl_repl); + + /* Update system catalog. */ + CatalogTupleUpdate(rel, &newtup->t_self, newtup); + + InvokeObjectPostAlterHook(StatisticExtRelationId, stxoid, 0); + + ObjectAddressSet(address, StatisticExtRelationId, stxoid); + + /* + * NOTE: because we only support altering the statistics target, not the + * other fields, there is no need to update dependencies. + */ + + heap_freetuple(newtup); + ReleaseSysCache(oldtup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Delete entry in pg_statistic_ext_data catalog. We don't know if the row + * exists, so don't error out. + */ +void +RemoveStatisticsDataById(Oid statsOid, bool inh) +{ + Relation relation; + HeapTuple tup; + + relation = table_open(StatisticExtDataRelationId, RowExclusiveLock); + + tup = SearchSysCache2(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid), + BoolGetDatum(inh)); + + /* We don't know if the data row for inh value exists. */ + if (HeapTupleIsValid(tup)) + { + CatalogTupleDelete(relation, &tup->t_self); + + ReleaseSysCache(tup); + } + + table_close(relation, RowExclusiveLock); +} + +/* + * Guts of statistics object deletion. + */ +void +RemoveStatisticsById(Oid statsOid) +{ + Relation relation; + HeapTuple tup; + Form_pg_statistic_ext statext; + Oid relid; + + /* + * First delete the pg_statistic_ext_data tuples holding the actual + * statistical data. There might be data with/without inheritance, so + * attempt deleting both. + */ + RemoveStatisticsDataById(statsOid, true); + RemoveStatisticsDataById(statsOid, false); + + /* + * Delete the pg_statistic_ext tuple. Also send out a cache inval on the + * associated table, so that dependent plans will be rebuilt. + */ + relation = table_open(StatisticExtRelationId, RowExclusiveLock); + + tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid)); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for statistics object %u", statsOid); + + statext = (Form_pg_statistic_ext) GETSTRUCT(tup); + relid = statext->stxrelid; + + CacheInvalidateRelcacheByRelid(relid); + + CatalogTupleDelete(relation, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(relation, RowExclusiveLock); +} + +/* + * Select a nonconflicting name for a new statistics object. + * + * name1, name2, and label are used the same way as for makeObjectName(), + * except that the label can't be NULL; digits will be appended to the label + * if needed to create a name that is unique within the specified namespace. + * + * Returns a palloc'd string. + * + * Note: it is theoretically possible to get a collision anyway, if someone + * else chooses the same name concurrently. This is fairly unlikely to be + * a problem in practice, especially if one is holding a share update + * exclusive lock on the relation identified by name1. However, if choosing + * multiple names within a single command, you'd better create the new object + * and do CommandCounterIncrement before choosing the next one! + */ +static char * +ChooseExtendedStatisticName(const char *name1, const char *name2, + const char *label, Oid namespaceid) +{ + int pass = 0; + char *stxname = NULL; + char modlabel[NAMEDATALEN]; + + /* try the unmodified label first */ + strlcpy(modlabel, label, sizeof(modlabel)); + + for (;;) + { + Oid existingstats; + + stxname = makeObjectName(name1, name2, modlabel); + + existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid, + PointerGetDatum(stxname), + ObjectIdGetDatum(namespaceid)); + if (!OidIsValid(existingstats)) + break; + + /* found a conflict, so try a new name component */ + pfree(stxname); + snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass); + } + + return stxname; +} + +/* + * Generate "name2" for a new statistics object given the list of column + * names for it. This will be passed to ChooseExtendedStatisticName along + * with the parent table name and a suitable label. + * + * We know that less than NAMEDATALEN characters will actually be used, + * so we can truncate the result once we've generated that many. + * + * XXX see also ChooseForeignKeyConstraintNameAddition and + * ChooseIndexNameAddition. + */ +static char * +ChooseExtendedStatisticNameAddition(List *exprs) +{ + char buf[NAMEDATALEN * 2]; + int buflen = 0; + ListCell *lc; + + buf[0] = '\0'; + foreach(lc, exprs) + { + StatsElem *selem = (StatsElem *) lfirst(lc); + const char *name; + + /* It should be one of these, but just skip if it happens not to be */ + if (!IsA(selem, StatsElem)) + continue; + + name = selem->name; + + if (buflen > 0) + buf[buflen++] = '_'; /* insert _ between names */ + + /* + * We use fixed 'expr' for expressions, which have empty column names. + * For indexes this is handled in ChooseIndexColumnNames, but we have + * no such function for stats and it does not seem worth adding. If a + * better name is needed, the user can specify it explicitly. + */ + if (!name) + name = "expr"; + + /* + * At this point we have buflen <= NAMEDATALEN. name should be less + * than NAMEDATALEN already, but use strlcpy for paranoia. + */ + strlcpy(buf + buflen, name, NAMEDATALEN); + buflen += strlen(buf + buflen); + if (buflen >= NAMEDATALEN) + break; + } + return pstrdup(buf); +} + +/* + * StatisticsGetRelation: given a statistics object's OID, get the OID of + * the relation it is defined on. Uses the system cache. + */ +Oid +StatisticsGetRelation(Oid statId, bool missing_ok) +{ + HeapTuple tuple; + Form_pg_statistic_ext stx; + Oid result; + + tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statId)); + if (!HeapTupleIsValid(tuple)) + { + if (missing_ok) + return InvalidOid; + elog(ERROR, "cache lookup failed for statistics object %u", statId); + } + stx = (Form_pg_statistic_ext) GETSTRUCT(tuple); + Assert(stx->oid == statId); + + result = stx->stxrelid; + ReleaseSysCache(tuple); + return result; +} diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c new file mode 100644 index 0000000..334717c --- /dev/null +++ b/src/backend/commands/subscriptioncmds.c @@ -0,0 +1,1966 @@ +/*------------------------------------------------------------------------- + * + * subscriptioncmds.c + * subscription catalog manipulation functions + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/subscriptioncmds.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/objectaddress.h" +#include "catalog/pg_subscription.h" +#include "catalog/pg_subscription_rel.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/subscriptioncmds.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "pgstat.h" +#include "replication/logicallauncher.h" +#include "replication/origin.h" +#include "replication/slot.h" +#include "replication/walreceiver.h" +#include "replication/walsender.h" +#include "replication/worker_internal.h" +#include "storage/lmgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_lsn.h" +#include "utils/syscache.h" + +/* + * Options that can be specified by the user in CREATE/ALTER SUBSCRIPTION + * command. + */ +#define SUBOPT_CONNECT 0x00000001 +#define SUBOPT_ENABLED 0x00000002 +#define SUBOPT_CREATE_SLOT 0x00000004 +#define SUBOPT_SLOT_NAME 0x00000008 +#define SUBOPT_COPY_DATA 0x00000010 +#define SUBOPT_SYNCHRONOUS_COMMIT 0x00000020 +#define SUBOPT_REFRESH 0x00000040 +#define SUBOPT_BINARY 0x00000080 +#define SUBOPT_STREAMING 0x00000100 +#define SUBOPT_TWOPHASE_COMMIT 0x00000200 +#define SUBOPT_DISABLE_ON_ERR 0x00000400 +#define SUBOPT_LSN 0x00000800 + +/* check if the 'val' has 'bits' set */ +#define IsSet(val, bits) (((val) & (bits)) == (bits)) + +/* + * Structure to hold a bitmap representing the user-provided CREATE/ALTER + * SUBSCRIPTION command options and the parsed/default values of each of them. + */ +typedef struct SubOpts +{ + bits32 specified_opts; + char *slot_name; + char *synchronous_commit; + bool connect; + bool enabled; + bool create_slot; + bool copy_data; + bool refresh; + bool binary; + bool streaming; + bool twophase; + bool disableonerr; + XLogRecPtr lsn; +} SubOpts; + +static List *fetch_table_list(WalReceiverConn *wrconn, List *publications); +static void check_duplicates_in_publist(List *publist, Datum *datums); +static List *merge_publications(List *oldpublist, List *newpublist, bool addpub, const char *subname); +static void ReportSlotConnectionError(List *rstates, Oid subid, char *slotname, char *err); + + +/* + * Common option parsing function for CREATE and ALTER SUBSCRIPTION commands. + * + * Since not all options can be specified in both commands, this function + * will report an error if mutually exclusive options are specified. + */ +static void +parse_subscription_options(ParseState *pstate, List *stmt_options, + bits32 supported_opts, SubOpts *opts) +{ + ListCell *lc; + + /* Start out with cleared opts. */ + memset(opts, 0, sizeof(SubOpts)); + + /* caller must expect some option */ + Assert(supported_opts != 0); + + /* If connect option is supported, these others also need to be. */ + Assert(!IsSet(supported_opts, SUBOPT_CONNECT) || + IsSet(supported_opts, SUBOPT_ENABLED | SUBOPT_CREATE_SLOT | + SUBOPT_COPY_DATA)); + + /* Set default values for the boolean supported options. */ + if (IsSet(supported_opts, SUBOPT_CONNECT)) + opts->connect = true; + if (IsSet(supported_opts, SUBOPT_ENABLED)) + opts->enabled = true; + if (IsSet(supported_opts, SUBOPT_CREATE_SLOT)) + opts->create_slot = true; + if (IsSet(supported_opts, SUBOPT_COPY_DATA)) + opts->copy_data = true; + if (IsSet(supported_opts, SUBOPT_REFRESH)) + opts->refresh = true; + if (IsSet(supported_opts, SUBOPT_BINARY)) + opts->binary = false; + if (IsSet(supported_opts, SUBOPT_STREAMING)) + opts->streaming = false; + if (IsSet(supported_opts, SUBOPT_TWOPHASE_COMMIT)) + opts->twophase = false; + if (IsSet(supported_opts, SUBOPT_DISABLE_ON_ERR)) + opts->disableonerr = false; + + /* Parse options */ + foreach(lc, stmt_options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (IsSet(supported_opts, SUBOPT_CONNECT) && + strcmp(defel->defname, "connect") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_CONNECT)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_CONNECT; + opts->connect = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_ENABLED) && + strcmp(defel->defname, "enabled") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_ENABLED)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_ENABLED; + opts->enabled = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_CREATE_SLOT) && + strcmp(defel->defname, "create_slot") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_CREATE_SLOT; + opts->create_slot = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_SLOT_NAME) && + strcmp(defel->defname, "slot_name") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_SLOT_NAME)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_SLOT_NAME; + opts->slot_name = defGetString(defel); + + /* Setting slot_name = NONE is treated as no slot name. */ + if (strcmp(opts->slot_name, "none") == 0) + opts->slot_name = NULL; + else + ReplicationSlotValidateName(opts->slot_name, ERROR); + } + else if (IsSet(supported_opts, SUBOPT_COPY_DATA) && + strcmp(defel->defname, "copy_data") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_COPY_DATA)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_COPY_DATA; + opts->copy_data = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_SYNCHRONOUS_COMMIT) && + strcmp(defel->defname, "synchronous_commit") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_SYNCHRONOUS_COMMIT)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_SYNCHRONOUS_COMMIT; + opts->synchronous_commit = defGetString(defel); + + /* Test if the given value is valid for synchronous_commit GUC. */ + (void) set_config_option("synchronous_commit", opts->synchronous_commit, + PGC_BACKEND, PGC_S_TEST, GUC_ACTION_SET, + false, 0, false); + } + else if (IsSet(supported_opts, SUBOPT_REFRESH) && + strcmp(defel->defname, "refresh") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_REFRESH)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_REFRESH; + opts->refresh = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_BINARY) && + strcmp(defel->defname, "binary") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_BINARY)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_BINARY; + opts->binary = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_STREAMING) && + strcmp(defel->defname, "streaming") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_STREAMING)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_STREAMING; + opts->streaming = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "two_phase") == 0) + { + /* + * Do not allow toggling of two_phase option. Doing so could cause + * missing of transactions and lead to an inconsistent replica. + * See comments atop worker.c + * + * Note: Unsupported twophase indicates that this call originated + * from AlterSubscription. + */ + if (!IsSet(supported_opts, SUBOPT_TWOPHASE_COMMIT)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized subscription parameter: \"%s\"", defel->defname))); + + if (IsSet(opts->specified_opts, SUBOPT_TWOPHASE_COMMIT)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_TWOPHASE_COMMIT; + opts->twophase = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_DISABLE_ON_ERR) && + strcmp(defel->defname, "disable_on_error") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_DISABLE_ON_ERR)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_DISABLE_ON_ERR; + opts->disableonerr = defGetBoolean(defel); + } + else if (IsSet(supported_opts, SUBOPT_LSN) && + strcmp(defel->defname, "lsn") == 0) + { + char *lsn_str = defGetString(defel); + XLogRecPtr lsn; + + if (IsSet(opts->specified_opts, SUBOPT_LSN)) + errorConflictingDefElem(defel, pstate); + + /* Setting lsn = NONE is treated as resetting LSN */ + if (strcmp(lsn_str, "none") == 0) + lsn = InvalidXLogRecPtr; + else + { + /* Parse the argument as LSN */ + lsn = DatumGetLSN(DirectFunctionCall1(pg_lsn_in, + CStringGetDatum(lsn_str))); + + if (XLogRecPtrIsInvalid(lsn)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid WAL location (LSN): %s", lsn_str))); + } + + opts->specified_opts |= SUBOPT_LSN; + opts->lsn = lsn; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized subscription parameter: \"%s\"", defel->defname))); + } + + /* + * We've been explicitly asked to not connect, that requires some + * additional processing. + */ + if (!opts->connect && IsSet(supported_opts, SUBOPT_CONNECT)) + { + /* Check for incompatible options from the user. */ + if (opts->enabled && + IsSet(opts->specified_opts, SUBOPT_ENABLED)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: both %s are strings of the form "option = value" */ + errmsg("%s and %s are mutually exclusive options", + "connect = false", "enabled = true"))); + + if (opts->create_slot && + IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s and %s are mutually exclusive options", + "connect = false", "create_slot = true"))); + + if (opts->copy_data && + IsSet(opts->specified_opts, SUBOPT_COPY_DATA)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s and %s are mutually exclusive options", + "connect = false", "copy_data = true"))); + + /* Change the defaults of other options. */ + opts->enabled = false; + opts->create_slot = false; + opts->copy_data = false; + } + + /* + * Do additional checking for disallowed combination when slot_name = NONE + * was used. + */ + if (!opts->slot_name && + IsSet(opts->specified_opts, SUBOPT_SLOT_NAME)) + { + if (opts->enabled) + { + if (IsSet(opts->specified_opts, SUBOPT_ENABLED)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: both %s are strings of the form "option = value" */ + errmsg("%s and %s are mutually exclusive options", + "slot_name = NONE", "enabled = true"))); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: both %s are strings of the form "option = value" */ + errmsg("subscription with %s must also set %s", + "slot_name = NONE", "enabled = false"))); + } + + if (opts->create_slot) + { + if (IsSet(opts->specified_opts, SUBOPT_CREATE_SLOT)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: both %s are strings of the form "option = value" */ + errmsg("%s and %s are mutually exclusive options", + "slot_name = NONE", "create_slot = true"))); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: both %s are strings of the form "option = value" */ + errmsg("subscription with %s must also set %s", + "slot_name = NONE", "create_slot = false"))); + } + } +} + +/* + * Add publication names from the list to a string. + */ +static void +get_publications_str(List *publications, StringInfo dest, bool quote_literal) +{ + ListCell *lc; + bool first = true; + + Assert(list_length(publications) > 0); + + foreach(lc, publications) + { + char *pubname = strVal(lfirst(lc)); + + if (first) + first = false; + else + appendStringInfoString(dest, ", "); + + if (quote_literal) + appendStringInfoString(dest, quote_literal_cstr(pubname)); + else + { + appendStringInfoChar(dest, '"'); + appendStringInfoString(dest, pubname); + appendStringInfoChar(dest, '"'); + } + } +} + +/* + * Check that the specified publications are present on the publisher. + */ +static void +check_publications(WalReceiverConn *wrconn, List *publications) +{ + WalRcvExecResult *res; + StringInfo cmd; + TupleTableSlot *slot; + List *publicationsCopy = NIL; + Oid tableRow[1] = {TEXTOID}; + + cmd = makeStringInfo(); + appendStringInfoString(cmd, "SELECT t.pubname FROM\n" + " pg_catalog.pg_publication t WHERE\n" + " t.pubname IN ("); + get_publications_str(publications, cmd, true); + appendStringInfoChar(cmd, ')'); + + res = walrcv_exec(wrconn, cmd->data, 1, tableRow); + pfree(cmd->data); + pfree(cmd); + + if (res->status != WALRCV_OK_TUPLES) + ereport(ERROR, + errmsg("could not receive list of publications from the publisher: %s", + res->err)); + + publicationsCopy = list_copy(publications); + + /* Process publication(s). */ + slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple); + while (tuplestore_gettupleslot(res->tuplestore, true, false, slot)) + { + char *pubname; + bool isnull; + + pubname = TextDatumGetCString(slot_getattr(slot, 1, &isnull)); + Assert(!isnull); + + /* Delete the publication present in publisher from the list. */ + publicationsCopy = list_delete(publicationsCopy, makeString(pubname)); + ExecClearTuple(slot); + } + + ExecDropSingleTupleTableSlot(slot); + + walrcv_clear_result(res); + + if (list_length(publicationsCopy)) + { + /* Prepare the list of non-existent publication(s) for error message. */ + StringInfo pubnames = makeStringInfo(); + + get_publications_str(publicationsCopy, pubnames, false); + ereport(WARNING, + errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg_plural("publication %s does not exist on the publisher", + "publications %s do not exist on the publisher", + list_length(publicationsCopy), + pubnames->data)); + } +} + +/* + * Auxiliary function to build a text array out of a list of String nodes. + */ +static Datum +publicationListToArray(List *publist) +{ + ArrayType *arr; + Datum *datums; + MemoryContext memcxt; + MemoryContext oldcxt; + + /* Create memory context for temporary allocations. */ + memcxt = AllocSetContextCreate(CurrentMemoryContext, + "publicationListToArray to array", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(memcxt); + + datums = (Datum *) palloc(sizeof(Datum) * list_length(publist)); + + check_duplicates_in_publist(publist, datums); + + MemoryContextSwitchTo(oldcxt); + + arr = construct_array(datums, list_length(publist), + TEXTOID, -1, false, TYPALIGN_INT); + + MemoryContextDelete(memcxt); + + return PointerGetDatum(arr); +} + +/* + * Create new subscription. + */ +ObjectAddress +CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, + bool isTopLevel) +{ + Relation rel; + ObjectAddress myself; + Oid subid; + bool nulls[Natts_pg_subscription]; + Datum values[Natts_pg_subscription]; + Oid owner = GetUserId(); + HeapTuple tup; + char *conninfo; + char originname[NAMEDATALEN]; + List *publications; + bits32 supported_opts; + SubOpts opts = {0}; + + /* + * Parse and check options. + * + * Connection and publication should not be specified here. + */ + supported_opts = (SUBOPT_CONNECT | SUBOPT_ENABLED | SUBOPT_CREATE_SLOT | + SUBOPT_SLOT_NAME | SUBOPT_COPY_DATA | + SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY | + SUBOPT_STREAMING | SUBOPT_TWOPHASE_COMMIT | + SUBOPT_DISABLE_ON_ERR); + parse_subscription_options(pstate, stmt->options, supported_opts, &opts); + + /* + * Since creating a replication slot is not transactional, rolling back + * the transaction leaves the created replication slot. So we cannot run + * CREATE SUBSCRIPTION inside a transaction block if creating a + * replication slot. + */ + if (opts.create_slot) + PreventInTransactionBlock(isTopLevel, "CREATE SUBSCRIPTION ... WITH (create_slot = true)"); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create subscriptions"))); + + /* + * If built with appropriate switch, whine when regression-testing + * conventions for subscription names are violated. + */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (strncmp(stmt->subname, "regress_", 8) != 0) + elog(WARNING, "subscriptions created by regression test cases should have names starting with \"regress_\""); +#endif + + rel = table_open(SubscriptionRelationId, RowExclusiveLock); + + /* Check if name is used */ + subid = GetSysCacheOid2(SUBSCRIPTIONNAME, Anum_pg_subscription_oid, + MyDatabaseId, CStringGetDatum(stmt->subname)); + if (OidIsValid(subid)) + { + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("subscription \"%s\" already exists", + stmt->subname))); + } + + if (!IsSet(opts.specified_opts, SUBOPT_SLOT_NAME) && + opts.slot_name == NULL) + opts.slot_name = stmt->subname; + + /* The default for synchronous_commit of subscriptions is off. */ + if (opts.synchronous_commit == NULL) + opts.synchronous_commit = "off"; + + conninfo = stmt->conninfo; + publications = stmt->publication; + + /* Load the library providing us libpq calls. */ + load_file("libpqwalreceiver", false); + + /* Check the connection info string. */ + walrcv_check_conninfo(conninfo); + + /* Everything ok, form a new tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + subid = GetNewOidWithIndex(rel, SubscriptionObjectIndexId, + Anum_pg_subscription_oid); + values[Anum_pg_subscription_oid - 1] = ObjectIdGetDatum(subid); + values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId); + values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr); + values[Anum_pg_subscription_subname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->subname)); + values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(owner); + values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(opts.enabled); + values[Anum_pg_subscription_subbinary - 1] = BoolGetDatum(opts.binary); + values[Anum_pg_subscription_substream - 1] = BoolGetDatum(opts.streaming); + values[Anum_pg_subscription_subtwophasestate - 1] = + CharGetDatum(opts.twophase ? + LOGICALREP_TWOPHASE_STATE_PENDING : + LOGICALREP_TWOPHASE_STATE_DISABLED); + values[Anum_pg_subscription_subdisableonerr - 1] = BoolGetDatum(opts.disableonerr); + values[Anum_pg_subscription_subconninfo - 1] = + CStringGetTextDatum(conninfo); + if (opts.slot_name) + values[Anum_pg_subscription_subslotname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(opts.slot_name)); + else + nulls[Anum_pg_subscription_subslotname - 1] = true; + values[Anum_pg_subscription_subsynccommit - 1] = + CStringGetTextDatum(opts.synchronous_commit); + values[Anum_pg_subscription_subpublications - 1] = + publicationListToArray(publications); + + tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + /* Insert tuple into catalog. */ + CatalogTupleInsert(rel, tup); + heap_freetuple(tup); + + recordDependencyOnOwner(SubscriptionRelationId, subid, owner); + + snprintf(originname, sizeof(originname), "pg_%u", subid); + replorigin_create(originname); + + /* + * Connect to remote side to execute requested commands and fetch table + * info. + */ + if (opts.connect) + { + char *err; + WalReceiverConn *wrconn; + List *tables; + ListCell *lc; + char table_state; + + /* Try to connect to the publisher. */ + wrconn = walrcv_connect(conninfo, true, stmt->subname, &err); + if (!wrconn) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not connect to the publisher: %s", err))); + + PG_TRY(); + { + check_publications(wrconn, publications); + + /* + * Set sync state based on if we were asked to do data copy or + * not. + */ + table_state = opts.copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY; + + /* + * Get the table list from publisher and build local table status + * info. + */ + tables = fetch_table_list(wrconn, publications); + foreach(lc, tables) + { + RangeVar *rv = (RangeVar *) lfirst(lc); + Oid relid; + + relid = RangeVarGetRelid(rv, AccessShareLock, false); + + /* Check for supported relkind. */ + CheckSubscriptionRelkind(get_rel_relkind(relid), + rv->schemaname, rv->relname); + + AddSubscriptionRelState(subid, relid, table_state, + InvalidXLogRecPtr); + } + + /* + * If requested, create permanent slot for the subscription. We + * won't use the initial snapshot for anything, so no need to + * export it. + */ + if (opts.create_slot) + { + bool twophase_enabled = false; + + Assert(opts.slot_name); + + /* + * Even if two_phase is set, don't create the slot with + * two-phase enabled. Will enable it once all the tables are + * synced and ready. This avoids race-conditions like prepared + * transactions being skipped due to changes not being applied + * due to checks in should_apply_changes_for_rel() when + * tablesync for the corresponding tables are in progress. See + * comments atop worker.c. + * + * Note that if tables were specified but copy_data is false + * then it is safe to enable two_phase up-front because those + * tables are already initially in READY state. When the + * subscription has no tables, we leave the twophase state as + * PENDING, to allow ALTER SUBSCRIPTION ... REFRESH + * PUBLICATION to work. + */ + if (opts.twophase && !opts.copy_data && tables != NIL) + twophase_enabled = true; + + walrcv_create_slot(wrconn, opts.slot_name, false, twophase_enabled, + CRS_NOEXPORT_SNAPSHOT, NULL); + + if (twophase_enabled) + UpdateTwoPhaseState(subid, LOGICALREP_TWOPHASE_STATE_ENABLED); + + ereport(NOTICE, + (errmsg("created replication slot \"%s\" on publisher", + opts.slot_name))); + } + } + PG_FINALLY(); + { + walrcv_disconnect(wrconn); + } + PG_END_TRY(); + } + else + ereport(WARNING, + /* translator: %s is an SQL ALTER statement */ + (errmsg("tables were not subscribed, you will have to run %s to subscribe the tables", + "ALTER SUBSCRIPTION ... REFRESH PUBLICATION"))); + + table_close(rel, RowExclusiveLock); + + pgstat_create_subscription(subid); + + if (opts.enabled) + ApplyLauncherWakeupAtCommit(); + + ObjectAddressSet(myself, SubscriptionRelationId, subid); + + InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0); + + return myself; +} + +static void +AlterSubscription_refresh(Subscription *sub, bool copy_data, + List *validate_publications) +{ + char *err; + List *pubrel_names; + List *subrel_states; + Oid *subrel_local_oids; + Oid *pubrel_local_oids; + ListCell *lc; + int off; + int remove_rel_len; + Relation rel = NULL; + typedef struct SubRemoveRels + { + Oid relid; + char state; + } SubRemoveRels; + SubRemoveRels *sub_remove_rels; + WalReceiverConn *wrconn; + + /* Load the library providing us libpq calls. */ + load_file("libpqwalreceiver", false); + + /* Try to connect to the publisher. */ + wrconn = walrcv_connect(sub->conninfo, true, sub->name, &err); + if (!wrconn) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not connect to the publisher: %s", err))); + + PG_TRY(); + { + if (validate_publications) + check_publications(wrconn, validate_publications); + + /* Get the table list from publisher. */ + pubrel_names = fetch_table_list(wrconn, sub->publications); + + /* Get local table list. */ + subrel_states = GetSubscriptionRelations(sub->oid); + + /* + * Build qsorted array of local table oids for faster lookup. This can + * potentially contain all tables in the database so speed of lookup + * is important. + */ + subrel_local_oids = palloc(list_length(subrel_states) * sizeof(Oid)); + off = 0; + foreach(lc, subrel_states) + { + SubscriptionRelState *relstate = (SubscriptionRelState *) lfirst(lc); + + subrel_local_oids[off++] = relstate->relid; + } + qsort(subrel_local_oids, list_length(subrel_states), + sizeof(Oid), oid_cmp); + + /* + * Rels that we want to remove from subscription and drop any slots + * and origins corresponding to them. + */ + sub_remove_rels = palloc(list_length(subrel_states) * sizeof(SubRemoveRels)); + + /* + * Walk over the remote tables and try to match them to locally known + * tables. If the table is not known locally create a new state for + * it. + * + * Also builds array of local oids of remote tables for the next step. + */ + off = 0; + pubrel_local_oids = palloc(list_length(pubrel_names) * sizeof(Oid)); + + foreach(lc, pubrel_names) + { + RangeVar *rv = (RangeVar *) lfirst(lc); + Oid relid; + + relid = RangeVarGetRelid(rv, AccessShareLock, false); + + /* Check for supported relkind. */ + CheckSubscriptionRelkind(get_rel_relkind(relid), + rv->schemaname, rv->relname); + + pubrel_local_oids[off++] = relid; + + if (!bsearch(&relid, subrel_local_oids, + list_length(subrel_states), sizeof(Oid), oid_cmp)) + { + AddSubscriptionRelState(sub->oid, relid, + copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY, + InvalidXLogRecPtr); + ereport(DEBUG1, + (errmsg_internal("table \"%s.%s\" added to subscription \"%s\"", + rv->schemaname, rv->relname, sub->name))); + } + } + + /* + * Next remove state for tables we should not care about anymore using + * the data we collected above + */ + qsort(pubrel_local_oids, list_length(pubrel_names), + sizeof(Oid), oid_cmp); + + remove_rel_len = 0; + for (off = 0; off < list_length(subrel_states); off++) + { + Oid relid = subrel_local_oids[off]; + + if (!bsearch(&relid, pubrel_local_oids, + list_length(pubrel_names), sizeof(Oid), oid_cmp)) + { + char state; + XLogRecPtr statelsn; + + /* + * Lock pg_subscription_rel with AccessExclusiveLock to + * prevent any race conditions with the apply worker + * re-launching workers at the same time this code is trying + * to remove those tables. + * + * Even if new worker for this particular rel is restarted it + * won't be able to make any progress as we hold exclusive + * lock on subscription_rel till the transaction end. It will + * simply exit as there is no corresponding rel entry. + * + * This locking also ensures that the state of rels won't + * change till we are done with this refresh operation. + */ + if (!rel) + rel = table_open(SubscriptionRelRelationId, AccessExclusiveLock); + + /* Last known rel state. */ + state = GetSubscriptionRelState(sub->oid, relid, &statelsn); + + sub_remove_rels[remove_rel_len].relid = relid; + sub_remove_rels[remove_rel_len++].state = state; + + RemoveSubscriptionRel(sub->oid, relid); + + logicalrep_worker_stop(sub->oid, relid); + + /* + * For READY state, we would have already dropped the + * tablesync origin. + */ + if (state != SUBREL_STATE_READY) + { + char originname[NAMEDATALEN]; + + /* + * Drop the tablesync's origin tracking if exists. + * + * It is possible that the origin is not yet created for + * tablesync worker, this can happen for the states before + * SUBREL_STATE_FINISHEDCOPY. The apply worker can also + * concurrently try to drop the origin and by this time + * the origin might be already removed. For these reasons, + * passing missing_ok = true. + */ + ReplicationOriginNameForTablesync(sub->oid, relid, originname, + sizeof(originname)); + replorigin_drop_by_name(originname, true, false); + } + + ereport(DEBUG1, + (errmsg_internal("table \"%s.%s\" removed from subscription \"%s\"", + get_namespace_name(get_rel_namespace(relid)), + get_rel_name(relid), + sub->name))); + } + } + + /* + * Drop the tablesync slots associated with removed tables. This has + * to be at the end because otherwise if there is an error while doing + * the database operations we won't be able to rollback dropped slots. + */ + for (off = 0; off < remove_rel_len; off++) + { + if (sub_remove_rels[off].state != SUBREL_STATE_READY && + sub_remove_rels[off].state != SUBREL_STATE_SYNCDONE) + { + char syncslotname[NAMEDATALEN] = {0}; + + /* + * For READY/SYNCDONE states we know the tablesync slot has + * already been dropped by the tablesync worker. + * + * For other states, there is no certainty, maybe the slot + * does not exist yet. Also, if we fail after removing some of + * the slots, next time, it will again try to drop already + * dropped slots and fail. For these reasons, we allow + * missing_ok = true for the drop. + */ + ReplicationSlotNameForTablesync(sub->oid, sub_remove_rels[off].relid, + syncslotname, sizeof(syncslotname)); + ReplicationSlotDropAtPubNode(wrconn, syncslotname, true); + } + } + } + PG_FINALLY(); + { + walrcv_disconnect(wrconn); + } + PG_END_TRY(); + + if (rel) + table_close(rel, NoLock); +} + +/* + * Alter the existing subscription. + */ +ObjectAddress +AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, + bool isTopLevel) +{ + Relation rel; + ObjectAddress myself; + bool nulls[Natts_pg_subscription]; + bool replaces[Natts_pg_subscription]; + Datum values[Natts_pg_subscription]; + HeapTuple tup; + Oid subid; + bool update_tuple = false; + Subscription *sub; + Form_pg_subscription form; + bits32 supported_opts; + SubOpts opts = {0}; + + rel = table_open(SubscriptionRelationId, RowExclusiveLock); + + /* Fetch the existing tuple. */ + tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(stmt->subname)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription \"%s\" does not exist", + stmt->subname))); + + form = (Form_pg_subscription) GETSTRUCT(tup); + subid = form->oid; + + /* must be owner */ + if (!pg_subscription_ownercheck(subid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION, + stmt->subname); + + sub = GetSubscription(subid, false); + + /* Lock the subscription so nobody else can do anything with it. */ + LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock); + + /* Form a new tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + switch (stmt->kind) + { + case ALTER_SUBSCRIPTION_OPTIONS: + { + supported_opts = (SUBOPT_SLOT_NAME | + SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY | + SUBOPT_STREAMING | SUBOPT_DISABLE_ON_ERR); + + parse_subscription_options(pstate, stmt->options, + supported_opts, &opts); + + if (IsSet(opts.specified_opts, SUBOPT_SLOT_NAME)) + { + /* + * The subscription must be disabled to allow slot_name as + * 'none', otherwise, the apply worker will repeatedly try + * to stream the data using that slot_name which neither + * exists on the publisher nor the user will be allowed to + * create it. + */ + if (sub->enabled && !opts.slot_name) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot set %s for enabled subscription", + "slot_name = NONE"))); + + if (opts.slot_name) + values[Anum_pg_subscription_subslotname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(opts.slot_name)); + else + nulls[Anum_pg_subscription_subslotname - 1] = true; + replaces[Anum_pg_subscription_subslotname - 1] = true; + } + + if (opts.synchronous_commit) + { + values[Anum_pg_subscription_subsynccommit - 1] = + CStringGetTextDatum(opts.synchronous_commit); + replaces[Anum_pg_subscription_subsynccommit - 1] = true; + } + + if (IsSet(opts.specified_opts, SUBOPT_BINARY)) + { + values[Anum_pg_subscription_subbinary - 1] = + BoolGetDatum(opts.binary); + replaces[Anum_pg_subscription_subbinary - 1] = true; + } + + if (IsSet(opts.specified_opts, SUBOPT_STREAMING)) + { + values[Anum_pg_subscription_substream - 1] = + BoolGetDatum(opts.streaming); + replaces[Anum_pg_subscription_substream - 1] = true; + } + + if (IsSet(opts.specified_opts, SUBOPT_DISABLE_ON_ERR)) + { + values[Anum_pg_subscription_subdisableonerr - 1] + = BoolGetDatum(opts.disableonerr); + replaces[Anum_pg_subscription_subdisableonerr - 1] + = true; + } + + update_tuple = true; + break; + } + + case ALTER_SUBSCRIPTION_ENABLED: + { + parse_subscription_options(pstate, stmt->options, + SUBOPT_ENABLED, &opts); + Assert(IsSet(opts.specified_opts, SUBOPT_ENABLED)); + + if (!sub->slotname && opts.enabled) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot enable subscription that does not have a slot name"))); + + values[Anum_pg_subscription_subenabled - 1] = + BoolGetDatum(opts.enabled); + replaces[Anum_pg_subscription_subenabled - 1] = true; + + if (opts.enabled) + ApplyLauncherWakeupAtCommit(); + + update_tuple = true; + break; + } + + case ALTER_SUBSCRIPTION_CONNECTION: + /* Load the library providing us libpq calls. */ + load_file("libpqwalreceiver", false); + /* Check the connection info string. */ + walrcv_check_conninfo(stmt->conninfo); + + values[Anum_pg_subscription_subconninfo - 1] = + CStringGetTextDatum(stmt->conninfo); + replaces[Anum_pg_subscription_subconninfo - 1] = true; + update_tuple = true; + break; + + case ALTER_SUBSCRIPTION_SET_PUBLICATION: + { + supported_opts = SUBOPT_COPY_DATA | SUBOPT_REFRESH; + parse_subscription_options(pstate, stmt->options, + supported_opts, &opts); + + values[Anum_pg_subscription_subpublications - 1] = + publicationListToArray(stmt->publication); + replaces[Anum_pg_subscription_subpublications - 1] = true; + + update_tuple = true; + + /* Refresh if user asked us to. */ + if (opts.refresh) + { + if (!sub->enabled) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("ALTER SUBSCRIPTION with refresh is not allowed for disabled subscriptions"), + errhint("Use ALTER SUBSCRIPTION ... SET PUBLICATION ... WITH (refresh = false)."))); + + /* + * See ALTER_SUBSCRIPTION_REFRESH for details why this is + * not allowed. + */ + if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("ALTER SUBSCRIPTION with refresh and copy_data is not allowed when two_phase is enabled"), + errhint("Use ALTER SUBSCRIPTION ... SET PUBLICATION with refresh = false, or with copy_data = false, or use DROP/CREATE SUBSCRIPTION."))); + + PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION with refresh"); + + /* Make sure refresh sees the new list of publications. */ + sub->publications = stmt->publication; + + AlterSubscription_refresh(sub, opts.copy_data, + stmt->publication); + } + + break; + } + + case ALTER_SUBSCRIPTION_ADD_PUBLICATION: + case ALTER_SUBSCRIPTION_DROP_PUBLICATION: + { + List *publist; + bool isadd = stmt->kind == ALTER_SUBSCRIPTION_ADD_PUBLICATION; + + supported_opts = SUBOPT_REFRESH | SUBOPT_COPY_DATA; + parse_subscription_options(pstate, stmt->options, + supported_opts, &opts); + + publist = merge_publications(sub->publications, stmt->publication, isadd, stmt->subname); + values[Anum_pg_subscription_subpublications - 1] = + publicationListToArray(publist); + replaces[Anum_pg_subscription_subpublications - 1] = true; + + update_tuple = true; + + /* Refresh if user asked us to. */ + if (opts.refresh) + { + /* We only need to validate user specified publications. */ + List *validate_publications = (isadd) ? stmt->publication : NULL; + + if (!sub->enabled) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("ALTER SUBSCRIPTION with refresh is not allowed for disabled subscriptions"), + /* translator: %s is an SQL ALTER command */ + errhint("Use %s instead.", + isadd ? + "ALTER SUBSCRIPTION ... ADD PUBLICATION ... WITH (refresh = false)" : + "ALTER SUBSCRIPTION ... DROP PUBLICATION ... WITH (refresh = false)"))); + + /* + * See ALTER_SUBSCRIPTION_REFRESH for details why this is + * not allowed. + */ + if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("ALTER SUBSCRIPTION with refresh and copy_data is not allowed when two_phase is enabled"), + /* translator: %s is an SQL ALTER command */ + errhint("Use %s with refresh = false, or with copy_data = false, or use DROP/CREATE SUBSCRIPTION.", + isadd ? + "ALTER SUBSCRIPTION ... ADD PUBLICATION" : + "ALTER SUBSCRIPTION ... DROP PUBLICATION"))); + + PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION with refresh"); + + /* Refresh the new list of publications. */ + sub->publications = publist; + + AlterSubscription_refresh(sub, opts.copy_data, + validate_publications); + } + + break; + } + + case ALTER_SUBSCRIPTION_REFRESH: + { + if (!sub->enabled) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("ALTER SUBSCRIPTION ... REFRESH is not allowed for disabled subscriptions"))); + + parse_subscription_options(pstate, stmt->options, + SUBOPT_COPY_DATA, &opts); + + /* + * The subscription option "two_phase" requires that + * replication has passed the initial table synchronization + * phase before the two_phase becomes properly enabled. + * + * But, having reached this two-phase commit "enabled" state + * we must not allow any subsequent table initialization to + * occur. So the ALTER SUBSCRIPTION ... REFRESH is disallowed + * when the user had requested two_phase = on mode. + * + * The exception to this restriction is when copy_data = + * false, because when copy_data is false the tablesync will + * start already in READY state and will exit directly without + * doing anything. + * + * For more details see comments atop worker.c. + */ + if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && opts.copy_data) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("ALTER SUBSCRIPTION ... REFRESH with copy_data is not allowed when two_phase is enabled"), + errhint("Use ALTER SUBSCRIPTION ... REFRESH with copy_data = false, or use DROP/CREATE SUBSCRIPTION."))); + + PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION ... REFRESH"); + + AlterSubscription_refresh(sub, opts.copy_data, NULL); + + break; + } + + case ALTER_SUBSCRIPTION_SKIP: + { + parse_subscription_options(pstate, stmt->options, SUBOPT_LSN, &opts); + + /* ALTER SUBSCRIPTION ... SKIP supports only LSN option */ + Assert(IsSet(opts.specified_opts, SUBOPT_LSN)); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to skip transaction"))); + + /* + * If the user sets subskiplsn, we do a sanity check to make + * sure that the specified LSN is a probable value. + */ + if (!XLogRecPtrIsInvalid(opts.lsn)) + { + RepOriginId originid; + char originname[NAMEDATALEN]; + XLogRecPtr remote_lsn; + + snprintf(originname, sizeof(originname), "pg_%u", subid); + originid = replorigin_by_name(originname, false); + remote_lsn = replorigin_get_progress(originid, false); + + /* Check the given LSN is at least a future LSN */ + if (!XLogRecPtrIsInvalid(remote_lsn) && opts.lsn < remote_lsn) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("skip WAL location (LSN %X/%X) must be greater than origin LSN %X/%X", + LSN_FORMAT_ARGS(opts.lsn), + LSN_FORMAT_ARGS(remote_lsn)))); + } + + values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(opts.lsn); + replaces[Anum_pg_subscription_subskiplsn - 1] = true; + + update_tuple = true; + break; + } + + default: + elog(ERROR, "unrecognized ALTER SUBSCRIPTION kind %d", + stmt->kind); + } + + /* Update the catalog if needed. */ + if (update_tuple) + { + tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, + replaces); + + CatalogTupleUpdate(rel, &tup->t_self, tup); + + heap_freetuple(tup); + } + + table_close(rel, RowExclusiveLock); + + ObjectAddressSet(myself, SubscriptionRelationId, subid); + + InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0); + + return myself; +} + +/* + * Drop a subscription + */ +void +DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) +{ + Relation rel; + ObjectAddress myself; + HeapTuple tup; + Oid subid; + Datum datum; + bool isnull; + char *subname; + char *conninfo; + char *slotname; + List *subworkers; + ListCell *lc; + char originname[NAMEDATALEN]; + char *err = NULL; + WalReceiverConn *wrconn; + Form_pg_subscription form; + List *rstates; + + /* + * Lock pg_subscription with AccessExclusiveLock to ensure that the + * launcher doesn't restart new worker during dropping the subscription + */ + rel = table_open(SubscriptionRelationId, AccessExclusiveLock); + + tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(stmt->subname)); + + if (!HeapTupleIsValid(tup)) + { + table_close(rel, NoLock); + + if (!stmt->missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription \"%s\" does not exist", + stmt->subname))); + else + ereport(NOTICE, + (errmsg("subscription \"%s\" does not exist, skipping", + stmt->subname))); + + return; + } + + form = (Form_pg_subscription) GETSTRUCT(tup); + subid = form->oid; + + /* must be owner */ + if (!pg_subscription_ownercheck(subid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION, + stmt->subname); + + /* DROP hook for the subscription being removed */ + InvokeObjectDropHook(SubscriptionRelationId, subid, 0); + + /* + * Lock the subscription so nobody else can do anything with it (including + * the replication workers). + */ + LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock); + + /* Get subname */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup, + Anum_pg_subscription_subname, &isnull); + Assert(!isnull); + subname = pstrdup(NameStr(*DatumGetName(datum))); + + /* Get conninfo */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup, + Anum_pg_subscription_subconninfo, &isnull); + Assert(!isnull); + conninfo = TextDatumGetCString(datum); + + /* Get slotname */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup, + Anum_pg_subscription_subslotname, &isnull); + if (!isnull) + slotname = pstrdup(NameStr(*DatumGetName(datum))); + else + slotname = NULL; + + /* + * Since dropping a replication slot is not transactional, the replication + * slot stays dropped even if the transaction rolls back. So we cannot + * run DROP SUBSCRIPTION inside a transaction block if dropping the + * replication slot. Also, in this case, we report a message for dropping + * the subscription to the cumulative stats system. + * + * XXX The command name should really be something like "DROP SUBSCRIPTION + * of a subscription that is associated with a replication slot", but we + * don't have the proper facilities for that. + */ + if (slotname) + PreventInTransactionBlock(isTopLevel, "DROP SUBSCRIPTION"); + + ObjectAddressSet(myself, SubscriptionRelationId, subid); + EventTriggerSQLDropAddObject(&myself, true, true); + + /* Remove the tuple from catalog. */ + CatalogTupleDelete(rel, &tup->t_self); + + ReleaseSysCache(tup); + + /* + * Stop all the subscription workers immediately. + * + * This is necessary if we are dropping the replication slot, so that the + * slot becomes accessible. + * + * It is also necessary if the subscription is disabled and was disabled + * in the same transaction. Then the workers haven't seen the disabling + * yet and will still be running, leading to hangs later when we want to + * drop the replication origin. If the subscription was disabled before + * this transaction, then there shouldn't be any workers left, so this + * won't make a difference. + * + * New workers won't be started because we hold an exclusive lock on the + * subscription till the end of the transaction. + */ + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + subworkers = logicalrep_workers_find(subid, false); + LWLockRelease(LogicalRepWorkerLock); + foreach(lc, subworkers) + { + LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc); + + logicalrep_worker_stop(w->subid, w->relid); + } + list_free(subworkers); + + /* + * Cleanup of tablesync replication origins. + * + * Any READY-state relations would already have dealt with clean-ups. + * + * Note that the state can't change because we have already stopped both + * the apply and tablesync workers and they can't restart because of + * exclusive lock on the subscription. + */ + rstates = GetSubscriptionNotReadyRelations(subid); + foreach(lc, rstates) + { + SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc); + Oid relid = rstate->relid; + + /* Only cleanup resources of tablesync workers */ + if (!OidIsValid(relid)) + continue; + + /* + * Drop the tablesync's origin tracking if exists. + * + * It is possible that the origin is not yet created for tablesync + * worker so passing missing_ok = true. This can happen for the states + * before SUBREL_STATE_FINISHEDCOPY. + */ + ReplicationOriginNameForTablesync(subid, relid, originname, + sizeof(originname)); + replorigin_drop_by_name(originname, true, false); + } + + /* Clean up dependencies */ + deleteSharedDependencyRecordsFor(SubscriptionRelationId, subid, 0); + + /* Remove any associated relation synchronization states. */ + RemoveSubscriptionRel(subid, InvalidOid); + + /* Remove the origin tracking if exists. */ + snprintf(originname, sizeof(originname), "pg_%u", subid); + replorigin_drop_by_name(originname, true, false); + + /* + * Tell the cumulative stats system that the subscription is getting + * dropped. + */ + pgstat_drop_subscription(subid); + + /* + * If there is no slot associated with the subscription, we can finish + * here. + */ + if (!slotname && rstates == NIL) + { + table_close(rel, NoLock); + return; + } + + /* + * Try to acquire the connection necessary for dropping slots. + * + * Note: If the slotname is NONE/NULL then we allow the command to finish + * and users need to manually cleanup the apply and tablesync worker slots + * later. + * + * This has to be at the end because otherwise if there is an error while + * doing the database operations we won't be able to rollback dropped + * slot. + */ + load_file("libpqwalreceiver", false); + + wrconn = walrcv_connect(conninfo, true, subname, &err); + if (wrconn == NULL) + { + if (!slotname) + { + /* be tidy */ + list_free(rstates); + table_close(rel, NoLock); + return; + } + else + { + ReportSlotConnectionError(rstates, subid, slotname, err); + } + } + + PG_TRY(); + { + foreach(lc, rstates) + { + SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc); + Oid relid = rstate->relid; + + /* Only cleanup resources of tablesync workers */ + if (!OidIsValid(relid)) + continue; + + /* + * Drop the tablesync slots associated with removed tables. + * + * For SYNCDONE/READY states, the tablesync slot is known to have + * already been dropped by the tablesync worker. + * + * For other states, there is no certainty, maybe the slot does + * not exist yet. Also, if we fail after removing some of the + * slots, next time, it will again try to drop already dropped + * slots and fail. For these reasons, we allow missing_ok = true + * for the drop. + */ + if (rstate->state != SUBREL_STATE_SYNCDONE) + { + char syncslotname[NAMEDATALEN] = {0}; + + ReplicationSlotNameForTablesync(subid, relid, syncslotname, + sizeof(syncslotname)); + ReplicationSlotDropAtPubNode(wrconn, syncslotname, true); + } + } + + list_free(rstates); + + /* + * If there is a slot associated with the subscription, then drop the + * replication slot at the publisher. + */ + if (slotname) + ReplicationSlotDropAtPubNode(wrconn, slotname, false); + } + PG_FINALLY(); + { + walrcv_disconnect(wrconn); + } + PG_END_TRY(); + + table_close(rel, NoLock); +} + +/* + * Drop the replication slot at the publisher node using the replication + * connection. + * + * missing_ok - if true then only issue a LOG message if the slot doesn't + * exist. + */ +void +ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok) +{ + StringInfoData cmd; + + Assert(wrconn); + + load_file("libpqwalreceiver", false); + + initStringInfo(&cmd); + appendStringInfo(&cmd, "DROP_REPLICATION_SLOT %s WAIT", quote_identifier(slotname)); + + PG_TRY(); + { + WalRcvExecResult *res; + + res = walrcv_exec(wrconn, cmd.data, 0, NULL); + + if (res->status == WALRCV_OK_COMMAND) + { + /* NOTICE. Success. */ + ereport(NOTICE, + (errmsg("dropped replication slot \"%s\" on publisher", + slotname))); + } + else if (res->status == WALRCV_ERROR && + missing_ok && + res->sqlstate == ERRCODE_UNDEFINED_OBJECT) + { + /* LOG. Error, but missing_ok = true. */ + ereport(LOG, + (errmsg("could not drop replication slot \"%s\" on publisher: %s", + slotname, res->err))); + } + else + { + /* ERROR. */ + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not drop replication slot \"%s\" on publisher: %s", + slotname, res->err))); + } + + walrcv_clear_result(res); + } + PG_FINALLY(); + { + pfree(cmd.data); + } + PG_END_TRY(); +} + +/* + * Internal workhorse for changing a subscription owner + */ +static void +AlterSubscriptionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) +{ + Form_pg_subscription form; + + form = (Form_pg_subscription) GETSTRUCT(tup); + + if (form->subowner == newOwnerId) + return; + + if (!pg_subscription_ownercheck(form->oid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SUBSCRIPTION, + NameStr(form->subname)); + + /* New owner must be a superuser */ + if (!superuser_arg(newOwnerId)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of subscription \"%s\"", + NameStr(form->subname)), + errhint("The owner of a subscription must be a superuser."))); + + form->subowner = newOwnerId; + CatalogTupleUpdate(rel, &tup->t_self, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(SubscriptionRelationId, + form->oid, + newOwnerId); + + InvokeObjectPostAlterHook(SubscriptionRelationId, + form->oid, 0); + + ApplyLauncherWakeupAtCommit(); +} + +/* + * Change subscription owner -- by name + */ +ObjectAddress +AlterSubscriptionOwner(const char *name, Oid newOwnerId) +{ + Oid subid; + HeapTuple tup; + Relation rel; + ObjectAddress address; + Form_pg_subscription form; + + rel = table_open(SubscriptionRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(name)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription \"%s\" does not exist", name))); + + form = (Form_pg_subscription) GETSTRUCT(tup); + subid = form->oid; + + AlterSubscriptionOwner_internal(rel, tup, newOwnerId); + + ObjectAddressSet(address, SubscriptionRelationId, subid); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change subscription owner -- by OID + */ +void +AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = table_open(SubscriptionRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription with OID %u does not exist", subid))); + + AlterSubscriptionOwner_internal(rel, tup, newOwnerId); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); +} + +/* + * Get the list of tables which belong to specified publications on the + * publisher connection. + * + * Note that we don't support the case where the column list is different for + * the same table in different publications to avoid sending unwanted column + * information for some of the rows. This can happen when both the column + * list and row filter are specified for different publications. + */ +static List * +fetch_table_list(WalReceiverConn *wrconn, List *publications) +{ + WalRcvExecResult *res; + StringInfoData cmd; + TupleTableSlot *slot; + Oid tableRow[3] = {TEXTOID, TEXTOID, NAMEARRAYOID}; + List *tablelist = NIL; + bool check_columnlist = (walrcv_server_version(wrconn) >= 150000); + + initStringInfo(&cmd); + appendStringInfoString(&cmd, "SELECT DISTINCT t.schemaname, t.tablename \n"); + + /* Get column lists for each relation if the publisher supports it */ + if (check_columnlist) + appendStringInfoString(&cmd, ", t.attnames\n"); + + appendStringInfoString(&cmd, "FROM pg_catalog.pg_publication_tables t\n" + " WHERE t.pubname IN ("); + get_publications_str(publications, &cmd, true); + appendStringInfoChar(&cmd, ')'); + + res = walrcv_exec(wrconn, cmd.data, check_columnlist ? 3 : 2, tableRow); + pfree(cmd.data); + + if (res->status != WALRCV_OK_TUPLES) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not receive list of replicated tables from the publisher: %s", + res->err))); + + /* Process tables. */ + slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple); + while (tuplestore_gettupleslot(res->tuplestore, true, false, slot)) + { + char *nspname; + char *relname; + bool isnull; + RangeVar *rv; + + nspname = TextDatumGetCString(slot_getattr(slot, 1, &isnull)); + Assert(!isnull); + relname = TextDatumGetCString(slot_getattr(slot, 2, &isnull)); + Assert(!isnull); + + rv = makeRangeVar(nspname, relname, -1); + + if (check_columnlist && list_member(tablelist, rv)) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use different column lists for table \"%s.%s\" in different publications", + nspname, relname)); + else + tablelist = lappend(tablelist, rv); + + ExecClearTuple(slot); + } + ExecDropSingleTupleTableSlot(slot); + + walrcv_clear_result(res); + + return tablelist; +} + +/* + * This is to report the connection failure while dropping replication slots. + * Here, we report the WARNING for all tablesync slots so that user can drop + * them manually, if required. + */ +static void +ReportSlotConnectionError(List *rstates, Oid subid, char *slotname, char *err) +{ + ListCell *lc; + + foreach(lc, rstates) + { + SubscriptionRelState *rstate = (SubscriptionRelState *) lfirst(lc); + Oid relid = rstate->relid; + + /* Only cleanup resources of tablesync workers */ + if (!OidIsValid(relid)) + continue; + + /* + * Caller needs to ensure that relstate doesn't change underneath us. + * See DropSubscription where we get the relstates. + */ + if (rstate->state != SUBREL_STATE_SYNCDONE) + { + char syncslotname[NAMEDATALEN] = {0}; + + ReplicationSlotNameForTablesync(subid, relid, syncslotname, + sizeof(syncslotname)); + elog(WARNING, "could not drop tablesync replication slot \"%s\"", + syncslotname); + } + } + + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not connect to publisher when attempting to drop replication slot \"%s\": %s", + slotname, err), + /* translator: %s is an SQL ALTER command */ + errhint("Use %s to disable the subscription, and then use %s to disassociate it from the slot.", + "ALTER SUBSCRIPTION ... DISABLE", + "ALTER SUBSCRIPTION ... SET (slot_name = NONE)"))); +} + +/* + * Check for duplicates in the given list of publications and error out if + * found one. Add publications to datums as text datums, if datums is not + * NULL. + */ +static void +check_duplicates_in_publist(List *publist, Datum *datums) +{ + ListCell *cell; + int j = 0; + + foreach(cell, publist) + { + char *name = strVal(lfirst(cell)); + ListCell *pcell; + + foreach(pcell, publist) + { + char *pname = strVal(lfirst(pcell)); + + if (pcell == cell) + break; + + if (strcmp(name, pname) == 0) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("publication name \"%s\" used more than once", + pname))); + } + + if (datums) + datums[j++] = CStringGetTextDatum(name); + } +} + +/* + * Merge current subscription's publications and user-specified publications + * from ADD/DROP PUBLICATIONS. + * + * If addpub is true, we will add the list of publications into oldpublist. + * Otherwise, we will delete the list of publications from oldpublist. The + * returned list is a copy, oldpublist itself is not changed. + * + * subname is the subscription name, for error messages. + */ +static List * +merge_publications(List *oldpublist, List *newpublist, bool addpub, const char *subname) +{ + ListCell *lc; + + oldpublist = list_copy(oldpublist); + + check_duplicates_in_publist(newpublist, NULL); + + foreach(lc, newpublist) + { + char *name = strVal(lfirst(lc)); + ListCell *lc2; + bool found = false; + + foreach(lc2, oldpublist) + { + char *pubname = strVal(lfirst(lc2)); + + if (strcmp(name, pubname) == 0) + { + found = true; + if (addpub) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("publication \"%s\" is already in subscription \"%s\"", + name, subname))); + else + oldpublist = foreach_delete_current(oldpublist, lc2); + + break; + } + } + + if (addpub && !found) + oldpublist = lappend(oldpublist, makeString(name)); + else if (!addpub && !found) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("publication \"%s\" is not in subscription \"%s\"", + name, subname))); + } + + /* + * XXX Probably no strong reason for this, but for now it's to make ALTER + * SUBSCRIPTION ... DROP PUBLICATION consistent with SET PUBLICATION. + */ + if (!oldpublist) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot drop all the publications from a subscription"))); + + return oldpublist; +} diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c new file mode 100644 index 0000000..97f9a22 --- /dev/null +++ b/src/backend/commands/tablecmds.c @@ -0,0 +1,19402 @@ +/*------------------------------------------------------------------------- + * + * tablecmds.c + * Commands for creating and altering table structures and settings + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/tablecmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/attmap.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "access/heapam_xlog.h" +#include "access/multixact.h" +#include "access/reloptions.h" +#include "access/relscan.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "access/toast_compression.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "catalog/catalog.h" +#include "catalog/heap.h" +#include "catalog/index.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/partition.h" +#include "catalog/pg_am.h" +#include "catalog/pg_attrdef.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_largeobject.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_tablespace.h" +#include "catalog/pg_trigger.h" +#include "catalog/pg_type.h" +#include "catalog/storage.h" +#include "catalog/storage_xlog.h" +#include "catalog/toasting.h" +#include "commands/cluster.h" +#include "commands/comment.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/policy.h" +#include "commands/sequence.h" +#include "commands/tablecmds.h" +#include "commands/tablespace.h" +#include "commands/trigger.h" +#include "commands/typecmds.h" +#include "commands/user.h" +#include "executor/executor.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/parsenodes.h" +#include "optimizer/optimizer.h" +#include "parser/parse_clause.h" +#include "parser/parse_coerce.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_oper.h" +#include "parser/parse_relation.h" +#include "parser/parse_type.h" +#include "parser/parse_utilcmd.h" +#include "parser/parser.h" +#include "partitioning/partbounds.h" +#include "partitioning/partdesc.h" +#include "pgstat.h" +#include "rewrite/rewriteDefine.h" +#include "rewrite/rewriteHandler.h" +#include "rewrite/rewriteManip.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/lock.h" +#include "storage/predicate.h" +#include "storage/smgr.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/partcache.h" +#include "utils/relcache.h" +#include "utils/ruleutils.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" +#include "utils/typcache.h" + +/* + * ON COMMIT action list + */ +typedef struct OnCommitItem +{ + Oid relid; /* relid of relation */ + OnCommitAction oncommit; /* what to do at end of xact */ + + /* + * If this entry was created during the current transaction, + * creating_subid is the ID of the creating subxact; if created in a prior + * transaction, creating_subid is zero. If deleted during the current + * transaction, deleting_subid is the ID of the deleting subxact; if no + * deletion request is pending, deleting_subid is zero. + */ + SubTransactionId creating_subid; + SubTransactionId deleting_subid; +} OnCommitItem; + +static List *on_commits = NIL; + + +/* + * State information for ALTER TABLE + * + * The pending-work queue for an ALTER TABLE is a List of AlteredTableInfo + * structs, one for each table modified by the operation (the named table + * plus any child tables that are affected). We save lists of subcommands + * to apply to this table (possibly modified by parse transformation steps); + * these lists will be executed in Phase 2. If a Phase 3 step is needed, + * necessary information is stored in the constraints and newvals lists. + * + * Phase 2 is divided into multiple passes; subcommands are executed in + * a pass determined by subcommand type. + */ + +#define AT_PASS_UNSET -1 /* UNSET will cause ERROR */ +#define AT_PASS_DROP 0 /* DROP (all flavors) */ +#define AT_PASS_ALTER_TYPE 1 /* ALTER COLUMN TYPE */ +#define AT_PASS_OLD_INDEX 2 /* re-add existing indexes */ +#define AT_PASS_OLD_CONSTR 3 /* re-add existing constraints */ +/* We could support a RENAME COLUMN pass here, but not currently used */ +#define AT_PASS_ADD_COL 4 /* ADD COLUMN */ +#define AT_PASS_ADD_CONSTR 5 /* ADD constraints (initial examination) */ +#define AT_PASS_COL_ATTRS 6 /* set column attributes, eg NOT NULL */ +#define AT_PASS_ADD_INDEXCONSTR 7 /* ADD index-based constraints */ +#define AT_PASS_ADD_INDEX 8 /* ADD indexes */ +#define AT_PASS_ADD_OTHERCONSTR 9 /* ADD other constraints, defaults */ +#define AT_PASS_MISC 10 /* other stuff */ +#define AT_NUM_PASSES 11 + +typedef struct AlteredTableInfo +{ + /* Information saved before any work commences: */ + Oid relid; /* Relation to work on */ + char relkind; /* Its relkind */ + TupleDesc oldDesc; /* Pre-modification tuple descriptor */ + + /* + * Transiently set during Phase 2, normally set to NULL. + * + * ATRewriteCatalogs sets this when it starts, and closes when ATExecCmd + * returns control. This can be exploited by ATExecCmd subroutines to + * close/reopen across transaction boundaries. + */ + Relation rel; + + /* Information saved by Phase 1 for Phase 2: */ + List *subcmds[AT_NUM_PASSES]; /* Lists of AlterTableCmd */ + /* Information saved by Phases 1/2 for Phase 3: */ + List *constraints; /* List of NewConstraint */ + List *newvals; /* List of NewColumnValue */ + List *afterStmts; /* List of utility command parsetrees */ + bool verify_new_notnull; /* T if we should recheck NOT NULL */ + int rewrite; /* Reason for forced rewrite, if any */ + Oid newAccessMethod; /* new access method; 0 means no change */ + Oid newTableSpace; /* new tablespace; 0 means no change */ + bool chgPersistence; /* T if SET LOGGED/UNLOGGED is used */ + char newrelpersistence; /* if above is true */ + Expr *partition_constraint; /* for attach partition validation */ + /* true, if validating default due to some other attach/detach */ + bool validate_default; + /* Objects to rebuild after completing ALTER TYPE operations */ + List *changedConstraintOids; /* OIDs of constraints to rebuild */ + List *changedConstraintDefs; /* string definitions of same */ + List *changedIndexOids; /* OIDs of indexes to rebuild */ + List *changedIndexDefs; /* string definitions of same */ + char *replicaIdentityIndex; /* index to reset as REPLICA IDENTITY */ + char *clusterOnIndex; /* index to use for CLUSTER */ + List *changedStatisticsOids; /* OIDs of statistics to rebuild */ + List *changedStatisticsDefs; /* string definitions of same */ +} AlteredTableInfo; + +/* Struct describing one new constraint to check in Phase 3 scan */ +/* Note: new NOT NULL constraints are handled elsewhere */ +typedef struct NewConstraint +{ + char *name; /* Constraint name, or NULL if none */ + ConstrType contype; /* CHECK or FOREIGN */ + Oid refrelid; /* PK rel, if FOREIGN */ + Oid refindid; /* OID of PK's index, if FOREIGN */ + Oid conid; /* OID of pg_constraint entry, if FOREIGN */ + Node *qual; /* Check expr or CONSTR_FOREIGN Constraint */ + ExprState *qualstate; /* Execution state for CHECK expr */ +} NewConstraint; + +/* + * Struct describing one new column value that needs to be computed during + * Phase 3 copy (this could be either a new column with a non-null default, or + * a column that we're changing the type of). Columns without such an entry + * are just copied from the old table during ATRewriteTable. Note that the + * expr is an expression over *old* table values, except when is_generated + * is true; then it is an expression over columns of the *new* tuple. + */ +typedef struct NewColumnValue +{ + AttrNumber attnum; /* which column */ + Expr *expr; /* expression to compute */ + ExprState *exprstate; /* execution state */ + bool is_generated; /* is it a GENERATED expression? */ +} NewColumnValue; + +/* + * Error-reporting support for RemoveRelations + */ +struct dropmsgstrings +{ + char kind; + int nonexistent_code; + const char *nonexistent_msg; + const char *skipping_msg; + const char *nota_msg; + const char *drophint_msg; +}; + +static const struct dropmsgstrings dropmsgstringarray[] = { + {RELKIND_RELATION, + ERRCODE_UNDEFINED_TABLE, + gettext_noop("table \"%s\" does not exist"), + gettext_noop("table \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a table"), + gettext_noop("Use DROP TABLE to remove a table.")}, + {RELKIND_SEQUENCE, + ERRCODE_UNDEFINED_TABLE, + gettext_noop("sequence \"%s\" does not exist"), + gettext_noop("sequence \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a sequence"), + gettext_noop("Use DROP SEQUENCE to remove a sequence.")}, + {RELKIND_VIEW, + ERRCODE_UNDEFINED_TABLE, + gettext_noop("view \"%s\" does not exist"), + gettext_noop("view \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a view"), + gettext_noop("Use DROP VIEW to remove a view.")}, + {RELKIND_MATVIEW, + ERRCODE_UNDEFINED_TABLE, + gettext_noop("materialized view \"%s\" does not exist"), + gettext_noop("materialized view \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a materialized view"), + gettext_noop("Use DROP MATERIALIZED VIEW to remove a materialized view.")}, + {RELKIND_INDEX, + ERRCODE_UNDEFINED_OBJECT, + gettext_noop("index \"%s\" does not exist"), + gettext_noop("index \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not an index"), + gettext_noop("Use DROP INDEX to remove an index.")}, + {RELKIND_COMPOSITE_TYPE, + ERRCODE_UNDEFINED_OBJECT, + gettext_noop("type \"%s\" does not exist"), + gettext_noop("type \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a type"), + gettext_noop("Use DROP TYPE to remove a type.")}, + {RELKIND_FOREIGN_TABLE, + ERRCODE_UNDEFINED_OBJECT, + gettext_noop("foreign table \"%s\" does not exist"), + gettext_noop("foreign table \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a foreign table"), + gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")}, + {RELKIND_PARTITIONED_TABLE, + ERRCODE_UNDEFINED_TABLE, + gettext_noop("table \"%s\" does not exist"), + gettext_noop("table \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a table"), + gettext_noop("Use DROP TABLE to remove a table.")}, + {RELKIND_PARTITIONED_INDEX, + ERRCODE_UNDEFINED_OBJECT, + gettext_noop("index \"%s\" does not exist"), + gettext_noop("index \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not an index"), + gettext_noop("Use DROP INDEX to remove an index.")}, + {'\0', 0, NULL, NULL, NULL, NULL} +}; + +/* communication between RemoveRelations and RangeVarCallbackForDropRelation */ +struct DropRelationCallbackState +{ + /* These fields are set by RemoveRelations: */ + char expected_relkind; + LOCKMODE heap_lockmode; + /* These fields are state to track which subsidiary locks are held: */ + Oid heapOid; + Oid partParentOid; + /* These fields are passed back by RangeVarCallbackForDropRelation: */ + char actual_relkind; + char actual_relpersistence; +}; + +/* Alter table target-type flags for ATSimplePermissions */ +#define ATT_TABLE 0x0001 +#define ATT_VIEW 0x0002 +#define ATT_MATVIEW 0x0004 +#define ATT_INDEX 0x0008 +#define ATT_COMPOSITE_TYPE 0x0010 +#define ATT_FOREIGN_TABLE 0x0020 +#define ATT_PARTITIONED_INDEX 0x0040 +#define ATT_SEQUENCE 0x0080 + +/* + * ForeignTruncateInfo + * + * Information related to truncation of foreign tables. This is used for + * the elements in a hash table. It uses the server OID as lookup key, + * and includes a per-server list of all foreign tables involved in the + * truncation. + */ +typedef struct ForeignTruncateInfo +{ + Oid serverid; + List *rels; +} ForeignTruncateInfo; + +/* + * Partition tables are expected to be dropped when the parent partitioned + * table gets dropped. Hence for partitioning we use AUTO dependency. + * Otherwise, for regular inheritance use NORMAL dependency. + */ +#define child_dependency_type(child_is_partition) \ + ((child_is_partition) ? DEPENDENCY_AUTO : DEPENDENCY_NORMAL) + +static void truncate_check_rel(Oid relid, Form_pg_class reltuple); +static void truncate_check_perms(Oid relid, Form_pg_class reltuple); +static void truncate_check_activity(Relation rel); +static void RangeVarCallbackForTruncate(const RangeVar *relation, + Oid relId, Oid oldRelId, void *arg); +static List *MergeAttributes(List *schema, List *supers, char relpersistence, + bool is_partition, List **supconstr); +static bool MergeCheckConstraint(List *constraints, char *name, Node *expr); +static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel); +static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel); +static void StoreCatalogInheritance(Oid relationId, List *supers, + bool child_is_partition); +static void StoreCatalogInheritance1(Oid relationId, Oid parentOid, + int32 seqNumber, Relation inhRelation, + bool child_is_partition); +static int findAttrByName(const char *attributeName, List *schema); +static void AlterIndexNamespaces(Relation classRel, Relation rel, + Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved); +static void AlterSeqNamespaces(Relation classRel, Relation rel, + Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved, + LOCKMODE lockmode); +static ObjectAddress ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd, + bool recurse, bool recursing, LOCKMODE lockmode); +static bool ATExecAlterConstrRecurse(Constraint *cmdcon, Relation conrel, Relation tgrel, + Relation rel, HeapTuple contuple, List **otherrelids, + LOCKMODE lockmode); +static ObjectAddress ATExecValidateConstraint(List **wqueue, + Relation rel, char *constrName, + bool recurse, bool recursing, LOCKMODE lockmode); +static int transformColumnNameList(Oid relId, List *colList, + int16 *attnums, Oid *atttypids); +static int transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid, + List **attnamelist, + int16 *attnums, Oid *atttypids, + Oid *opclasses); +static Oid transformFkeyCheckAttrs(Relation pkrel, + int numattrs, int16 *attnums, + Oid *opclasses); +static void checkFkeyPermissions(Relation rel, int16 *attnums, int natts); +static CoercionPathType findFkeyCast(Oid targetTypeId, Oid sourceTypeId, + Oid *funcid); +static void validateForeignKeyConstraint(char *conname, + Relation rel, Relation pkrel, + Oid pkindOid, Oid constraintOid); +static void ATController(AlterTableStmt *parsetree, + Relation rel, List *cmds, bool recurse, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static void ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, + bool recurse, bool recursing, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static void ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static void ATExecCmd(List **wqueue, AlteredTableInfo *tab, + AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass, + AlterTableUtilityContext *context); +static AlterTableCmd *ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab, + Relation rel, AlterTableCmd *cmd, + bool recurse, LOCKMODE lockmode, + int cur_pass, + AlterTableUtilityContext *context); +static void ATRewriteTables(AlterTableStmt *parsetree, + List **wqueue, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode); +static AlteredTableInfo *ATGetQueueEntry(List **wqueue, Relation rel); +static void ATSimplePermissions(AlterTableType cmdtype, Relation rel, int allowed_targets); +static void ATSimpleRecursion(List **wqueue, Relation rel, + AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static void ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode); +static void ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd, + LOCKMODE lockmode, + AlterTableUtilityContext *context); +static List *find_typed_table_dependencies(Oid typeOid, const char *typeName, + DropBehavior behavior); +static void ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing, + bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static ObjectAddress ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, + Relation rel, AlterTableCmd **cmd, + bool recurse, bool recursing, + LOCKMODE lockmode, int cur_pass, + AlterTableUtilityContext *context); +static bool check_for_column_name_collision(Relation rel, const char *colname, + bool if_not_exists); +static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid); +static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid); +static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing); +static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode); +static void ATPrepSetNotNull(List **wqueue, Relation rel, + AlterTableCmd *cmd, bool recurse, bool recursing, + LOCKMODE lockmode, + AlterTableUtilityContext *context); +static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, + const char *colName, LOCKMODE lockmode); +static void ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel, + const char *colName, LOCKMODE lockmode); +static bool NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr); +static bool ConstraintImpliedByRelConstraint(Relation scanrel, + List *testConstraint, List *provenConstraint); +static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName, + Node *newDefault, LOCKMODE lockmode); +static ObjectAddress ATExecCookedColumnDefault(Relation rel, AttrNumber attnum, + Node *newDefault); +static ObjectAddress ATExecAddIdentity(Relation rel, const char *colName, + Node *def, LOCKMODE lockmode); +static ObjectAddress ATExecSetIdentity(Relation rel, const char *colName, + Node *def, LOCKMODE lockmode); +static ObjectAddress ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode); +static void ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode); +static ObjectAddress ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode); +static ObjectAddress ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, + Node *newValue, LOCKMODE lockmode); +static ObjectAddress ATExecSetOptions(Relation rel, const char *colName, + Node *options, bool isReset, LOCKMODE lockmode); +static ObjectAddress ATExecSetStorage(Relation rel, const char *colName, + Node *newValue, LOCKMODE lockmode); +static void ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing, + AlterTableCmd *cmd, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static ObjectAddress ATExecDropColumn(List **wqueue, Relation rel, const char *colName, + DropBehavior behavior, + bool recurse, bool recursing, + bool missing_ok, LOCKMODE lockmode, + ObjectAddresses *addrs); +static ObjectAddress ATExecAddIndex(AlteredTableInfo *tab, Relation rel, + IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode); +static ObjectAddress ATExecAddStatistics(AlteredTableInfo *tab, Relation rel, + CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode); +static ObjectAddress ATExecAddConstraint(List **wqueue, + AlteredTableInfo *tab, Relation rel, + Constraint *newConstraint, bool recurse, bool is_readd, + LOCKMODE lockmode); +static char *ChooseForeignKeyConstraintNameAddition(List *colnames); +static ObjectAddress ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel, + IndexStmt *stmt, LOCKMODE lockmode); +static ObjectAddress ATAddCheckConstraint(List **wqueue, + AlteredTableInfo *tab, Relation rel, + Constraint *constr, + bool recurse, bool recursing, bool is_readd, + LOCKMODE lockmode); +static ObjectAddress ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, + Relation rel, Constraint *fkconstraint, + bool recurse, bool recursing, + LOCKMODE lockmode); +static ObjectAddress addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint, + Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr, + int numfks, int16 *pkattnum, int16 *fkattnum, + Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators, + int numfkdelsetcols, int16 *fkdelsetcols, + bool old_check_ok, + Oid parentDelTrigger, Oid parentUpdTrigger); +static void validateFkOnDeleteSetColumns(int numfks, const int16 *fkattnums, + int numfksetcols, const int16 *fksetcolsattnums, + List *fksetcols); +static void addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, + Relation rel, Relation pkrel, Oid indexOid, Oid parentConstr, + int numfks, int16 *pkattnum, int16 *fkattnum, + Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators, + int numfkdelsetcols, int16 *fkdelsetcols, + bool old_check_ok, LOCKMODE lockmode, + Oid parentInsTrigger, Oid parentUpdTrigger); +static void CloneForeignKeyConstraints(List **wqueue, Relation parentRel, + Relation partitionRel); +static void CloneFkReferenced(Relation parentRel, Relation partitionRel); +static void CloneFkReferencing(List **wqueue, Relation parentRel, + Relation partRel); +static void createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid, + Constraint *fkconstraint, Oid constraintOid, + Oid indexOid, + Oid parentInsTrigger, Oid parentUpdTrigger, + Oid *insertTrigOid, Oid *updateTrigOid); +static void createForeignKeyActionTriggers(Relation rel, Oid refRelOid, + Constraint *fkconstraint, Oid constraintOid, + Oid indexOid, + Oid parentDelTrigger, Oid parentUpdTrigger, + Oid *deleteTrigOid, Oid *updateTrigOid); +static bool tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk, + Oid partRelid, + Oid parentConstrOid, int numfks, + AttrNumber *mapped_conkey, AttrNumber *confkey, + Oid *conpfeqop, + Oid parentInsTrigger, + Oid parentUpdTrigger, + Relation trigrel); +static void GetForeignKeyActionTriggers(Relation trigrel, + Oid conoid, Oid confrelid, Oid conrelid, + Oid *deleteTriggerOid, + Oid *updateTriggerOid); +static void GetForeignKeyCheckTriggers(Relation trigrel, + Oid conoid, Oid confrelid, Oid conrelid, + Oid *insertTriggerOid, + Oid *updateTriggerOid); +static void ATExecDropConstraint(Relation rel, const char *constrName, + DropBehavior behavior, + bool recurse, bool recursing, + bool missing_ok, LOCKMODE lockmode); +static void ATPrepAlterColumnType(List **wqueue, + AlteredTableInfo *tab, Relation rel, + bool recurse, bool recursing, + AlterTableCmd *cmd, LOCKMODE lockmode, + AlterTableUtilityContext *context); +static bool ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno); +static ObjectAddress ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, + AlterTableCmd *cmd, LOCKMODE lockmode); +static void RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab); +static void RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab); +static void RememberStatisticsForRebuilding(Oid indoid, AlteredTableInfo *tab); +static void ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, + LOCKMODE lockmode); +static void ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, + char *cmd, List **wqueue, LOCKMODE lockmode, + bool rewrite); +static void RebuildConstraintComment(AlteredTableInfo *tab, int pass, + Oid objid, Relation rel, List *domname, + const char *conname); +static void TryReuseIndex(Oid oldId, IndexStmt *stmt); +static void TryReuseForeignKey(Oid oldId, Constraint *con); +static ObjectAddress ATExecAlterColumnGenericOptions(Relation rel, const char *colName, + List *options, LOCKMODE lockmode); +static void change_owner_fix_column_acls(Oid relationOid, + Oid oldOwnerId, Oid newOwnerId); +static void change_owner_recurse_to_sequences(Oid relationOid, + Oid newOwnerId, LOCKMODE lockmode); +static ObjectAddress ATExecClusterOn(Relation rel, const char *indexName, + LOCKMODE lockmode); +static void ATExecDropCluster(Relation rel, LOCKMODE lockmode); +static void ATPrepSetAccessMethod(AlteredTableInfo *tab, Relation rel, const char *amname); +static bool ATPrepChangePersistence(Relation rel, bool toLogged); +static void ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel, + const char *tablespacename, LOCKMODE lockmode); +static void ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode); +static void ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace); +static void ATExecSetRelOptions(Relation rel, List *defList, + AlterTableType operation, + LOCKMODE lockmode); +static void ATExecEnableDisableTrigger(Relation rel, const char *trigname, + char fires_when, bool skip_system, bool recurse, + LOCKMODE lockmode); +static void ATExecEnableDisableRule(Relation rel, const char *rulename, + char fires_when, LOCKMODE lockmode); +static void ATPrepAddInherit(Relation child_rel); +static ObjectAddress ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode); +static ObjectAddress ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode); +static void drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid, + DependencyType deptype); +static ObjectAddress ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode); +static void ATExecDropOf(Relation rel, LOCKMODE lockmode); +static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode); +static void ATExecGenericOptions(Relation rel, List *options); +static void ATExecSetRowSecurity(Relation rel, bool rls); +static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls); +static ObjectAddress ATExecSetCompression(AlteredTableInfo *tab, Relation rel, + const char *column, Node *newValue, LOCKMODE lockmode); + +static void index_copy_data(Relation rel, RelFileNode newrnode); +static const char *storage_name(char c); + +static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, + Oid oldRelOid, void *arg); +static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, + Oid oldrelid, void *arg); +static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy); +static void ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs, + List **partexprs, Oid *partopclass, Oid *partcollation, char strategy); +static void CreateInheritance(Relation child_rel, Relation parent_rel); +static void RemoveInheritance(Relation child_rel, Relation parent_rel, + bool allow_detached); +static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel, + PartitionCmd *cmd, + AlterTableUtilityContext *context); +static void AttachPartitionEnsureIndexes(Relation rel, Relation attachrel); +static void QueuePartitionConstraintValidation(List **wqueue, Relation scanrel, + List *partConstraint, + bool validate_default); +static void CloneRowTriggersToPartition(Relation parent, Relation partition); +static void DetachAddConstraintIfNeeded(List **wqueue, Relation partRel); +static void DropClonedTriggersFromPartition(Oid partitionId); +static ObjectAddress ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, + Relation rel, RangeVar *name, + bool concurrent); +static void DetachPartitionFinalize(Relation rel, Relation partRel, + bool concurrent, Oid defaultPartOid); +static ObjectAddress ATExecDetachPartitionFinalize(Relation rel, RangeVar *name); +static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel, + RangeVar *name); +static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl); +static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, + Relation partitionTbl); +static List *GetParentedForeignKeyRefs(Relation partition); +static void ATDetachCheckNoForeignKeyRefs(Relation partition); +static char GetAttributeCompression(Oid atttypid, char *compression); + + +/* ---------------------------------------------------------------- + * DefineRelation + * Creates a new relation. + * + * stmt carries parsetree information from an ordinary CREATE TABLE statement. + * The other arguments are used to extend the behavior for other cases: + * relkind: relkind to assign to the new relation + * ownerId: if not InvalidOid, use this as the new relation's owner. + * typaddress: if not null, it's set to the pg_type entry's address. + * queryString: for error reporting + * + * Note that permissions checks are done against current user regardless of + * ownerId. A nonzero ownerId is used when someone is creating a relation + * "on behalf of" someone else, so we still want to see that the current user + * has permissions to do it. + * + * If successful, returns the address of the new relation. + * ---------------------------------------------------------------- + */ +ObjectAddress +DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, + ObjectAddress *typaddress, const char *queryString) +{ + char relname[NAMEDATALEN]; + Oid namespaceId; + Oid relationId; + Oid tablespaceId; + Relation rel; + TupleDesc descriptor; + List *inheritOids; + List *old_constraints; + List *rawDefaults; + List *cookedDefaults; + Datum reloptions; + ListCell *listptr; + AttrNumber attnum; + bool partitioned; + static char *validnsps[] = HEAP_RELOPT_NAMESPACES; + Oid ofTypeId; + ObjectAddress address; + LOCKMODE parentLockmode; + const char *accessMethod = NULL; + Oid accessMethodId = InvalidOid; + + /* + * Truncate relname to appropriate length (probably a waste of time, as + * parser should have done this already). + */ + strlcpy(relname, stmt->relation->relname, NAMEDATALEN); + + /* + * Check consistency of arguments + */ + if (stmt->oncommit != ONCOMMIT_NOOP + && stmt->relation->relpersistence != RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("ON COMMIT can only be used on temporary tables"))); + + if (stmt->partspec != NULL) + { + if (relkind != RELKIND_RELATION) + elog(ERROR, "unexpected relkind: %d", (int) relkind); + + relkind = RELKIND_PARTITIONED_TABLE; + partitioned = true; + } + else + partitioned = false; + + /* + * Look up the namespace in which we are supposed to create the relation, + * check we have permission to create there, lock it against concurrent + * drop, and mark stmt->relation as RELPERSISTENCE_TEMP if a temporary + * namespace is selected. + */ + namespaceId = + RangeVarGetAndCheckCreationNamespace(stmt->relation, NoLock, NULL); + + /* + * Security check: disallow creating temp tables from security-restricted + * code. This is needed because calling code might not expect untrusted + * tables to appear in pg_temp at the front of its search path. + */ + if (stmt->relation->relpersistence == RELPERSISTENCE_TEMP + && InSecurityRestrictedOperation()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot create temporary table within security-restricted operation"))); + + /* + * Determine the lockmode to use when scanning parents. A self-exclusive + * lock is needed here. + * + * For regular inheritance, if two backends attempt to add children to the + * same parent simultaneously, and that parent has no pre-existing + * children, then both will attempt to update the parent's relhassubclass + * field, leading to a "tuple concurrently updated" error. Also, this + * interlocks against a concurrent ANALYZE on the parent table, which + * might otherwise be attempting to clear the parent's relhassubclass + * field, if its previous children were recently dropped. + * + * If the child table is a partition, then we instead grab an exclusive + * lock on the parent because its partition descriptor will be changed by + * addition of the new partition. + */ + parentLockmode = (stmt->partbound != NULL ? AccessExclusiveLock : + ShareUpdateExclusiveLock); + + /* Determine the list of OIDs of the parents. */ + inheritOids = NIL; + foreach(listptr, stmt->inhRelations) + { + RangeVar *rv = (RangeVar *) lfirst(listptr); + Oid parentOid; + + parentOid = RangeVarGetRelid(rv, parentLockmode, false); + + /* + * Reject duplications in the list of parents. + */ + if (list_member_oid(inheritOids, parentOid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" would be inherited from more than once", + get_rel_name(parentOid)))); + + inheritOids = lappend_oid(inheritOids, parentOid); + } + + /* + * Select tablespace to use: an explicitly indicated one, or (in the case + * of a partitioned table) the parent's, if it has one. + */ + if (stmt->tablespacename) + { + tablespaceId = get_tablespace_oid(stmt->tablespacename, false); + + if (partitioned && tablespaceId == MyDatabaseTableSpace) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify default tablespace for partitioned relations"))); + } + else if (stmt->partbound) + { + /* + * For partitions, when no other tablespace is specified, we default + * the tablespace to the parent partitioned table's. + */ + Assert(list_length(inheritOids) == 1); + tablespaceId = get_rel_tablespace(linitial_oid(inheritOids)); + } + else + tablespaceId = InvalidOid; + + /* still nothing? use the default */ + if (!OidIsValid(tablespaceId)) + tablespaceId = GetDefaultTablespace(stmt->relation->relpersistence, + partitioned); + + /* Check permissions except when using database's default */ + if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace) + { + AclResult aclresult; + + aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + get_tablespace_name(tablespaceId)); + } + + /* In all cases disallow placing user relations in pg_global */ + if (tablespaceId == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("only shared relations can be placed in pg_global tablespace"))); + + /* Identify user ID that will own the table */ + if (!OidIsValid(ownerId)) + ownerId = GetUserId(); + + /* + * Parse and validate reloptions, if any. + */ + reloptions = transformRelOptions((Datum) 0, stmt->options, NULL, validnsps, + true, false); + + switch (relkind) + { + case RELKIND_VIEW: + (void) view_reloptions(reloptions, true); + break; + case RELKIND_PARTITIONED_TABLE: + (void) partitioned_table_reloptions(reloptions, true); + break; + default: + (void) heap_reloptions(relkind, reloptions, true); + } + + if (stmt->ofTypename) + { + AclResult aclresult; + + ofTypeId = typenameTypeId(NULL, stmt->ofTypename); + + aclresult = pg_type_aclcheck(ofTypeId, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, ofTypeId); + } + else + ofTypeId = InvalidOid; + + /* + * Look up inheritance ancestors and generate relation schema, including + * inherited attributes. (Note that stmt->tableElts is destructively + * modified by MergeAttributes.) + */ + stmt->tableElts = + MergeAttributes(stmt->tableElts, inheritOids, + stmt->relation->relpersistence, + stmt->partbound != NULL, + &old_constraints); + + /* + * Create a tuple descriptor from the relation schema. Note that this + * deals with column names, types, and NOT NULL constraints, but not + * default values or CHECK constraints; we handle those below. + */ + descriptor = BuildDescForRelation(stmt->tableElts); + + /* + * Find columns with default values and prepare for insertion of the + * defaults. Pre-cooked (that is, inherited) defaults go into a list of + * CookedConstraint structs that we'll pass to heap_create_with_catalog, + * while raw defaults go into a list of RawColumnDefault structs that will + * be processed by AddRelationNewConstraints. (We can't deal with raw + * expressions until we can do transformExpr.) + * + * We can set the atthasdef flags now in the tuple descriptor; this just + * saves StoreAttrDefault from having to do an immediate update of the + * pg_attribute rows. + */ + rawDefaults = NIL; + cookedDefaults = NIL; + attnum = 0; + + foreach(listptr, stmt->tableElts) + { + ColumnDef *colDef = lfirst(listptr); + Form_pg_attribute attr; + + attnum++; + attr = TupleDescAttr(descriptor, attnum - 1); + + if (colDef->raw_default != NULL) + { + RawColumnDefault *rawEnt; + + Assert(colDef->cooked_default == NULL); + + rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault)); + rawEnt->attnum = attnum; + rawEnt->raw_default = colDef->raw_default; + rawEnt->missingMode = false; + rawEnt->generated = colDef->generated; + rawDefaults = lappend(rawDefaults, rawEnt); + attr->atthasdef = true; + } + else if (colDef->cooked_default != NULL) + { + CookedConstraint *cooked; + + cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint)); + cooked->contype = CONSTR_DEFAULT; + cooked->conoid = InvalidOid; /* until created */ + cooked->name = NULL; + cooked->attnum = attnum; + cooked->expr = colDef->cooked_default; + cooked->skip_validation = false; + cooked->is_local = true; /* not used for defaults */ + cooked->inhcount = 0; /* ditto */ + cooked->is_no_inherit = false; + cookedDefaults = lappend(cookedDefaults, cooked); + attr->atthasdef = true; + } + + if (colDef->identity) + attr->attidentity = colDef->identity; + + if (colDef->generated) + attr->attgenerated = colDef->generated; + + if (colDef->compression) + attr->attcompression = GetAttributeCompression(attr->atttypid, + colDef->compression); + } + + /* + * If the statement hasn't specified an access method, but we're defining + * a type of relation that needs one, use the default. + */ + if (stmt->accessMethod != NULL) + { + accessMethod = stmt->accessMethod; + + if (partitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("specifying a table access method is not supported on a partitioned table"))); + } + else if (RELKIND_HAS_TABLE_AM(relkind)) + accessMethod = default_table_access_method; + + /* look up the access method, verify it is for a table */ + if (accessMethod != NULL) + accessMethodId = get_table_am_oid(accessMethod, false); + + /* + * Create the relation. Inherited defaults and constraints are passed in + * for immediate handling --- since they don't need parsing, they can be + * stored immediately. + */ + relationId = heap_create_with_catalog(relname, + namespaceId, + tablespaceId, + InvalidOid, + InvalidOid, + ofTypeId, + ownerId, + accessMethodId, + descriptor, + list_concat(cookedDefaults, + old_constraints), + relkind, + stmt->relation->relpersistence, + false, + false, + stmt->oncommit, + reloptions, + true, + allowSystemTableMods, + false, + InvalidOid, + typaddress); + + /* + * We must bump the command counter to make the newly-created relation + * tuple visible for opening. + */ + CommandCounterIncrement(); + + /* + * Open the new relation and acquire exclusive lock on it. This isn't + * really necessary for locking out other backends (since they can't see + * the new rel anyway until we commit), but it keeps the lock manager from + * complaining about deadlock risks. + */ + rel = relation_open(relationId, AccessExclusiveLock); + + /* + * Now add any newly specified column default and generation expressions + * to the new relation. These are passed to us in the form of raw + * parsetrees; we need to transform them to executable expression trees + * before they can be added. The most convenient way to do that is to + * apply the parser's transformExpr routine, but transformExpr doesn't + * work unless we have a pre-existing relation. So, the transformation has + * to be postponed to this final step of CREATE TABLE. + * + * This needs to be before processing the partitioning clauses because + * those could refer to generated columns. + */ + if (rawDefaults) + AddRelationNewConstraints(rel, rawDefaults, NIL, + true, true, false, queryString); + + /* + * Make column generation expressions visible for use by partitioning. + */ + CommandCounterIncrement(); + + /* Process and store partition bound, if any. */ + if (stmt->partbound) + { + PartitionBoundSpec *bound; + ParseState *pstate; + Oid parentId = linitial_oid(inheritOids), + defaultPartOid; + Relation parent, + defaultRel = NULL; + ParseNamespaceItem *nsitem; + + /* Already have strong enough lock on the parent */ + parent = table_open(parentId, NoLock); + + /* + * We are going to try to validate the partition bound specification + * against the partition key of parentRel, so it better have one. + */ + if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("\"%s\" is not partitioned", + RelationGetRelationName(parent)))); + + /* + * The partition constraint of the default partition depends on the + * partition bounds of every other partition. It is possible that + * another backend might be about to execute a query on the default + * partition table, and that the query relies on previously cached + * default partition constraints. We must therefore take a table lock + * strong enough to prevent all queries on the default partition from + * proceeding until we commit and send out a shared-cache-inval notice + * that will make them update their index lists. + * + * Order of locking: The relation being added won't be visible to + * other backends until it is committed, hence here in + * DefineRelation() the order of locking the default partition and the + * relation being added does not matter. But at all other places we + * need to lock the default relation before we lock the relation being + * added or removed i.e. we should take the lock in same order at all + * the places such that lock parent, lock default partition and then + * lock the partition so as to avoid a deadlock. + */ + defaultPartOid = + get_default_oid_from_partdesc(RelationGetPartitionDesc(parent, + true)); + if (OidIsValid(defaultPartOid)) + defaultRel = table_open(defaultPartOid, AccessExclusiveLock); + + /* Transform the bound values */ + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + /* + * Add an nsitem containing this relation, so that transformExpr + * called on partition bound expressions is able to report errors + * using a proper context. + */ + nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock, + NULL, false, false); + addNSItemToQuery(pstate, nsitem, false, true, true); + + bound = transformPartitionBound(pstate, parent, stmt->partbound); + + /* + * Check first that the new partition's bound is valid and does not + * overlap with any of existing partitions of the parent. + */ + check_new_partition_bound(relname, parent, bound, pstate); + + /* + * If the default partition exists, its partition constraints will + * change after the addition of this new partition such that it won't + * allow any row that qualifies for this new partition. So, check that + * the existing data in the default partition satisfies the constraint + * as it will exist after adding this partition. + */ + if (OidIsValid(defaultPartOid)) + { + check_default_partition_contents(parent, defaultRel, bound); + /* Keep the lock until commit. */ + table_close(defaultRel, NoLock); + } + + /* Update the pg_class entry. */ + StorePartitionBound(rel, parent, bound); + + table_close(parent, NoLock); + } + + /* Store inheritance information for new rel. */ + StoreCatalogInheritance(relationId, inheritOids, stmt->partbound != NULL); + + /* + * Process the partitioning specification (if any) and store the partition + * key information into the catalog. + */ + if (partitioned) + { + ParseState *pstate; + char strategy; + int partnatts; + AttrNumber partattrs[PARTITION_MAX_KEYS]; + Oid partopclass[PARTITION_MAX_KEYS]; + Oid partcollation[PARTITION_MAX_KEYS]; + List *partexprs = NIL; + + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + partnatts = list_length(stmt->partspec->partParams); + + /* Protect fixed-size arrays here and in executor */ + if (partnatts > PARTITION_MAX_KEYS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot partition using more than %d columns", + PARTITION_MAX_KEYS))); + + /* + * We need to transform the raw parsetrees corresponding to partition + * expressions into executable expression trees. Like column defaults + * and CHECK constraints, we could not have done the transformation + * earlier. + */ + stmt->partspec = transformPartitionSpec(rel, stmt->partspec, + &strategy); + + ComputePartitionAttrs(pstate, rel, stmt->partspec->partParams, + partattrs, &partexprs, partopclass, + partcollation, strategy); + + StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs, + partopclass, partcollation); + + /* make it all visible */ + CommandCounterIncrement(); + } + + /* + * If we're creating a partition, create now all the indexes, triggers, + * FKs defined in the parent. + * + * We can't do it earlier, because DefineIndex wants to know the partition + * key which we just stored. + */ + if (stmt->partbound) + { + Oid parentId = linitial_oid(inheritOids); + Relation parent; + List *idxlist; + ListCell *cell; + + /* Already have strong enough lock on the parent */ + parent = table_open(parentId, NoLock); + idxlist = RelationGetIndexList(parent); + + /* + * For each index in the parent table, create one in the partition + */ + foreach(cell, idxlist) + { + Relation idxRel = index_open(lfirst_oid(cell), AccessShareLock); + AttrMap *attmap; + IndexStmt *idxstmt; + Oid constraintOid; + + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + if (idxRel->rd_index->indisunique) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create foreign partition of partitioned table \"%s\"", + RelationGetRelationName(parent)), + errdetail("Table \"%s\" contains indexes that are unique.", + RelationGetRelationName(parent)))); + else + { + index_close(idxRel, AccessShareLock); + continue; + } + } + + attmap = build_attrmap_by_name(RelationGetDescr(rel), + RelationGetDescr(parent)); + idxstmt = + generateClonedIndexStmt(NULL, idxRel, + attmap, &constraintOid); + DefineIndex(RelationGetRelid(rel), + idxstmt, + InvalidOid, + RelationGetRelid(idxRel), + constraintOid, + false, false, false, false, false); + + index_close(idxRel, AccessShareLock); + } + + list_free(idxlist); + + /* + * If there are any row-level triggers, clone them to the new + * partition. + */ + if (parent->trigdesc != NULL) + CloneRowTriggersToPartition(parent, rel); + + /* + * And foreign keys too. Note that because we're freshly creating the + * table, there is no need to verify these new constraints. + */ + CloneForeignKeyConstraints(NULL, parent, rel); + + table_close(parent, NoLock); + } + + /* + * Now add any newly specified CHECK constraints to the new relation. Same + * as for defaults above, but these need to come after partitioning is set + * up. + */ + if (stmt->constraints) + AddRelationNewConstraints(rel, NIL, stmt->constraints, + true, true, false, queryString); + + ObjectAddressSet(address, RelationRelationId, relationId); + + /* + * Clean up. We keep lock on new relation (although it shouldn't be + * visible to anyone else anyway, until commit). + */ + relation_close(rel, NoLock); + + return address; +} + +/* + * Emit the right error or warning message for a "DROP" command issued on a + * non-existent relation + */ +static void +DropErrorMsgNonExistent(RangeVar *rel, char rightkind, bool missing_ok) +{ + const struct dropmsgstrings *rentry; + + if (rel->schemaname != NULL && + !OidIsValid(LookupNamespaceNoError(rel->schemaname))) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema \"%s\" does not exist", rel->schemaname))); + } + else + { + ereport(NOTICE, + (errmsg("schema \"%s\" does not exist, skipping", + rel->schemaname))); + } + return; + } + + for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++) + { + if (rentry->kind == rightkind) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(rentry->nonexistent_code), + errmsg(rentry->nonexistent_msg, rel->relname))); + } + else + { + ereport(NOTICE, (errmsg(rentry->skipping_msg, rel->relname))); + break; + } + } + } + + Assert(rentry->kind != '\0'); /* Should be impossible */ +} + +/* + * Emit the right error message for a "DROP" command issued on a + * relation of the wrong type + */ +static void +DropErrorMsgWrongType(const char *relname, char wrongkind, char rightkind) +{ + const struct dropmsgstrings *rentry; + const struct dropmsgstrings *wentry; + + for (rentry = dropmsgstringarray; rentry->kind != '\0'; rentry++) + if (rentry->kind == rightkind) + break; + Assert(rentry->kind != '\0'); + + for (wentry = dropmsgstringarray; wentry->kind != '\0'; wentry++) + if (wentry->kind == wrongkind) + break; + /* wrongkind could be something we don't have in our table... */ + + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg(rentry->nota_msg, relname), + (wentry->kind != '\0') ? errhint("%s", _(wentry->drophint_msg)) : 0)); +} + +/* + * RemoveRelations + * Implements DROP TABLE, DROP INDEX, DROP SEQUENCE, DROP VIEW, + * DROP MATERIALIZED VIEW, DROP FOREIGN TABLE + */ +void +RemoveRelations(DropStmt *drop) +{ + ObjectAddresses *objects; + char relkind; + ListCell *cell; + int flags = 0; + LOCKMODE lockmode = AccessExclusiveLock; + + /* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */ + if (drop->concurrent) + { + /* + * Note that for temporary relations this lock may get upgraded later + * on, but as no other session can access a temporary relation, this + * is actually fine. + */ + lockmode = ShareUpdateExclusiveLock; + Assert(drop->removeType == OBJECT_INDEX); + if (list_length(drop->objects) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DROP INDEX CONCURRENTLY does not support dropping multiple objects"))); + if (drop->behavior == DROP_CASCADE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DROP INDEX CONCURRENTLY does not support CASCADE"))); + } + + /* + * First we identify all the relations, then we delete them in a single + * performMultipleDeletions() call. This is to avoid unwanted DROP + * RESTRICT errors if one of the relations depends on another. + */ + + /* Determine required relkind */ + switch (drop->removeType) + { + case OBJECT_TABLE: + relkind = RELKIND_RELATION; + break; + + case OBJECT_INDEX: + relkind = RELKIND_INDEX; + break; + + case OBJECT_SEQUENCE: + relkind = RELKIND_SEQUENCE; + break; + + case OBJECT_VIEW: + relkind = RELKIND_VIEW; + break; + + case OBJECT_MATVIEW: + relkind = RELKIND_MATVIEW; + break; + + case OBJECT_FOREIGN_TABLE: + relkind = RELKIND_FOREIGN_TABLE; + break; + + default: + elog(ERROR, "unrecognized drop object type: %d", + (int) drop->removeType); + relkind = 0; /* keep compiler quiet */ + break; + } + + /* Lock and validate each relation; build a list of object addresses */ + objects = new_object_addresses(); + + foreach(cell, drop->objects) + { + RangeVar *rel = makeRangeVarFromNameList((List *) lfirst(cell)); + Oid relOid; + ObjectAddress obj; + struct DropRelationCallbackState state; + + /* + * These next few steps are a great deal like relation_openrv, but we + * don't bother building a relcache entry since we don't need it. + * + * Check for shared-cache-inval messages before trying to access the + * relation. This is needed to cover the case where the name + * identifies a rel that has been dropped and recreated since the + * start of our transaction: if we don't flush the old syscache entry, + * then we'll latch onto that entry and suffer an error later. + */ + AcceptInvalidationMessages(); + + /* Look up the appropriate relation using namespace search. */ + state.expected_relkind = relkind; + state.heap_lockmode = drop->concurrent ? + ShareUpdateExclusiveLock : AccessExclusiveLock; + /* We must initialize these fields to show that no locks are held: */ + state.heapOid = InvalidOid; + state.partParentOid = InvalidOid; + + relOid = RangeVarGetRelidExtended(rel, lockmode, RVR_MISSING_OK, + RangeVarCallbackForDropRelation, + (void *) &state); + + /* Not there? */ + if (!OidIsValid(relOid)) + { + DropErrorMsgNonExistent(rel, relkind, drop->missing_ok); + continue; + } + + /* + * Decide if concurrent mode needs to be used here or not. The + * callback retrieved the rel's persistence for us. + */ + if (drop->concurrent && + state.actual_relpersistence != RELPERSISTENCE_TEMP) + { + Assert(list_length(drop->objects) == 1 && + drop->removeType == OBJECT_INDEX); + flags |= PERFORM_DELETION_CONCURRENTLY; + } + + /* + * Concurrent index drop cannot be used with partitioned indexes, + * either. + */ + if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 && + state.actual_relkind == RELKIND_PARTITIONED_INDEX) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot drop partitioned index \"%s\" concurrently", + rel->relname))); + + /* + * If we're told to drop a partitioned index, we must acquire lock on + * all the children of its parent partitioned table before proceeding. + * Otherwise we'd try to lock the child index partitions before their + * tables, leading to potential deadlock against other sessions that + * will lock those objects in the other order. + */ + if (state.actual_relkind == RELKIND_PARTITIONED_INDEX) + (void) find_all_inheritors(state.heapOid, + state.heap_lockmode, + NULL); + + /* OK, we're ready to delete this one */ + obj.classId = RelationRelationId; + obj.objectId = relOid; + obj.objectSubId = 0; + + add_exact_object_address(&obj, objects); + } + + performMultipleDeletions(objects, drop->behavior, flags); + + free_object_addresses(objects); +} + +/* + * Before acquiring a table lock, check whether we have sufficient rights. + * In the case of DROP INDEX, also try to lock the table before the index. + * Also, if the table to be dropped is a partition, we try to lock the parent + * first. + */ +static void +RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, + void *arg) +{ + HeapTuple tuple; + struct DropRelationCallbackState *state; + char expected_relkind; + bool is_partition; + Form_pg_class classform; + LOCKMODE heap_lockmode; + bool invalid_system_index = false; + + state = (struct DropRelationCallbackState *) arg; + heap_lockmode = state->heap_lockmode; + + /* + * If we previously locked some other index's heap, and the name we're + * looking up no longer refers to that relation, release the now-useless + * lock. + */ + if (relOid != oldRelOid && OidIsValid(state->heapOid)) + { + UnlockRelationOid(state->heapOid, heap_lockmode); + state->heapOid = InvalidOid; + } + + /* + * Similarly, if we previously locked some other partition's heap, and the + * name we're looking up no longer refers to that relation, release the + * now-useless lock. + */ + if (relOid != oldRelOid && OidIsValid(state->partParentOid)) + { + UnlockRelationOid(state->partParentOid, AccessExclusiveLock); + state->partParentOid = InvalidOid; + } + + /* Didn't find a relation, so no need for locking or permission checks. */ + if (!OidIsValid(relOid)) + return; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid)); + if (!HeapTupleIsValid(tuple)) + return; /* concurrently dropped, so nothing to do */ + classform = (Form_pg_class) GETSTRUCT(tuple); + is_partition = classform->relispartition; + + /* Pass back some data to save lookups in RemoveRelations */ + state->actual_relkind = classform->relkind; + state->actual_relpersistence = classform->relpersistence; + + /* + * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE, + * but RemoveRelations() can only pass one relkind for a given relation. + * It chooses RELKIND_RELATION for both regular and partitioned tables. + * That means we must be careful before giving the wrong type error when + * the relation is RELKIND_PARTITIONED_TABLE. An equivalent problem + * exists with indexes. + */ + if (classform->relkind == RELKIND_PARTITIONED_TABLE) + expected_relkind = RELKIND_RELATION; + else if (classform->relkind == RELKIND_PARTITIONED_INDEX) + expected_relkind = RELKIND_INDEX; + else + expected_relkind = classform->relkind; + + if (state->expected_relkind != expected_relkind) + DropErrorMsgWrongType(rel->relname, classform->relkind, + state->expected_relkind); + + /* Allow DROP to either table owner or schema owner */ + if (!pg_class_ownercheck(relOid, GetUserId()) && + !pg_namespace_ownercheck(classform->relnamespace, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, + get_relkind_objtype(classform->relkind), + rel->relname); + + /* + * Check the case of a system index that might have been invalidated by a + * failed concurrent process and allow its drop. For the time being, this + * only concerns indexes of toast relations that became invalid during a + * REINDEX CONCURRENTLY process. + */ + if (IsSystemClass(relOid, classform) && classform->relkind == RELKIND_INDEX) + { + HeapTuple locTuple; + Form_pg_index indexform; + bool indisvalid; + + locTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid)); + if (!HeapTupleIsValid(locTuple)) + { + ReleaseSysCache(tuple); + return; + } + + indexform = (Form_pg_index) GETSTRUCT(locTuple); + indisvalid = indexform->indisvalid; + ReleaseSysCache(locTuple); + + /* Mark object as being an invalid index of system catalogs */ + if (!indisvalid) + invalid_system_index = true; + } + + /* In the case of an invalid index, it is fine to bypass this check */ + if (!invalid_system_index && !allowSystemTableMods && IsSystemClass(relOid, classform)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + rel->relname))); + + ReleaseSysCache(tuple); + + /* + * In DROP INDEX, attempt to acquire lock on the parent table before + * locking the index. index_drop() will need this anyway, and since + * regular queries lock tables before their indexes, we risk deadlock if + * we do it the other way around. No error if we don't find a pg_index + * entry, though --- the relation may have been dropped. Note that this + * code will execute for either plain or partitioned indexes. + */ + if (expected_relkind == RELKIND_INDEX && + relOid != oldRelOid) + { + state->heapOid = IndexGetRelation(relOid, true); + if (OidIsValid(state->heapOid)) + LockRelationOid(state->heapOid, heap_lockmode); + } + + /* + * Similarly, if the relation is a partition, we must acquire lock on its + * parent before locking the partition. That's because queries lock the + * parent before its partitions, so we risk deadlock if we do it the other + * way around. + */ + if (is_partition && relOid != oldRelOid) + { + state->partParentOid = get_partition_parent(relOid, true); + if (OidIsValid(state->partParentOid)) + LockRelationOid(state->partParentOid, AccessExclusiveLock); + } +} + +/* + * ExecuteTruncate + * Executes a TRUNCATE command. + * + * This is a multi-relation truncate. We first open and grab exclusive + * lock on all relations involved, checking permissions and otherwise + * verifying that the relation is OK for truncation. Note that if relations + * are foreign tables, at this stage, we have not yet checked that their + * foreign data in external data sources are OK for truncation. These are + * checked when foreign data are actually truncated later. In CASCADE mode, + * relations having FK references to the targeted relations are automatically + * added to the group; in RESTRICT mode, we check that all FK references are + * internal to the group that's being truncated. Finally all the relations + * are truncated and reindexed. + */ +void +ExecuteTruncate(TruncateStmt *stmt) +{ + List *rels = NIL; + List *relids = NIL; + List *relids_logged = NIL; + ListCell *cell; + + /* + * Open, exclusive-lock, and check all the explicitly-specified relations + */ + foreach(cell, stmt->relations) + { + RangeVar *rv = lfirst(cell); + Relation rel; + bool recurse = rv->inh; + Oid myrelid; + LOCKMODE lockmode = AccessExclusiveLock; + + myrelid = RangeVarGetRelidExtended(rv, lockmode, + 0, RangeVarCallbackForTruncate, + NULL); + + /* don't throw error for "TRUNCATE foo, foo" */ + if (list_member_oid(relids, myrelid)) + continue; + + /* open the relation, we already hold a lock on it */ + rel = table_open(myrelid, NoLock); + + /* + * RangeVarGetRelidExtended() has done most checks with its callback, + * but other checks with the now-opened Relation remain. + */ + truncate_check_activity(rel); + + rels = lappend(rels, rel); + relids = lappend_oid(relids, myrelid); + + /* Log this relation only if needed for logical decoding */ + if (RelationIsLogicallyLogged(rel)) + relids_logged = lappend_oid(relids_logged, myrelid); + + if (recurse) + { + ListCell *child; + List *children; + + children = find_all_inheritors(myrelid, lockmode, NULL); + + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + + if (list_member_oid(relids, childrelid)) + continue; + + /* find_all_inheritors already got lock */ + rel = table_open(childrelid, NoLock); + + /* + * It is possible that the parent table has children that are + * temp tables of other backends. We cannot safely access + * such tables (because of buffering issues), and the best + * thing to do is to silently ignore them. Note that this + * check is the same as one of the checks done in + * truncate_check_activity() called below, still it is kept + * here for simplicity. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + { + table_close(rel, lockmode); + continue; + } + + /* + * Inherited TRUNCATE commands perform access permission + * checks on the parent table only. So we skip checking the + * children's permissions and don't call + * truncate_check_perms() here. + */ + truncate_check_rel(RelationGetRelid(rel), rel->rd_rel); + truncate_check_activity(rel); + + rels = lappend(rels, rel); + relids = lappend_oid(relids, childrelid); + + /* Log this relation only if needed for logical decoding */ + if (RelationIsLogicallyLogged(rel)) + relids_logged = lappend_oid(relids_logged, childrelid); + } + } + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot truncate only a partitioned table"), + errhint("Do not specify the ONLY keyword, or use TRUNCATE ONLY on the partitions directly."))); + } + + ExecuteTruncateGuts(rels, relids, relids_logged, + stmt->behavior, stmt->restart_seqs); + + /* And close the rels */ + foreach(cell, rels) + { + Relation rel = (Relation) lfirst(cell); + + table_close(rel, NoLock); + } +} + +/* + * ExecuteTruncateGuts + * + * Internal implementation of TRUNCATE. This is called by the actual TRUNCATE + * command (see above) as well as replication subscribers that execute a + * replicated TRUNCATE action. + * + * explicit_rels is the list of Relations to truncate that the command + * specified. relids is the list of Oids corresponding to explicit_rels. + * relids_logged is the list of Oids (a subset of relids) that require + * WAL-logging. This is all a bit redundant, but the existing callers have + * this information handy in this form. + */ +void +ExecuteTruncateGuts(List *explicit_rels, + List *relids, + List *relids_logged, + DropBehavior behavior, bool restart_seqs) +{ + List *rels; + List *seq_relids = NIL; + HTAB *ft_htab = NULL; + EState *estate; + ResultRelInfo *resultRelInfos; + ResultRelInfo *resultRelInfo; + SubTransactionId mySubid; + ListCell *cell; + Oid *logrelids; + + /* + * Check the explicitly-specified relations. + * + * In CASCADE mode, suck in all referencing relations as well. This + * requires multiple iterations to find indirectly-dependent relations. At + * each phase, we need to exclusive-lock new rels before looking for their + * dependencies, else we might miss something. Also, we check each rel as + * soon as we open it, to avoid a faux pas such as holding lock for a long + * time on a rel we have no permissions for. + */ + rels = list_copy(explicit_rels); + if (behavior == DROP_CASCADE) + { + for (;;) + { + List *newrelids; + + newrelids = heap_truncate_find_FKs(relids); + if (newrelids == NIL) + break; /* nothing else to add */ + + foreach(cell, newrelids) + { + Oid relid = lfirst_oid(cell); + Relation rel; + + rel = table_open(relid, AccessExclusiveLock); + ereport(NOTICE, + (errmsg("truncate cascades to table \"%s\"", + RelationGetRelationName(rel)))); + truncate_check_rel(relid, rel->rd_rel); + truncate_check_perms(relid, rel->rd_rel); + truncate_check_activity(rel); + rels = lappend(rels, rel); + relids = lappend_oid(relids, relid); + + /* Log this relation only if needed for logical decoding */ + if (RelationIsLogicallyLogged(rel)) + relids_logged = lappend_oid(relids_logged, relid); + } + } + } + + /* + * Check foreign key references. In CASCADE mode, this should be + * unnecessary since we just pulled in all the references; but as a + * cross-check, do it anyway if in an Assert-enabled build. + */ +#ifdef USE_ASSERT_CHECKING + heap_truncate_check_FKs(rels, false); +#else + if (behavior == DROP_RESTRICT) + heap_truncate_check_FKs(rels, false); +#endif + + /* + * If we are asked to restart sequences, find all the sequences, lock them + * (we need AccessExclusiveLock for ResetSequence), and check permissions. + * We want to do this early since it's pointless to do all the truncation + * work only to fail on sequence permissions. + */ + if (restart_seqs) + { + foreach(cell, rels) + { + Relation rel = (Relation) lfirst(cell); + List *seqlist = getOwnedSequences(RelationGetRelid(rel)); + ListCell *seqcell; + + foreach(seqcell, seqlist) + { + Oid seq_relid = lfirst_oid(seqcell); + Relation seq_rel; + + seq_rel = relation_open(seq_relid, AccessExclusiveLock); + + /* This check must match AlterSequence! */ + if (!pg_class_ownercheck(seq_relid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SEQUENCE, + RelationGetRelationName(seq_rel)); + + seq_relids = lappend_oid(seq_relids, seq_relid); + + relation_close(seq_rel, NoLock); + } + } + } + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + /* + * To fire triggers, we'll need an EState as well as a ResultRelInfo for + * each relation. We don't need to call ExecOpenIndices, though. + * + * We put the ResultRelInfos in the es_opened_result_relations list, even + * though we don't have a range table and don't populate the + * es_result_relations array. That's a bit bogus, but it's enough to make + * ExecGetTriggerResultRel() find them. + */ + estate = CreateExecutorState(); + resultRelInfos = (ResultRelInfo *) + palloc(list_length(rels) * sizeof(ResultRelInfo)); + resultRelInfo = resultRelInfos; + foreach(cell, rels) + { + Relation rel = (Relation) lfirst(cell); + + InitResultRelInfo(resultRelInfo, + rel, + 0, /* dummy rangetable index */ + NULL, + 0); + estate->es_opened_result_relations = + lappend(estate->es_opened_result_relations, resultRelInfo); + resultRelInfo++; + } + + /* + * Process all BEFORE STATEMENT TRUNCATE triggers before we begin + * truncating (this is because one of them might throw an error). Also, if + * we were to allow them to prevent statement execution, that would need + * to be handled here. + */ + resultRelInfo = resultRelInfos; + foreach(cell, rels) + { + ExecBSTruncateTriggers(estate, resultRelInfo); + resultRelInfo++; + } + + /* + * OK, truncate each table. + */ + mySubid = GetCurrentSubTransactionId(); + + foreach(cell, rels) + { + Relation rel = (Relation) lfirst(cell); + + /* Skip partitioned tables as there is nothing to do */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + continue; + + /* + * Build the lists of foreign tables belonging to each foreign server + * and pass each list to the foreign data wrapper's callback function, + * so that each server can truncate its all foreign tables in bulk. + * Each list is saved as a single entry in a hash table that uses the + * server OID as lookup key. + */ + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + Oid serverid = GetForeignServerIdByRelId(RelationGetRelid(rel)); + bool found; + ForeignTruncateInfo *ft_info; + + /* First time through, initialize hashtable for foreign tables */ + if (!ft_htab) + { + HASHCTL hctl; + + memset(&hctl, 0, sizeof(HASHCTL)); + hctl.keysize = sizeof(Oid); + hctl.entrysize = sizeof(ForeignTruncateInfo); + hctl.hcxt = CurrentMemoryContext; + + ft_htab = hash_create("TRUNCATE for Foreign Tables", + 32, /* start small and extend */ + &hctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + } + + /* Find or create cached entry for the foreign table */ + ft_info = hash_search(ft_htab, &serverid, HASH_ENTER, &found); + if (!found) + { + ft_info->serverid = serverid; + ft_info->rels = NIL; + } + + /* + * Save the foreign table in the entry of the server that the + * foreign table belongs to. + */ + ft_info->rels = lappend(ft_info->rels, rel); + continue; + } + + /* + * Normally, we need a transaction-safe truncation here. However, if + * the table was either created in the current (sub)transaction or has + * a new relfilenode in the current (sub)transaction, then we can just + * truncate it in-place, because a rollback would cause the whole + * table or the current physical file to be thrown away anyway. + */ + if (rel->rd_createSubid == mySubid || + rel->rd_newRelfilenodeSubid == mySubid) + { + /* Immediate, non-rollbackable truncation is OK */ + heap_truncate_one_rel(rel); + } + else + { + Oid heap_relid; + Oid toast_relid; + ReindexParams reindex_params = {0}; + + /* + * This effectively deletes all rows in the table, and may be done + * in a serializable transaction. In that case we must record a + * rw-conflict in to this transaction from each transaction + * holding a predicate lock on the table. + */ + CheckTableForSerializableConflictIn(rel); + + /* + * Need the full transaction-safe pushups. + * + * Create a new empty storage file for the relation, and assign it + * as the relfilenode value. The old storage file is scheduled for + * deletion at commit. + */ + RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence); + + heap_relid = RelationGetRelid(rel); + + /* + * The same for the toast table, if any. + */ + toast_relid = rel->rd_rel->reltoastrelid; + if (OidIsValid(toast_relid)) + { + Relation toastrel = relation_open(toast_relid, + AccessExclusiveLock); + + RelationSetNewRelfilenode(toastrel, + toastrel->rd_rel->relpersistence); + table_close(toastrel, NoLock); + } + + /* + * Reconstruct the indexes to match, and we're done. + */ + reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST, + &reindex_params); + } + + pgstat_count_truncate(rel); + } + + /* Now go through the hash table, and truncate foreign tables */ + if (ft_htab) + { + ForeignTruncateInfo *ft_info; + HASH_SEQ_STATUS seq; + + hash_seq_init(&seq, ft_htab); + + PG_TRY(); + { + while ((ft_info = hash_seq_search(&seq)) != NULL) + { + FdwRoutine *routine = GetFdwRoutineByServerId(ft_info->serverid); + + /* truncate_check_rel() has checked that already */ + Assert(routine->ExecForeignTruncate != NULL); + + routine->ExecForeignTruncate(ft_info->rels, + behavior, + restart_seqs); + } + } + PG_FINALLY(); + { + hash_destroy(ft_htab); + } + PG_END_TRY(); + } + + /* + * Restart owned sequences if we were asked to. + */ + foreach(cell, seq_relids) + { + Oid seq_relid = lfirst_oid(cell); + + ResetSequence(seq_relid); + } + + /* + * Write a WAL record to allow this set of actions to be logically + * decoded. + * + * Assemble an array of relids so we can write a single WAL record for the + * whole action. + */ + if (list_length(relids_logged) > 0) + { + xl_heap_truncate xlrec; + int i = 0; + + /* should only get here if wal_level >= logical */ + Assert(XLogLogicalInfoActive()); + + logrelids = palloc(list_length(relids_logged) * sizeof(Oid)); + foreach(cell, relids_logged) + logrelids[i++] = lfirst_oid(cell); + + xlrec.dbId = MyDatabaseId; + xlrec.nrelids = list_length(relids_logged); + xlrec.flags = 0; + if (behavior == DROP_CASCADE) + xlrec.flags |= XLH_TRUNCATE_CASCADE; + if (restart_seqs) + xlrec.flags |= XLH_TRUNCATE_RESTART_SEQS; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapTruncate); + XLogRegisterData((char *) logrelids, list_length(relids_logged) * sizeof(Oid)); + + XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); + + (void) XLogInsert(RM_HEAP_ID, XLOG_HEAP_TRUNCATE); + } + + /* + * Process all AFTER STATEMENT TRUNCATE triggers. + */ + resultRelInfo = resultRelInfos; + foreach(cell, rels) + { + ExecASTruncateTriggers(estate, resultRelInfo); + resultRelInfo++; + } + + /* Handle queued AFTER triggers */ + AfterTriggerEndQuery(estate); + + /* We can clean up the EState now */ + FreeExecutorState(estate); + + /* + * Close any rels opened by CASCADE (can't do this while EState still + * holds refs) + */ + rels = list_difference_ptr(rels, explicit_rels); + foreach(cell, rels) + { + Relation rel = (Relation) lfirst(cell); + + table_close(rel, NoLock); + } +} + +/* + * Check that a given relation is safe to truncate. Subroutine for + * ExecuteTruncate() and RangeVarCallbackForTruncate(). + */ +static void +truncate_check_rel(Oid relid, Form_pg_class reltuple) +{ + char *relname = NameStr(reltuple->relname); + + /* + * Only allow truncate on regular tables, foreign tables using foreign + * data wrappers supporting TRUNCATE and partitioned tables (although, the + * latter are only being included here for the following checks; no + * physical truncation will occur in their case.). + */ + if (reltuple->relkind == RELKIND_FOREIGN_TABLE) + { + Oid serverid = GetForeignServerIdByRelId(relid); + FdwRoutine *fdwroutine = GetFdwRoutineByServerId(serverid); + + if (!fdwroutine->ExecForeignTruncate) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot truncate foreign table \"%s\"", + relname))); + } + else if (reltuple->relkind != RELKIND_RELATION && + reltuple->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table", relname))); + + /* + * Most system catalogs can't be truncated at all, or at least not unless + * allow_system_table_mods=on. As an exception, however, we allow + * pg_largeobject to be truncated as part of pg_upgrade, because we need + * to change its relfilenode to match the old cluster, and allowing a + * TRUNCATE command to be executed is the easiest way of doing that. + */ + if (!allowSystemTableMods && IsSystemClass(relid, reltuple) + && (!IsBinaryUpgrade || relid != LargeObjectRelationId)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + relname))); + + InvokeObjectTruncateHook(relid); +} + +/* + * Check that current user has the permission to truncate given relation. + */ +static void +truncate_check_perms(Oid relid, Form_pg_class reltuple) +{ + char *relname = NameStr(reltuple->relname); + AclResult aclresult; + + /* Permissions checks */ + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_TRUNCATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, get_relkind_objtype(reltuple->relkind), + relname); +} + +/* + * Set of extra sanity checks to check if a given relation is safe to + * truncate. This is split with truncate_check_rel() as + * RangeVarCallbackForTruncate() cannot open a Relation yet. + */ +static void +truncate_check_activity(Relation rel) +{ + /* + * Don't allow truncate on temp tables of other backends ... their local + * buffer manager is not going to cope. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot truncate temporary tables of other sessions"))); + + /* + * Also check for active uses of the relation in the current transaction, + * including open scans and pending AFTER trigger events. + */ + CheckTableNotInUse(rel, "TRUNCATE"); +} + +/* + * storage_name + * returns the name corresponding to a typstorage/attstorage enum value + */ +static const char * +storage_name(char c) +{ + switch (c) + { + case TYPSTORAGE_PLAIN: + return "PLAIN"; + case TYPSTORAGE_EXTERNAL: + return "EXTERNAL"; + case TYPSTORAGE_EXTENDED: + return "EXTENDED"; + case TYPSTORAGE_MAIN: + return "MAIN"; + default: + return "???"; + } +} + +/*---------- + * MergeAttributes + * Returns new schema given initial schema and superclasses. + * + * Input arguments: + * 'schema' is the column/attribute definition for the table. (It's a list + * of ColumnDef's.) It is destructively changed. + * 'supers' is a list of OIDs of parent relations, already locked by caller. + * 'relpersistence' is the persistence type of the table. + * 'is_partition' tells if the table is a partition. + * + * Output arguments: + * 'supconstr' receives a list of constraints belonging to the parents, + * updated as necessary to be valid for the child. + * + * Return value: + * Completed schema list. + * + * Notes: + * The order in which the attributes are inherited is very important. + * Intuitively, the inherited attributes should come first. If a table + * inherits from multiple parents, the order of those attributes are + * according to the order of the parents specified in CREATE TABLE. + * + * Here's an example: + * + * create table person (name text, age int4, location point); + * create table emp (salary int4, manager text) inherits(person); + * create table student (gpa float8) inherits (person); + * create table stud_emp (percent int4) inherits (emp, student); + * + * The order of the attributes of stud_emp is: + * + * person {1:name, 2:age, 3:location} + * / \ + * {6:gpa} student emp {4:salary, 5:manager} + * \ / + * stud_emp {7:percent} + * + * If the same attribute name appears multiple times, then it appears + * in the result table in the proper location for its first appearance. + * + * Constraints (including NOT NULL constraints) for the child table + * are the union of all relevant constraints, from both the child schema + * and parent tables. + * + * The default value for a child column is defined as: + * (1) If the child schema specifies a default, that value is used. + * (2) If neither the child nor any parent specifies a default, then + * the column will not have a default. + * (3) If conflicting defaults are inherited from different parents + * (and not overridden by the child), an error is raised. + * (4) Otherwise the inherited default is used. + * Rule (3) is new in Postgres 7.1; in earlier releases you got a + * rather arbitrary choice of which parent default to use. + *---------- + */ +static List * +MergeAttributes(List *schema, List *supers, char relpersistence, + bool is_partition, List **supconstr) +{ + List *inhSchema = NIL; + List *constraints = NIL; + bool have_bogus_defaults = false; + int child_attno; + static Node bogus_marker = {0}; /* marks conflicting defaults */ + List *saved_schema = NIL; + ListCell *entry; + + /* + * Check for and reject tables with too many columns. We perform this + * check relatively early for two reasons: (a) we don't run the risk of + * overflowing an AttrNumber in subsequent code (b) an O(n^2) algorithm is + * okay if we're processing <= 1600 columns, but could take minutes to + * execute if the user attempts to create a table with hundreds of + * thousands of columns. + * + * Note that we also need to check that we do not exceed this figure after + * including columns from inherited relations. + */ + if (list_length(schema) > MaxHeapAttributeNumber) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("tables can have at most %d columns", + MaxHeapAttributeNumber))); + + /* + * Check for duplicate names in the explicit list of attributes. + * + * Although we might consider merging such entries in the same way that we + * handle name conflicts for inherited attributes, it seems to make more + * sense to assume such conflicts are errors. + * + * We don't use foreach() here because we have two nested loops over the + * schema list, with possible element deletions in the inner one. If we + * used foreach_delete_current() it could only fix up the state of one of + * the loops, so it seems cleaner to use looping over list indexes for + * both loops. Note that any deletion will happen beyond where the outer + * loop is, so its index never needs adjustment. + */ + for (int coldefpos = 0; coldefpos < list_length(schema); coldefpos++) + { + ColumnDef *coldef = list_nth_node(ColumnDef, schema, coldefpos); + + if (!is_partition && coldef->typeName == NULL) + { + /* + * Typed table column option that does not belong to a column from + * the type. This works because the columns from the type come + * first in the list. (We omit this check for partition column + * lists; those are processed separately below.) + */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + coldef->colname))); + } + + /* restpos scans all entries beyond coldef; incr is in loop body */ + for (int restpos = coldefpos + 1; restpos < list_length(schema);) + { + ColumnDef *restdef = list_nth_node(ColumnDef, schema, restpos); + + if (strcmp(coldef->colname, restdef->colname) == 0) + { + if (coldef->is_from_type) + { + /* + * merge the column options into the column from the type + */ + coldef->is_not_null = restdef->is_not_null; + coldef->raw_default = restdef->raw_default; + coldef->cooked_default = restdef->cooked_default; + coldef->constraints = restdef->constraints; + coldef->is_from_type = false; + schema = list_delete_nth_cell(schema, restpos); + } + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" specified more than once", + coldef->colname))); + } + else + restpos++; + } + } + + /* + * In case of a partition, there are no new column definitions, only dummy + * ColumnDefs created for column constraints. Set them aside for now and + * process them at the end. + */ + if (is_partition) + { + saved_schema = schema; + schema = NIL; + } + + /* + * Scan the parents left-to-right, and merge their attributes to form a + * list of inherited attributes (inhSchema). Also check to see if we need + * to inherit an OID column. + */ + child_attno = 0; + foreach(entry, supers) + { + Oid parent = lfirst_oid(entry); + Relation relation; + TupleDesc tupleDesc; + TupleConstr *constr; + AttrMap *newattmap; + List *inherited_defaults; + List *cols_with_defaults; + AttrNumber parent_attno; + ListCell *lc1; + ListCell *lc2; + + /* caller already got lock */ + relation = table_open(parent, NoLock); + + /* + * Check for active uses of the parent partitioned table in the + * current transaction, such as being used in some manner by an + * enclosing command. + */ + if (is_partition) + CheckTableNotInUse(relation, "CREATE TABLE .. PARTITION OF"); + + /* + * We do not allow partitioned tables and partitions to participate in + * regular inheritance. + */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !is_partition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partitioned table \"%s\"", + RelationGetRelationName(relation)))); + if (relation->rd_rel->relispartition && !is_partition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partition \"%s\"", + RelationGetRelationName(relation)))); + + if (relation->rd_rel->relkind != RELKIND_RELATION && + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("inherited relation \"%s\" is not a table or foreign table", + RelationGetRelationName(relation)))); + + /* + * If the parent is permanent, so must be all of its partitions. Note + * that inheritance allows that case. + */ + if (is_partition && + relation->rd_rel->relpersistence != RELPERSISTENCE_TEMP && + relpersistence == RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create a temporary relation as partition of permanent relation \"%s\"", + RelationGetRelationName(relation)))); + + /* Permanent rels cannot inherit from temporary ones */ + if (relpersistence != RELPERSISTENCE_TEMP && + relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg(!is_partition + ? "cannot inherit from temporary relation \"%s\"" + : "cannot create a permanent relation as partition of temporary relation \"%s\"", + RelationGetRelationName(relation)))); + + /* If existing rel is temp, it must belong to this session */ + if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !relation->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg(!is_partition + ? "cannot inherit from temporary relation of another session" + : "cannot create as partition of temporary relation of another session"))); + + /* + * We should have an UNDER permission flag for this, but for now, + * demand that creator of a child table own the parent. + */ + if (!pg_class_ownercheck(RelationGetRelid(relation), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(relation->rd_rel->relkind), + RelationGetRelationName(relation)); + + tupleDesc = RelationGetDescr(relation); + constr = tupleDesc->constr; + + /* + * newattmap->attnums[] will contain the child-table attribute numbers + * for the attributes of this parent table. (They are not the same + * for parents after the first one, nor if we have dropped columns.) + */ + newattmap = make_attrmap(tupleDesc->natts); + + /* We can't process inherited defaults until newattmap is complete. */ + inherited_defaults = cols_with_defaults = NIL; + + for (parent_attno = 1; parent_attno <= tupleDesc->natts; + parent_attno++) + { + Form_pg_attribute attribute = TupleDescAttr(tupleDesc, + parent_attno - 1); + char *attributeName = NameStr(attribute->attname); + int exist_attno; + ColumnDef *def; + + /* + * Ignore dropped columns in the parent. + */ + if (attribute->attisdropped) + continue; /* leave newattmap->attnums entry as zero */ + + /* + * Does it conflict with some previously inherited column? + */ + exist_attno = findAttrByName(attributeName, inhSchema); + if (exist_attno > 0) + { + Oid defTypeId; + int32 deftypmod; + Oid defCollId; + + /* + * Yes, try to merge the two column definitions. They must + * have the same type, typmod, and collation. + */ + ereport(NOTICE, + (errmsg("merging multiple inherited definitions of column \"%s\"", + attributeName))); + def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1); + typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod); + if (defTypeId != attribute->atttypid || + deftypmod != attribute->atttypmod) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("inherited column \"%s\" has a type conflict", + attributeName), + errdetail("%s versus %s", + format_type_with_typemod(defTypeId, + deftypmod), + format_type_with_typemod(attribute->atttypid, + attribute->atttypmod)))); + defCollId = GetColumnDefCollation(NULL, def, defTypeId); + if (defCollId != attribute->attcollation) + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("inherited column \"%s\" has a collation conflict", + attributeName), + errdetail("\"%s\" versus \"%s\"", + get_collation_name(defCollId), + get_collation_name(attribute->attcollation)))); + + /* Copy/check storage parameter */ + if (def->storage == 0) + def->storage = attribute->attstorage; + else if (def->storage != attribute->attstorage) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("inherited column \"%s\" has a storage parameter conflict", + attributeName), + errdetail("%s versus %s", + storage_name(def->storage), + storage_name(attribute->attstorage)))); + + /* Copy/check compression parameter */ + if (CompressionMethodIsValid(attribute->attcompression)) + { + const char *compression = + GetCompressionMethodName(attribute->attcompression); + + if (def->compression == NULL) + def->compression = pstrdup(compression); + else if (strcmp(def->compression, compression) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" has a compression method conflict", + attributeName), + errdetail("%s versus %s", def->compression, compression))); + } + + def->inhcount++; + /* Merge of NOT NULL constraints = OR 'em together */ + def->is_not_null |= attribute->attnotnull; + /* Default and other constraints are handled below */ + newattmap->attnums[parent_attno - 1] = exist_attno; + + /* Check for GENERATED conflicts */ + if (def->generated != attribute->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("inherited column \"%s\" has a generation conflict", + attributeName))); + } + else + { + /* + * No, create a new inherited column + */ + def = makeNode(ColumnDef); + def->colname = pstrdup(attributeName); + def->typeName = makeTypeNameFromOid(attribute->atttypid, + attribute->atttypmod); + def->inhcount = 1; + def->is_local = false; + def->is_not_null = attribute->attnotnull; + def->is_from_type = false; + def->storage = attribute->attstorage; + def->raw_default = NULL; + def->cooked_default = NULL; + def->generated = attribute->attgenerated; + def->collClause = NULL; + def->collOid = attribute->attcollation; + def->constraints = NIL; + def->location = -1; + if (CompressionMethodIsValid(attribute->attcompression)) + def->compression = + pstrdup(GetCompressionMethodName(attribute->attcompression)); + else + def->compression = NULL; + inhSchema = lappend(inhSchema, def); + newattmap->attnums[parent_attno - 1] = ++child_attno; + } + + /* + * Locate default if any + */ + if (attribute->atthasdef) + { + Node *this_default = NULL; + + /* Find default in constraint structure */ + if (constr != NULL) + { + AttrDefault *attrdef = constr->defval; + + for (int i = 0; i < constr->num_defval; i++) + { + if (attrdef[i].adnum == parent_attno) + { + this_default = stringToNode(attrdef[i].adbin); + break; + } + } + } + if (this_default == NULL) + elog(ERROR, "default expression not found for attribute %d of relation \"%s\"", + parent_attno, RelationGetRelationName(relation)); + + /* + * If it's a GENERATED default, it might contain Vars that + * need to be mapped to the inherited column(s)' new numbers. + * We can't do that till newattmap is ready, so just remember + * all the inherited default expressions for the moment. + */ + inherited_defaults = lappend(inherited_defaults, this_default); + cols_with_defaults = lappend(cols_with_defaults, def); + } + } + + /* + * Now process any inherited default expressions, adjusting attnos + * using the completed newattmap map. + */ + forboth(lc1, inherited_defaults, lc2, cols_with_defaults) + { + Node *this_default = (Node *) lfirst(lc1); + ColumnDef *def = (ColumnDef *) lfirst(lc2); + bool found_whole_row; + + /* Adjust Vars to match new table's column numbering */ + this_default = map_variable_attnos(this_default, + 1, 0, + newattmap, + InvalidOid, &found_whole_row); + + /* + * For the moment we have to reject whole-row variables. We could + * convert them, if we knew the new table's rowtype OID, but that + * hasn't been assigned yet. (A variable could only appear in a + * generation expression, so the error message is correct.) + */ + if (found_whole_row) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert whole-row table reference"), + errdetail("Generation expression for column \"%s\" contains a whole-row reference to table \"%s\".", + def->colname, + RelationGetRelationName(relation)))); + + /* + * If we already had a default from some prior parent, check to + * see if they are the same. If so, no problem; if not, mark the + * column as having a bogus default. Below, we will complain if + * the bogus default isn't overridden by the child schema. + */ + Assert(def->raw_default == NULL); + if (def->cooked_default == NULL) + def->cooked_default = this_default; + else if (!equal(def->cooked_default, this_default)) + { + def->cooked_default = &bogus_marker; + have_bogus_defaults = true; + } + } + + /* + * Now copy the CHECK constraints of this parent, adjusting attnos + * using the completed newattmap map. Identically named constraints + * are merged if possible, else we throw error. + */ + if (constr && constr->num_check > 0) + { + ConstrCheck *check = constr->check; + int i; + + for (i = 0; i < constr->num_check; i++) + { + char *name = check[i].ccname; + Node *expr; + bool found_whole_row; + + /* ignore if the constraint is non-inheritable */ + if (check[i].ccnoinherit) + continue; + + /* Adjust Vars to match new table's column numbering */ + expr = map_variable_attnos(stringToNode(check[i].ccbin), + 1, 0, + newattmap, + InvalidOid, &found_whole_row); + + /* + * For the moment we have to reject whole-row variables. We + * could convert them, if we knew the new table's rowtype OID, + * but that hasn't been assigned yet. + */ + if (found_whole_row) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert whole-row table reference"), + errdetail("Constraint \"%s\" contains a whole-row reference to table \"%s\".", + name, + RelationGetRelationName(relation)))); + + /* check for duplicate */ + if (!MergeCheckConstraint(constraints, name, expr)) + { + /* nope, this is a new one */ + CookedConstraint *cooked; + + cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint)); + cooked->contype = CONSTR_CHECK; + cooked->conoid = InvalidOid; /* until created */ + cooked->name = pstrdup(name); + cooked->attnum = 0; /* not used for constraints */ + cooked->expr = expr; + cooked->skip_validation = false; + cooked->is_local = false; + cooked->inhcount = 1; + cooked->is_no_inherit = false; + constraints = lappend(constraints, cooked); + } + } + } + + free_attrmap(newattmap); + + /* + * Close the parent rel, but keep our lock on it until xact commit. + * That will prevent someone else from deleting or ALTERing the parent + * before the child is committed. + */ + table_close(relation, NoLock); + } + + /* + * If we had no inherited attributes, the result schema is just the + * explicitly declared columns. Otherwise, we need to merge the declared + * columns into the inherited schema list. Although, we never have any + * explicitly declared columns if the table is a partition. + */ + if (inhSchema != NIL) + { + int schema_attno = 0; + + foreach(entry, schema) + { + ColumnDef *newdef = lfirst(entry); + char *attributeName = newdef->colname; + int exist_attno; + + schema_attno++; + + /* + * Does it conflict with some previously inherited column? + */ + exist_attno = findAttrByName(attributeName, inhSchema); + if (exist_attno > 0) + { + ColumnDef *def; + Oid defTypeId, + newTypeId; + int32 deftypmod, + newtypmod; + Oid defcollid, + newcollid; + + /* + * Partitions have only one parent and have no column + * definitions of their own, so conflict should never occur. + */ + Assert(!is_partition); + + /* + * Yes, try to merge the two column definitions. They must + * have the same type, typmod, and collation. + */ + if (exist_attno == schema_attno) + ereport(NOTICE, + (errmsg("merging column \"%s\" with inherited definition", + attributeName))); + else + ereport(NOTICE, + (errmsg("moving and merging column \"%s\" with inherited definition", attributeName), + errdetail("User-specified column moved to the position of the inherited column."))); + def = (ColumnDef *) list_nth(inhSchema, exist_attno - 1); + typenameTypeIdAndMod(NULL, def->typeName, &defTypeId, &deftypmod); + typenameTypeIdAndMod(NULL, newdef->typeName, &newTypeId, &newtypmod); + if (defTypeId != newTypeId || deftypmod != newtypmod) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" has a type conflict", + attributeName), + errdetail("%s versus %s", + format_type_with_typemod(defTypeId, + deftypmod), + format_type_with_typemod(newTypeId, + newtypmod)))); + defcollid = GetColumnDefCollation(NULL, def, defTypeId); + newcollid = GetColumnDefCollation(NULL, newdef, newTypeId); + if (defcollid != newcollid) + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("column \"%s\" has a collation conflict", + attributeName), + errdetail("\"%s\" versus \"%s\"", + get_collation_name(defcollid), + get_collation_name(newcollid)))); + + /* + * Identity is never inherited. The new column can have an + * identity definition, so we always just take that one. + */ + def->identity = newdef->identity; + + /* Copy storage parameter */ + if (def->storage == 0) + def->storage = newdef->storage; + else if (newdef->storage != 0 && def->storage != newdef->storage) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" has a storage parameter conflict", + attributeName), + errdetail("%s versus %s", + storage_name(def->storage), + storage_name(newdef->storage)))); + + /* Copy compression parameter */ + if (def->compression == NULL) + def->compression = newdef->compression; + else if (newdef->compression != NULL) + { + if (strcmp(def->compression, newdef->compression) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" has a compression method conflict", + attributeName), + errdetail("%s versus %s", def->compression, newdef->compression))); + } + + /* Mark the column as locally defined */ + def->is_local = true; + /* Merge of NOT NULL constraints = OR 'em together */ + def->is_not_null |= newdef->is_not_null; + + /* + * Check for conflicts related to generated columns. + * + * If the parent column is generated, the child column must be + * unadorned and will be made a generated column. (We could + * in theory allow the child column definition specifying the + * exact same generation expression, but that's a bit + * complicated to implement and doesn't seem very useful.) We + * also check that the child column doesn't specify a default + * value or identity, which matches the rules for a single + * column in parse_util.c. + */ + if (def->generated) + { + if (newdef->generated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_DEFINITION), + errmsg("child column \"%s\" specifies generation expression", + def->colname), + errhint("Omit the generation expression in the definition of the child table column to inherit the generation expression from the parent table."))); + if (newdef->raw_default && !newdef->generated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_DEFINITION), + errmsg("column \"%s\" inherits from generated column but specifies default", + def->colname))); + if (newdef->identity) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_DEFINITION), + errmsg("column \"%s\" inherits from generated column but specifies identity", + def->colname))); + } + + /* + * If the parent column is not generated, then take whatever + * the child column definition says. + */ + else + { + if (newdef->generated) + def->generated = newdef->generated; + } + + /* If new def has a default, override previous default */ + if (newdef->raw_default != NULL) + { + def->raw_default = newdef->raw_default; + def->cooked_default = newdef->cooked_default; + } + } + else + { + /* + * No, attach new column to result schema + */ + inhSchema = lappend(inhSchema, newdef); + } + } + + schema = inhSchema; + + /* + * Check that we haven't exceeded the legal # of columns after merging + * in inherited columns. + */ + if (list_length(schema) > MaxHeapAttributeNumber) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("tables can have at most %d columns", + MaxHeapAttributeNumber))); + } + + /* + * Now that we have the column definition list for a partition, we can + * check whether the columns referenced in the column constraint specs + * actually exist. Also, we merge NOT NULL and defaults into each + * corresponding column definition. + */ + if (is_partition) + { + foreach(entry, saved_schema) + { + ColumnDef *restdef = lfirst(entry); + bool found = false; + ListCell *l; + + foreach(l, schema) + { + ColumnDef *coldef = lfirst(l); + + if (strcmp(coldef->colname, restdef->colname) == 0) + { + found = true; + coldef->is_not_null |= restdef->is_not_null; + + /* + * Override the parent's default value for this column + * (coldef->cooked_default) with the partition's local + * definition (restdef->raw_default), if there's one. It + * should be physically impossible to get a cooked default + * in the local definition or a raw default in the + * inherited definition, but make sure they're nulls, for + * future-proofing. + */ + Assert(restdef->cooked_default == NULL); + Assert(coldef->raw_default == NULL); + if (restdef->raw_default) + { + coldef->raw_default = restdef->raw_default; + coldef->cooked_default = NULL; + } + } + } + + /* complain for constraints on columns not in parent */ + if (!found) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + restdef->colname))); + } + } + + /* + * If we found any conflicting parent default values, check to make sure + * they were overridden by the child. + */ + if (have_bogus_defaults) + { + foreach(entry, schema) + { + ColumnDef *def = lfirst(entry); + + if (def->cooked_default == &bogus_marker) + { + if (def->generated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_DEFINITION), + errmsg("column \"%s\" inherits conflicting generation expressions", + def->colname))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_DEFINITION), + errmsg("column \"%s\" inherits conflicting default values", + def->colname), + errhint("To resolve the conflict, specify a default explicitly."))); + } + } + } + + *supconstr = constraints; + return schema; +} + + +/* + * MergeCheckConstraint + * Try to merge an inherited CHECK constraint with previous ones + * + * If we inherit identically-named constraints from multiple parents, we must + * merge them, or throw an error if they don't have identical definitions. + * + * constraints is a list of CookedConstraint structs for previous constraints. + * + * Returns true if merged (constraint is a duplicate), or false if it's + * got a so-far-unique name, or throws error if conflict. + */ +static bool +MergeCheckConstraint(List *constraints, char *name, Node *expr) +{ + ListCell *lc; + + foreach(lc, constraints) + { + CookedConstraint *ccon = (CookedConstraint *) lfirst(lc); + + Assert(ccon->contype == CONSTR_CHECK); + + /* Non-matching names never conflict */ + if (strcmp(ccon->name, name) != 0) + continue; + + if (equal(expr, ccon->expr)) + { + /* OK to merge */ + ccon->inhcount++; + return true; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("check constraint name \"%s\" appears multiple times but with different expressions", + name))); + } + + return false; +} + + +/* + * StoreCatalogInheritance + * Updates the system catalogs with proper inheritance information. + * + * supers is a list of the OIDs of the new relation's direct ancestors. + */ +static void +StoreCatalogInheritance(Oid relationId, List *supers, + bool child_is_partition) +{ + Relation relation; + int32 seqNumber; + ListCell *entry; + + /* + * sanity checks + */ + AssertArg(OidIsValid(relationId)); + + if (supers == NIL) + return; + + /* + * Store INHERITS information in pg_inherits using direct ancestors only. + * Also enter dependencies on the direct ancestors, and make sure they are + * marked with relhassubclass = true. + * + * (Once upon a time, both direct and indirect ancestors were found here + * and then entered into pg_ipl. Since that catalog doesn't exist + * anymore, there's no need to look for indirect ancestors.) + */ + relation = table_open(InheritsRelationId, RowExclusiveLock); + + seqNumber = 1; + foreach(entry, supers) + { + Oid parentOid = lfirst_oid(entry); + + StoreCatalogInheritance1(relationId, parentOid, seqNumber, relation, + child_is_partition); + seqNumber++; + } + + table_close(relation, RowExclusiveLock); +} + +/* + * Make catalog entries showing relationId as being an inheritance child + * of parentOid. inhRelation is the already-opened pg_inherits catalog. + */ +static void +StoreCatalogInheritance1(Oid relationId, Oid parentOid, + int32 seqNumber, Relation inhRelation, + bool child_is_partition) +{ + ObjectAddress childobject, + parentobject; + + /* store the pg_inherits row */ + StoreSingleInheritance(relationId, parentOid, seqNumber); + + /* + * Store a dependency too + */ + parentobject.classId = RelationRelationId; + parentobject.objectId = parentOid; + parentobject.objectSubId = 0; + childobject.classId = RelationRelationId; + childobject.objectId = relationId; + childobject.objectSubId = 0; + + recordDependencyOn(&childobject, &parentobject, + child_dependency_type(child_is_partition)); + + /* + * Post creation hook of this inheritance. Since object_access_hook + * doesn't take multiple object identifiers, we relay oid of parent + * relation using auxiliary_id argument. + */ + InvokeObjectPostAlterHookArg(InheritsRelationId, + relationId, 0, + parentOid, false); + + /* + * Mark the parent as having subclasses. + */ + SetRelationHasSubclass(parentOid, true); +} + +/* + * Look for an existing schema entry with the given name. + * + * Returns the index (starting with 1) if attribute already exists in schema, + * 0 if it doesn't. + */ +static int +findAttrByName(const char *attributeName, List *schema) +{ + ListCell *s; + int i = 1; + + foreach(s, schema) + { + ColumnDef *def = lfirst(s); + + if (strcmp(attributeName, def->colname) == 0) + return i; + + i++; + } + return 0; +} + + +/* + * SetRelationHasSubclass + * Set the value of the relation's relhassubclass field in pg_class. + * + * NOTE: caller must be holding an appropriate lock on the relation. + * ShareUpdateExclusiveLock is sufficient. + * + * NOTE: an important side-effect of this operation is that an SI invalidation + * message is sent out to all backends --- including me --- causing plans + * referencing the relation to be rebuilt with the new list of children. + * This must happen even if we find that no change is needed in the pg_class + * row. + */ +void +SetRelationHasSubclass(Oid relationId, bool relhassubclass) +{ + Relation relationRelation; + HeapTuple tuple; + Form_pg_class classtuple; + + /* + * Fetch a modifiable copy of the tuple, modify it, update pg_class. + */ + relationRelation = table_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relationId); + classtuple = (Form_pg_class) GETSTRUCT(tuple); + + if (classtuple->relhassubclass != relhassubclass) + { + classtuple->relhassubclass = relhassubclass; + CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple); + } + else + { + /* no need to change tuple, but force relcache rebuild anyway */ + CacheInvalidateRelcacheByTuple(tuple); + } + + heap_freetuple(tuple); + table_close(relationRelation, RowExclusiveLock); +} + +/* + * CheckRelationTableSpaceMove + * Check if relation can be moved to new tablespace. + * + * NOTE: The caller must hold AccessExclusiveLock on the relation. + * + * Returns true if the relation can be moved to the new tablespace; raises + * an error if it is not possible to do the move; returns false if the move + * would have no effect. + */ +bool +CheckRelationTableSpaceMove(Relation rel, Oid newTableSpaceId) +{ + Oid oldTableSpaceId; + + /* + * No work if no change in tablespace. Note that MyDatabaseTableSpace is + * stored as 0. + */ + oldTableSpaceId = rel->rd_rel->reltablespace; + if (newTableSpaceId == oldTableSpaceId || + (newTableSpaceId == MyDatabaseTableSpace && oldTableSpaceId == 0)) + return false; + + /* + * We cannot support moving mapped relations into different tablespaces. + * (In particular this eliminates all shared catalogs.) + */ + if (RelationIsMapped(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move system relation \"%s\"", + RelationGetRelationName(rel)))); + + /* Cannot move a non-shared relation into pg_global */ + if (newTableSpaceId == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("only shared relations can be placed in pg_global tablespace"))); + + /* + * Do not allow moving temp tables of other backends ... their local + * buffer manager is not going to cope. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move temporary tables of other sessions"))); + + return true; +} + +/* + * SetRelationTableSpace + * Set new reltablespace and relfilenode in pg_class entry. + * + * newTableSpaceId is the new tablespace for the relation, and + * newRelFileNode its new filenode. If newRelFileNode is InvalidOid, + * this field is not updated. + * + * NOTE: The caller must hold AccessExclusiveLock on the relation. + * + * The caller of this routine had better check if a relation can be + * moved to this new tablespace by calling CheckRelationTableSpaceMove() + * first, and is responsible for making the change visible with + * CommandCounterIncrement(). + */ +void +SetRelationTableSpace(Relation rel, + Oid newTableSpaceId, + Oid newRelFileNode) +{ + Relation pg_class; + HeapTuple tuple; + Form_pg_class rd_rel; + Oid reloid = RelationGetRelid(rel); + + Assert(CheckRelationTableSpaceMove(rel, newTableSpaceId)); + + /* Get a modifiable copy of the relation's pg_class row. */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(reloid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", reloid); + rd_rel = (Form_pg_class) GETSTRUCT(tuple); + + /* Update the pg_class row. */ + rd_rel->reltablespace = (newTableSpaceId == MyDatabaseTableSpace) ? + InvalidOid : newTableSpaceId; + if (OidIsValid(newRelFileNode)) + rd_rel->relfilenode = newRelFileNode; + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + + /* + * Record dependency on tablespace. This is only required for relations + * that have no physical storage. + */ + if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) + changeDependencyOnTablespace(RelationRelationId, reloid, + rd_rel->reltablespace); + + heap_freetuple(tuple); + table_close(pg_class, RowExclusiveLock); +} + +/* + * renameatt_check - basic sanity checks before attribute rename + */ +static void +renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing) +{ + char relkind = classform->relkind; + + if (classform->reloftype && !recursing) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot rename column of typed table"))); + + /* + * Renaming the columns of sequences or toast tables doesn't actually + * break anything from the system's point of view, since internal + * references are by attnum. But it doesn't seem right to allow users to + * change names that are hardcoded into the system, hence the following + * restriction. + */ + if (relkind != RELKIND_RELATION && + relkind != RELKIND_VIEW && + relkind != RELKIND_MATVIEW && + relkind != RELKIND_COMPOSITE_TYPE && + relkind != RELKIND_INDEX && + relkind != RELKIND_PARTITIONED_INDEX && + relkind != RELKIND_FOREIGN_TABLE && + relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot rename columns of relation \"%s\"", + NameStr(classform->relname)), + errdetail_relkind_not_supported(relkind))); + + /* + * permissions checking. only the owner of a class can change its schema. + */ + if (!pg_class_ownercheck(myrelid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(myrelid)), + NameStr(classform->relname)); + if (!allowSystemTableMods && IsSystemClass(myrelid, classform)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + NameStr(classform->relname)))); +} + +/* + * renameatt_internal - workhorse for renameatt + * + * Return value is the attribute number in the 'myrelid' relation. + */ +static AttrNumber +renameatt_internal(Oid myrelid, + const char *oldattname, + const char *newattname, + bool recurse, + bool recursing, + int expected_parents, + DropBehavior behavior) +{ + Relation targetrelation; + Relation attrelation; + HeapTuple atttup; + Form_pg_attribute attform; + AttrNumber attnum; + + /* + * Grab an exclusive lock on the target table, which we will NOT release + * until end of transaction. + */ + targetrelation = relation_open(myrelid, AccessExclusiveLock); + renameatt_check(myrelid, RelationGetForm(targetrelation), recursing); + + /* + * if the 'recurse' flag is set then we are supposed to rename this + * attribute in all classes that inherit from 'relname' (as well as in + * 'relname'). + * + * any permissions or problems with duplicate attributes will cause the + * whole transaction to abort, which is what we want -- all or nothing. + */ + if (recurse) + { + List *child_oids, + *child_numparents; + ListCell *lo, + *li; + + /* + * we need the number of parents for each child so that the recursive + * calls to renameatt() can determine whether there are any parents + * outside the inheritance hierarchy being processed. + */ + child_oids = find_all_inheritors(myrelid, AccessExclusiveLock, + &child_numparents); + + /* + * find_all_inheritors does the recursive search of the inheritance + * hierarchy, so all we have to do is process all of the relids in the + * list that it returns. + */ + forboth(lo, child_oids, li, child_numparents) + { + Oid childrelid = lfirst_oid(lo); + int numparents = lfirst_int(li); + + if (childrelid == myrelid) + continue; + /* note we need not recurse again */ + renameatt_internal(childrelid, oldattname, newattname, false, true, numparents, behavior); + } + } + else + { + /* + * If we are told not to recurse, there had better not be any child + * tables; else the rename would put them out of step. + * + * expected_parents will only be 0 if we are not already recursing. + */ + if (expected_parents == 0 && + find_inheritance_children(myrelid, NoLock) != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("inherited column \"%s\" must be renamed in child tables too", + oldattname))); + } + + /* rename attributes in typed tables of composite type */ + if (targetrelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + { + List *child_oids; + ListCell *lo; + + child_oids = find_typed_table_dependencies(targetrelation->rd_rel->reltype, + RelationGetRelationName(targetrelation), + behavior); + + foreach(lo, child_oids) + renameatt_internal(lfirst_oid(lo), oldattname, newattname, true, true, 0, behavior); + } + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + + atttup = SearchSysCacheCopyAttName(myrelid, oldattname); + if (!HeapTupleIsValid(atttup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + oldattname))); + attform = (Form_pg_attribute) GETSTRUCT(atttup); + + attnum = attform->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot rename system column \"%s\"", + oldattname))); + + /* + * if the attribute is inherited, forbid the renaming. if this is a + * top-level call to renameatt(), then expected_parents will be 0, so the + * effect of this code will be to prohibit the renaming if the attribute + * is inherited at all. if this is a recursive call to renameatt(), + * expected_parents will be the number of parents the current relation has + * within the inheritance hierarchy being processed, so we'll prohibit the + * renaming only if there are additional parents from elsewhere. + */ + if (attform->attinhcount > expected_parents) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot rename inherited column \"%s\"", + oldattname))); + + /* new name should not already exist */ + (void) check_for_column_name_collision(targetrelation, newattname, false); + + /* apply the update */ + namestrcpy(&(attform->attname), newattname); + + CatalogTupleUpdate(attrelation, &atttup->t_self, atttup); + + InvokeObjectPostAlterHook(RelationRelationId, myrelid, attnum); + + heap_freetuple(atttup); + + table_close(attrelation, RowExclusiveLock); + + relation_close(targetrelation, NoLock); /* close rel but keep lock */ + + return attnum; +} + +/* + * Perform permissions and integrity checks before acquiring a relation lock. + */ +static void +RangeVarCallbackForRenameAttribute(const RangeVar *rv, Oid relid, Oid oldrelid, + void *arg) +{ + HeapTuple tuple; + Form_pg_class form; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + return; /* concurrently dropped */ + form = (Form_pg_class) GETSTRUCT(tuple); + renameatt_check(relid, form, false); + ReleaseSysCache(tuple); +} + +/* + * renameatt - changes the name of an attribute in a relation + * + * The returned ObjectAddress is that of the renamed column. + */ +ObjectAddress +renameatt(RenameStmt *stmt) +{ + Oid relid; + AttrNumber attnum; + ObjectAddress address; + + /* lock level taken here should match renameatt_internal */ + relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, + stmt->missing_ok ? RVR_MISSING_OK : 0, + RangeVarCallbackForRenameAttribute, + NULL); + + if (!OidIsValid(relid)) + { + ereport(NOTICE, + (errmsg("relation \"%s\" does not exist, skipping", + stmt->relation->relname))); + return InvalidObjectAddress; + } + + attnum = + renameatt_internal(relid, + stmt->subname, /* old att name */ + stmt->newname, /* new att name */ + stmt->relation->inh, /* recursive? */ + false, /* recursing? */ + 0, /* expected inhcount */ + stmt->behavior); + + ObjectAddressSubSet(address, RelationRelationId, relid, attnum); + + return address; +} + +/* + * same logic as renameatt_internal + */ +static ObjectAddress +rename_constraint_internal(Oid myrelid, + Oid mytypid, + const char *oldconname, + const char *newconname, + bool recurse, + bool recursing, + int expected_parents) +{ + Relation targetrelation = NULL; + Oid constraintOid; + HeapTuple tuple; + Form_pg_constraint con; + ObjectAddress address; + + AssertArg(!myrelid || !mytypid); + + if (mytypid) + { + constraintOid = get_domain_constraint_oid(mytypid, oldconname, false); + } + else + { + targetrelation = relation_open(myrelid, AccessExclusiveLock); + + /* + * don't tell it whether we're recursing; we allow changing typed + * tables here + */ + renameatt_check(myrelid, RelationGetForm(targetrelation), false); + + constraintOid = get_relation_constraint_oid(myrelid, oldconname, false); + } + + tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintOid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for constraint %u", + constraintOid); + con = (Form_pg_constraint) GETSTRUCT(tuple); + + if (myrelid && con->contype == CONSTRAINT_CHECK && !con->connoinherit) + { + if (recurse) + { + List *child_oids, + *child_numparents; + ListCell *lo, + *li; + + child_oids = find_all_inheritors(myrelid, AccessExclusiveLock, + &child_numparents); + + forboth(lo, child_oids, li, child_numparents) + { + Oid childrelid = lfirst_oid(lo); + int numparents = lfirst_int(li); + + if (childrelid == myrelid) + continue; + + rename_constraint_internal(childrelid, InvalidOid, oldconname, newconname, false, true, numparents); + } + } + else + { + if (expected_parents == 0 && + find_inheritance_children(myrelid, NoLock) != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("inherited constraint \"%s\" must be renamed in child tables too", + oldconname))); + } + + if (con->coninhcount > expected_parents) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot rename inherited constraint \"%s\"", + oldconname))); + } + + if (con->conindid + && (con->contype == CONSTRAINT_PRIMARY + || con->contype == CONSTRAINT_UNIQUE + || con->contype == CONSTRAINT_EXCLUSION)) + /* rename the index; this renames the constraint as well */ + RenameRelationInternal(con->conindid, newconname, false, true); + else + RenameConstraintById(constraintOid, newconname); + + ObjectAddressSet(address, ConstraintRelationId, constraintOid); + + ReleaseSysCache(tuple); + + if (targetrelation) + { + /* + * Invalidate relcache so as others can see the new constraint name. + */ + CacheInvalidateRelcache(targetrelation); + + relation_close(targetrelation, NoLock); /* close rel but keep lock */ + } + + return address; +} + +ObjectAddress +RenameConstraint(RenameStmt *stmt) +{ + Oid relid = InvalidOid; + Oid typid = InvalidOid; + + if (stmt->renameType == OBJECT_DOMCONSTRAINT) + { + Relation rel; + HeapTuple tup; + + typid = typenameTypeId(NULL, makeTypeNameFromNameList(castNode(List, stmt->object))); + rel = table_open(TypeRelationId, RowExclusiveLock); + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typid); + checkDomainOwner(tup); + ReleaseSysCache(tup); + table_close(rel, NoLock); + } + else + { + /* lock level taken here should match rename_constraint_internal */ + relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, + stmt->missing_ok ? RVR_MISSING_OK : 0, + RangeVarCallbackForRenameAttribute, + NULL); + if (!OidIsValid(relid)) + { + ereport(NOTICE, + (errmsg("relation \"%s\" does not exist, skipping", + stmt->relation->relname))); + return InvalidObjectAddress; + } + } + + return + rename_constraint_internal(relid, typid, + stmt->subname, + stmt->newname, + (stmt->relation && + stmt->relation->inh), /* recursive? */ + false, /* recursing? */ + 0 /* expected inhcount */ ); +} + +/* + * Execute ALTER TABLE/INDEX/SEQUENCE/VIEW/MATERIALIZED VIEW/FOREIGN TABLE + * RENAME + */ +ObjectAddress +RenameRelation(RenameStmt *stmt) +{ + bool is_index_stmt = stmt->renameType == OBJECT_INDEX; + Oid relid; + ObjectAddress address; + + /* + * Grab an exclusive lock on the target table, index, sequence, view, + * materialized view, or foreign table, which we will NOT release until + * end of transaction. + * + * Lock level used here should match RenameRelationInternal, to avoid lock + * escalation. However, because ALTER INDEX can be used with any relation + * type, we mustn't believe without verification. + */ + for (;;) + { + LOCKMODE lockmode; + char relkind; + bool obj_is_index; + + lockmode = is_index_stmt ? ShareUpdateExclusiveLock : AccessExclusiveLock; + + relid = RangeVarGetRelidExtended(stmt->relation, lockmode, + stmt->missing_ok ? RVR_MISSING_OK : 0, + RangeVarCallbackForAlterRelation, + (void *) stmt); + + if (!OidIsValid(relid)) + { + ereport(NOTICE, + (errmsg("relation \"%s\" does not exist, skipping", + stmt->relation->relname))); + return InvalidObjectAddress; + } + + /* + * We allow mismatched statement and object types (e.g., ALTER INDEX + * to rename a table), but we might've used the wrong lock level. If + * that happens, retry with the correct lock level. We don't bother + * if we already acquired AccessExclusiveLock with an index, however. + */ + relkind = get_rel_relkind(relid); + obj_is_index = (relkind == RELKIND_INDEX || + relkind == RELKIND_PARTITIONED_INDEX); + if (obj_is_index || is_index_stmt == obj_is_index) + break; + + UnlockRelationOid(relid, lockmode); + is_index_stmt = obj_is_index; + } + + /* Do the work */ + RenameRelationInternal(relid, stmt->newname, false, is_index_stmt); + + ObjectAddressSet(address, RelationRelationId, relid); + + return address; +} + +/* + * RenameRelationInternal - change the name of a relation + */ +void +RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index) +{ + Relation targetrelation; + Relation relrelation; /* for RELATION relation */ + HeapTuple reltup; + Form_pg_class relform; + Oid namespaceId; + + /* + * Grab a lock on the target relation, which we will NOT release until end + * of transaction. We need at least a self-exclusive lock so that + * concurrent DDL doesn't overwrite the rename if they start updating + * while still seeing the old version. The lock also guards against + * triggering relcache reloads in concurrent sessions, which might not + * handle this information changing under them. For indexes, we can use a + * reduced lock level because RelationReloadIndexInfo() handles indexes + * specially. + */ + targetrelation = relation_open(myrelid, is_index ? ShareUpdateExclusiveLock : AccessExclusiveLock); + namespaceId = RelationGetNamespace(targetrelation); + + /* + * Find relation's pg_class tuple, and make sure newrelname isn't in use. + */ + relrelation = table_open(RelationRelationId, RowExclusiveLock); + + reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid)); + if (!HeapTupleIsValid(reltup)) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for relation %u", myrelid); + relform = (Form_pg_class) GETSTRUCT(reltup); + + if (get_relname_relid(newrelname, namespaceId) != InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists", + newrelname))); + + /* + * RenameRelation is careful not to believe the caller's idea of the + * relation kind being handled. We don't have to worry about this, but + * let's not be totally oblivious to it. We can process an index as + * not-an-index, but not the other way around. + */ + Assert(!is_index || + is_index == (targetrelation->rd_rel->relkind == RELKIND_INDEX || + targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)); + + /* + * Update pg_class tuple with new relname. (Scribbling on reltup is OK + * because it's a copy...) + */ + namestrcpy(&(relform->relname), newrelname); + + CatalogTupleUpdate(relrelation, &reltup->t_self, reltup); + + InvokeObjectPostAlterHookArg(RelationRelationId, myrelid, 0, + InvalidOid, is_internal); + + heap_freetuple(reltup); + table_close(relrelation, RowExclusiveLock); + + /* + * Also rename the associated type, if any. + */ + if (OidIsValid(targetrelation->rd_rel->reltype)) + RenameTypeInternal(targetrelation->rd_rel->reltype, + newrelname, namespaceId); + + /* + * Also rename the associated constraint, if any. + */ + if (targetrelation->rd_rel->relkind == RELKIND_INDEX || + targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + { + Oid constraintId = get_index_constraint(myrelid); + + if (OidIsValid(constraintId)) + RenameConstraintById(constraintId, newrelname); + } + + /* + * Close rel, but keep lock! + */ + relation_close(targetrelation, NoLock); +} + +/* + * ResetRelRewrite - reset relrewrite + */ +void +ResetRelRewrite(Oid myrelid) +{ + Relation relrelation; /* for RELATION relation */ + HeapTuple reltup; + Form_pg_class relform; + + /* + * Find relation's pg_class tuple. + */ + relrelation = table_open(RelationRelationId, RowExclusiveLock); + + reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid)); + if (!HeapTupleIsValid(reltup)) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for relation %u", myrelid); + relform = (Form_pg_class) GETSTRUCT(reltup); + + /* + * Update pg_class tuple. + */ + relform->relrewrite = InvalidOid; + + CatalogTupleUpdate(relrelation, &reltup->t_self, reltup); + + heap_freetuple(reltup); + table_close(relrelation, RowExclusiveLock); +} + +/* + * Disallow ALTER TABLE (and similar commands) when the current backend has + * any open reference to the target table besides the one just acquired by + * the calling command; this implies there's an open cursor or active plan. + * We need this check because our lock doesn't protect us against stomping + * on our own foot, only other people's feet! + * + * For ALTER TABLE, the only case known to cause serious trouble is ALTER + * COLUMN TYPE, and some changes are obviously pretty benign, so this could + * possibly be relaxed to only error out for certain types of alterations. + * But the use-case for allowing any of these things is not obvious, so we + * won't work hard at it for now. + * + * We also reject these commands if there are any pending AFTER trigger events + * for the rel. This is certainly necessary for the rewriting variants of + * ALTER TABLE, because they don't preserve tuple TIDs and so the pending + * events would try to fetch the wrong tuples. It might be overly cautious + * in other cases, but again it seems better to err on the side of paranoia. + * + * REINDEX calls this with "rel" referencing the index to be rebuilt; here + * we are worried about active indexscans on the index. The trigger-event + * check can be skipped, since we are doing no damage to the parent table. + * + * The statement name (eg, "ALTER TABLE") is passed for use in error messages. + */ +void +CheckTableNotInUse(Relation rel, const char *stmt) +{ + int expected_refcnt; + + expected_refcnt = rel->rd_isnailed ? 2 : 1; + if (rel->rd_refcnt != expected_refcnt) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + /* translator: first %s is a SQL command, eg ALTER TABLE */ + errmsg("cannot %s \"%s\" because it is being used by active queries in this session", + stmt, RelationGetRelationName(rel)))); + + if (rel->rd_rel->relkind != RELKIND_INDEX && + rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + AfterTriggerPendingOnRel(RelationGetRelid(rel))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + /* translator: first %s is a SQL command, eg ALTER TABLE */ + errmsg("cannot %s \"%s\" because it has pending trigger events", + stmt, RelationGetRelationName(rel)))); +} + +/* + * AlterTableLookupRelation + * Look up, and lock, the OID for the relation named by an alter table + * statement. + */ +Oid +AlterTableLookupRelation(AlterTableStmt *stmt, LOCKMODE lockmode) +{ + return RangeVarGetRelidExtended(stmt->relation, lockmode, + stmt->missing_ok ? RVR_MISSING_OK : 0, + RangeVarCallbackForAlterRelation, + (void *) stmt); +} + +/* + * AlterTable + * Execute ALTER TABLE, which can be a list of subcommands + * + * ALTER TABLE is performed in three phases: + * 1. Examine subcommands and perform pre-transformation checking. + * 2. Validate and transform subcommands, and update system catalogs. + * 3. Scan table(s) to check new constraints, and optionally recopy + * the data into new table(s). + * Phase 3 is not performed unless one or more of the subcommands requires + * it. The intention of this design is to allow multiple independent + * updates of the table schema to be performed with only one pass over the + * data. + * + * ATPrepCmd performs phase 1. A "work queue" entry is created for + * each table to be affected (there may be multiple affected tables if the + * commands traverse a table inheritance hierarchy). Also we do preliminary + * validation of the subcommands. Because earlier subcommands may change + * the catalog state seen by later commands, there are limits to what can + * be done in this phase. Generally, this phase acquires table locks, + * checks permissions and relkind, and recurses to find child tables. + * + * ATRewriteCatalogs performs phase 2 for each affected table. + * Certain subcommands need to be performed before others to avoid + * unnecessary conflicts; for example, DROP COLUMN should come before + * ADD COLUMN. Therefore phase 1 divides the subcommands into multiple + * lists, one for each logical "pass" of phase 2. + * + * ATRewriteTables performs phase 3 for those tables that need it. + * + * For most subcommand types, phases 2 and 3 do no explicit recursion, + * since phase 1 already does it. However, for certain subcommand types + * it is only possible to determine how to recurse at phase 2 time; for + * those cases, phase 1 sets the cmd->recurse flag (or, in some older coding, + * changes the command subtype of a "Recurse" variant XXX to be cleaned up.) + * + * Thanks to the magic of MVCC, an error anywhere along the way rolls back + * the whole operation; we don't have to do anything special to clean up. + * + * The caller must lock the relation, with an appropriate lock level + * for the subcommands requested, using AlterTableGetLockLevel(stmt->cmds) + * or higher. We pass the lock level down + * so that we can apply it recursively to inherited tables. Note that the + * lock level we want as we recurse might well be higher than required for + * that specific subcommand. So we pass down the overall lock requirement, + * rather than reassess it at lower levels. + * + * The caller also provides a "context" which is to be passed back to + * utility.c when we need to execute a subcommand such as CREATE INDEX. + * Some of the fields therein, such as the relid, are used here as well. + */ +void +AlterTable(AlterTableStmt *stmt, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + Relation rel; + + /* Caller is required to provide an adequate lock. */ + rel = relation_open(context->relid, NoLock); + + CheckTableNotInUse(rel, "ALTER TABLE"); + + ATController(stmt, rel, stmt->cmds, stmt->relation->inh, lockmode, context); +} + +/* + * AlterTableInternal + * + * ALTER TABLE with target specified by OID + * + * We do not reject if the relation is already open, because it's quite + * likely that one or more layers of caller have it open. That means it + * is unsafe to use this entry point for alterations that could break + * existing query plans. On the assumption it's not used for such, we + * don't have to reject pending AFTER triggers, either. + * + * Also, since we don't have an AlterTableUtilityContext, this cannot be + * used for any subcommand types that require parse transformation or + * could generate subcommands that have to be passed to ProcessUtility. + */ +void +AlterTableInternal(Oid relid, List *cmds, bool recurse) +{ + Relation rel; + LOCKMODE lockmode = AlterTableGetLockLevel(cmds); + + rel = relation_open(relid, lockmode); + + EventTriggerAlterTableRelid(relid); + + ATController(NULL, rel, cmds, recurse, lockmode, NULL); +} + +/* + * AlterTableGetLockLevel + * + * Sets the overall lock level required for the supplied list of subcommands. + * Policy for doing this set according to needs of AlterTable(), see + * comments there for overall explanation. + * + * Function is called before and after parsing, so it must give same + * answer each time it is called. Some subcommands are transformed + * into other subcommand types, so the transform must never be made to a + * lower lock level than previously assigned. All transforms are noted below. + * + * Since this is called before we lock the table we cannot use table metadata + * to influence the type of lock we acquire. + * + * There should be no lockmodes hardcoded into the subcommand functions. All + * lockmode decisions for ALTER TABLE are made here only. The one exception is + * ALTER TABLE RENAME which is treated as a different statement type T_RenameStmt + * and does not travel through this section of code and cannot be combined with + * any of the subcommands given here. + * + * Note that Hot Standby only knows about AccessExclusiveLocks on the primary + * so any changes that might affect SELECTs running on standbys need to use + * AccessExclusiveLocks even if you think a lesser lock would do, unless you + * have a solution for that also. + * + * Also note that pg_dump uses only an AccessShareLock, meaning that anything + * that takes a lock less than AccessExclusiveLock can change object definitions + * while pg_dump is running. Be careful to check that the appropriate data is + * derived by pg_dump using an MVCC snapshot, rather than syscache lookups, + * otherwise we might end up with an inconsistent dump that can't restore. + */ +LOCKMODE +AlterTableGetLockLevel(List *cmds) +{ + /* + * This only works if we read catalog tables using MVCC snapshots. + */ + ListCell *lcmd; + LOCKMODE lockmode = ShareUpdateExclusiveLock; + + foreach(lcmd, cmds) + { + AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd); + LOCKMODE cmd_lockmode = AccessExclusiveLock; /* default for compiler */ + + switch (cmd->subtype) + { + /* + * These subcommands rewrite the heap, so require full locks. + */ + case AT_AddColumn: /* may rewrite heap, in some cases and visible + * to SELECT */ + case AT_SetAccessMethod: /* must rewrite heap */ + case AT_SetTableSpace: /* must rewrite heap */ + case AT_AlterColumnType: /* must rewrite heap */ + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * These subcommands may require addition of toast tables. If + * we add a toast table to a table currently being scanned, we + * might miss data added to the new toast table by concurrent + * insert transactions. + */ + case AT_SetStorage: /* may add toast tables, see + * ATRewriteCatalogs() */ + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * Removing constraints can affect SELECTs that have been + * optimized assuming the constraint holds true. See also + * CloneFkReferenced. + */ + case AT_DropConstraint: /* as DROP INDEX */ + case AT_DropNotNull: /* may change some SQL plans */ + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * Subcommands that may be visible to concurrent SELECTs + */ + case AT_DropColumn: /* change visible to SELECT */ + case AT_AddColumnToView: /* CREATE VIEW */ + case AT_DropOids: /* used to equiv to DropColumn */ + case AT_EnableAlwaysRule: /* may change SELECT rules */ + case AT_EnableReplicaRule: /* may change SELECT rules */ + case AT_EnableRule: /* may change SELECT rules */ + case AT_DisableRule: /* may change SELECT rules */ + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * Changing owner may remove implicit SELECT privileges + */ + case AT_ChangeOwner: /* change visible to SELECT */ + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * Changing foreign table options may affect optimization. + */ + case AT_GenericOptions: + case AT_AlterColumnGenericOptions: + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * These subcommands affect write operations only. + */ + case AT_EnableTrig: + case AT_EnableAlwaysTrig: + case AT_EnableReplicaTrig: + case AT_EnableTrigAll: + case AT_EnableTrigUser: + case AT_DisableTrig: + case AT_DisableTrigAll: + case AT_DisableTrigUser: + cmd_lockmode = ShareRowExclusiveLock; + break; + + /* + * These subcommands affect write operations only. XXX + * Theoretically, these could be ShareRowExclusiveLock. + */ + case AT_ColumnDefault: + case AT_CookedColumnDefault: + case AT_AlterConstraint: + case AT_AddIndex: /* from ADD CONSTRAINT */ + case AT_AddIndexConstraint: + case AT_ReplicaIdentity: + case AT_SetNotNull: + case AT_EnableRowSecurity: + case AT_DisableRowSecurity: + case AT_ForceRowSecurity: + case AT_NoForceRowSecurity: + case AT_AddIdentity: + case AT_DropIdentity: + case AT_SetIdentity: + case AT_DropExpression: + case AT_SetCompression: + cmd_lockmode = AccessExclusiveLock; + break; + + case AT_AddConstraint: + case AT_AddConstraintRecurse: /* becomes AT_AddConstraint */ + case AT_ReAddConstraint: /* becomes AT_AddConstraint */ + case AT_ReAddDomainConstraint: /* becomes AT_AddConstraint */ + if (IsA(cmd->def, Constraint)) + { + Constraint *con = (Constraint *) cmd->def; + + switch (con->contype) + { + case CONSTR_EXCLUSION: + case CONSTR_PRIMARY: + case CONSTR_UNIQUE: + + /* + * Cases essentially the same as CREATE INDEX. We + * could reduce the lock strength to ShareLock if + * we can work out how to allow concurrent catalog + * updates. XXX Might be set down to + * ShareRowExclusiveLock but requires further + * analysis. + */ + cmd_lockmode = AccessExclusiveLock; + break; + case CONSTR_FOREIGN: + + /* + * We add triggers to both tables when we add a + * Foreign Key, so the lock level must be at least + * as strong as CREATE TRIGGER. + */ + cmd_lockmode = ShareRowExclusiveLock; + break; + + default: + cmd_lockmode = AccessExclusiveLock; + } + } + break; + + /* + * These subcommands affect inheritance behaviour. Queries + * started before us will continue to see the old inheritance + * behaviour, while queries started after we commit will see + * new behaviour. No need to prevent reads or writes to the + * subtable while we hook it up though. Changing the TupDesc + * may be a problem, so keep highest lock. + */ + case AT_AddInherit: + case AT_DropInherit: + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * These subcommands affect implicit row type conversion. They + * have affects similar to CREATE/DROP CAST on queries. don't + * provide for invalidating parse trees as a result of such + * changes, so we keep these at AccessExclusiveLock. + */ + case AT_AddOf: + case AT_DropOf: + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * Only used by CREATE OR REPLACE VIEW which must conflict + * with an SELECTs currently using the view. + */ + case AT_ReplaceRelOptions: + cmd_lockmode = AccessExclusiveLock; + break; + + /* + * These subcommands affect general strategies for performance + * and maintenance, though don't change the semantic results + * from normal data reads and writes. Delaying an ALTER TABLE + * behind currently active writes only delays the point where + * the new strategy begins to take effect, so there is no + * benefit in waiting. In this case the minimum restriction + * applies: we don't currently allow concurrent catalog + * updates. + */ + case AT_SetStatistics: /* Uses MVCC in getTableAttrs() */ + case AT_ClusterOn: /* Uses MVCC in getIndexes() */ + case AT_DropCluster: /* Uses MVCC in getIndexes() */ + case AT_SetOptions: /* Uses MVCC in getTableAttrs() */ + case AT_ResetOptions: /* Uses MVCC in getTableAttrs() */ + cmd_lockmode = ShareUpdateExclusiveLock; + break; + + case AT_SetLogged: + case AT_SetUnLogged: + cmd_lockmode = AccessExclusiveLock; + break; + + case AT_ValidateConstraint: /* Uses MVCC in getConstraints() */ + cmd_lockmode = ShareUpdateExclusiveLock; + break; + + /* + * Rel options are more complex than first appears. Options + * are set here for tables, views and indexes; for historical + * reasons these can all be used with ALTER TABLE, so we can't + * decide between them using the basic grammar. + */ + case AT_SetRelOptions: /* Uses MVCC in getIndexes() and + * getTables() */ + case AT_ResetRelOptions: /* Uses MVCC in getIndexes() and + * getTables() */ + cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def); + break; + + case AT_AttachPartition: + cmd_lockmode = ShareUpdateExclusiveLock; + break; + + case AT_DetachPartition: + if (((PartitionCmd *) cmd->def)->concurrent) + cmd_lockmode = ShareUpdateExclusiveLock; + else + cmd_lockmode = AccessExclusiveLock; + break; + + case AT_DetachPartitionFinalize: + cmd_lockmode = ShareUpdateExclusiveLock; + break; + + case AT_CheckNotNull: + + /* + * This only examines the table's schema; but lock must be + * strong enough to prevent concurrent DROP NOT NULL. + */ + cmd_lockmode = AccessShareLock; + break; + + default: /* oops */ + elog(ERROR, "unrecognized alter table type: %d", + (int) cmd->subtype); + break; + } + + /* + * Take the greatest lockmode from any subcommand + */ + if (cmd_lockmode > lockmode) + lockmode = cmd_lockmode; + } + + return lockmode; +} + +/* + * ATController provides top level control over the phases. + * + * parsetree is passed in to allow it to be passed to event triggers + * when requested. + */ +static void +ATController(AlterTableStmt *parsetree, + Relation rel, List *cmds, bool recurse, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + List *wqueue = NIL; + ListCell *lcmd; + + /* Phase 1: preliminary examination of commands, create work queue */ + foreach(lcmd, cmds) + { + AlterTableCmd *cmd = (AlterTableCmd *) lfirst(lcmd); + + ATPrepCmd(&wqueue, rel, cmd, recurse, false, lockmode, context); + } + + /* Close the relation, but keep lock until commit */ + relation_close(rel, NoLock); + + /* Phase 2: update system catalogs */ + ATRewriteCatalogs(&wqueue, lockmode, context); + + /* Phase 3: scan/rewrite tables as needed, and run afterStmts */ + ATRewriteTables(parsetree, &wqueue, lockmode, context); +} + +/* + * ATPrepCmd + * + * Traffic cop for ALTER TABLE Phase 1 operations, including simple + * recursion and permission checks. + * + * Caller must have acquired appropriate lock type on relation already. + * This lock should be held until commit. + */ +static void +ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, + bool recurse, bool recursing, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + AlteredTableInfo *tab; + int pass = AT_PASS_UNSET; + + /* Find or create work queue entry for this table */ + tab = ATGetQueueEntry(wqueue, rel); + + /* + * Disallow any ALTER TABLE other than ALTER TABLE DETACH FINALIZE on + * partitions that are pending detach. + */ + if (rel->rd_rel->relispartition && + cmd->subtype != AT_DetachPartitionFinalize && + PartitionHasPendingDetach(RelationGetRelid(rel))) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot alter partition \"%s\" with an incomplete detach", + RelationGetRelationName(rel)), + errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation.")); + + /* + * Copy the original subcommand for each table, so we can scribble on it. + * This avoids conflicts when different child tables need to make + * different parse transformations (for example, the same column may have + * different column numbers in different children). + */ + cmd = copyObject(cmd); + + /* + * Do permissions and relkind checking, recursion to child tables if + * needed, and any additional phase-1 processing needed. (But beware of + * adding any processing that looks at table details that another + * subcommand could change. In some cases we reject multiple subcommands + * that could try to change the same state in contrary ways.) + */ + switch (cmd->subtype) + { + case AT_AddColumn: /* ADD COLUMN */ + ATSimplePermissions(cmd->subtype, rel, + ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE); + ATPrepAddColumn(wqueue, rel, recurse, recursing, false, cmd, + lockmode, context); + /* Recursion occurs during execution phase */ + pass = AT_PASS_ADD_COL; + break; + case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */ + ATSimplePermissions(cmd->subtype, rel, ATT_VIEW); + ATPrepAddColumn(wqueue, rel, recurse, recursing, true, cmd, + lockmode, context); + /* Recursion occurs during execution phase */ + pass = AT_PASS_ADD_COL; + break; + case AT_ColumnDefault: /* ALTER COLUMN DEFAULT */ + + /* + * We allow defaults on views so that INSERT into a view can have + * default-ish behavior. This works because the rewriter + * substitutes default values into INSERTs before it expands + * rules. + */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE); + ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context); + /* No command-specific prep needed */ + pass = cmd->def ? AT_PASS_ADD_OTHERCONSTR : AT_PASS_DROP; + break; + case AT_CookedColumnDefault: /* add a pre-cooked default */ + /* This is currently used only in CREATE TABLE */ + /* (so the permission check really isn't necessary) */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + /* This command never recurses */ + pass = AT_PASS_ADD_OTHERCONSTR; + break; + case AT_AddIdentity: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE); + /* This command never recurses */ + pass = AT_PASS_ADD_OTHERCONSTR; + break; + case AT_SetIdentity: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE); + /* This command never recurses */ + /* This should run after AddIdentity, so do it in MISC pass */ + pass = AT_PASS_MISC; + break; + case AT_DropIdentity: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_FOREIGN_TABLE); + /* This command never recurses */ + pass = AT_PASS_DROP; + break; + case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATPrepDropNotNull(rel, recurse, recursing); + ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context); + pass = AT_PASS_DROP; + break; + case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + /* Need command-specific recursion decision */ + ATPrepSetNotNull(wqueue, rel, cmd, recurse, recursing, + lockmode, context); + pass = AT_PASS_COL_ATTRS; + break; + case AT_CheckNotNull: /* check column is already marked NOT NULL */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context); + /* No command-specific prep needed */ + pass = AT_PASS_COL_ATTRS; + break; + case AT_DropExpression: /* ALTER COLUMN DROP EXPRESSION */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context); + ATPrepDropExpression(rel, cmd, recurse, recursing, lockmode); + pass = AT_PASS_DROP; + break; + case AT_SetStatistics: /* ALTER COLUMN SET STATISTICS */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX | ATT_PARTITIONED_INDEX | ATT_FOREIGN_TABLE); + ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_SetOptions: /* ALTER COLUMN SET ( options ) */ + case AT_ResetOptions: /* ALTER COLUMN RESET ( options ) */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE); + /* This command never recurses */ + pass = AT_PASS_MISC; + break; + case AT_SetStorage: /* ALTER COLUMN SET STORAGE */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_FOREIGN_TABLE); + ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_SetCompression: /* ALTER COLUMN SET COMPRESSION */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW); + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_DropColumn: /* DROP COLUMN */ + ATSimplePermissions(cmd->subtype, rel, + ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE); + ATPrepDropColumn(wqueue, rel, recurse, recursing, cmd, + lockmode, context); + /* Recursion occurs during execution phase */ + pass = AT_PASS_DROP; + break; + case AT_AddIndex: /* ADD INDEX */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE); + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_ADD_INDEX; + break; + case AT_AddConstraint: /* ADD CONSTRAINT */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + /* Recursion occurs during execution phase */ + /* No command-specific prep needed except saving recurse flag */ + if (recurse) + cmd->subtype = AT_AddConstraintRecurse; + pass = AT_PASS_ADD_CONSTR; + break; + case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE); + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_ADD_INDEXCONSTR; + break; + case AT_DropConstraint: /* DROP CONSTRAINT */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATCheckPartitionsNotInUse(rel, lockmode); + /* Other recursion occurs during execution phase */ + /* No command-specific prep needed except saving recurse flag */ + if (recurse) + cmd->subtype = AT_DropConstraintRecurse; + pass = AT_PASS_DROP; + break; + case AT_AlterColumnType: /* ALTER COLUMN TYPE */ + ATSimplePermissions(cmd->subtype, rel, + ATT_TABLE | ATT_COMPOSITE_TYPE | ATT_FOREIGN_TABLE); + /* See comments for ATPrepAlterColumnType */ + cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, recurse, lockmode, + AT_PASS_UNSET, context); + Assert(cmd != NULL); + /* Performs own recursion */ + ATPrepAlterColumnType(wqueue, tab, rel, recurse, recursing, cmd, + lockmode, context); + pass = AT_PASS_ALTER_TYPE; + break; + case AT_AlterColumnGenericOptions: + ATSimplePermissions(cmd->subtype, rel, ATT_FOREIGN_TABLE); + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_ChangeOwner: /* ALTER OWNER */ + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_ClusterOn: /* CLUSTER ON */ + case AT_DropCluster: /* SET WITHOUT CLUSTER */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW); + /* These commands never recurse */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_SetLogged: /* SET LOGGED */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_SEQUENCE); + if (tab->chgPersistence) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change persistence setting twice"))); + tab->chgPersistence = ATPrepChangePersistence(rel, true); + /* force rewrite if necessary; see comment in ATRewriteTables */ + if (tab->chgPersistence) + { + tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE; + tab->newrelpersistence = RELPERSISTENCE_PERMANENT; + } + pass = AT_PASS_MISC; + break; + case AT_SetUnLogged: /* SET UNLOGGED */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_SEQUENCE); + if (tab->chgPersistence) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change persistence setting twice"))); + tab->chgPersistence = ATPrepChangePersistence(rel, false); + /* force rewrite if necessary; see comment in ATRewriteTables */ + if (tab->chgPersistence) + { + tab->rewrite |= AT_REWRITE_ALTER_PERSISTENCE; + tab->newrelpersistence = RELPERSISTENCE_UNLOGGED; + } + pass = AT_PASS_MISC; + break; + case AT_DropOids: /* SET WITHOUT OIDS */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + pass = AT_PASS_DROP; + break; + case AT_SetAccessMethod: /* SET ACCESS METHOD */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW); + + /* partitioned tables don't have an access method */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change access method of a partitioned table"))); + + /* check if another access method change was already requested */ + if (OidIsValid(tab->newAccessMethod)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot have multiple SET ACCESS METHOD subcommands"))); + + ATPrepSetAccessMethod(tab, rel, cmd->name); + pass = AT_PASS_MISC; /* does not matter; no work in Phase 2 */ + break; + case AT_SetTableSpace: /* SET TABLESPACE */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW | ATT_INDEX | + ATT_PARTITIONED_INDEX); + /* This command never recurses */ + ATPrepSetTableSpace(tab, rel, cmd->name, lockmode); + pass = AT_PASS_MISC; /* doesn't actually matter */ + break; + case AT_SetRelOptions: /* SET (...) */ + case AT_ResetRelOptions: /* RESET (...) */ + case AT_ReplaceRelOptions: /* reset them all, then set just these */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_VIEW | ATT_MATVIEW | ATT_INDEX); + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_AddInherit: /* INHERIT */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + /* This command never recurses */ + ATPrepAddInherit(rel); + pass = AT_PASS_MISC; + break; + case AT_DropInherit: /* NO INHERIT */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_AlterConstraint: /* ALTER CONSTRAINT */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE); + /* Recursion occurs during execution phase */ + pass = AT_PASS_MISC; + break; + case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + /* Recursion occurs during execution phase */ + /* No command-specific prep needed except saving recurse flag */ + if (recurse) + cmd->subtype = AT_ValidateConstraintRecurse; + pass = AT_PASS_MISC; + break; + case AT_ReplicaIdentity: /* REPLICA IDENTITY ... */ + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_MATVIEW); + pass = AT_PASS_MISC; + /* This command never recurses */ + /* No command-specific prep needed */ + break; + case AT_EnableTrig: /* ENABLE TRIGGER variants */ + case AT_EnableAlwaysTrig: + case AT_EnableReplicaTrig: + case AT_EnableTrigAll: + case AT_EnableTrigUser: + case AT_DisableTrig: /* DISABLE TRIGGER variants */ + case AT_DisableTrigAll: + case AT_DisableTrigUser: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + /* Set up recursion for phase 2; no other prep needed */ + if (recurse) + cmd->recurse = true; + pass = AT_PASS_MISC; + break; + case AT_EnableRule: /* ENABLE/DISABLE RULE variants */ + case AT_EnableAlwaysRule: + case AT_EnableReplicaRule: + case AT_DisableRule: + case AT_AddOf: /* OF */ + case AT_DropOf: /* NOT OF */ + case AT_EnableRowSecurity: + case AT_DisableRowSecurity: + case AT_ForceRowSecurity: + case AT_NoForceRowSecurity: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE); + /* These commands never recurse */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_GenericOptions: + ATSimplePermissions(cmd->subtype, rel, ATT_FOREIGN_TABLE); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_AttachPartition: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE | ATT_PARTITIONED_INDEX); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_DetachPartition: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + case AT_DetachPartitionFinalize: + ATSimplePermissions(cmd->subtype, rel, ATT_TABLE); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + default: /* oops */ + elog(ERROR, "unrecognized alter table type: %d", + (int) cmd->subtype); + pass = AT_PASS_UNSET; /* keep compiler quiet */ + break; + } + Assert(pass > AT_PASS_UNSET); + + /* Add the subcommand to the appropriate list for phase 2 */ + tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd); +} + +/* + * ATRewriteCatalogs + * + * Traffic cop for ALTER TABLE Phase 2 operations. Subcommands are + * dispatched in a "safe" execution order (designed to avoid unnecessary + * conflicts). + */ +static void +ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + int pass; + ListCell *ltab; + + /* + * We process all the tables "in parallel", one pass at a time. This is + * needed because we may have to propagate work from one table to another + * (specifically, ALTER TYPE on a foreign key's PK has to dispatch the + * re-adding of the foreign key constraint to the other table). Work can + * only be propagated into later passes, however. + */ + for (pass = 0; pass < AT_NUM_PASSES; pass++) + { + /* Go through each table that needs to be processed */ + foreach(ltab, *wqueue) + { + AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); + List *subcmds = tab->subcmds[pass]; + ListCell *lcmd; + + if (subcmds == NIL) + continue; + + /* + * Open the relation and store it in tab. This allows subroutines + * close and reopen, if necessary. Appropriate lock was obtained + * by phase 1, needn't get it again. + */ + tab->rel = relation_open(tab->relid, NoLock); + + foreach(lcmd, subcmds) + ATExecCmd(wqueue, tab, + lfirst_node(AlterTableCmd, lcmd), + lockmode, pass, context); + + /* + * After the ALTER TYPE pass, do cleanup work (this is not done in + * ATExecAlterColumnType since it should be done only once if + * multiple columns of a table are altered). + */ + if (pass == AT_PASS_ALTER_TYPE) + ATPostAlterTypeCleanup(wqueue, tab, lockmode); + + if (tab->rel) + { + relation_close(tab->rel, NoLock); + tab->rel = NULL; + } + } + } + + /* Check to see if a toast table must be added. */ + foreach(ltab, *wqueue) + { + AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); + + /* + * If the table is source table of ATTACH PARTITION command, we did + * not modify anything about it that will change its toasting + * requirement, so no need to check. + */ + if (((tab->relkind == RELKIND_RELATION || + tab->relkind == RELKIND_PARTITIONED_TABLE) && + tab->partition_constraint == NULL) || + tab->relkind == RELKIND_MATVIEW) + AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode); + } +} + +/* + * ATExecCmd: dispatch a subcommand to appropriate execution routine + */ +static void +ATExecCmd(List **wqueue, AlteredTableInfo *tab, + AlterTableCmd *cmd, LOCKMODE lockmode, int cur_pass, + AlterTableUtilityContext *context) +{ + ObjectAddress address = InvalidObjectAddress; + Relation rel = tab->rel; + + switch (cmd->subtype) + { + case AT_AddColumn: /* ADD COLUMN */ + case AT_AddColumnToView: /* add column via CREATE OR REPLACE VIEW */ + address = ATExecAddColumn(wqueue, tab, rel, &cmd, + false, false, + lockmode, cur_pass, context); + break; + case AT_AddColumnRecurse: + address = ATExecAddColumn(wqueue, tab, rel, &cmd, + true, false, + lockmode, cur_pass, context); + break; + case AT_ColumnDefault: /* ALTER COLUMN DEFAULT */ + address = ATExecColumnDefault(rel, cmd->name, cmd->def, lockmode); + break; + case AT_CookedColumnDefault: /* add a pre-cooked default */ + address = ATExecCookedColumnDefault(rel, cmd->num, cmd->def); + break; + case AT_AddIdentity: + cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode, + cur_pass, context); + Assert(cmd != NULL); + address = ATExecAddIdentity(rel, cmd->name, cmd->def, lockmode); + break; + case AT_SetIdentity: + cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode, + cur_pass, context); + Assert(cmd != NULL); + address = ATExecSetIdentity(rel, cmd->name, cmd->def, lockmode); + break; + case AT_DropIdentity: + address = ATExecDropIdentity(rel, cmd->name, cmd->missing_ok, lockmode); + break; + case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */ + address = ATExecDropNotNull(rel, cmd->name, lockmode); + break; + case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */ + address = ATExecSetNotNull(tab, rel, cmd->name, lockmode); + break; + case AT_CheckNotNull: /* check column is already marked NOT NULL */ + ATExecCheckNotNull(tab, rel, cmd->name, lockmode); + break; + case AT_DropExpression: + address = ATExecDropExpression(rel, cmd->name, cmd->missing_ok, lockmode); + break; + case AT_SetStatistics: /* ALTER COLUMN SET STATISTICS */ + address = ATExecSetStatistics(rel, cmd->name, cmd->num, cmd->def, lockmode); + break; + case AT_SetOptions: /* ALTER COLUMN SET ( options ) */ + address = ATExecSetOptions(rel, cmd->name, cmd->def, false, lockmode); + break; + case AT_ResetOptions: /* ALTER COLUMN RESET ( options ) */ + address = ATExecSetOptions(rel, cmd->name, cmd->def, true, lockmode); + break; + case AT_SetStorage: /* ALTER COLUMN SET STORAGE */ + address = ATExecSetStorage(rel, cmd->name, cmd->def, lockmode); + break; + case AT_SetCompression: + address = ATExecSetCompression(tab, rel, cmd->name, cmd->def, + lockmode); + break; + case AT_DropColumn: /* DROP COLUMN */ + address = ATExecDropColumn(wqueue, rel, cmd->name, + cmd->behavior, false, false, + cmd->missing_ok, lockmode, + NULL); + break; + case AT_DropColumnRecurse: /* DROP COLUMN with recursion */ + address = ATExecDropColumn(wqueue, rel, cmd->name, + cmd->behavior, true, false, + cmd->missing_ok, lockmode, + NULL); + break; + case AT_AddIndex: /* ADD INDEX */ + address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, false, + lockmode); + break; + case AT_ReAddIndex: /* ADD INDEX */ + address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, true, + lockmode); + break; + case AT_ReAddStatistics: /* ADD STATISTICS */ + address = ATExecAddStatistics(tab, rel, (CreateStatsStmt *) cmd->def, + true, lockmode); + break; + case AT_AddConstraint: /* ADD CONSTRAINT */ + /* Transform the command only during initial examination */ + if (cur_pass == AT_PASS_ADD_CONSTR) + cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, + false, lockmode, + cur_pass, context); + /* Depending on constraint type, might be no more work to do now */ + if (cmd != NULL) + address = + ATExecAddConstraint(wqueue, tab, rel, + (Constraint *) cmd->def, + false, false, lockmode); + break; + case AT_AddConstraintRecurse: /* ADD CONSTRAINT with recursion */ + /* Transform the command only during initial examination */ + if (cur_pass == AT_PASS_ADD_CONSTR) + cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, + true, lockmode, + cur_pass, context); + /* Depending on constraint type, might be no more work to do now */ + if (cmd != NULL) + address = + ATExecAddConstraint(wqueue, tab, rel, + (Constraint *) cmd->def, + true, false, lockmode); + break; + case AT_ReAddConstraint: /* Re-add pre-existing check constraint */ + address = + ATExecAddConstraint(wqueue, tab, rel, (Constraint *) cmd->def, + true, true, lockmode); + break; + case AT_ReAddDomainConstraint: /* Re-add pre-existing domain check + * constraint */ + address = + AlterDomainAddConstraint(((AlterDomainStmt *) cmd->def)->typeName, + ((AlterDomainStmt *) cmd->def)->def, + NULL); + break; + case AT_ReAddComment: /* Re-add existing comment */ + address = CommentObject((CommentStmt *) cmd->def); + break; + case AT_AddIndexConstraint: /* ADD CONSTRAINT USING INDEX */ + address = ATExecAddIndexConstraint(tab, rel, (IndexStmt *) cmd->def, + lockmode); + break; + case AT_AlterConstraint: /* ALTER CONSTRAINT */ + address = ATExecAlterConstraint(rel, cmd, false, false, lockmode); + break; + case AT_ValidateConstraint: /* VALIDATE CONSTRAINT */ + address = ATExecValidateConstraint(wqueue, rel, cmd->name, false, + false, lockmode); + break; + case AT_ValidateConstraintRecurse: /* VALIDATE CONSTRAINT with + * recursion */ + address = ATExecValidateConstraint(wqueue, rel, cmd->name, true, + false, lockmode); + break; + case AT_DropConstraint: /* DROP CONSTRAINT */ + ATExecDropConstraint(rel, cmd->name, cmd->behavior, + false, false, + cmd->missing_ok, lockmode); + break; + case AT_DropConstraintRecurse: /* DROP CONSTRAINT with recursion */ + ATExecDropConstraint(rel, cmd->name, cmd->behavior, + true, false, + cmd->missing_ok, lockmode); + break; + case AT_AlterColumnType: /* ALTER COLUMN TYPE */ + /* parse transformation was done earlier */ + address = ATExecAlterColumnType(tab, rel, cmd, lockmode); + break; + case AT_AlterColumnGenericOptions: /* ALTER COLUMN OPTIONS */ + address = + ATExecAlterColumnGenericOptions(rel, cmd->name, + (List *) cmd->def, lockmode); + break; + case AT_ChangeOwner: /* ALTER OWNER */ + ATExecChangeOwner(RelationGetRelid(rel), + get_rolespec_oid(cmd->newowner, false), + false, lockmode); + break; + case AT_ClusterOn: /* CLUSTER ON */ + address = ATExecClusterOn(rel, cmd->name, lockmode); + break; + case AT_DropCluster: /* SET WITHOUT CLUSTER */ + ATExecDropCluster(rel, lockmode); + break; + case AT_SetLogged: /* SET LOGGED */ + case AT_SetUnLogged: /* SET UNLOGGED */ + break; + case AT_DropOids: /* SET WITHOUT OIDS */ + /* nothing to do here, oid columns don't exist anymore */ + break; + case AT_SetAccessMethod: /* SET ACCESS METHOD */ + /* handled specially in Phase 3 */ + break; + case AT_SetTableSpace: /* SET TABLESPACE */ + + /* + * Only do this for partitioned tables and indexes, for which this + * is just a catalog change. Other relation types which have + * storage are handled by Phase 3. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + ATExecSetTableSpaceNoStorage(rel, tab->newTableSpace); + + break; + case AT_SetRelOptions: /* SET (...) */ + case AT_ResetRelOptions: /* RESET (...) */ + case AT_ReplaceRelOptions: /* replace entire option list */ + ATExecSetRelOptions(rel, (List *) cmd->def, cmd->subtype, lockmode); + break; + case AT_EnableTrig: /* ENABLE TRIGGER name */ + ATExecEnableDisableTrigger(rel, cmd->name, + TRIGGER_FIRES_ON_ORIGIN, false, + cmd->recurse, + lockmode); + break; + case AT_EnableAlwaysTrig: /* ENABLE ALWAYS TRIGGER name */ + ATExecEnableDisableTrigger(rel, cmd->name, + TRIGGER_FIRES_ALWAYS, false, + cmd->recurse, + lockmode); + break; + case AT_EnableReplicaTrig: /* ENABLE REPLICA TRIGGER name */ + ATExecEnableDisableTrigger(rel, cmd->name, + TRIGGER_FIRES_ON_REPLICA, false, + cmd->recurse, + lockmode); + break; + case AT_DisableTrig: /* DISABLE TRIGGER name */ + ATExecEnableDisableTrigger(rel, cmd->name, + TRIGGER_DISABLED, false, + cmd->recurse, + lockmode); + break; + case AT_EnableTrigAll: /* ENABLE TRIGGER ALL */ + ATExecEnableDisableTrigger(rel, NULL, + TRIGGER_FIRES_ON_ORIGIN, false, + cmd->recurse, + lockmode); + break; + case AT_DisableTrigAll: /* DISABLE TRIGGER ALL */ + ATExecEnableDisableTrigger(rel, NULL, + TRIGGER_DISABLED, false, + cmd->recurse, + lockmode); + break; + case AT_EnableTrigUser: /* ENABLE TRIGGER USER */ + ATExecEnableDisableTrigger(rel, NULL, + TRIGGER_FIRES_ON_ORIGIN, true, + cmd->recurse, + lockmode); + break; + case AT_DisableTrigUser: /* DISABLE TRIGGER USER */ + ATExecEnableDisableTrigger(rel, NULL, + TRIGGER_DISABLED, true, + cmd->recurse, + lockmode); + break; + + case AT_EnableRule: /* ENABLE RULE name */ + ATExecEnableDisableRule(rel, cmd->name, + RULE_FIRES_ON_ORIGIN, lockmode); + break; + case AT_EnableAlwaysRule: /* ENABLE ALWAYS RULE name */ + ATExecEnableDisableRule(rel, cmd->name, + RULE_FIRES_ALWAYS, lockmode); + break; + case AT_EnableReplicaRule: /* ENABLE REPLICA RULE name */ + ATExecEnableDisableRule(rel, cmd->name, + RULE_FIRES_ON_REPLICA, lockmode); + break; + case AT_DisableRule: /* DISABLE RULE name */ + ATExecEnableDisableRule(rel, cmd->name, + RULE_DISABLED, lockmode); + break; + + case AT_AddInherit: + address = ATExecAddInherit(rel, (RangeVar *) cmd->def, lockmode); + break; + case AT_DropInherit: + address = ATExecDropInherit(rel, (RangeVar *) cmd->def, lockmode); + break; + case AT_AddOf: + address = ATExecAddOf(rel, (TypeName *) cmd->def, lockmode); + break; + case AT_DropOf: + ATExecDropOf(rel, lockmode); + break; + case AT_ReplicaIdentity: + ATExecReplicaIdentity(rel, (ReplicaIdentityStmt *) cmd->def, lockmode); + break; + case AT_EnableRowSecurity: + ATExecSetRowSecurity(rel, true); + break; + case AT_DisableRowSecurity: + ATExecSetRowSecurity(rel, false); + break; + case AT_ForceRowSecurity: + ATExecForceNoForceRowSecurity(rel, true); + break; + case AT_NoForceRowSecurity: + ATExecForceNoForceRowSecurity(rel, false); + break; + case AT_GenericOptions: + ATExecGenericOptions(rel, (List *) cmd->def); + break; + case AT_AttachPartition: + cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode, + cur_pass, context); + Assert(cmd != NULL); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def, + context); + else + ATExecAttachPartitionIdx(wqueue, rel, + ((PartitionCmd *) cmd->def)->name); + break; + case AT_DetachPartition: + cmd = ATParseTransformCmd(wqueue, tab, rel, cmd, false, lockmode, + cur_pass, context); + Assert(cmd != NULL); + /* ATPrepCmd ensures it must be a table */ + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + ATExecDetachPartition(wqueue, tab, rel, + ((PartitionCmd *) cmd->def)->name, + ((PartitionCmd *) cmd->def)->concurrent); + break; + case AT_DetachPartitionFinalize: + ATExecDetachPartitionFinalize(rel, ((PartitionCmd *) cmd->def)->name); + break; + default: /* oops */ + elog(ERROR, "unrecognized alter table type: %d", + (int) cmd->subtype); + break; + } + + /* + * Report the subcommand to interested event triggers. + */ + if (cmd) + EventTriggerCollectAlterTableSubcmd((Node *) cmd, address); + + /* + * Bump the command counter to ensure the next subcommand in the sequence + * can see the changes so far + */ + CommandCounterIncrement(); +} + +/* + * ATParseTransformCmd: perform parse transformation for one subcommand + * + * Returns the transformed subcommand tree, if there is one, else NULL. + * + * The parser may hand back additional AlterTableCmd(s) and/or other + * utility statements, either before or after the original subcommand. + * Other AlterTableCmds are scheduled into the appropriate slot of the + * AlteredTableInfo (they had better be for later passes than the current one). + * Utility statements that are supposed to happen before the AlterTableCmd + * are executed immediately. Those that are supposed to happen afterwards + * are added to the tab->afterStmts list to be done at the very end. + */ +static AlterTableCmd * +ATParseTransformCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, + AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode, + int cur_pass, AlterTableUtilityContext *context) +{ + AlterTableCmd *newcmd = NULL; + AlterTableStmt *atstmt = makeNode(AlterTableStmt); + List *beforeStmts; + List *afterStmts; + ListCell *lc; + + /* Gin up an AlterTableStmt with just this subcommand and this table */ + atstmt->relation = + makeRangeVar(get_namespace_name(RelationGetNamespace(rel)), + pstrdup(RelationGetRelationName(rel)), + -1); + atstmt->relation->inh = recurse; + atstmt->cmds = list_make1(cmd); + atstmt->objtype = OBJECT_TABLE; /* needn't be picky here */ + atstmt->missing_ok = false; + + /* Transform the AlterTableStmt */ + atstmt = transformAlterTableStmt(RelationGetRelid(rel), + atstmt, + context->queryString, + &beforeStmts, + &afterStmts); + + /* Execute any statements that should happen before these subcommand(s) */ + foreach(lc, beforeStmts) + { + Node *stmt = (Node *) lfirst(lc); + + ProcessUtilityForAlterTable(stmt, context); + CommandCounterIncrement(); + } + + /* Examine the transformed subcommands and schedule them appropriately */ + foreach(lc, atstmt->cmds) + { + AlterTableCmd *cmd2 = lfirst_node(AlterTableCmd, lc); + int pass; + + /* + * This switch need only cover the subcommand types that can be added + * by parse_utilcmd.c; otherwise, we'll use the default strategy of + * executing the subcommand immediately, as a substitute for the + * original subcommand. (Note, however, that this does cause + * AT_AddConstraint subcommands to be rescheduled into later passes, + * which is important for index and foreign key constraints.) + * + * We assume we needn't do any phase-1 checks for added subcommands. + */ + switch (cmd2->subtype) + { + case AT_SetNotNull: + /* Need command-specific recursion decision */ + ATPrepSetNotNull(wqueue, rel, cmd2, + recurse, false, + lockmode, context); + pass = AT_PASS_COL_ATTRS; + break; + case AT_AddIndex: + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_ADD_INDEX; + break; + case AT_AddIndexConstraint: + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_ADD_INDEXCONSTR; + break; + case AT_AddConstraint: + /* Recursion occurs during execution phase */ + if (recurse) + cmd2->subtype = AT_AddConstraintRecurse; + switch (castNode(Constraint, cmd2->def)->contype) + { + case CONSTR_PRIMARY: + case CONSTR_UNIQUE: + case CONSTR_EXCLUSION: + pass = AT_PASS_ADD_INDEXCONSTR; + break; + default: + pass = AT_PASS_ADD_OTHERCONSTR; + break; + } + break; + case AT_AlterColumnGenericOptions: + /* This command never recurses */ + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; + default: + pass = cur_pass; + break; + } + + if (pass < cur_pass) + { + /* Cannot schedule into a pass we already finished */ + elog(ERROR, "ALTER TABLE scheduling failure: too late for pass %d", + pass); + } + else if (pass > cur_pass) + { + /* OK, queue it up for later */ + tab->subcmds[pass] = lappend(tab->subcmds[pass], cmd2); + } + else + { + /* + * We should see at most one subcommand for the current pass, + * which is the transformed version of the original subcommand. + */ + if (newcmd == NULL && cmd->subtype == cmd2->subtype) + { + /* Found the transformed version of our subcommand */ + newcmd = cmd2; + } + else + elog(ERROR, "ALTER TABLE scheduling failure: bogus item for pass %d", + pass); + } + } + + /* Queue up any after-statements to happen at the end */ + tab->afterStmts = list_concat(tab->afterStmts, afterStmts); + + return newcmd; +} + +/* + * ATRewriteTables: ALTER TABLE phase 3 + */ +static void +ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + ListCell *ltab; + + /* Go through each table that needs to be checked or rewritten */ + foreach(ltab, *wqueue) + { + AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); + + /* Relations without storage may be ignored here */ + if (!RELKIND_HAS_STORAGE(tab->relkind)) + continue; + + /* + * If we change column data types, the operation has to be propagated + * to tables that use this table's rowtype as a column type. + * tab->newvals will also be non-NULL in the case where we're adding a + * column with a default. We choose to forbid that case as well, + * since composite types might eventually support defaults. + * + * (Eventually we'll probably need to check for composite type + * dependencies even when we're just scanning the table without a + * rewrite, but at the moment a composite type does not enforce any + * constraints, so it's not necessary/appropriate to enforce them just + * during ALTER.) + */ + if (tab->newvals != NIL || tab->rewrite > 0) + { + Relation rel; + + rel = table_open(tab->relid, NoLock); + find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL); + table_close(rel, NoLock); + } + + /* + * We only need to rewrite the table if at least one column needs to + * be recomputed, or we are changing its persistence or access method. + * + * There are two reasons for requiring a rewrite when changing + * persistence: on one hand, we need to ensure that the buffers + * belonging to each of the two relations are marked with or without + * BM_PERMANENT properly. On the other hand, since rewriting creates + * and assigns a new relfilenode, we automatically create or drop an + * init fork for the relation as appropriate. + */ + if (tab->rewrite > 0 && tab->relkind != RELKIND_SEQUENCE) + { + /* Build a temporary relation and copy data */ + Relation OldHeap; + Oid OIDNewHeap; + Oid NewAccessMethod; + Oid NewTableSpace; + char persistence; + + OldHeap = table_open(tab->relid, NoLock); + + /* + * We don't support rewriting of system catalogs; there are too + * many corner cases and too little benefit. In particular this + * is certainly not going to work for mapped catalogs. + */ + if (IsSystemRelation(OldHeap)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot rewrite system relation \"%s\"", + RelationGetRelationName(OldHeap)))); + + if (RelationIsUsedAsCatalogTable(OldHeap)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot rewrite table \"%s\" used as a catalog table", + RelationGetRelationName(OldHeap)))); + + /* + * Don't allow rewrite on temp tables of other backends ... their + * local buffer manager is not going to cope. + */ + if (RELATION_IS_OTHER_TEMP(OldHeap)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot rewrite temporary tables of other sessions"))); + + /* + * Select destination tablespace (same as original unless user + * requested a change) + */ + if (tab->newTableSpace) + NewTableSpace = tab->newTableSpace; + else + NewTableSpace = OldHeap->rd_rel->reltablespace; + + /* + * Select destination access method (same as original unless user + * requested a change) + */ + if (OidIsValid(tab->newAccessMethod)) + NewAccessMethod = tab->newAccessMethod; + else + NewAccessMethod = OldHeap->rd_rel->relam; + + /* + * Select persistence of transient table (same as original unless + * user requested a change) + */ + persistence = tab->chgPersistence ? + tab->newrelpersistence : OldHeap->rd_rel->relpersistence; + + table_close(OldHeap, NoLock); + + /* + * Fire off an Event Trigger now, before actually rewriting the + * table. + * + * We don't support Event Trigger for nested commands anywhere, + * here included, and parsetree is given NULL when coming from + * AlterTableInternal. + * + * And fire it only once. + */ + if (parsetree) + EventTriggerTableRewrite((Node *) parsetree, + tab->relid, + tab->rewrite); + + /* + * Create transient table that will receive the modified data. + * + * Ensure it is marked correctly as logged or unlogged. We have + * to do this here so that buffers for the new relfilenode will + * have the right persistence set, and at the same time ensure + * that the original filenode's buffers will get read in with the + * correct setting (i.e. the original one). Otherwise a rollback + * after the rewrite would possibly result with buffers for the + * original filenode having the wrong persistence setting. + * + * NB: This relies on swap_relation_files() also swapping the + * persistence. That wouldn't work for pg_class, but that can't be + * unlogged anyway. + */ + OIDNewHeap = make_new_heap(tab->relid, NewTableSpace, NewAccessMethod, + persistence, lockmode); + + /* + * Copy the heap data into the new table with the desired + * modifications, and test the current data within the table + * against new constraints generated by ALTER TABLE commands. + */ + ATRewriteTable(tab, OIDNewHeap, lockmode); + + /* + * Swap the physical files of the old and new heaps, then rebuild + * indexes and discard the old heap. We can use RecentXmin for + * the table's new relfrozenxid because we rewrote all the tuples + * in ATRewriteTable, so no older Xid remains in the table. Also, + * we never try to swap toast tables by content, since we have no + * interest in letting this code work on system catalogs. + */ + finish_heap_swap(tab->relid, OIDNewHeap, + false, false, true, + !OidIsValid(tab->newTableSpace), + RecentXmin, + ReadNextMultiXactId(), + persistence); + + InvokeObjectPostAlterHook(RelationRelationId, tab->relid, 0); + } + else if (tab->rewrite > 0 && tab->relkind == RELKIND_SEQUENCE) + { + if (tab->chgPersistence) + SequenceChangePersistence(tab->relid, tab->newrelpersistence); + } + else + { + /* + * If required, test the current data within the table against new + * constraints generated by ALTER TABLE commands, but don't + * rebuild data. + */ + if (tab->constraints != NIL || tab->verify_new_notnull || + tab->partition_constraint != NULL) + ATRewriteTable(tab, InvalidOid, lockmode); + + /* + * If we had SET TABLESPACE but no reason to reconstruct tuples, + * just do a block-by-block copy. + */ + if (tab->newTableSpace) + ATExecSetTableSpace(tab->relid, tab->newTableSpace, lockmode); + } + + /* + * Also change persistence of owned sequences, so that it matches the + * table persistence. + */ + if (tab->chgPersistence) + { + List *seqlist = getOwnedSequences(tab->relid); + ListCell *lc; + + foreach(lc, seqlist) + { + Oid seq_relid = lfirst_oid(lc); + + SequenceChangePersistence(seq_relid, tab->newrelpersistence); + } + } + } + + /* + * Foreign key constraints are checked in a final pass, since (a) it's + * generally best to examine each one separately, and (b) it's at least + * theoretically possible that we have changed both relations of the + * foreign key, and we'd better have finished both rewrites before we try + * to read the tables. + */ + foreach(ltab, *wqueue) + { + AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); + Relation rel = NULL; + ListCell *lcon; + + /* Relations without storage may be ignored here too */ + if (!RELKIND_HAS_STORAGE(tab->relkind)) + continue; + + foreach(lcon, tab->constraints) + { + NewConstraint *con = lfirst(lcon); + + if (con->contype == CONSTR_FOREIGN) + { + Constraint *fkconstraint = (Constraint *) con->qual; + Relation refrel; + + if (rel == NULL) + { + /* Long since locked, no need for another */ + rel = table_open(tab->relid, NoLock); + } + + refrel = table_open(con->refrelid, RowShareLock); + + validateForeignKeyConstraint(fkconstraint->conname, rel, refrel, + con->refindid, + con->conid); + + /* + * No need to mark the constraint row as validated, we did + * that when we inserted the row earlier. + */ + + table_close(refrel, NoLock); + } + } + + if (rel) + table_close(rel, NoLock); + } + + /* Finally, run any afterStmts that were queued up */ + foreach(ltab, *wqueue) + { + AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); + ListCell *lc; + + foreach(lc, tab->afterStmts) + { + Node *stmt = (Node *) lfirst(lc); + + ProcessUtilityForAlterTable(stmt, context); + CommandCounterIncrement(); + } + } +} + +/* + * ATRewriteTable: scan or rewrite one table + * + * OIDNewHeap is InvalidOid if we don't need to rewrite + */ +static void +ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) +{ + Relation oldrel; + Relation newrel; + TupleDesc oldTupDesc; + TupleDesc newTupDesc; + bool needscan = false; + List *notnull_attrs; + int i; + ListCell *l; + EState *estate; + CommandId mycid; + BulkInsertState bistate; + int ti_options; + ExprState *partqualstate = NULL; + + /* + * Open the relation(s). We have surely already locked the existing + * table. + */ + oldrel = table_open(tab->relid, NoLock); + oldTupDesc = tab->oldDesc; + newTupDesc = RelationGetDescr(oldrel); /* includes all mods */ + + if (OidIsValid(OIDNewHeap)) + newrel = table_open(OIDNewHeap, lockmode); + else + newrel = NULL; + + /* + * Prepare a BulkInsertState and options for table_tuple_insert. The FSM + * is empty, so don't bother using it. + */ + if (newrel) + { + mycid = GetCurrentCommandId(true); + bistate = GetBulkInsertState(); + ti_options = TABLE_INSERT_SKIP_FSM; + } + else + { + /* keep compiler quiet about using these uninitialized */ + mycid = 0; + bistate = NULL; + ti_options = 0; + } + + /* + * Generate the constraint and default execution states + */ + + estate = CreateExecutorState(); + + /* Build the needed expression execution states */ + foreach(l, tab->constraints) + { + NewConstraint *con = lfirst(l); + + switch (con->contype) + { + case CONSTR_CHECK: + needscan = true; + con->qualstate = ExecPrepareExpr((Expr *) con->qual, estate); + break; + case CONSTR_FOREIGN: + /* Nothing to do here */ + break; + default: + elog(ERROR, "unrecognized constraint type: %d", + (int) con->contype); + } + } + + /* Build expression execution states for partition check quals */ + if (tab->partition_constraint) + { + needscan = true; + partqualstate = ExecPrepareExpr(tab->partition_constraint, estate); + } + + foreach(l, tab->newvals) + { + NewColumnValue *ex = lfirst(l); + + /* expr already planned */ + ex->exprstate = ExecInitExpr((Expr *) ex->expr, NULL); + } + + notnull_attrs = NIL; + if (newrel || tab->verify_new_notnull) + { + /* + * If we are rebuilding the tuples OR if we added any new but not + * verified NOT NULL constraints, check all not-null constraints. This + * is a bit of overkill but it minimizes risk of bugs, and + * heap_attisnull is a pretty cheap test anyway. + */ + for (i = 0; i < newTupDesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(newTupDesc, i); + + if (attr->attnotnull && !attr->attisdropped) + notnull_attrs = lappend_int(notnull_attrs, i); + } + if (notnull_attrs) + needscan = true; + } + + if (newrel || needscan) + { + ExprContext *econtext; + TupleTableSlot *oldslot; + TupleTableSlot *newslot; + TableScanDesc scan; + MemoryContext oldCxt; + List *dropped_attrs = NIL; + ListCell *lc; + Snapshot snapshot; + + if (newrel) + ereport(DEBUG1, + (errmsg_internal("rewriting table \"%s\"", + RelationGetRelationName(oldrel)))); + else + ereport(DEBUG1, + (errmsg_internal("verifying table \"%s\"", + RelationGetRelationName(oldrel)))); + + if (newrel) + { + /* + * All predicate locks on the tuples or pages are about to be made + * invalid, because we move tuples around. Promote them to + * relation locks. + */ + TransferPredicateLocksToHeapRelation(oldrel); + } + + econtext = GetPerTupleExprContext(estate); + + /* + * Create necessary tuple slots. When rewriting, two slots are needed, + * otherwise one suffices. In the case where one slot suffices, we + * need to use the new tuple descriptor, otherwise some constraints + * can't be evaluated. Note that even when the tuple layout is the + * same and no rewrite is required, the tupDescs might not be + * (consider ADD COLUMN without a default). + */ + if (tab->rewrite) + { + Assert(newrel != NULL); + oldslot = MakeSingleTupleTableSlot(oldTupDesc, + table_slot_callbacks(oldrel)); + newslot = MakeSingleTupleTableSlot(newTupDesc, + table_slot_callbacks(newrel)); + + /* + * Set all columns in the new slot to NULL initially, to ensure + * columns added as part of the rewrite are initialized to NULL. + * That is necessary as tab->newvals will not contain an + * expression for columns with a NULL default, e.g. when adding a + * column without a default together with a column with a default + * requiring an actual rewrite. + */ + ExecStoreAllNullTuple(newslot); + } + else + { + oldslot = MakeSingleTupleTableSlot(newTupDesc, + table_slot_callbacks(oldrel)); + newslot = NULL; + } + + /* + * Any attributes that are dropped according to the new tuple + * descriptor can be set to NULL. We precompute the list of dropped + * attributes to avoid needing to do so in the per-tuple loop. + */ + for (i = 0; i < newTupDesc->natts; i++) + { + if (TupleDescAttr(newTupDesc, i)->attisdropped) + dropped_attrs = lappend_int(dropped_attrs, i); + } + + /* + * Scan through the rows, generating a new row if needed and then + * checking all the constraints. + */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = table_beginscan(oldrel, snapshot, 0, NULL); + + /* + * Switch to per-tuple memory context and reset it for each tuple + * produced, so we don't leak memory. + */ + oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot)) + { + TupleTableSlot *insertslot; + + if (tab->rewrite > 0) + { + /* Extract data from old tuple */ + slot_getallattrs(oldslot); + ExecClearTuple(newslot); + + /* copy attributes */ + memcpy(newslot->tts_values, oldslot->tts_values, + sizeof(Datum) * oldslot->tts_nvalid); + memcpy(newslot->tts_isnull, oldslot->tts_isnull, + sizeof(bool) * oldslot->tts_nvalid); + + /* Set dropped attributes to null in new tuple */ + foreach(lc, dropped_attrs) + newslot->tts_isnull[lfirst_int(lc)] = true; + + /* + * Constraints and GENERATED expressions might reference the + * tableoid column, so fill tts_tableOid with the desired + * value. (We must do this each time, because it gets + * overwritten with newrel's OID during storing.) + */ + newslot->tts_tableOid = RelationGetRelid(oldrel); + + /* + * Process supplied expressions to replace selected columns. + * + * First, evaluate expressions whose inputs come from the old + * tuple. + */ + econtext->ecxt_scantuple = oldslot; + + foreach(l, tab->newvals) + { + NewColumnValue *ex = lfirst(l); + + if (ex->is_generated) + continue; + + newslot->tts_values[ex->attnum - 1] + = ExecEvalExpr(ex->exprstate, + econtext, + &newslot->tts_isnull[ex->attnum - 1]); + } + + ExecStoreVirtualTuple(newslot); + + /* + * Now, evaluate any expressions whose inputs come from the + * new tuple. We assume these columns won't reference each + * other, so that there's no ordering dependency. + */ + econtext->ecxt_scantuple = newslot; + + foreach(l, tab->newvals) + { + NewColumnValue *ex = lfirst(l); + + if (!ex->is_generated) + continue; + + newslot->tts_values[ex->attnum - 1] + = ExecEvalExpr(ex->exprstate, + econtext, + &newslot->tts_isnull[ex->attnum - 1]); + } + + insertslot = newslot; + } + else + { + /* + * If there's no rewrite, old and new table are guaranteed to + * have the same AM, so we can just use the old slot to verify + * new constraints etc. + */ + insertslot = oldslot; + } + + /* Now check any constraints on the possibly-changed tuple */ + econtext->ecxt_scantuple = insertslot; + + foreach(l, notnull_attrs) + { + int attn = lfirst_int(l); + + if (slot_attisnull(insertslot, attn + 1)) + { + Form_pg_attribute attr = TupleDescAttr(newTupDesc, attn); + + ereport(ERROR, + (errcode(ERRCODE_NOT_NULL_VIOLATION), + errmsg("column \"%s\" of relation \"%s\" contains null values", + NameStr(attr->attname), + RelationGetRelationName(oldrel)), + errtablecol(oldrel, attn + 1))); + } + } + + foreach(l, tab->constraints) + { + NewConstraint *con = lfirst(l); + + switch (con->contype) + { + case CONSTR_CHECK: + if (!ExecCheck(con->qualstate, econtext)) + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("check constraint \"%s\" of relation \"%s\" is violated by some row", + con->name, + RelationGetRelationName(oldrel)), + errtableconstraint(oldrel, con->name))); + break; + case CONSTR_FOREIGN: + /* Nothing to do here */ + break; + default: + elog(ERROR, "unrecognized constraint type: %d", + (int) con->contype); + } + } + + if (partqualstate && !ExecCheck(partqualstate, econtext)) + { + if (tab->validate_default) + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("updated partition constraint for default partition \"%s\" would be violated by some row", + RelationGetRelationName(oldrel)), + errtable(oldrel))); + else + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("partition constraint of relation \"%s\" is violated by some row", + RelationGetRelationName(oldrel)), + errtable(oldrel))); + } + + /* Write the tuple out to the new relation */ + if (newrel) + table_tuple_insert(newrel, insertslot, mycid, + ti_options, bistate); + + ResetExprContext(econtext); + + CHECK_FOR_INTERRUPTS(); + } + + MemoryContextSwitchTo(oldCxt); + table_endscan(scan); + UnregisterSnapshot(snapshot); + + ExecDropSingleTupleTableSlot(oldslot); + if (newslot) + ExecDropSingleTupleTableSlot(newslot); + } + + FreeExecutorState(estate); + + table_close(oldrel, NoLock); + if (newrel) + { + FreeBulkInsertState(bistate); + + table_finish_bulk_insert(newrel, ti_options); + + table_close(newrel, NoLock); + } +} + +/* + * ATGetQueueEntry: find or create an entry in the ALTER TABLE work queue + */ +static AlteredTableInfo * +ATGetQueueEntry(List **wqueue, Relation rel) +{ + Oid relid = RelationGetRelid(rel); + AlteredTableInfo *tab; + ListCell *ltab; + + foreach(ltab, *wqueue) + { + tab = (AlteredTableInfo *) lfirst(ltab); + if (tab->relid == relid) + return tab; + } + + /* + * Not there, so add it. Note that we make a copy of the relation's + * existing descriptor before anything interesting can happen to it. + */ + tab = (AlteredTableInfo *) palloc0(sizeof(AlteredTableInfo)); + tab->relid = relid; + tab->rel = NULL; /* set later */ + tab->relkind = rel->rd_rel->relkind; + tab->oldDesc = CreateTupleDescCopyConstr(RelationGetDescr(rel)); + tab->newAccessMethod = InvalidOid; + tab->newTableSpace = InvalidOid; + tab->newrelpersistence = RELPERSISTENCE_PERMANENT; + tab->chgPersistence = false; + + *wqueue = lappend(*wqueue, tab); + + return tab; +} + +static const char * +alter_table_type_to_string(AlterTableType cmdtype) +{ + switch (cmdtype) + { + case AT_AddColumn: + case AT_AddColumnRecurse: + case AT_AddColumnToView: + return "ADD COLUMN"; + case AT_ColumnDefault: + case AT_CookedColumnDefault: + return "ALTER COLUMN ... SET DEFAULT"; + case AT_DropNotNull: + return "ALTER COLUMN ... DROP NOT NULL"; + case AT_SetNotNull: + return "ALTER COLUMN ... SET NOT NULL"; + case AT_DropExpression: + return "ALTER COLUMN ... DROP EXPRESSION"; + case AT_CheckNotNull: + return NULL; /* not real grammar */ + case AT_SetStatistics: + return "ALTER COLUMN ... SET STATISTICS"; + case AT_SetOptions: + return "ALTER COLUMN ... SET"; + case AT_ResetOptions: + return "ALTER COLUMN ... RESET"; + case AT_SetStorage: + return "ALTER COLUMN ... SET STORAGE"; + case AT_SetCompression: + return "ALTER COLUMN ... SET COMPRESSION"; + case AT_DropColumn: + case AT_DropColumnRecurse: + return "DROP COLUMN"; + case AT_AddIndex: + case AT_ReAddIndex: + return NULL; /* not real grammar */ + case AT_AddConstraint: + case AT_AddConstraintRecurse: + case AT_ReAddConstraint: + case AT_ReAddDomainConstraint: + case AT_AddIndexConstraint: + return "ADD CONSTRAINT"; + case AT_AlterConstraint: + return "ALTER CONSTRAINT"; + case AT_ValidateConstraint: + case AT_ValidateConstraintRecurse: + return "VALIDATE CONSTRAINT"; + case AT_DropConstraint: + case AT_DropConstraintRecurse: + return "DROP CONSTRAINT"; + case AT_ReAddComment: + return NULL; /* not real grammar */ + case AT_AlterColumnType: + return "ALTER COLUMN ... SET DATA TYPE"; + case AT_AlterColumnGenericOptions: + return "ALTER COLUMN ... OPTIONS"; + case AT_ChangeOwner: + return "OWNER TO"; + case AT_ClusterOn: + return "CLUSTER ON"; + case AT_DropCluster: + return "SET WITHOUT CLUSTER"; + case AT_SetAccessMethod: + return "SET ACCESS METHOD"; + case AT_SetLogged: + return "SET LOGGED"; + case AT_SetUnLogged: + return "SET UNLOGGED"; + case AT_DropOids: + return "SET WITHOUT OIDS"; + case AT_SetTableSpace: + return "SET TABLESPACE"; + case AT_SetRelOptions: + return "SET"; + case AT_ResetRelOptions: + return "RESET"; + case AT_ReplaceRelOptions: + return NULL; /* not real grammar */ + case AT_EnableTrig: + return "ENABLE TRIGGER"; + case AT_EnableAlwaysTrig: + return "ENABLE ALWAYS TRIGGER"; + case AT_EnableReplicaTrig: + return "ENABLE REPLICA TRIGGER"; + case AT_DisableTrig: + return "DISABLE TRIGGER"; + case AT_EnableTrigAll: + return "ENABLE TRIGGER ALL"; + case AT_DisableTrigAll: + return "DISABLE TRIGGER ALL"; + case AT_EnableTrigUser: + return "ENABLE TRIGGER USER"; + case AT_DisableTrigUser: + return "DISABLE TRIGGER USER"; + case AT_EnableRule: + return "ENABLE RULE"; + case AT_EnableAlwaysRule: + return "ENABLE ALWAYS RULE"; + case AT_EnableReplicaRule: + return "ENABLE REPLICA RULE"; + case AT_DisableRule: + return "DISABLE RULE"; + case AT_AddInherit: + return "INHERIT"; + case AT_DropInherit: + return "NO INHERIT"; + case AT_AddOf: + return "OF"; + case AT_DropOf: + return "NOT OF"; + case AT_ReplicaIdentity: + return "REPLICA IDENTITY"; + case AT_EnableRowSecurity: + return "ENABLE ROW SECURITY"; + case AT_DisableRowSecurity: + return "DISABLE ROW SECURITY"; + case AT_ForceRowSecurity: + return "FORCE ROW SECURITY"; + case AT_NoForceRowSecurity: + return "NO FORCE ROW SECURITY"; + case AT_GenericOptions: + return "OPTIONS"; + case AT_AttachPartition: + return "ATTACH PARTITION"; + case AT_DetachPartition: + return "DETACH PARTITION"; + case AT_DetachPartitionFinalize: + return "DETACH PARTITION ... FINALIZE"; + case AT_AddIdentity: + return "ALTER COLUMN ... ADD IDENTITY"; + case AT_SetIdentity: + return "ALTER COLUMN ... SET"; + case AT_DropIdentity: + return "ALTER COLUMN ... DROP IDENTITY"; + case AT_ReAddStatistics: + return NULL; /* not real grammar */ + } + + return NULL; +} + +/* + * ATSimplePermissions + * + * - Ensure that it is a relation (or possibly a view) + * - Ensure this user is the owner + * - Ensure that it is not a system table + */ +static void +ATSimplePermissions(AlterTableType cmdtype, Relation rel, int allowed_targets) +{ + int actual_target; + + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: + actual_target = ATT_TABLE; + break; + case RELKIND_VIEW: + actual_target = ATT_VIEW; + break; + case RELKIND_MATVIEW: + actual_target = ATT_MATVIEW; + break; + case RELKIND_INDEX: + actual_target = ATT_INDEX; + break; + case RELKIND_PARTITIONED_INDEX: + actual_target = ATT_PARTITIONED_INDEX; + break; + case RELKIND_COMPOSITE_TYPE: + actual_target = ATT_COMPOSITE_TYPE; + break; + case RELKIND_FOREIGN_TABLE: + actual_target = ATT_FOREIGN_TABLE; + break; + case RELKIND_SEQUENCE: + actual_target = ATT_SEQUENCE; + break; + default: + actual_target = 0; + break; + } + + /* Wrong target type? */ + if ((actual_target & allowed_targets) == 0) + { + const char *action_str = alter_table_type_to_string(cmdtype); + + if (action_str) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + /* translator: %s is a group of some SQL keywords */ + errmsg("ALTER action %s cannot be performed on relation \"%s\"", + action_str, RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + else + /* internal error? */ + elog(ERROR, "invalid ALTER action attempted on relation \"%s\"", + RelationGetRelationName(rel)); + } + + /* Permissions checks */ + if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind), + RelationGetRelationName(rel)); + + if (!allowSystemTableMods && IsSystemRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + RelationGetRelationName(rel)))); +} + +/* + * ATSimpleRecursion + * + * Simple table recursion sufficient for most ALTER TABLE operations. + * All direct and indirect children are processed in an unspecified order. + * Note that if a child inherits from the original table via multiple + * inheritance paths, it will be visited just once. + */ +static void +ATSimpleRecursion(List **wqueue, Relation rel, + AlterTableCmd *cmd, bool recurse, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + /* + * Propagate to children, if desired and if there are (or might be) any + * children. + */ + if (recurse && rel->rd_rel->relhassubclass) + { + Oid relid = RelationGetRelid(rel); + ListCell *child; + List *children; + + children = find_all_inheritors(relid, lockmode, NULL); + + /* + * find_all_inheritors does the recursive search of the inheritance + * hierarchy, so all we have to do is process all of the relids in the + * list that it returns. + */ + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + Relation childrel; + + if (childrelid == relid) + continue; + /* find_all_inheritors already got lock */ + childrel = relation_open(childrelid, NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context); + relation_close(childrel, NoLock); + } + } +} + +/* + * Obtain list of partitions of the given table, locking them all at the given + * lockmode and ensuring that they all pass CheckTableNotInUse. + * + * This function is a no-op if the given relation is not a partitioned table; + * in particular, nothing is done if it's a legacy inheritance parent. + */ +static void +ATCheckPartitionsNotInUse(Relation rel, LOCKMODE lockmode) +{ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + List *inh; + ListCell *cell; + + inh = find_all_inheritors(RelationGetRelid(rel), lockmode, NULL); + /* first element is the parent rel; must ignore it */ + for_each_from(cell, inh, 1) + { + Relation childrel; + + /* find_all_inheritors already got lock */ + childrel = table_open(lfirst_oid(cell), NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + table_close(childrel, NoLock); + } + list_free(inh); + } +} + +/* + * ATTypedTableRecursion + * + * Propagate ALTER TYPE operations to the typed tables of that type. + * Also check the RESTRICT/CASCADE behavior. Given CASCADE, also permit + * recursion to inheritance children of the typed tables. + */ +static void +ATTypedTableRecursion(List **wqueue, Relation rel, AlterTableCmd *cmd, + LOCKMODE lockmode, AlterTableUtilityContext *context) +{ + ListCell *child; + List *children; + + Assert(rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE); + + children = find_typed_table_dependencies(rel->rd_rel->reltype, + RelationGetRelationName(rel), + cmd->behavior); + + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + Relation childrel; + + childrel = relation_open(childrelid, lockmode); + CheckTableNotInUse(childrel, "ALTER TABLE"); + ATPrepCmd(wqueue, childrel, cmd, true, true, lockmode, context); + relation_close(childrel, NoLock); + } +} + + +/* + * find_composite_type_dependencies + * + * Check to see if the type "typeOid" is being used as a column in some table + * (possibly nested several levels deep in composite types, arrays, etc!). + * Eventually, we'd like to propagate the check or rewrite operation + * into such tables, but for now, just error out if we find any. + * + * Caller should provide either the associated relation of a rowtype, + * or a type name (not both) for use in the error message, if any. + * + * Note that "typeOid" is not necessarily a composite type; it could also be + * another container type such as an array or range, or a domain over one of + * these things. The name of this function is therefore somewhat historical, + * but it's not worth changing. + * + * We assume that functions and views depending on the type are not reasons + * to reject the ALTER. (How safe is this really?) + */ +void +find_composite_type_dependencies(Oid typeOid, Relation origRelation, + const char *origTypeName) +{ + Relation depRel; + ScanKeyData key[2]; + SysScanDesc depScan; + HeapTuple depTup; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* + * We scan pg_depend to find those things that depend on the given type. + * (We assume we can ignore refobjsubid for a type.) + */ + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(TypeRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(typeOid)); + + depScan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(depTup = systable_getnext(depScan))) + { + Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup); + Relation rel; + TupleDesc tupleDesc; + Form_pg_attribute att; + + /* Check for directly dependent types */ + if (pg_depend->classid == TypeRelationId) + { + /* + * This must be an array, domain, or range containing the given + * type, so recursively check for uses of this type. Note that + * any error message will mention the original type not the + * container; this is intentional. + */ + find_composite_type_dependencies(pg_depend->objid, + origRelation, origTypeName); + continue; + } + + /* Else, ignore dependees that aren't relations */ + if (pg_depend->classid != RelationRelationId) + continue; + + rel = relation_open(pg_depend->objid, AccessShareLock); + tupleDesc = RelationGetDescr(rel); + + /* + * If objsubid identifies a specific column, refer to that in error + * messages. Otherwise, search to see if there's a user column of the + * type. (We assume system columns are never of interesting types.) + * The search is needed because an index containing an expression + * column of the target type will just be recorded as a whole-relation + * dependency. If we do not find a column of the type, the dependency + * must indicate that the type is transiently referenced in an index + * expression but not stored on disk, which we assume is OK, just as + * we do for references in views. (It could also be that the target + * type is embedded in some container type that is stored in an index + * column, but the previous recursion should catch such cases.) + */ + if (pg_depend->objsubid > 0 && pg_depend->objsubid <= tupleDesc->natts) + att = TupleDescAttr(tupleDesc, pg_depend->objsubid - 1); + else + { + att = NULL; + for (int attno = 1; attno <= tupleDesc->natts; attno++) + { + att = TupleDescAttr(tupleDesc, attno - 1); + if (att->atttypid == typeOid && !att->attisdropped) + break; + att = NULL; + } + if (att == NULL) + { + /* No such column, so assume OK */ + relation_close(rel, AccessShareLock); + continue; + } + } + + /* + * We definitely should reject if the relation has storage. If it's + * partitioned, then perhaps we don't have to reject: if there are + * partitions then we'll fail when we find one, else there is no + * stored data to worry about. However, it's possible that the type + * change would affect conclusions about whether the type is sortable + * or hashable and thus (if it's a partitioning column) break the + * partitioning rule. For now, reject for partitioned rels too. + */ + if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind) || + RELKIND_HAS_PARTITIONS(rel->rd_rel->relkind)) + { + if (origTypeName) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it", + origTypeName, + RelationGetRelationName(rel), + NameStr(att->attname)))); + else if (origRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter type \"%s\" because column \"%s.%s\" uses it", + RelationGetRelationName(origRelation), + RelationGetRelationName(rel), + NameStr(att->attname)))); + else if (origRelation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter foreign table \"%s\" because column \"%s.%s\" uses its row type", + RelationGetRelationName(origRelation), + RelationGetRelationName(rel), + NameStr(att->attname)))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter table \"%s\" because column \"%s.%s\" uses its row type", + RelationGetRelationName(origRelation), + RelationGetRelationName(rel), + NameStr(att->attname)))); + } + else if (OidIsValid(rel->rd_rel->reltype)) + { + /* + * A view or composite type itself isn't a problem, but we must + * recursively check for indirect dependencies via its rowtype. + */ + find_composite_type_dependencies(rel->rd_rel->reltype, + origRelation, origTypeName); + } + + relation_close(rel, AccessShareLock); + } + + systable_endscan(depScan); + + relation_close(depRel, AccessShareLock); +} + + +/* + * find_typed_table_dependencies + * + * Check to see if a composite type is being used as the type of a + * typed table. Abort if any are found and behavior is RESTRICT. + * Else return the list of tables. + */ +static List * +find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior behavior) +{ + Relation classRel; + ScanKeyData key[1]; + TableScanDesc scan; + HeapTuple tuple; + List *result = NIL; + + classRel = table_open(RelationRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_class_reloftype, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(typeOid)); + + scan = table_beginscan_catalog(classRel, 1, key); + + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple); + + if (behavior == DROP_RESTRICT) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("cannot alter type \"%s\" because it is the type of a typed table", + typeName), + errhint("Use ALTER ... CASCADE to alter the typed tables too."))); + else + result = lappend_oid(result, classform->oid); + } + + table_endscan(scan); + table_close(classRel, AccessShareLock); + + return result; +} + + +/* + * check_of_type + * + * Check whether a type is suitable for CREATE TABLE OF/ALTER TABLE OF. If it + * isn't suitable, throw an error. Currently, we require that the type + * originated with CREATE TYPE AS. We could support any row type, but doing so + * would require handling a number of extra corner cases in the DDL commands. + * (Also, allowing domain-over-composite would open up a can of worms about + * whether and how the domain's constraints should apply to derived tables.) + */ +void +check_of_type(HeapTuple typetuple) +{ + Form_pg_type typ = (Form_pg_type) GETSTRUCT(typetuple); + bool typeOk = false; + + if (typ->typtype == TYPTYPE_COMPOSITE) + { + Relation typeRelation; + + Assert(OidIsValid(typ->typrelid)); + typeRelation = relation_open(typ->typrelid, AccessShareLock); + typeOk = (typeRelation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE); + + /* + * Close the parent rel, but keep our AccessShareLock on it until xact + * commit. That will prevent someone else from deleting or ALTERing + * the type before the typed table creation/conversion commits. + */ + relation_close(typeRelation, NoLock); + } + if (!typeOk) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("type %s is not a composite type", + format_type_be(typ->oid)))); +} + + +/* + * ALTER TABLE ADD COLUMN + * + * Adds an additional attribute to a relation making the assumption that + * CHECK, NOT NULL, and FOREIGN KEY constraints will be removed from the + * AT_AddColumn AlterTableCmd by parse_utilcmd.c and added as independent + * AlterTableCmd's. + * + * ADD COLUMN cannot use the normal ALTER TABLE recursion mechanism, because we + * have to decide at runtime whether to recurse or not depending on whether we + * actually add a column or merely merge with an existing column. (We can't + * check this in a static pre-pass because it won't handle multiple inheritance + * situations correctly.) + */ +static void +ATPrepAddColumn(List **wqueue, Relation rel, bool recurse, bool recursing, + bool is_view, AlterTableCmd *cmd, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + if (rel->rd_rel->reloftype && !recursing) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot add column to typed table"))); + + if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context); + + if (recurse && !is_view) + cmd->subtype = AT_AddColumnRecurse; +} + +/* + * Add a column to a table. The return value is the address of the + * new column in the parent relation. + * + * cmd is pass-by-ref so that we can replace it with the parse-transformed + * copy (but that happens only after we check for IF NOT EXISTS). + */ +static ObjectAddress +ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, + AlterTableCmd **cmd, + bool recurse, bool recursing, + LOCKMODE lockmode, int cur_pass, + AlterTableUtilityContext *context) +{ + Oid myrelid = RelationGetRelid(rel); + ColumnDef *colDef = castNode(ColumnDef, (*cmd)->def); + bool if_not_exists = (*cmd)->missing_ok; + Relation pgclass, + attrdesc; + HeapTuple reltup; + FormData_pg_attribute attribute; + int newattnum; + char relkind; + HeapTuple typeTuple; + Oid typeOid; + int32 typmod; + Oid collOid; + Form_pg_type tform; + Expr *defval; + List *children; + ListCell *child; + AlterTableCmd *childcmd; + AclResult aclresult; + ObjectAddress address; + TupleDesc tupdesc; + FormData_pg_attribute *aattr[] = {&attribute}; + + /* At top level, permission check was done in ATPrepCmd, else do it */ + if (recursing) + ATSimplePermissions((*cmd)->subtype, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + + if (rel->rd_rel->relispartition && !recursing) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot add column to a partition"))); + + attrdesc = table_open(AttributeRelationId, RowExclusiveLock); + + /* + * Are we adding the column to a recursion child? If so, check whether to + * merge with an existing definition for the column. If we do merge, we + * must not recurse. Children will already have the column, and recursing + * into them would mess up attinhcount. + */ + if (colDef->inhcount > 0) + { + HeapTuple tuple; + + /* Does child already have a column by this name? */ + tuple = SearchSysCacheCopyAttName(myrelid, colDef->colname); + if (HeapTupleIsValid(tuple)) + { + Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple); + Oid ctypeId; + int32 ctypmod; + Oid ccollid; + + /* Child column must match on type, typmod, and collation */ + typenameTypeIdAndMod(NULL, colDef->typeName, &ctypeId, &ctypmod); + if (ctypeId != childatt->atttypid || + ctypmod != childatt->atttypmod) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("child table \"%s\" has different type for column \"%s\"", + RelationGetRelationName(rel), colDef->colname))); + ccollid = GetColumnDefCollation(NULL, colDef, ctypeId); + if (ccollid != childatt->attcollation) + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("child table \"%s\" has different collation for column \"%s\"", + RelationGetRelationName(rel), colDef->colname), + errdetail("\"%s\" versus \"%s\"", + get_collation_name(ccollid), + get_collation_name(childatt->attcollation)))); + + /* Bump the existing child att's inhcount */ + childatt->attinhcount++; + CatalogTupleUpdate(attrdesc, &tuple->t_self, tuple); + + heap_freetuple(tuple); + + /* Inform the user about the merge */ + ereport(NOTICE, + (errmsg("merging definition of column \"%s\" for child \"%s\"", + colDef->colname, RelationGetRelationName(rel)))); + + table_close(attrdesc, RowExclusiveLock); + return InvalidObjectAddress; + } + } + + /* skip if the name already exists and if_not_exists is true */ + if (!check_for_column_name_collision(rel, colDef->colname, if_not_exists)) + { + table_close(attrdesc, RowExclusiveLock); + return InvalidObjectAddress; + } + + /* + * Okay, we need to add the column, so go ahead and do parse + * transformation. This can result in queueing up, or even immediately + * executing, subsidiary operations (such as creation of unique indexes); + * so we mustn't do it until we have made the if_not_exists check. + * + * When recursing, the command was already transformed and we needn't do + * so again. Also, if context isn't given we can't transform. (That + * currently happens only for AT_AddColumnToView; we expect that view.c + * passed us a ColumnDef that doesn't need work.) + */ + if (context != NULL && !recursing) + { + *cmd = ATParseTransformCmd(wqueue, tab, rel, *cmd, recurse, lockmode, + cur_pass, context); + Assert(*cmd != NULL); + colDef = castNode(ColumnDef, (*cmd)->def); + } + + /* + * Cannot add identity column if table has children, because identity does + * not inherit. (Adding column and identity separately will work.) + */ + if (colDef->identity && + recurse && + find_inheritance_children(myrelid, NoLock) != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot recursively add identity column to table that has child tables"))); + + pgclass = table_open(RelationRelationId, RowExclusiveLock); + + reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(myrelid)); + if (!HeapTupleIsValid(reltup)) + elog(ERROR, "cache lookup failed for relation %u", myrelid); + relkind = ((Form_pg_class) GETSTRUCT(reltup))->relkind; + + /* Determine the new attribute's number */ + newattnum = ((Form_pg_class) GETSTRUCT(reltup))->relnatts + 1; + if (newattnum > MaxHeapAttributeNumber) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("tables can have at most %d columns", + MaxHeapAttributeNumber))); + + typeTuple = typenameType(NULL, colDef->typeName, &typmod); + tform = (Form_pg_type) GETSTRUCT(typeTuple); + typeOid = tform->oid; + + aclresult = pg_type_aclcheck(typeOid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, typeOid); + + collOid = GetColumnDefCollation(NULL, colDef, typeOid); + + /* make sure datatype is legal for a column */ + CheckAttributeType(colDef->colname, typeOid, collOid, + list_make1_oid(rel->rd_rel->reltype), + 0); + + /* + * Construct new attribute's pg_attribute entry. (Variable-length fields + * are handled by InsertPgAttributeTuples().) + */ + attribute.attrelid = myrelid; + namestrcpy(&(attribute.attname), colDef->colname); + attribute.atttypid = typeOid; + attribute.attstattarget = (newattnum > 0) ? -1 : 0; + attribute.attlen = tform->typlen; + attribute.attnum = newattnum; + attribute.attndims = list_length(colDef->typeName->arrayBounds); + attribute.atttypmod = typmod; + attribute.attbyval = tform->typbyval; + attribute.attalign = tform->typalign; + attribute.attstorage = tform->typstorage; + attribute.attcompression = GetAttributeCompression(typeOid, + colDef->compression); + attribute.attnotnull = colDef->is_not_null; + attribute.atthasdef = false; + attribute.atthasmissing = false; + attribute.attidentity = colDef->identity; + attribute.attgenerated = colDef->generated; + attribute.attisdropped = false; + attribute.attislocal = colDef->is_local; + attribute.attinhcount = colDef->inhcount; + attribute.attcollation = collOid; + + ReleaseSysCache(typeTuple); + + tupdesc = CreateTupleDesc(lengthof(aattr), (FormData_pg_attribute **) &aattr); + + InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL); + + table_close(attrdesc, RowExclusiveLock); + + /* + * Update pg_class tuple as appropriate + */ + ((Form_pg_class) GETSTRUCT(reltup))->relnatts = newattnum; + + CatalogTupleUpdate(pgclass, &reltup->t_self, reltup); + + heap_freetuple(reltup); + + /* Post creation hook for new attribute */ + InvokeObjectPostCreateHook(RelationRelationId, myrelid, newattnum); + + table_close(pgclass, RowExclusiveLock); + + /* Make the attribute's catalog entry visible */ + CommandCounterIncrement(); + + /* + * Store the DEFAULT, if any, in the catalogs + */ + if (colDef->raw_default) + { + RawColumnDefault *rawEnt; + + rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault)); + rawEnt->attnum = attribute.attnum; + rawEnt->raw_default = copyObject(colDef->raw_default); + + /* + * Attempt to skip a complete table rewrite by storing the specified + * DEFAULT value outside of the heap. This may be disabled inside + * AddRelationNewConstraints if the optimization cannot be applied. + */ + rawEnt->missingMode = (!colDef->generated); + + rawEnt->generated = colDef->generated; + + /* + * This function is intended for CREATE TABLE, so it processes a + * _list_ of defaults, but we just do one. + */ + AddRelationNewConstraints(rel, list_make1(rawEnt), NIL, + false, true, false, NULL); + + /* Make the additional catalog changes visible */ + CommandCounterIncrement(); + + /* + * Did the request for a missing value work? If not we'll have to do a + * rewrite + */ + if (!rawEnt->missingMode) + tab->rewrite |= AT_REWRITE_DEFAULT_VAL; + } + + /* + * Tell Phase 3 to fill in the default expression, if there is one. + * + * If there is no default, Phase 3 doesn't have to do anything, because + * that effectively means that the default is NULL. The heap tuple access + * routines always check for attnum > # of attributes in tuple, and return + * NULL if so, so without any modification of the tuple data we will get + * the effect of NULL values in the new column. + * + * An exception occurs when the new column is of a domain type: the domain + * might have a NOT NULL constraint, or a check constraint that indirectly + * rejects nulls. If there are any domain constraints then we construct + * an explicit NULL default value that will be passed through + * CoerceToDomain processing. (This is a tad inefficient, since it causes + * rewriting the table which we really don't have to do, but the present + * design of domain processing doesn't offer any simple way of checking + * the constraints more directly.) + * + * Note: we use build_column_default, and not just the cooked default + * returned by AddRelationNewConstraints, so that the right thing happens + * when a datatype's default applies. + * + * Note: it might seem that this should happen at the end of Phase 2, so + * that the effects of subsequent subcommands can be taken into account. + * It's intentional that we do it now, though. The new column should be + * filled according to what is said in the ADD COLUMN subcommand, so that + * the effects are the same as if this subcommand had been run by itself + * and the later subcommands had been issued in new ALTER TABLE commands. + * + * We can skip this entirely for relations without storage, since Phase 3 + * is certainly not going to touch them. System attributes don't have + * interesting defaults, either. + */ + if (RELKIND_HAS_STORAGE(relkind) && attribute.attnum > 0) + { + /* + * For an identity column, we can't use build_column_default(), + * because the sequence ownership isn't set yet. So do it manually. + */ + if (colDef->identity) + { + NextValueExpr *nve = makeNode(NextValueExpr); + + nve->seqid = RangeVarGetRelid(colDef->identitySequence, NoLock, false); + nve->typeId = typeOid; + + defval = (Expr *) nve; + + /* must do a rewrite for identity columns */ + tab->rewrite |= AT_REWRITE_DEFAULT_VAL; + } + else + defval = (Expr *) build_column_default(rel, attribute.attnum); + + if (!defval && DomainHasConstraints(typeOid)) + { + Oid baseTypeId; + int32 baseTypeMod; + Oid baseTypeColl; + + baseTypeMod = typmod; + baseTypeId = getBaseTypeAndTypmod(typeOid, &baseTypeMod); + baseTypeColl = get_typcollation(baseTypeId); + defval = (Expr *) makeNullConst(baseTypeId, baseTypeMod, baseTypeColl); + defval = (Expr *) coerce_to_target_type(NULL, + (Node *) defval, + baseTypeId, + typeOid, + typmod, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (defval == NULL) /* should not happen */ + elog(ERROR, "failed to coerce base type to domain"); + } + + if (defval) + { + NewColumnValue *newval; + + newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue)); + newval->attnum = attribute.attnum; + newval->expr = expression_planner(defval); + newval->is_generated = (colDef->generated != '\0'); + + tab->newvals = lappend(tab->newvals, newval); + } + + if (DomainHasConstraints(typeOid)) + tab->rewrite |= AT_REWRITE_DEFAULT_VAL; + + if (!TupleDescAttr(rel->rd_att, attribute.attnum - 1)->atthasmissing) + { + /* + * If the new column is NOT NULL, and there is no missing value, + * tell Phase 3 it needs to check for NULLs. + */ + tab->verify_new_notnull |= colDef->is_not_null; + } + } + + /* + * Add needed dependency entries for the new column. + */ + add_column_datatype_dependency(myrelid, newattnum, attribute.atttypid); + add_column_collation_dependency(myrelid, newattnum, attribute.attcollation); + + /* + * Propagate to children as appropriate. Unlike most other ALTER + * routines, we have to do this one level of recursion at a time; we can't + * use find_all_inheritors to do it in one pass. + */ + children = + find_inheritance_children(RelationGetRelid(rel), lockmode); + + /* + * If we are told not to recurse, there had better not be any child + * tables; else the addition would put them out of step. + */ + if (children && !recurse) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column must be added to child tables too"))); + + /* Children should see column as singly inherited */ + if (!recursing) + { + childcmd = copyObject(*cmd); + colDef = castNode(ColumnDef, childcmd->def); + colDef->inhcount = 1; + colDef->is_local = false; + } + else + childcmd = *cmd; /* no need to copy again */ + + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + Relation childrel; + AlteredTableInfo *childtab; + + /* find_inheritance_children already got lock */ + childrel = table_open(childrelid, NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + + /* Find or create work queue entry for this table */ + childtab = ATGetQueueEntry(wqueue, childrel); + + /* Recurse to child; return value is ignored */ + ATExecAddColumn(wqueue, childtab, childrel, + &childcmd, recurse, true, + lockmode, cur_pass, context); + + table_close(childrel, NoLock); + } + + ObjectAddressSubSet(address, RelationRelationId, myrelid, newattnum); + return address; +} + +/* + * If a new or renamed column will collide with the name of an existing + * column and if_not_exists is false then error out, else do nothing. + */ +static bool +check_for_column_name_collision(Relation rel, const char *colname, + bool if_not_exists) +{ + HeapTuple attTuple; + int attnum; + + /* + * this test is deliberately not attisdropped-aware, since if one tries to + * add a column matching a dropped column name, it's gonna fail anyway. + */ + attTuple = SearchSysCache2(ATTNAME, + ObjectIdGetDatum(RelationGetRelid(rel)), + PointerGetDatum(colname)); + if (!HeapTupleIsValid(attTuple)) + return true; + + attnum = ((Form_pg_attribute) GETSTRUCT(attTuple))->attnum; + ReleaseSysCache(attTuple); + + /* + * We throw a different error message for conflicts with system column + * names, since they are normally not shown and the user might otherwise + * be confused about the reason for the conflict. + */ + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column name \"%s\" conflicts with a system column name", + colname))); + else + { + if (if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" of relation \"%s\" already exists, skipping", + colname, RelationGetRelationName(rel)))); + return false; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" of relation \"%s\" already exists", + colname, RelationGetRelationName(rel)))); + } + + return true; +} + +/* + * Install a column's dependency on its datatype. + */ +static void +add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid) +{ + ObjectAddress myself, + referenced; + + myself.classId = RelationRelationId; + myself.objectId = relid; + myself.objectSubId = attnum; + referenced.classId = TypeRelationId; + referenced.objectId = typid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); +} + +/* + * Install a column's dependency on its collation. + */ +static void +add_column_collation_dependency(Oid relid, int32 attnum, Oid collid) +{ + ObjectAddress myself, + referenced; + + /* We know the default collation is pinned, so don't bother recording it */ + if (OidIsValid(collid) && collid != DEFAULT_COLLATION_OID) + { + myself.classId = RelationRelationId; + myself.objectId = relid; + myself.objectSubId = attnum; + referenced.classId = CollationRelationId; + referenced.objectId = collid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } +} + +/* + * ALTER TABLE ALTER COLUMN DROP NOT NULL + */ + +static void +ATPrepDropNotNull(Relation rel, bool recurse, bool recursing) +{ + /* + * If the parent is a partitioned table, like check constraints, we do not + * support removing the NOT NULL while partitions exist. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDesc partdesc = RelationGetPartitionDesc(rel, true); + + Assert(partdesc != NULL); + if (partdesc->nparts > 0 && !recurse && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot remove constraint from only the partitioned table when partitions exist"), + errhint("Do not specify the ONLY keyword."))); + } +} + +/* + * Return the address of the modified column. If the column was already + * nullable, InvalidObjectAddress is returned. + */ +static ObjectAddress +ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) +{ + HeapTuple tuple; + Form_pg_attribute attTup; + AttrNumber attnum; + Relation attr_rel; + List *indexoidlist; + ListCell *indexoidscan; + ObjectAddress address; + + /* + * lookup the attribute + */ + attr_rel = table_open(AttributeRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + attTup = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = attTup->attnum; + + /* Prevent them from altering a system attribute */ + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + if (attTup->attidentity) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("column \"%s\" of relation \"%s\" is an identity column", + colName, RelationGetRelationName(rel)))); + + /* + * Check that the attribute is not in a primary key or in an index used as + * a replica identity. + * + * Note: we'll throw error even if the pkey index is not valid. + */ + + /* Loop over all indexes on the relation */ + indexoidlist = RelationGetIndexList(rel); + + foreach(indexoidscan, indexoidlist) + { + Oid indexoid = lfirst_oid(indexoidscan); + HeapTuple indexTuple; + Form_pg_index indexStruct; + int i; + + indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexoid); + indexStruct = (Form_pg_index) GETSTRUCT(indexTuple); + + /* + * If the index is not a primary key or an index used as replica + * identity, skip the check. + */ + if (indexStruct->indisprimary || indexStruct->indisreplident) + { + /* + * Loop over each attribute in the primary key or the index used + * as replica identity and see if it matches the to-be-altered + * attribute. + */ + for (i = 0; i < indexStruct->indnkeyatts; i++) + { + if (indexStruct->indkey.values[i] == attnum) + { + if (indexStruct->indisprimary) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column \"%s\" is in a primary key", + colName))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column \"%s\" is in index used as replica identity", + colName))); + } + } + } + + ReleaseSysCache(indexTuple); + } + + list_free(indexoidlist); + + /* If rel is partition, shouldn't drop NOT NULL if parent has the same */ + if (rel->rd_rel->relispartition) + { + Oid parentId = get_partition_parent(RelationGetRelid(rel), false); + Relation parent = table_open(parentId, AccessShareLock); + TupleDesc tupDesc = RelationGetDescr(parent); + AttrNumber parent_attnum; + + parent_attnum = get_attnum(parentId, colName); + if (TupleDescAttr(tupDesc, parent_attnum - 1)->attnotnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column \"%s\" is marked NOT NULL in parent table", + colName))); + table_close(parent, AccessShareLock); + } + + /* + * Okay, actually perform the catalog change ... if needed + */ + if (attTup->attnotnull) + { + attTup->attnotnull = false; + + CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple); + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + } + else + address = InvalidObjectAddress; + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), attnum); + + table_close(attr_rel, RowExclusiveLock); + + return address; +} + +/* + * ALTER TABLE ALTER COLUMN SET NOT NULL + */ + +static void +ATPrepSetNotNull(List **wqueue, Relation rel, + AlterTableCmd *cmd, bool recurse, bool recursing, + LOCKMODE lockmode, AlterTableUtilityContext *context) +{ + /* + * If we're already recursing, there's nothing to do; the topmost + * invocation of ATSimpleRecursion already visited all children. + */ + if (recursing) + return; + + /* + * If the target column is already marked NOT NULL, we can skip recursing + * to children, because their columns should already be marked NOT NULL as + * well. But there's no point in checking here unless the relation has + * some children; else we can just wait till execution to check. (If it + * does have children, however, this can save taking per-child locks + * unnecessarily. This greatly improves concurrency in some parallel + * restore scenarios.) + * + * Unfortunately, we can only apply this optimization to partitioned + * tables, because traditional inheritance doesn't enforce that child + * columns be NOT NULL when their parent is. (That's a bug that should + * get fixed someday.) + */ + if (rel->rd_rel->relhassubclass && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + HeapTuple tuple; + bool attnotnull; + + tuple = SearchSysCacheAttName(RelationGetRelid(rel), cmd->name); + + /* Might as well throw the error now, if name is bad */ + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + cmd->name, RelationGetRelationName(rel)))); + + attnotnull = ((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull; + ReleaseSysCache(tuple); + if (attnotnull) + return; + } + + /* + * If we have ALTER TABLE ONLY ... SET NOT NULL on a partitioned table, + * apply ALTER TABLE ... CHECK NOT NULL to every child. Otherwise, use + * normal recursion logic. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !recurse) + { + AlterTableCmd *newcmd = makeNode(AlterTableCmd); + + newcmd->subtype = AT_CheckNotNull; + newcmd->name = pstrdup(cmd->name); + ATSimpleRecursion(wqueue, rel, newcmd, true, lockmode, context); + } + else + ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode, context); +} + +/* + * Return the address of the modified column. If the column was already NOT + * NULL, InvalidObjectAddress is returned. + */ +static ObjectAddress +ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, + const char *colName, LOCKMODE lockmode) +{ + HeapTuple tuple; + AttrNumber attnum; + Relation attr_rel; + ObjectAddress address; + + /* + * lookup the attribute + */ + attr_rel = table_open(AttributeRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + + attnum = ((Form_pg_attribute) GETSTRUCT(tuple))->attnum; + + /* Prevent them from altering a system attribute */ + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + /* + * Okay, actually perform the catalog change ... if needed + */ + if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull) + { + ((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull = true; + + CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple); + + /* + * Ordinarily phase 3 must ensure that no NULLs exist in columns that + * are set NOT NULL; however, if we can find a constraint which proves + * this then we can skip that. We needn't bother looking if we've + * already found that we must verify some other NOT NULL constraint. + */ + if (!tab->verify_new_notnull && + !NotNullImpliedByRelConstraints(rel, (Form_pg_attribute) GETSTRUCT(tuple))) + { + /* Tell Phase 3 it needs to test the constraint */ + tab->verify_new_notnull = true; + } + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + } + else + address = InvalidObjectAddress; + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), attnum); + + table_close(attr_rel, RowExclusiveLock); + + return address; +} + +/* + * ALTER TABLE ALTER COLUMN CHECK NOT NULL + * + * This doesn't exist in the grammar, but we generate AT_CheckNotNull + * commands against the partitions of a partitioned table if the user + * writes ALTER TABLE ONLY ... SET NOT NULL on the partitioned table, + * or tries to create a primary key on it (which internally creates + * AT_SetNotNull on the partitioned table). Such a command doesn't + * allow us to actually modify any partition, but we want to let it + * go through if the partitions are already properly marked. + * + * In future, this might need to adjust the child table's state, likely + * by incrementing an inheritance count for the attnotnull constraint. + * For now we need only check for the presence of the flag. + */ +static void +ATExecCheckNotNull(AlteredTableInfo *tab, Relation rel, + const char *colName, LOCKMODE lockmode) +{ + HeapTuple tuple; + + tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName); + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + + if (!((Form_pg_attribute) GETSTRUCT(tuple))->attnotnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be added to child tables too"), + errdetail("Column \"%s\" of relation \"%s\" is not already NOT NULL.", + colName, RelationGetRelationName(rel)), + errhint("Do not specify the ONLY keyword."))); + + ReleaseSysCache(tuple); +} + +/* + * NotNullImpliedByRelConstraints + * Does rel's existing constraints imply NOT NULL for the given attribute? + */ +static bool +NotNullImpliedByRelConstraints(Relation rel, Form_pg_attribute attr) +{ + NullTest *nnulltest = makeNode(NullTest); + + nnulltest->arg = (Expr *) makeVar(1, + attr->attnum, + attr->atttypid, + attr->atttypmod, + attr->attcollation, + 0); + nnulltest->nulltesttype = IS_NOT_NULL; + + /* + * argisrow = false is correct even for a composite column, because + * attnotnull does not represent a SQL-spec IS NOT NULL test in such a + * case, just IS DISTINCT FROM NULL. + */ + nnulltest->argisrow = false; + nnulltest->location = -1; + + if (ConstraintImpliedByRelConstraint(rel, list_make1(nnulltest), NIL)) + { + ereport(DEBUG1, + (errmsg_internal("existing constraints on column \"%s.%s\" are sufficient to prove that it does not contain nulls", + RelationGetRelationName(rel), NameStr(attr->attname)))); + return true; + } + + return false; +} + +/* + * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT + * + * Return the address of the affected column. + */ +static ObjectAddress +ATExecColumnDefault(Relation rel, const char *colName, + Node *newDefault, LOCKMODE lockmode) +{ + TupleDesc tupdesc = RelationGetDescr(rel); + AttrNumber attnum; + ObjectAddress address; + + /* + * get the number of the attribute + */ + attnum = get_attnum(RelationGetRelid(rel), colName); + if (attnum == InvalidAttrNumber) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + + /* Prevent them from altering a system attribute */ + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + if (TupleDescAttr(tupdesc, attnum - 1)->attidentity) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("column \"%s\" of relation \"%s\" is an identity column", + colName, RelationGetRelationName(rel)), + newDefault ? 0 : errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP IDENTITY instead."))); + + if (TupleDescAttr(tupdesc, attnum - 1)->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("column \"%s\" of relation \"%s\" is a generated column", + colName, RelationGetRelationName(rel)), + newDefault || TupleDescAttr(tupdesc, attnum - 1)->attgenerated != ATTRIBUTE_GENERATED_STORED ? 0 : + errhint("Use ALTER TABLE ... ALTER COLUMN ... DROP EXPRESSION instead."))); + + /* + * Remove any old default for the column. We use RESTRICT here for + * safety, but at present we do not expect anything to depend on the + * default. + * + * We treat removing the existing default as an internal operation when it + * is preparatory to adding a new default, but as a user-initiated + * operation when the user asked for a drop. + */ + RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false, + newDefault != NULL); + + if (newDefault) + { + /* SET DEFAULT */ + RawColumnDefault *rawEnt; + + rawEnt = (RawColumnDefault *) palloc(sizeof(RawColumnDefault)); + rawEnt->attnum = attnum; + rawEnt->raw_default = newDefault; + rawEnt->missingMode = false; + rawEnt->generated = '\0'; + + /* + * This function is intended for CREATE TABLE, so it processes a + * _list_ of defaults, but we just do one. + */ + AddRelationNewConstraints(rel, list_make1(rawEnt), NIL, + false, true, false, NULL); + } + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + return address; +} + +/* + * Add a pre-cooked default expression. + * + * Return the address of the affected column. + */ +static ObjectAddress +ATExecCookedColumnDefault(Relation rel, AttrNumber attnum, + Node *newDefault) +{ + ObjectAddress address; + + /* We assume no checking is required */ + + /* + * Remove any old default for the column. We use RESTRICT here for + * safety, but at present we do not expect anything to depend on the + * default. (In ordinary cases, there could not be a default in place + * anyway, but it's possible when combining LIKE with inheritance.) + */ + RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, false, + true); + + (void) StoreAttrDefault(rel, attnum, newDefault, true, false); + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + return address; +} + +/* + * ALTER TABLE ALTER COLUMN ADD IDENTITY + * + * Return the address of the affected column. + */ +static ObjectAddress +ATExecAddIdentity(Relation rel, const char *colName, + Node *def, LOCKMODE lockmode) +{ + Relation attrelation; + HeapTuple tuple; + Form_pg_attribute attTup; + AttrNumber attnum; + ObjectAddress address; + ColumnDef *cdef = castNode(ColumnDef, def); + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + attTup = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = attTup->attnum; + + /* Can't alter a system attribute */ + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + /* + * Creating a column as identity implies NOT NULL, so adding the identity + * to an existing column that is not NOT NULL would create a state that + * cannot be reproduced without contortions. + */ + if (!attTup->attnotnull) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("column \"%s\" of relation \"%s\" must be declared NOT NULL before identity can be added", + colName, RelationGetRelationName(rel)))); + + if (attTup->attidentity) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("column \"%s\" of relation \"%s\" is already an identity column", + colName, RelationGetRelationName(rel)))); + + if (attTup->atthasdef) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("column \"%s\" of relation \"%s\" already has a default value", + colName, RelationGetRelationName(rel)))); + + attTup->attidentity = cdef->identity; + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attTup->attnum); + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + heap_freetuple(tuple); + + table_close(attrelation, RowExclusiveLock); + + return address; +} + +/* + * ALTER TABLE ALTER COLUMN SET { GENERATED or sequence options } + * + * Return the address of the affected column. + */ +static ObjectAddress +ATExecSetIdentity(Relation rel, const char *colName, Node *def, LOCKMODE lockmode) +{ + ListCell *option; + DefElem *generatedEl = NULL; + HeapTuple tuple; + Form_pg_attribute attTup; + AttrNumber attnum; + Relation attrelation; + ObjectAddress address; + + foreach(option, castNode(List, def)) + { + DefElem *defel = lfirst_node(DefElem, option); + + if (strcmp(defel->defname, "generated") == 0) + { + if (generatedEl) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + generatedEl = defel; + } + else + elog(ERROR, "option \"%s\" not recognized", + defel->defname); + } + + /* + * Even if there is nothing to change here, we run all the checks. There + * will be a subsequent ALTER SEQUENCE that relies on everything being + * there. + */ + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + + attTup = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = attTup->attnum; + + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + if (!attTup->attidentity) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("column \"%s\" of relation \"%s\" is not an identity column", + colName, RelationGetRelationName(rel)))); + + if (generatedEl) + { + attTup->attidentity = defGetInt32(generatedEl); + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attTup->attnum); + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + } + else + address = InvalidObjectAddress; + + heap_freetuple(tuple); + table_close(attrelation, RowExclusiveLock); + + return address; +} + +/* + * ALTER TABLE ALTER COLUMN DROP IDENTITY + * + * Return the address of the affected column. + */ +static ObjectAddress +ATExecDropIdentity(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode) +{ + HeapTuple tuple; + Form_pg_attribute attTup; + AttrNumber attnum; + Relation attrelation; + ObjectAddress address; + Oid seqid; + ObjectAddress seqaddress; + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + + attTup = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = attTup->attnum; + + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + if (!attTup->attidentity) + { + if (!missing_ok) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("column \"%s\" of relation \"%s\" is not an identity column", + colName, RelationGetRelationName(rel)))); + else + { + ereport(NOTICE, + (errmsg("column \"%s\" of relation \"%s\" is not an identity column, skipping", + colName, RelationGetRelationName(rel)))); + heap_freetuple(tuple); + table_close(attrelation, RowExclusiveLock); + return InvalidObjectAddress; + } + } + + attTup->attidentity = '\0'; + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attTup->attnum); + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + heap_freetuple(tuple); + + table_close(attrelation, RowExclusiveLock); + + /* drop the internal sequence */ + seqid = getIdentitySequence(RelationGetRelid(rel), attnum, false); + deleteDependencyRecordsForClass(RelationRelationId, seqid, + RelationRelationId, DEPENDENCY_INTERNAL); + CommandCounterIncrement(); + seqaddress.classId = RelationRelationId; + seqaddress.objectId = seqid; + seqaddress.objectSubId = 0; + performDeletion(&seqaddress, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); + + return address; +} + +/* + * ALTER TABLE ALTER COLUMN DROP EXPRESSION + */ +static void +ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recursing, LOCKMODE lockmode) +{ + /* + * Reject ONLY if there are child tables. We could implement this, but it + * is a bit complicated. GENERATED clauses must be attached to the column + * definition and cannot be added later like DEFAULT, so if a child table + * has a generation expression that the parent does not have, the child + * column will necessarily be an attlocal column. So to implement ONLY + * here, we'd need extra code to update attislocal of the direct child + * tables, somewhat similar to how DROP COLUMN does it, so that the + * resulting state can be properly dumped and restored. + */ + if (!recurse && + find_inheritance_children(RelationGetRelid(rel), lockmode)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too"))); + + /* + * Cannot drop generation expression from inherited columns. + */ + if (!recursing) + { + HeapTuple tuple; + Form_pg_attribute attTup; + + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), cmd->name); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + cmd->name, RelationGetRelationName(rel)))); + + attTup = (Form_pg_attribute) GETSTRUCT(tuple); + + if (attTup->attinhcount > 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop generation expression from inherited column"))); + } +} + +/* + * Return the address of the affected column. + */ +static ObjectAddress +ATExecDropExpression(Relation rel, const char *colName, bool missing_ok, LOCKMODE lockmode) +{ + HeapTuple tuple; + Form_pg_attribute attTup; + AttrNumber attnum; + Relation attrelation; + Oid attrdefoid; + ObjectAddress address; + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + + attTup = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = attTup->attnum; + + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + if (attTup->attgenerated != ATTRIBUTE_GENERATED_STORED) + { + if (!missing_ok) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("column \"%s\" of relation \"%s\" is not a stored generated column", + colName, RelationGetRelationName(rel)))); + else + { + ereport(NOTICE, + (errmsg("column \"%s\" of relation \"%s\" is not a stored generated column, skipping", + colName, RelationGetRelationName(rel)))); + heap_freetuple(tuple); + table_close(attrelation, RowExclusiveLock); + return InvalidObjectAddress; + } + } + + /* + * Mark the column as no longer generated. (The atthasdef flag needs to + * get cleared too, but RemoveAttrDefault will handle that.) + */ + attTup->attgenerated = '\0'; + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attnum); + heap_freetuple(tuple); + + table_close(attrelation, RowExclusiveLock); + + /* + * Drop the dependency records of the GENERATED expression, in particular + * its INTERNAL dependency on the column, which would otherwise cause + * dependency.c to refuse to perform the deletion. + */ + attrdefoid = GetAttrDefaultOid(RelationGetRelid(rel), attnum); + if (!OidIsValid(attrdefoid)) + elog(ERROR, "could not find attrdef tuple for relation %u attnum %d", + RelationGetRelid(rel), attnum); + (void) deleteDependencyRecordsFor(AttrDefaultRelationId, attrdefoid, false); + + /* Make above changes visible */ + CommandCounterIncrement(); + + /* + * Get rid of the GENERATED expression itself. We use RESTRICT here for + * safety, but at present we do not expect anything to depend on the + * default. + */ + RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, + false, false); + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + return address; +} + +/* + * ALTER TABLE ALTER COLUMN SET STATISTICS + * + * Return value is the address of the modified column + */ +static ObjectAddress +ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newValue, LOCKMODE lockmode) +{ + int newtarget; + Relation attrelation; + HeapTuple tuple; + Form_pg_attribute attrtuple; + AttrNumber attnum; + ObjectAddress address; + + /* + * We allow referencing columns by numbers only for indexes, since table + * column numbers could contain gaps if columns are later dropped. + */ + if (rel->rd_rel->relkind != RELKIND_INDEX && + rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + !colName) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot refer to non-index column by number"))); + + Assert(IsA(newValue, Integer)); + newtarget = intVal(newValue); + + /* + * Limit target to a sane range + */ + if (newtarget < -1) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistics target %d is too low", + newtarget))); + } + else if (newtarget > 10000) + { + newtarget = 10000; + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("lowering statistics target to %d", + newtarget))); + } + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + + if (colName) + { + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + } + else + { + tuple = SearchSysCacheCopyAttNum(RelationGetRelid(rel), colNum); + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column number %d of relation \"%s\" does not exist", + colNum, RelationGetRelationName(rel)))); + } + + attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); + + attnum = attrtuple->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + if (rel->rd_rel->relkind == RELKIND_INDEX || + rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + { + if (attnum > rel->rd_index->indnkeyatts) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter statistics on included column \"%s\" of index \"%s\"", + NameStr(attrtuple->attname), RelationGetRelationName(rel)))); + else if (rel->rd_index->indkey.values[attnum - 1] != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter statistics on non-expression column \"%s\" of index \"%s\"", + NameStr(attrtuple->attname), RelationGetRelationName(rel)), + errhint("Alter statistics on table column instead."))); + } + + attrtuple->attstattarget = newtarget; + + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attrtuple->attnum); + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + heap_freetuple(tuple); + + table_close(attrelation, RowExclusiveLock); + + return address; +} + +/* + * Return value is the address of the modified column + */ +static ObjectAddress +ATExecSetOptions(Relation rel, const char *colName, Node *options, + bool isReset, LOCKMODE lockmode) +{ + Relation attrelation; + HeapTuple tuple, + newtuple; + Form_pg_attribute attrtuple; + AttrNumber attnum; + Datum datum, + newOptions; + bool isnull; + ObjectAddress address; + Datum repl_val[Natts_pg_attribute]; + bool repl_null[Natts_pg_attribute]; + bool repl_repl[Natts_pg_attribute]; + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + + tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName); + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); + + attnum = attrtuple->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + /* Generate new proposed attoptions (text array) */ + datum = SysCacheGetAttr(ATTNAME, tuple, Anum_pg_attribute_attoptions, + &isnull); + newOptions = transformRelOptions(isnull ? (Datum) 0 : datum, + castNode(List, options), NULL, NULL, + false, isReset); + /* Validate new options */ + (void) attribute_reloptions(newOptions, true); + + /* Build new tuple. */ + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + if (newOptions != (Datum) 0) + repl_val[Anum_pg_attribute_attoptions - 1] = newOptions; + else + repl_null[Anum_pg_attribute_attoptions - 1] = true; + repl_repl[Anum_pg_attribute_attoptions - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrelation), + repl_val, repl_null, repl_repl); + + /* Update system catalog. */ + CatalogTupleUpdate(attrelation, &newtuple->t_self, newtuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attrtuple->attnum); + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + + heap_freetuple(newtuple); + + ReleaseSysCache(tuple); + + table_close(attrelation, RowExclusiveLock); + + return address; +} + +/* + * Helper function for ATExecSetStorage and ATExecSetCompression + * + * Set the attstorage and/or attcompression fields for index columns + * associated with the specified table column. + */ +static void +SetIndexStorageProperties(Relation rel, Relation attrelation, + AttrNumber attnum, + bool setstorage, char newstorage, + bool setcompression, char newcompression, + LOCKMODE lockmode) +{ + ListCell *lc; + + foreach(lc, RelationGetIndexList(rel)) + { + Oid indexoid = lfirst_oid(lc); + Relation indrel; + AttrNumber indattnum = 0; + HeapTuple tuple; + + indrel = index_open(indexoid, lockmode); + + for (int i = 0; i < indrel->rd_index->indnatts; i++) + { + if (indrel->rd_index->indkey.values[i] == attnum) + { + indattnum = i + 1; + break; + } + } + + if (indattnum == 0) + { + index_close(indrel, lockmode); + continue; + } + + tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum); + + if (HeapTupleIsValid(tuple)) + { + Form_pg_attribute attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); + + if (setstorage) + attrtuple->attstorage = newstorage; + + if (setcompression) + attrtuple->attcompression = newcompression; + + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attrtuple->attnum); + + heap_freetuple(tuple); + } + + index_close(indrel, lockmode); + } +} + +/* + * ALTER TABLE ALTER COLUMN SET STORAGE + * + * Return value is the address of the modified column + */ +static ObjectAddress +ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE lockmode) +{ + char *storagemode; + char newstorage; + Relation attrelation; + HeapTuple tuple; + Form_pg_attribute attrtuple; + AttrNumber attnum; + ObjectAddress address; + + Assert(IsA(newValue, String)); + storagemode = strVal(newValue); + + if (pg_strcasecmp(storagemode, "plain") == 0) + newstorage = TYPSTORAGE_PLAIN; + else if (pg_strcasecmp(storagemode, "external") == 0) + newstorage = TYPSTORAGE_EXTERNAL; + else if (pg_strcasecmp(storagemode, "extended") == 0) + newstorage = TYPSTORAGE_EXTENDED; + else if (pg_strcasecmp(storagemode, "main") == 0) + newstorage = TYPSTORAGE_MAIN; + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid storage type \"%s\"", + storagemode))); + newstorage = 0; /* keep compiler quiet */ + } + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); + + attnum = attrtuple->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + /* + * safety check: do not allow toasted storage modes unless column datatype + * is TOAST-aware. + */ + if (newstorage == TYPSTORAGE_PLAIN || TypeIsToastable(attrtuple->atttypid)) + attrtuple->attstorage = newstorage; + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("column data type %s can only have storage PLAIN", + format_type_be(attrtuple->atttypid)))); + + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attrtuple->attnum); + + heap_freetuple(tuple); + + /* + * Apply the change to indexes as well (only for simple index columns, + * matching behavior of index.c ConstructTupleDescriptor()). + */ + SetIndexStorageProperties(rel, attrelation, attnum, + true, newstorage, + false, 0, + lockmode); + + table_close(attrelation, RowExclusiveLock); + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + return address; +} + + +/* + * ALTER TABLE DROP COLUMN + * + * DROP COLUMN cannot use the normal ALTER TABLE recursion mechanism, + * because we have to decide at runtime whether to recurse or not depending + * on whether attinhcount goes to zero or not. (We can't check this in a + * static pre-pass because it won't handle multiple inheritance situations + * correctly.) + */ +static void +ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing, + AlterTableCmd *cmd, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + if (rel->rd_rel->reloftype && !recursing) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot drop column from typed table"))); + + if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context); + + if (recurse) + cmd->subtype = AT_DropColumnRecurse; +} + +/* + * Drops column 'colName' from relation 'rel' and returns the address of the + * dropped column. The column is also dropped (or marked as no longer + * inherited from relation) from the relation's inheritance children, if any. + * + * In the recursive invocations for inheritance child relations, instead of + * dropping the column directly (if to be dropped at all), its object address + * is added to 'addrs', which must be non-NULL in such invocations. All + * columns are dropped at the same time after all the children have been + * checked recursively. + */ +static ObjectAddress +ATExecDropColumn(List **wqueue, Relation rel, const char *colName, + DropBehavior behavior, + bool recurse, bool recursing, + bool missing_ok, LOCKMODE lockmode, + ObjectAddresses *addrs) +{ + HeapTuple tuple; + Form_pg_attribute targetatt; + AttrNumber attnum; + List *children; + ObjectAddress object; + bool is_expr; + + /* At top level, permission check was done in ATPrepCmd, else do it */ + if (recursing) + ATSimplePermissions(AT_DropColumn, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + + /* Initialize addrs on the first invocation */ + Assert(!recursing || addrs != NULL); + if (!recursing) + addrs = new_object_addresses(); + + /* + * get the number of the attribute + */ + tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + } + else + { + ereport(NOTICE, + (errmsg("column \"%s\" of relation \"%s\" does not exist, skipping", + colName, RelationGetRelationName(rel)))); + return InvalidObjectAddress; + } + } + targetatt = (Form_pg_attribute) GETSTRUCT(tuple); + + attnum = targetatt->attnum; + + /* Can't drop a system attribute */ + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot drop system column \"%s\"", + colName))); + + /* + * Don't drop inherited columns, unless recursing (presumably from a drop + * of the parent column) + */ + if (targetatt->attinhcount > 0 && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop inherited column \"%s\"", + colName))); + + /* + * Don't drop columns used in the partition key, either. (If we let this + * go through, the key column's dependencies would cause a cascaded drop + * of the whole table, which is surely not what the user expected.) + */ + if (has_partition_attrs(rel, + bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber), + &is_expr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop column \"%s\" because it is part of the partition key of relation \"%s\"", + colName, RelationGetRelationName(rel)))); + + ReleaseSysCache(tuple); + + /* + * Propagate to children as appropriate. Unlike most other ALTER + * routines, we have to do this one level of recursion at a time; we can't + * use find_all_inheritors to do it in one pass. + */ + children = + find_inheritance_children(RelationGetRelid(rel), lockmode); + + if (children) + { + Relation attr_rel; + ListCell *child; + + /* + * In case of a partitioned table, the column must be dropped from the + * partitions as well. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop column from only the partitioned table when partitions exist"), + errhint("Do not specify the ONLY keyword."))); + + attr_rel = table_open(AttributeRelationId, RowExclusiveLock); + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + Relation childrel; + Form_pg_attribute childatt; + + /* find_inheritance_children already got lock */ + childrel = table_open(childrelid, NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + + tuple = SearchSysCacheCopyAttName(childrelid, colName); + if (!HeapTupleIsValid(tuple)) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for attribute \"%s\" of relation %u", + colName, childrelid); + childatt = (Form_pg_attribute) GETSTRUCT(tuple); + + if (childatt->attinhcount <= 0) /* shouldn't happen */ + elog(ERROR, "relation %u has non-inherited attribute \"%s\"", + childrelid, colName); + + if (recurse) + { + /* + * If the child column has other definition sources, just + * decrement its inheritance count; if not, recurse to delete + * it. + */ + if (childatt->attinhcount == 1 && !childatt->attislocal) + { + /* Time to delete this child column, too */ + ATExecDropColumn(wqueue, childrel, colName, + behavior, true, true, + false, lockmode, addrs); + } + else + { + /* Child column must survive my deletion */ + childatt->attinhcount--; + + CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple); + + /* Make update visible */ + CommandCounterIncrement(); + } + } + else + { + /* + * If we were told to drop ONLY in this table (no recursion), + * we need to mark the inheritors' attributes as locally + * defined rather than inherited. + */ + childatt->attinhcount--; + childatt->attislocal = true; + + CatalogTupleUpdate(attr_rel, &tuple->t_self, tuple); + + /* Make update visible */ + CommandCounterIncrement(); + } + + heap_freetuple(tuple); + + table_close(childrel, NoLock); + } + table_close(attr_rel, RowExclusiveLock); + } + + /* Add object to delete */ + object.classId = RelationRelationId; + object.objectId = RelationGetRelid(rel); + object.objectSubId = attnum; + add_exact_object_address(&object, addrs); + + if (!recursing) + { + /* Recursion has ended, drop everything that was collected */ + performMultipleDeletions(addrs, behavior, 0); + free_object_addresses(addrs); + } + + return object; +} + +/* + * ALTER TABLE ADD INDEX + * + * There is no such command in the grammar, but parse_utilcmd.c converts + * UNIQUE and PRIMARY KEY constraints into AT_AddIndex subcommands. This lets + * us schedule creation of the index at the appropriate time during ALTER. + * + * Return value is the address of the new index. + */ +static ObjectAddress +ATExecAddIndex(AlteredTableInfo *tab, Relation rel, + IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode) +{ + bool check_rights; + bool skip_build; + bool quiet; + ObjectAddress address; + + Assert(IsA(stmt, IndexStmt)); + Assert(!stmt->concurrent); + + /* The IndexStmt has already been through transformIndexStmt */ + Assert(stmt->transformed); + + /* suppress schema rights check when rebuilding existing index */ + check_rights = !is_rebuild; + /* skip index build if phase 3 will do it or we're reusing an old one */ + skip_build = tab->rewrite > 0 || OidIsValid(stmt->oldNode); + /* suppress notices when rebuilding existing index */ + quiet = is_rebuild; + + address = DefineIndex(RelationGetRelid(rel), + stmt, + InvalidOid, /* no predefined OID */ + InvalidOid, /* no parent index */ + InvalidOid, /* no parent constraint */ + true, /* is_alter_table */ + check_rights, + false, /* check_not_in_use - we did it already */ + skip_build, + quiet); + + /* + * If TryReuseIndex() stashed a relfilenode for us, we used it for the new + * index instead of building from scratch. Restore associated fields. + * This may store InvalidSubTransactionId in both fields, in which case + * relcache.c will assume it can rebuild the relcache entry. Hence, do + * this after the CCI that made catalog rows visible to any rebuild. The + * DROP of the old edition of this index will have scheduled the storage + * for deletion at commit, so cancel that pending deletion. + */ + if (OidIsValid(stmt->oldNode)) + { + Relation irel = index_open(address.objectId, NoLock); + + irel->rd_createSubid = stmt->oldCreateSubid; + irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid; + RelationPreserveStorage(irel->rd_node, true); + index_close(irel, NoLock); + } + + return address; +} + +/* + * ALTER TABLE ADD STATISTICS + * + * This is no such command in the grammar, but we use this internally to add + * AT_ReAddStatistics subcommands to rebuild extended statistics after a table + * column type change. + */ +static ObjectAddress +ATExecAddStatistics(AlteredTableInfo *tab, Relation rel, + CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode) +{ + ObjectAddress address; + + Assert(IsA(stmt, CreateStatsStmt)); + + /* The CreateStatsStmt has already been through transformStatsStmt */ + Assert(stmt->transformed); + + address = CreateStatistics(stmt); + + return address; +} + +/* + * ALTER TABLE ADD CONSTRAINT USING INDEX + * + * Returns the address of the new constraint. + */ +static ObjectAddress +ATExecAddIndexConstraint(AlteredTableInfo *tab, Relation rel, + IndexStmt *stmt, LOCKMODE lockmode) +{ + Oid index_oid = stmt->indexOid; + Relation indexRel; + char *indexName; + IndexInfo *indexInfo; + char *constraintName; + char constraintType; + ObjectAddress address; + bits16 flags; + + Assert(IsA(stmt, IndexStmt)); + Assert(OidIsValid(index_oid)); + Assert(stmt->isconstraint); + + /* + * Doing this on partitioned tables is not a simple feature to implement, + * so let's punt for now. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX is not supported on partitioned tables"))); + + indexRel = index_open(index_oid, AccessShareLock); + + indexName = pstrdup(RelationGetRelationName(indexRel)); + + indexInfo = BuildIndexInfo(indexRel); + + /* this should have been checked at parse time */ + if (!indexInfo->ii_Unique) + elog(ERROR, "index \"%s\" is not unique", indexName); + + /* + * Determine name to assign to constraint. We require a constraint to + * have the same name as the underlying index; therefore, use the index's + * existing name as the default constraint name, and if the user + * explicitly gives some other name for the constraint, rename the index + * to match. + */ + constraintName = stmt->idxname; + if (constraintName == NULL) + constraintName = indexName; + else if (strcmp(constraintName, indexName) != 0) + { + ereport(NOTICE, + (errmsg("ALTER TABLE / ADD CONSTRAINT USING INDEX will rename index \"%s\" to \"%s\"", + indexName, constraintName))); + RenameRelationInternal(index_oid, constraintName, false, true); + } + + /* Extra checks needed if making primary key */ + if (stmt->primary) + index_check_primary_key(rel, indexInfo, true, stmt); + + /* Note we currently don't support EXCLUSION constraints here */ + if (stmt->primary) + constraintType = CONSTRAINT_PRIMARY; + else + constraintType = CONSTRAINT_UNIQUE; + + /* Create the catalog entries for the constraint */ + flags = INDEX_CONSTR_CREATE_UPDATE_INDEX | + INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS | + (stmt->initdeferred ? INDEX_CONSTR_CREATE_INIT_DEFERRED : 0) | + (stmt->deferrable ? INDEX_CONSTR_CREATE_DEFERRABLE : 0) | + (stmt->primary ? INDEX_CONSTR_CREATE_MARK_AS_PRIMARY : 0); + + address = index_constraint_create(rel, + index_oid, + InvalidOid, + indexInfo, + constraintName, + constraintType, + flags, + allowSystemTableMods, + false); /* is_internal */ + + index_close(indexRel, NoLock); + + return address; +} + +/* + * ALTER TABLE ADD CONSTRAINT + * + * Return value is the address of the new constraint; if no constraint was + * added, InvalidObjectAddress is returned. + */ +static ObjectAddress +ATExecAddConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, + Constraint *newConstraint, bool recurse, bool is_readd, + LOCKMODE lockmode) +{ + ObjectAddress address = InvalidObjectAddress; + + Assert(IsA(newConstraint, Constraint)); + + /* + * Currently, we only expect to see CONSTR_CHECK and CONSTR_FOREIGN nodes + * arriving here (see the preprocessing done in parse_utilcmd.c). Use a + * switch anyway to make it easier to add more code later. + */ + switch (newConstraint->contype) + { + case CONSTR_CHECK: + address = + ATAddCheckConstraint(wqueue, tab, rel, + newConstraint, recurse, false, is_readd, + lockmode); + break; + + case CONSTR_FOREIGN: + + /* + * Assign or validate constraint name + */ + if (newConstraint->conname) + { + if (ConstraintNameIsUsed(CONSTRAINT_RELATION, + RelationGetRelid(rel), + newConstraint->conname)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("constraint \"%s\" for relation \"%s\" already exists", + newConstraint->conname, + RelationGetRelationName(rel)))); + } + else + newConstraint->conname = + ChooseConstraintName(RelationGetRelationName(rel), + ChooseForeignKeyConstraintNameAddition(newConstraint->fk_attrs), + "fkey", + RelationGetNamespace(rel), + NIL); + + address = ATAddForeignKeyConstraint(wqueue, tab, rel, + newConstraint, + recurse, false, + lockmode); + break; + + default: + elog(ERROR, "unrecognized constraint type: %d", + (int) newConstraint->contype); + } + + return address; +} + +/* + * Generate the column-name portion of the constraint name for a new foreign + * key given the list of column names that reference the referenced + * table. This will be passed to ChooseConstraintName along with the parent + * table name and the "fkey" suffix. + * + * We know that less than NAMEDATALEN characters will actually be used, so we + * can truncate the result once we've generated that many. + * + * XXX see also ChooseExtendedStatisticNameAddition and + * ChooseIndexNameAddition. + */ +static char * +ChooseForeignKeyConstraintNameAddition(List *colnames) +{ + char buf[NAMEDATALEN * 2]; + int buflen = 0; + ListCell *lc; + + buf[0] = '\0'; + foreach(lc, colnames) + { + const char *name = strVal(lfirst(lc)); + + if (buflen > 0) + buf[buflen++] = '_'; /* insert _ between names */ + + /* + * At this point we have buflen <= NAMEDATALEN. name should be less + * than NAMEDATALEN already, but use strlcpy for paranoia. + */ + strlcpy(buf + buflen, name, NAMEDATALEN); + buflen += strlen(buf + buflen); + if (buflen >= NAMEDATALEN) + break; + } + return pstrdup(buf); +} + +/* + * Add a check constraint to a single table and its children. Returns the + * address of the constraint added to the parent relation, if one gets added, + * or InvalidObjectAddress otherwise. + * + * Subroutine for ATExecAddConstraint. + * + * We must recurse to child tables during execution, rather than using + * ALTER TABLE's normal prep-time recursion. The reason is that all the + * constraints *must* be given the same name, else they won't be seen as + * related later. If the user didn't explicitly specify a name, then + * AddRelationNewConstraints would normally assign different names to the + * child constraints. To fix that, we must capture the name assigned at + * the parent table and pass that down. + */ +static ObjectAddress +ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, + Constraint *constr, bool recurse, bool recursing, + bool is_readd, LOCKMODE lockmode) +{ + List *newcons; + ListCell *lcon; + List *children; + ListCell *child; + ObjectAddress address = InvalidObjectAddress; + + /* At top level, permission check was done in ATPrepCmd, else do it */ + if (recursing) + ATSimplePermissions(AT_AddConstraint, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + + /* + * Call AddRelationNewConstraints to do the work, making sure it works on + * a copy of the Constraint so transformExpr can't modify the original. It + * returns a list of cooked constraints. + * + * If the constraint ends up getting merged with a pre-existing one, it's + * omitted from the returned list, which is what we want: we do not need + * to do any validation work. That can only happen at child tables, + * though, since we disallow merging at the top level. + */ + newcons = AddRelationNewConstraints(rel, NIL, + list_make1(copyObject(constr)), + recursing | is_readd, /* allow_merge */ + !recursing, /* is_local */ + is_readd, /* is_internal */ + NULL); /* queryString not available + * here */ + + /* we don't expect more than one constraint here */ + Assert(list_length(newcons) <= 1); + + /* Add each to-be-validated constraint to Phase 3's queue */ + foreach(lcon, newcons) + { + CookedConstraint *ccon = (CookedConstraint *) lfirst(lcon); + + if (!ccon->skip_validation) + { + NewConstraint *newcon; + + newcon = (NewConstraint *) palloc0(sizeof(NewConstraint)); + newcon->name = ccon->name; + newcon->contype = ccon->contype; + newcon->qual = ccon->expr; + + tab->constraints = lappend(tab->constraints, newcon); + } + + /* Save the actually assigned name if it was defaulted */ + if (constr->conname == NULL) + constr->conname = ccon->name; + + ObjectAddressSet(address, ConstraintRelationId, ccon->conoid); + } + + /* At this point we must have a locked-down name to use */ + Assert(constr->conname != NULL); + + /* Advance command counter in case same table is visited multiple times */ + CommandCounterIncrement(); + + /* + * If the constraint got merged with an existing constraint, we're done. + * We mustn't recurse to child tables in this case, because they've + * already got the constraint, and visiting them again would lead to an + * incorrect value for coninhcount. + */ + if (newcons == NIL) + return address; + + /* + * If adding a NO INHERIT constraint, no need to find our children. + */ + if (constr->is_no_inherit) + return address; + + /* + * Propagate to children as appropriate. Unlike most other ALTER + * routines, we have to do this one level of recursion at a time; we can't + * use find_all_inheritors to do it in one pass. + */ + children = + find_inheritance_children(RelationGetRelid(rel), lockmode); + + /* + * Check if ONLY was specified with ALTER TABLE. If so, allow the + * constraint creation only if there are no children currently. Error out + * otherwise. + */ + if (!recurse && children != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be added to child tables too"))); + + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + Relation childrel; + AlteredTableInfo *childtab; + + /* find_inheritance_children already got lock */ + childrel = table_open(childrelid, NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + + /* Find or create work queue entry for this table */ + childtab = ATGetQueueEntry(wqueue, childrel); + + /* Recurse to child */ + ATAddCheckConstraint(wqueue, childtab, childrel, + constr, recurse, true, is_readd, lockmode); + + table_close(childrel, NoLock); + } + + return address; +} + +/* + * Add a foreign-key constraint to a single table; return the new constraint's + * address. + * + * Subroutine for ATExecAddConstraint. Must already hold exclusive + * lock on the rel, and have done appropriate validity checks for it. + * We do permissions checks here, however. + * + * When the referenced or referencing tables (or both) are partitioned, + * multiple pg_constraint rows are required -- one for each partitioned table + * and each partition on each side (fortunately, not one for every combination + * thereof). We also need action triggers on each leaf partition on the + * referenced side, and check triggers on each leaf partition on the + * referencing side. + */ +static ObjectAddress +ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, + Constraint *fkconstraint, + bool recurse, bool recursing, LOCKMODE lockmode) +{ + Relation pkrel; + int16 pkattnum[INDEX_MAX_KEYS]; + int16 fkattnum[INDEX_MAX_KEYS]; + Oid pktypoid[INDEX_MAX_KEYS]; + Oid fktypoid[INDEX_MAX_KEYS]; + Oid opclasses[INDEX_MAX_KEYS]; + Oid pfeqoperators[INDEX_MAX_KEYS]; + Oid ppeqoperators[INDEX_MAX_KEYS]; + Oid ffeqoperators[INDEX_MAX_KEYS]; + int16 fkdelsetcols[INDEX_MAX_KEYS]; + int i; + int numfks, + numpks, + numfkdelsetcols; + Oid indexOid; + bool old_check_ok; + ObjectAddress address; + ListCell *old_pfeqop_item = list_head(fkconstraint->old_conpfeqop); + + /* + * Grab ShareRowExclusiveLock on the pk table, so that someone doesn't + * delete rows out from under us. + */ + if (OidIsValid(fkconstraint->old_pktable_oid)) + pkrel = table_open(fkconstraint->old_pktable_oid, ShareRowExclusiveLock); + else + pkrel = table_openrv(fkconstraint->pktable, ShareRowExclusiveLock); + + /* + * Validity checks (permission checks wait till we have the column + * numbers) + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + if (!recurse) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot use ONLY for foreign key on partitioned table \"%s\" referencing relation \"%s\"", + RelationGetRelationName(rel), + RelationGetRelationName(pkrel)))); + if (fkconstraint->skip_validation && !fkconstraint->initially_valid) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot add NOT VALID foreign key on partitioned table \"%s\" referencing relation \"%s\"", + RelationGetRelationName(rel), + RelationGetRelationName(pkrel)), + errdetail("This feature is not yet supported on partitioned tables."))); + } + + if (pkrel->rd_rel->relkind != RELKIND_RELATION && + pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("referenced relation \"%s\" is not a table", + RelationGetRelationName(pkrel)))); + + if (!allowSystemTableMods && IsSystemRelation(pkrel)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + RelationGetRelationName(pkrel)))); + + /* + * References from permanent or unlogged tables to temp tables, and from + * permanent tables to unlogged tables, are disallowed because the + * referenced data can vanish out from under us. References from temp + * tables to any other table type are also disallowed, because other + * backends might need to run the RI triggers on the perm table, but they + * can't reliably see tuples in the local buffers of other backends. + */ + switch (rel->rd_rel->relpersistence) + { + case RELPERSISTENCE_PERMANENT: + if (!RelationIsPermanent(pkrel)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraints on permanent tables may reference only permanent tables"))); + break; + case RELPERSISTENCE_UNLOGGED: + if (!RelationIsPermanent(pkrel) + && pkrel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraints on unlogged tables may reference only permanent or unlogged tables"))); + break; + case RELPERSISTENCE_TEMP: + if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraints on temporary tables may reference only temporary tables"))); + if (!pkrel->rd_islocaltemp || !rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraints on temporary tables must involve temporary tables of this session"))); + break; + } + + /* + * Look up the referencing attributes to make sure they exist, and record + * their attnums and type OIDs. + */ + MemSet(pkattnum, 0, sizeof(pkattnum)); + MemSet(fkattnum, 0, sizeof(fkattnum)); + MemSet(pktypoid, 0, sizeof(pktypoid)); + MemSet(fktypoid, 0, sizeof(fktypoid)); + MemSet(opclasses, 0, sizeof(opclasses)); + MemSet(pfeqoperators, 0, sizeof(pfeqoperators)); + MemSet(ppeqoperators, 0, sizeof(ppeqoperators)); + MemSet(ffeqoperators, 0, sizeof(ffeqoperators)); + MemSet(fkdelsetcols, 0, sizeof(fkdelsetcols)); + + numfks = transformColumnNameList(RelationGetRelid(rel), + fkconstraint->fk_attrs, + fkattnum, fktypoid); + + numfkdelsetcols = transformColumnNameList(RelationGetRelid(rel), + fkconstraint->fk_del_set_cols, + fkdelsetcols, NULL); + validateFkOnDeleteSetColumns(numfks, fkattnum, + numfkdelsetcols, fkdelsetcols, + fkconstraint->fk_del_set_cols); + + /* + * If the attribute list for the referenced table was omitted, lookup the + * definition of the primary key and use it. Otherwise, validate the + * supplied attribute list. In either case, discover the index OID and + * index opclasses, and the attnums and type OIDs of the attributes. + */ + if (fkconstraint->pk_attrs == NIL) + { + numpks = transformFkeyGetPrimaryKey(pkrel, &indexOid, + &fkconstraint->pk_attrs, + pkattnum, pktypoid, + opclasses); + } + else + { + numpks = transformColumnNameList(RelationGetRelid(pkrel), + fkconstraint->pk_attrs, + pkattnum, pktypoid); + /* Look for an index matching the column list */ + indexOid = transformFkeyCheckAttrs(pkrel, numpks, pkattnum, + opclasses); + } + + /* + * Now we can check permissions. + */ + checkFkeyPermissions(pkrel, pkattnum, numpks); + + /* + * Check some things for generated columns. + */ + for (i = 0; i < numfks; i++) + { + char attgenerated = TupleDescAttr(RelationGetDescr(rel), fkattnum[i] - 1)->attgenerated; + + if (attgenerated) + { + /* + * Check restrictions on UPDATE/DELETE actions, per SQL standard + */ + if (fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETNULL || + fkconstraint->fk_upd_action == FKCONSTR_ACTION_SETDEFAULT || + fkconstraint->fk_upd_action == FKCONSTR_ACTION_CASCADE) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid %s action for foreign key constraint containing generated column", + "ON UPDATE"))); + if (fkconstraint->fk_del_action == FKCONSTR_ACTION_SETNULL || + fkconstraint->fk_del_action == FKCONSTR_ACTION_SETDEFAULT) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid %s action for foreign key constraint containing generated column", + "ON DELETE"))); + } + } + + /* + * Look up the equality operators to use in the constraint. + * + * Note that we have to be careful about the difference between the actual + * PK column type and the opclass' declared input type, which might be + * only binary-compatible with it. The declared opcintype is the right + * thing to probe pg_amop with. + */ + if (numfks != numpks) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FOREIGN_KEY), + errmsg("number of referencing and referenced columns for foreign key disagree"))); + + /* + * On the strength of a previous constraint, we might avoid scanning + * tables to validate this one. See below. + */ + old_check_ok = (fkconstraint->old_conpfeqop != NIL); + Assert(!old_check_ok || numfks == list_length(fkconstraint->old_conpfeqop)); + + for (i = 0; i < numpks; i++) + { + Oid pktype = pktypoid[i]; + Oid fktype = fktypoid[i]; + Oid fktyped; + HeapTuple cla_ht; + Form_pg_opclass cla_tup; + Oid amid; + Oid opfamily; + Oid opcintype; + Oid pfeqop; + Oid ppeqop; + Oid ffeqop; + int16 eqstrategy; + Oid pfeqop_right; + + /* We need several fields out of the pg_opclass entry */ + cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclasses[i])); + if (!HeapTupleIsValid(cla_ht)) + elog(ERROR, "cache lookup failed for opclass %u", opclasses[i]); + cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht); + amid = cla_tup->opcmethod; + opfamily = cla_tup->opcfamily; + opcintype = cla_tup->opcintype; + ReleaseSysCache(cla_ht); + + /* + * Check it's a btree; currently this can never fail since no other + * index AMs support unique indexes. If we ever did have other types + * of unique indexes, we'd need a way to determine which operator + * strategy number is equality. (Is it reasonable to insist that + * every such index AM use btree's number for equality?) + */ + if (amid != BTREE_AM_OID) + elog(ERROR, "only b-tree indexes are supported for foreign keys"); + eqstrategy = BTEqualStrategyNumber; + + /* + * There had better be a primary equality operator for the index. + * We'll use it for PK = PK comparisons. + */ + ppeqop = get_opfamily_member(opfamily, opcintype, opcintype, + eqstrategy); + + if (!OidIsValid(ppeqop)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + eqstrategy, opcintype, opcintype, opfamily); + + /* + * Are there equality operators that take exactly the FK type? Assume + * we should look through any domain here. + */ + fktyped = getBaseType(fktype); + + pfeqop = get_opfamily_member(opfamily, opcintype, fktyped, + eqstrategy); + if (OidIsValid(pfeqop)) + { + pfeqop_right = fktyped; + ffeqop = get_opfamily_member(opfamily, fktyped, fktyped, + eqstrategy); + } + else + { + /* keep compiler quiet */ + pfeqop_right = InvalidOid; + ffeqop = InvalidOid; + } + + if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop))) + { + /* + * Otherwise, look for an implicit cast from the FK type to the + * opcintype, and if found, use the primary equality operator. + * This is a bit tricky because opcintype might be a polymorphic + * type such as ANYARRAY or ANYENUM; so what we have to test is + * whether the two actual column types can be concurrently cast to + * that type. (Otherwise, we'd fail to reject combinations such + * as int[] and point[].) + */ + Oid input_typeids[2]; + Oid target_typeids[2]; + + input_typeids[0] = pktype; + input_typeids[1] = fktype; + target_typeids[0] = opcintype; + target_typeids[1] = opcintype; + if (can_coerce_type(2, input_typeids, target_typeids, + COERCION_IMPLICIT)) + { + pfeqop = ffeqop = ppeqop; + pfeqop_right = opcintype; + } + } + + if (!(OidIsValid(pfeqop) && OidIsValid(ffeqop))) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("foreign key constraint \"%s\" cannot be implemented", + fkconstraint->conname), + errdetail("Key columns \"%s\" and \"%s\" " + "are of incompatible types: %s and %s.", + strVal(list_nth(fkconstraint->fk_attrs, i)), + strVal(list_nth(fkconstraint->pk_attrs, i)), + format_type_be(fktype), + format_type_be(pktype)))); + + if (old_check_ok) + { + /* + * When a pfeqop changes, revalidate the constraint. We could + * permit intra-opfamily changes, but that adds subtle complexity + * without any concrete benefit for core types. We need not + * assess ppeqop or ffeqop, which RI_Initial_Check() does not use. + */ + old_check_ok = (pfeqop == lfirst_oid(old_pfeqop_item)); + old_pfeqop_item = lnext(fkconstraint->old_conpfeqop, + old_pfeqop_item); + } + if (old_check_ok) + { + Oid old_fktype; + Oid new_fktype; + CoercionPathType old_pathtype; + CoercionPathType new_pathtype; + Oid old_castfunc; + Oid new_castfunc; + Form_pg_attribute attr = TupleDescAttr(tab->oldDesc, + fkattnum[i] - 1); + + /* + * Identify coercion pathways from each of the old and new FK-side + * column types to the right (foreign) operand type of the pfeqop. + * We may assume that pg_constraint.conkey is not changing. + */ + old_fktype = attr->atttypid; + new_fktype = fktype; + old_pathtype = findFkeyCast(pfeqop_right, old_fktype, + &old_castfunc); + new_pathtype = findFkeyCast(pfeqop_right, new_fktype, + &new_castfunc); + + /* + * Upon a change to the cast from the FK column to its pfeqop + * operand, revalidate the constraint. For this evaluation, a + * binary coercion cast is equivalent to no cast at all. While + * type implementors should design implicit casts with an eye + * toward consistency of operations like equality, we cannot + * assume here that they have done so. + * + * A function with a polymorphic argument could change behavior + * arbitrarily in response to get_fn_expr_argtype(). Therefore, + * when the cast destination is polymorphic, we only avoid + * revalidation if the input type has not changed at all. Given + * just the core data types and operator classes, this requirement + * prevents no would-be optimizations. + * + * If the cast converts from a base type to a domain thereon, then + * that domain type must be the opcintype of the unique index. + * Necessarily, the primary key column must then be of the domain + * type. Since the constraint was previously valid, all values on + * the foreign side necessarily exist on the primary side and in + * turn conform to the domain. Consequently, we need not treat + * domains specially here. + * + * Since we require that all collations share the same notion of + * equality (which they do, because texteq reduces to bitwise + * equality), we don't compare collation here. + * + * We need not directly consider the PK type. It's necessarily + * binary coercible to the opcintype of the unique index column, + * and ri_triggers.c will only deal with PK datums in terms of + * that opcintype. Changing the opcintype also changes pfeqop. + */ + old_check_ok = (new_pathtype == old_pathtype && + new_castfunc == old_castfunc && + (!IsPolymorphicType(pfeqop_right) || + new_fktype == old_fktype)); + } + + pfeqoperators[i] = pfeqop; + ppeqoperators[i] = ppeqop; + ffeqoperators[i] = ffeqop; + } + + /* + * Create all the constraint and trigger objects, recursing to partitions + * as necessary. First handle the referenced side. + */ + address = addFkRecurseReferenced(wqueue, fkconstraint, rel, pkrel, + indexOid, + InvalidOid, /* no parent constraint */ + numfks, + pkattnum, + fkattnum, + pfeqoperators, + ppeqoperators, + ffeqoperators, + numfkdelsetcols, + fkdelsetcols, + old_check_ok, + InvalidOid, InvalidOid); + + /* Now handle the referencing side. */ + addFkRecurseReferencing(wqueue, fkconstraint, rel, pkrel, + indexOid, + address.objectId, + numfks, + pkattnum, + fkattnum, + pfeqoperators, + ppeqoperators, + ffeqoperators, + numfkdelsetcols, + fkdelsetcols, + old_check_ok, + lockmode, + InvalidOid, InvalidOid); + + /* + * Done. Close pk table, but keep lock until we've committed. + */ + table_close(pkrel, NoLock); + + return address; +} + +/* + * validateFkOnDeleteSetColumns + * Verifies that columns used in ON DELETE SET NULL/DEFAULT (...) + * column lists are valid. + */ +void +validateFkOnDeleteSetColumns(int numfks, const int16 *fkattnums, + int numfksetcols, const int16 *fksetcolsattnums, + List *fksetcols) +{ + for (int i = 0; i < numfksetcols; i++) + { + int16 setcol_attnum = fksetcolsattnums[i]; + bool seen = false; + + for (int j = 0; j < numfks; j++) + { + if (fkattnums[j] == setcol_attnum) + { + seen = true; + break; + } + } + + if (!seen) + { + char *col = strVal(list_nth(fksetcols, i)); + + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("column \"%s\" referenced in ON DELETE SET action must be part of foreign key", col))); + } + } +} + +/* + * addFkRecurseReferenced + * subroutine for ATAddForeignKeyConstraint; recurses on the referenced + * side of the constraint + * + * Create pg_constraint rows for the referenced side of the constraint, + * referencing the parent of the referencing side; also create action triggers + * on leaf partitions. If the table is partitioned, recurse to handle each + * partition. + * + * wqueue is the ALTER TABLE work queue; can be NULL when not running as part + * of an ALTER TABLE sequence. + * fkconstraint is the constraint being added. + * rel is the root referencing relation. + * pkrel is the referenced relation; might be a partition, if recursing. + * indexOid is the OID of the index (on pkrel) implementing this constraint. + * parentConstr is the OID of a parent constraint; InvalidOid if this is a + * top-level constraint. + * numfks is the number of columns in the foreign key + * pkattnum is the attnum array of referenced attributes. + * fkattnum is the attnum array of referencing attributes. + * numfkdelsetcols is the number of columns in the ON DELETE SET NULL/DEFAULT + * (...) clause + * fkdelsetcols is the attnum array of the columns in the ON DELETE SET + * NULL/DEFAULT clause + * pf/pp/ffeqoperators are OID array of operators between columns. + * old_check_ok signals that this constraint replaces an existing one that + * was already validated (thus this one doesn't need validation). + * parentDelTrigger and parentUpdTrigger, when being recursively called on + * a partition, are the OIDs of the parent action triggers for DELETE and + * UPDATE respectively. + */ +static ObjectAddress +addFkRecurseReferenced(List **wqueue, Constraint *fkconstraint, Relation rel, + Relation pkrel, Oid indexOid, Oid parentConstr, + int numfks, + int16 *pkattnum, int16 *fkattnum, Oid *pfeqoperators, + Oid *ppeqoperators, Oid *ffeqoperators, + int numfkdelsetcols, int16 *fkdelsetcols, + bool old_check_ok, + Oid parentDelTrigger, Oid parentUpdTrigger) +{ + ObjectAddress address; + Oid constrOid; + char *conname; + bool conislocal; + int coninhcount; + bool connoinherit; + Oid deleteTriggerOid, + updateTriggerOid; + + /* + * Verify relkind for each referenced partition. At the top level, this + * is redundant with a previous check, but we need it when recursing. + */ + if (pkrel->rd_rel->relkind != RELKIND_RELATION && + pkrel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("referenced relation \"%s\" is not a table", + RelationGetRelationName(pkrel)))); + + /* + * Caller supplies us with a constraint name; however, it may be used in + * this partition, so come up with a different one in that case. + */ + if (ConstraintNameIsUsed(CONSTRAINT_RELATION, + RelationGetRelid(rel), + fkconstraint->conname)) + conname = ChooseConstraintName(RelationGetRelationName(rel), + ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs), + "fkey", + RelationGetNamespace(rel), NIL); + else + conname = fkconstraint->conname; + + if (OidIsValid(parentConstr)) + { + conislocal = false; + coninhcount = 1; + connoinherit = false; + } + else + { + conislocal = true; + coninhcount = 0; + + /* + * always inherit for partitioned tables, never for legacy inheritance + */ + connoinherit = rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE; + } + + /* + * Record the FK constraint in pg_constraint. + */ + constrOid = CreateConstraintEntry(conname, + RelationGetNamespace(rel), + CONSTRAINT_FOREIGN, + fkconstraint->deferrable, + fkconstraint->initdeferred, + fkconstraint->initially_valid, + parentConstr, + RelationGetRelid(rel), + fkattnum, + numfks, + numfks, + InvalidOid, /* not a domain constraint */ + indexOid, + RelationGetRelid(pkrel), + pkattnum, + pfeqoperators, + ppeqoperators, + ffeqoperators, + numfks, + fkconstraint->fk_upd_action, + fkconstraint->fk_del_action, + fkdelsetcols, + numfkdelsetcols, + fkconstraint->fk_matchtype, + NULL, /* no exclusion constraint */ + NULL, /* no check constraint */ + NULL, + conislocal, /* islocal */ + coninhcount, /* inhcount */ + connoinherit, /* conNoInherit */ + false); /* is_internal */ + + ObjectAddressSet(address, ConstraintRelationId, constrOid); + + /* + * Mark the child constraint as part of the parent constraint; it must not + * be dropped on its own. (This constraint is deleted when the partition + * is detached, but a special check needs to occur that the partition + * contains no referenced values.) + */ + if (OidIsValid(parentConstr)) + { + ObjectAddress referenced; + + ObjectAddressSet(referenced, ConstraintRelationId, parentConstr); + recordDependencyOn(&address, &referenced, DEPENDENCY_INTERNAL); + } + + /* make new constraint visible, in case we add more */ + CommandCounterIncrement(); + + /* + * Create the action triggers that enforce the constraint. + */ + createForeignKeyActionTriggers(rel, RelationGetRelid(pkrel), + fkconstraint, + constrOid, indexOid, + parentDelTrigger, parentUpdTrigger, + &deleteTriggerOid, &updateTriggerOid); + + /* + * If the referenced table is partitioned, recurse on ourselves to handle + * each partition. We need one pg_constraint row created for each + * partition in addition to the pg_constraint row for the parent table. + */ + if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDesc pd = RelationGetPartitionDesc(pkrel, true); + + for (int i = 0; i < pd->nparts; i++) + { + Relation partRel; + AttrMap *map; + AttrNumber *mapped_pkattnum; + Oid partIndexId; + + partRel = table_open(pd->oids[i], ShareRowExclusiveLock); + + /* + * Map the attribute numbers in the referenced side of the FK + * definition to match the partition's column layout. + */ + map = build_attrmap_by_name_if_req(RelationGetDescr(partRel), + RelationGetDescr(pkrel)); + if (map) + { + mapped_pkattnum = palloc(sizeof(AttrNumber) * numfks); + for (int j = 0; j < numfks; j++) + mapped_pkattnum[j] = map->attnums[pkattnum[j] - 1]; + } + else + mapped_pkattnum = pkattnum; + + /* do the deed */ + partIndexId = index_get_partition(partRel, indexOid); + if (!OidIsValid(partIndexId)) + elog(ERROR, "index for %u not found in partition %s", + indexOid, RelationGetRelationName(partRel)); + addFkRecurseReferenced(wqueue, fkconstraint, rel, partRel, + partIndexId, constrOid, numfks, + mapped_pkattnum, fkattnum, + pfeqoperators, ppeqoperators, ffeqoperators, + numfkdelsetcols, fkdelsetcols, + old_check_ok, + deleteTriggerOid, updateTriggerOid); + + /* Done -- clean up (but keep the lock) */ + table_close(partRel, NoLock); + if (map) + { + pfree(mapped_pkattnum); + free_attrmap(map); + } + } + } + + return address; +} + +/* + * addFkRecurseReferencing + * subroutine for ATAddForeignKeyConstraint and CloneFkReferencing + * + * If the referencing relation is a plain relation, create the necessary check + * triggers that implement the constraint, and set up for Phase 3 constraint + * verification. If the referencing relation is a partitioned table, then + * we create a pg_constraint row for it and recurse on this routine for each + * partition. + * + * We assume that the referenced relation is locked against concurrent + * deletions. If it's a partitioned relation, every partition must be so + * locked. + * + * wqueue is the ALTER TABLE work queue; can be NULL when not running as part + * of an ALTER TABLE sequence. + * fkconstraint is the constraint being added. + * rel is the referencing relation; might be a partition, if recursing. + * pkrel is the root referenced relation. + * indexOid is the OID of the index (on pkrel) implementing this constraint. + * parentConstr is the OID of the parent constraint (there is always one). + * numfks is the number of columns in the foreign key + * pkattnum is the attnum array of referenced attributes. + * fkattnum is the attnum array of referencing attributes. + * pf/pp/ffeqoperators are OID array of operators between columns. + * numfkdelsetcols is the number of columns in the ON DELETE SET NULL/DEFAULT + * (...) clause + * fkdelsetcols is the attnum array of the columns in the ON DELETE SET + * NULL/DEFAULT clause + * old_check_ok signals that this constraint replaces an existing one that + * was already validated (thus this one doesn't need validation). + * lockmode is the lockmode to acquire on partitions when recursing. + * parentInsTrigger and parentUpdTrigger, when being recursively called on + * a partition, are the OIDs of the parent check triggers for INSERT and + * UPDATE respectively. + */ +static void +addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel, + Relation pkrel, Oid indexOid, Oid parentConstr, + int numfks, int16 *pkattnum, int16 *fkattnum, + Oid *pfeqoperators, Oid *ppeqoperators, Oid *ffeqoperators, + int numfkdelsetcols, int16 *fkdelsetcols, + bool old_check_ok, LOCKMODE lockmode, + Oid parentInsTrigger, Oid parentUpdTrigger) +{ + Oid insertTriggerOid, + updateTriggerOid; + + AssertArg(OidIsValid(parentConstr)); + + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("foreign key constraints are not supported on foreign tables"))); + + /* + * Add the check triggers to it and, if necessary, schedule it to be + * checked in Phase 3. + * + * If the relation is partitioned, drill down to do it to its partitions. + */ + createForeignKeyCheckTriggers(RelationGetRelid(rel), + RelationGetRelid(pkrel), + fkconstraint, + parentConstr, + indexOid, + parentInsTrigger, parentUpdTrigger, + &insertTriggerOid, &updateTriggerOid); + + if (rel->rd_rel->relkind == RELKIND_RELATION) + { + /* + * Tell Phase 3 to check that the constraint is satisfied by existing + * rows. We can skip this during table creation, when requested + * explicitly by specifying NOT VALID in an ADD FOREIGN KEY command, + * and when we're recreating a constraint following a SET DATA TYPE + * operation that did not impugn its validity. + */ + if (wqueue && !old_check_ok && !fkconstraint->skip_validation) + { + NewConstraint *newcon; + AlteredTableInfo *tab; + + tab = ATGetQueueEntry(wqueue, rel); + + newcon = (NewConstraint *) palloc0(sizeof(NewConstraint)); + newcon->name = get_constraint_name(parentConstr); + newcon->contype = CONSTR_FOREIGN; + newcon->refrelid = RelationGetRelid(pkrel); + newcon->refindid = indexOid; + newcon->conid = parentConstr; + newcon->qual = (Node *) fkconstraint; + + tab->constraints = lappend(tab->constraints, newcon); + } + } + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDesc pd = RelationGetPartitionDesc(rel, true); + Relation trigrel; + + /* + * Triggers of the foreign keys will be manipulated a bunch of times + * in the loop below. To avoid repeatedly opening/closing the trigger + * catalog relation, we open it here and pass it to the subroutines + * called below. + */ + trigrel = table_open(TriggerRelationId, RowExclusiveLock); + + /* + * Recurse to take appropriate action on each partition; either we + * find an existing constraint to reparent to ours, or we create a new + * one. + */ + for (int i = 0; i < pd->nparts; i++) + { + Oid partitionId = pd->oids[i]; + Relation partition = table_open(partitionId, lockmode); + List *partFKs; + AttrMap *attmap; + AttrNumber mapped_fkattnum[INDEX_MAX_KEYS]; + bool attached; + char *conname; + Oid constrOid; + ObjectAddress address, + referenced; + ListCell *cell; + + CheckTableNotInUse(partition, "ALTER TABLE"); + + attmap = build_attrmap_by_name(RelationGetDescr(partition), + RelationGetDescr(rel)); + for (int j = 0; j < numfks; j++) + mapped_fkattnum[j] = attmap->attnums[fkattnum[j] - 1]; + + /* Check whether an existing constraint can be repurposed */ + partFKs = copyObject(RelationGetFKeyList(partition)); + attached = false; + foreach(cell, partFKs) + { + ForeignKeyCacheInfo *fk; + + fk = lfirst_node(ForeignKeyCacheInfo, cell); + if (tryAttachPartitionForeignKey(fk, + partitionId, + parentConstr, + numfks, + mapped_fkattnum, + pkattnum, + pfeqoperators, + insertTriggerOid, + updateTriggerOid, + trigrel)) + { + attached = true; + break; + } + } + if (attached) + { + table_close(partition, NoLock); + continue; + } + + /* + * No luck finding a good constraint to reuse; create our own. + */ + if (ConstraintNameIsUsed(CONSTRAINT_RELATION, + RelationGetRelid(partition), + fkconstraint->conname)) + conname = ChooseConstraintName(RelationGetRelationName(partition), + ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs), + "fkey", + RelationGetNamespace(partition), NIL); + else + conname = fkconstraint->conname; + constrOid = + CreateConstraintEntry(conname, + RelationGetNamespace(partition), + CONSTRAINT_FOREIGN, + fkconstraint->deferrable, + fkconstraint->initdeferred, + fkconstraint->initially_valid, + parentConstr, + partitionId, + mapped_fkattnum, + numfks, + numfks, + InvalidOid, + indexOid, + RelationGetRelid(pkrel), + pkattnum, + pfeqoperators, + ppeqoperators, + ffeqoperators, + numfks, + fkconstraint->fk_upd_action, + fkconstraint->fk_del_action, + fkdelsetcols, + numfkdelsetcols, + fkconstraint->fk_matchtype, + NULL, + NULL, + NULL, + false, + 1, + false, + false); + + /* + * Give this constraint partition-type dependencies on the parent + * constraint as well as the table. + */ + ObjectAddressSet(address, ConstraintRelationId, constrOid); + ObjectAddressSet(referenced, ConstraintRelationId, parentConstr); + recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI); + ObjectAddressSet(referenced, RelationRelationId, partitionId); + recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC); + + /* Make all this visible before recursing */ + CommandCounterIncrement(); + + /* call ourselves to finalize the creation and we're done */ + addFkRecurseReferencing(wqueue, fkconstraint, partition, pkrel, + indexOid, + constrOid, + numfks, + pkattnum, + mapped_fkattnum, + pfeqoperators, + ppeqoperators, + ffeqoperators, + numfkdelsetcols, + fkdelsetcols, + old_check_ok, + lockmode, + insertTriggerOid, + updateTriggerOid); + + table_close(partition, NoLock); + } + + table_close(trigrel, RowExclusiveLock); + } +} + +/* + * CloneForeignKeyConstraints + * Clone foreign keys from a partitioned table to a newly acquired + * partition. + * + * partitionRel is a partition of parentRel, so we can be certain that it has + * the same columns with the same datatypes. The columns may be in different + * order, though. + * + * wqueue must be passed to set up phase 3 constraint checking, unless the + * referencing-side partition is known to be empty (such as in CREATE TABLE / + * PARTITION OF). + */ +static void +CloneForeignKeyConstraints(List **wqueue, Relation parentRel, + Relation partitionRel) +{ + /* This only works for declarative partitioning */ + Assert(parentRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + /* + * Clone constraints for which the parent is on the referenced side. + */ + CloneFkReferenced(parentRel, partitionRel); + + /* + * Now clone constraints where the parent is on the referencing side. + */ + CloneFkReferencing(wqueue, parentRel, partitionRel); +} + +/* + * CloneFkReferenced + * Subroutine for CloneForeignKeyConstraints + * + * Find all the FKs that have the parent relation on the referenced side; + * clone those constraints to the given partition. This is to be called + * when the partition is being created or attached. + * + * This ignores self-referencing FKs; those are handled by CloneFkReferencing. + * + * This recurses to partitions, if the relation being attached is partitioned. + * Recursion is done by calling addFkRecurseReferenced. + */ +static void +CloneFkReferenced(Relation parentRel, Relation partitionRel) +{ + Relation pg_constraint; + AttrMap *attmap; + ListCell *cell; + SysScanDesc scan; + ScanKeyData key[2]; + HeapTuple tuple; + List *clone = NIL; + Relation trigrel; + + /* + * Search for any constraints where this partition's parent is in the + * referenced side. However, we must not clone any constraint whose + * parent constraint is also going to be cloned, to avoid duplicates. So + * do it in two steps: first construct the list of constraints to clone, + * then go over that list cloning those whose parents are not in the list. + * (We must not rely on the parent being seen first, since the catalog + * scan could return children first.) + */ + pg_constraint = table_open(ConstraintRelationId, RowShareLock); + ScanKeyInit(&key[0], + Anum_pg_constraint_confrelid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parentRel))); + ScanKeyInit(&key[1], + Anum_pg_constraint_contype, BTEqualStrategyNumber, + F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN)); + /* This is a seqscan, as we don't have a usable index ... */ + scan = systable_beginscan(pg_constraint, InvalidOid, true, + NULL, 2, key); + while ((tuple = systable_getnext(scan)) != NULL) + { + Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple); + + clone = lappend_oid(clone, constrForm->oid); + } + systable_endscan(scan); + table_close(pg_constraint, RowShareLock); + + /* + * Triggers of the foreign keys will be manipulated a bunch of times in + * the loop below. To avoid repeatedly opening/closing the trigger + * catalog relation, we open it here and pass it to the subroutines called + * below. + */ + trigrel = table_open(TriggerRelationId, RowExclusiveLock); + + attmap = build_attrmap_by_name(RelationGetDescr(partitionRel), + RelationGetDescr(parentRel)); + foreach(cell, clone) + { + Oid constrOid = lfirst_oid(cell); + Form_pg_constraint constrForm; + Relation fkRel; + Oid indexOid; + Oid partIndexId; + int numfks; + AttrNumber conkey[INDEX_MAX_KEYS]; + AttrNumber mapped_confkey[INDEX_MAX_KEYS]; + AttrNumber confkey[INDEX_MAX_KEYS]; + Oid conpfeqop[INDEX_MAX_KEYS]; + Oid conppeqop[INDEX_MAX_KEYS]; + Oid conffeqop[INDEX_MAX_KEYS]; + int numfkdelsetcols; + AttrNumber confdelsetcols[INDEX_MAX_KEYS]; + Constraint *fkconstraint; + Oid deleteTriggerOid, + updateTriggerOid; + + tuple = SearchSysCache1(CONSTROID, constrOid); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for constraint %u", constrOid); + constrForm = (Form_pg_constraint) GETSTRUCT(tuple); + + /* + * As explained above: don't try to clone a constraint for which we're + * going to clone the parent. + */ + if (list_member_oid(clone, constrForm->conparentid)) + { + ReleaseSysCache(tuple); + continue; + } + + /* + * Don't clone self-referencing foreign keys, which can be in the + * partitioned table or in the partition-to-be. + */ + if (constrForm->conrelid == RelationGetRelid(parentRel) || + constrForm->conrelid == RelationGetRelid(partitionRel)) + { + ReleaseSysCache(tuple); + continue; + } + + /* + * Because we're only expanding the key space at the referenced side, + * we don't need to prevent any operation in the referencing table, so + * AccessShareLock suffices (assumes that dropping the constraint + * acquires AEL). + */ + fkRel = table_open(constrForm->conrelid, AccessShareLock); + + indexOid = constrForm->conindid; + DeconstructFkConstraintRow(tuple, + &numfks, + conkey, + confkey, + conpfeqop, + conppeqop, + conffeqop, + &numfkdelsetcols, + confdelsetcols); + + for (int i = 0; i < numfks; i++) + mapped_confkey[i] = attmap->attnums[confkey[i] - 1]; + + fkconstraint = makeNode(Constraint); + fkconstraint->contype = CONSTRAINT_FOREIGN; + fkconstraint->conname = NameStr(constrForm->conname); + fkconstraint->deferrable = constrForm->condeferrable; + fkconstraint->initdeferred = constrForm->condeferred; + fkconstraint->location = -1; + fkconstraint->pktable = NULL; + /* ->fk_attrs determined below */ + fkconstraint->pk_attrs = NIL; + fkconstraint->fk_matchtype = constrForm->confmatchtype; + fkconstraint->fk_upd_action = constrForm->confupdtype; + fkconstraint->fk_del_action = constrForm->confdeltype; + fkconstraint->fk_del_set_cols = NIL; + fkconstraint->old_conpfeqop = NIL; + fkconstraint->old_pktable_oid = InvalidOid; + fkconstraint->skip_validation = false; + fkconstraint->initially_valid = true; + + /* set up colnames that are used to generate the constraint name */ + for (int i = 0; i < numfks; i++) + { + Form_pg_attribute att; + + att = TupleDescAttr(RelationGetDescr(fkRel), + conkey[i] - 1); + fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs, + makeString(NameStr(att->attname))); + } + + /* + * Add the new foreign key constraint pointing to the new partition. + * Because this new partition appears in the referenced side of the + * constraint, we don't need to set up for Phase 3 check. + */ + partIndexId = index_get_partition(partitionRel, indexOid); + if (!OidIsValid(partIndexId)) + elog(ERROR, "index for %u not found in partition %s", + indexOid, RelationGetRelationName(partitionRel)); + + /* + * Get the "action" triggers belonging to the constraint to pass as + * parent OIDs for similar triggers that will be created on the + * partition in addFkRecurseReferenced(). + */ + GetForeignKeyActionTriggers(trigrel, constrOid, + constrForm->confrelid, constrForm->conrelid, + &deleteTriggerOid, &updateTriggerOid); + + addFkRecurseReferenced(NULL, + fkconstraint, + fkRel, + partitionRel, + partIndexId, + constrOid, + numfks, + mapped_confkey, + conkey, + conpfeqop, + conppeqop, + conffeqop, + numfkdelsetcols, + confdelsetcols, + true, + deleteTriggerOid, + updateTriggerOid); + + table_close(fkRel, NoLock); + ReleaseSysCache(tuple); + } + + table_close(trigrel, RowExclusiveLock); +} + +/* + * CloneFkReferencing + * Subroutine for CloneForeignKeyConstraints + * + * For each FK constraint of the parent relation in the given list, find an + * equivalent constraint in its partition relation that can be reparented; + * if one cannot be found, create a new constraint in the partition as its + * child. + * + * If wqueue is given, it is used to set up phase-3 verification for each + * cloned constraint; if omitted, we assume that such verification is not + * needed (example: the partition is being created anew). + */ +static void +CloneFkReferencing(List **wqueue, Relation parentRel, Relation partRel) +{ + AttrMap *attmap; + List *partFKs; + List *clone = NIL; + ListCell *cell; + Relation trigrel; + + /* obtain a list of constraints that we need to clone */ + foreach(cell, RelationGetFKeyList(parentRel)) + { + ForeignKeyCacheInfo *fk = lfirst(cell); + + clone = lappend_oid(clone, fk->conoid); + } + + /* + * Silently do nothing if there's nothing to do. In particular, this + * avoids throwing a spurious error for foreign tables. + */ + if (clone == NIL) + return; + + if (partRel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("foreign key constraints are not supported on foreign tables"))); + + /* + * Triggers of the foreign keys will be manipulated a bunch of times in + * the loop below. To avoid repeatedly opening/closing the trigger + * catalog relation, we open it here and pass it to the subroutines called + * below. + */ + trigrel = table_open(TriggerRelationId, RowExclusiveLock); + + /* + * The constraint key may differ, if the columns in the partition are + * different. This map is used to convert them. + */ + attmap = build_attrmap_by_name(RelationGetDescr(partRel), + RelationGetDescr(parentRel)); + + partFKs = copyObject(RelationGetFKeyList(partRel)); + + foreach(cell, clone) + { + Oid parentConstrOid = lfirst_oid(cell); + Form_pg_constraint constrForm; + Relation pkrel; + HeapTuple tuple; + int numfks; + AttrNumber conkey[INDEX_MAX_KEYS]; + AttrNumber mapped_conkey[INDEX_MAX_KEYS]; + AttrNumber confkey[INDEX_MAX_KEYS]; + Oid conpfeqop[INDEX_MAX_KEYS]; + Oid conppeqop[INDEX_MAX_KEYS]; + Oid conffeqop[INDEX_MAX_KEYS]; + int numfkdelsetcols; + AttrNumber confdelsetcols[INDEX_MAX_KEYS]; + Constraint *fkconstraint; + bool attached; + Oid indexOid; + Oid constrOid; + ObjectAddress address, + referenced; + ListCell *cell; + Oid insertTriggerOid, + updateTriggerOid; + + tuple = SearchSysCache1(CONSTROID, parentConstrOid); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for constraint %u", + parentConstrOid); + constrForm = (Form_pg_constraint) GETSTRUCT(tuple); + + /* Don't clone constraints whose parents are being cloned */ + if (list_member_oid(clone, constrForm->conparentid)) + { + ReleaseSysCache(tuple); + continue; + } + + /* + * Need to prevent concurrent deletions. If pkrel is a partitioned + * relation, that means to lock all partitions. + */ + pkrel = table_open(constrForm->confrelid, ShareRowExclusiveLock); + if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + (void) find_all_inheritors(RelationGetRelid(pkrel), + ShareRowExclusiveLock, NULL); + + DeconstructFkConstraintRow(tuple, &numfks, conkey, confkey, + conpfeqop, conppeqop, conffeqop, + &numfkdelsetcols, confdelsetcols); + for (int i = 0; i < numfks; i++) + mapped_conkey[i] = attmap->attnums[conkey[i] - 1]; + + /* + * Get the "check" triggers belonging to the constraint to pass as + * parent OIDs for similar triggers that will be created on the + * partition in addFkRecurseReferencing(). They are also passed to + * tryAttachPartitionForeignKey() below to simply assign as parents to + * the partition's existing "check" triggers, that is, if the + * corresponding constraints is deemed attachable to the parent + * constraint. + */ + GetForeignKeyCheckTriggers(trigrel, constrForm->oid, + constrForm->confrelid, constrForm->conrelid, + &insertTriggerOid, &updateTriggerOid); + + /* + * Before creating a new constraint, see whether any existing FKs are + * fit for the purpose. If one is, attach the parent constraint to + * it, and don't clone anything. This way we avoid the expensive + * verification step and don't end up with a duplicate FK, and we + * don't need to recurse to partitions for this constraint. + */ + attached = false; + foreach(cell, partFKs) + { + ForeignKeyCacheInfo *fk = lfirst_node(ForeignKeyCacheInfo, cell); + + if (tryAttachPartitionForeignKey(fk, + RelationGetRelid(partRel), + parentConstrOid, + numfks, + mapped_conkey, + confkey, + conpfeqop, + insertTriggerOid, + updateTriggerOid, + trigrel)) + { + attached = true; + table_close(pkrel, NoLock); + break; + } + } + if (attached) + { + ReleaseSysCache(tuple); + continue; + } + + /* No dice. Set up to create our own constraint */ + fkconstraint = makeNode(Constraint); + fkconstraint->contype = CONSTRAINT_FOREIGN; + /* ->conname determined below */ + fkconstraint->deferrable = constrForm->condeferrable; + fkconstraint->initdeferred = constrForm->condeferred; + fkconstraint->location = -1; + fkconstraint->pktable = NULL; + /* ->fk_attrs determined below */ + fkconstraint->pk_attrs = NIL; + fkconstraint->fk_matchtype = constrForm->confmatchtype; + fkconstraint->fk_upd_action = constrForm->confupdtype; + fkconstraint->fk_del_action = constrForm->confdeltype; + fkconstraint->fk_del_set_cols = NIL; + fkconstraint->old_conpfeqop = NIL; + fkconstraint->old_pktable_oid = InvalidOid; + fkconstraint->skip_validation = false; + fkconstraint->initially_valid = true; + for (int i = 0; i < numfks; i++) + { + Form_pg_attribute att; + + att = TupleDescAttr(RelationGetDescr(partRel), + mapped_conkey[i] - 1); + fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs, + makeString(NameStr(att->attname))); + } + if (ConstraintNameIsUsed(CONSTRAINT_RELATION, + RelationGetRelid(partRel), + NameStr(constrForm->conname))) + fkconstraint->conname = + ChooseConstraintName(RelationGetRelationName(partRel), + ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs), + "fkey", + RelationGetNamespace(partRel), NIL); + else + fkconstraint->conname = pstrdup(NameStr(constrForm->conname)); + + indexOid = constrForm->conindid; + constrOid = + CreateConstraintEntry(fkconstraint->conname, + constrForm->connamespace, + CONSTRAINT_FOREIGN, + fkconstraint->deferrable, + fkconstraint->initdeferred, + constrForm->convalidated, + parentConstrOid, + RelationGetRelid(partRel), + mapped_conkey, + numfks, + numfks, + InvalidOid, /* not a domain constraint */ + indexOid, + constrForm->confrelid, /* same foreign rel */ + confkey, + conpfeqop, + conppeqop, + conffeqop, + numfks, + fkconstraint->fk_upd_action, + fkconstraint->fk_del_action, + confdelsetcols, + numfkdelsetcols, + fkconstraint->fk_matchtype, + NULL, + NULL, + NULL, + false, /* islocal */ + 1, /* inhcount */ + false, /* conNoInherit */ + true); + + /* Set up partition dependencies for the new constraint */ + ObjectAddressSet(address, ConstraintRelationId, constrOid); + ObjectAddressSet(referenced, ConstraintRelationId, parentConstrOid); + recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_PRI); + ObjectAddressSet(referenced, RelationRelationId, + RelationGetRelid(partRel)); + recordDependencyOn(&address, &referenced, DEPENDENCY_PARTITION_SEC); + + /* Done with the cloned constraint's tuple */ + ReleaseSysCache(tuple); + + /* Make all this visible before recursing */ + CommandCounterIncrement(); + + addFkRecurseReferencing(wqueue, + fkconstraint, + partRel, + pkrel, + indexOid, + constrOid, + numfks, + confkey, + mapped_conkey, + conpfeqop, + conppeqop, + conffeqop, + numfkdelsetcols, + confdelsetcols, + false, /* no old check exists */ + AccessExclusiveLock, + insertTriggerOid, + updateTriggerOid); + table_close(pkrel, NoLock); + } + + table_close(trigrel, RowExclusiveLock); +} + +/* + * When the parent of a partition receives [the referencing side of] a foreign + * key, we must propagate that foreign key to the partition. However, the + * partition might already have an equivalent foreign key; this routine + * compares the given ForeignKeyCacheInfo (in the partition) to the FK defined + * by the other parameters. If they are equivalent, create the link between + * the two constraints and return true. + * + * If the given FK does not match the one defined by rest of the params, + * return false. + */ +static bool +tryAttachPartitionForeignKey(ForeignKeyCacheInfo *fk, + Oid partRelid, + Oid parentConstrOid, + int numfks, + AttrNumber *mapped_conkey, + AttrNumber *confkey, + Oid *conpfeqop, + Oid parentInsTrigger, + Oid parentUpdTrigger, + Relation trigrel) +{ + HeapTuple parentConstrTup; + Form_pg_constraint parentConstr; + HeapTuple partcontup; + Form_pg_constraint partConstr; + ScanKeyData key; + SysScanDesc scan; + HeapTuple trigtup; + Oid insertTriggerOid, + updateTriggerOid; + + parentConstrTup = SearchSysCache1(CONSTROID, + ObjectIdGetDatum(parentConstrOid)); + if (!HeapTupleIsValid(parentConstrTup)) + elog(ERROR, "cache lookup failed for constraint %u", parentConstrOid); + parentConstr = (Form_pg_constraint) GETSTRUCT(parentConstrTup); + + /* + * Do some quick & easy initial checks. If any of these fail, we cannot + * use this constraint. + */ + if (fk->confrelid != parentConstr->confrelid || fk->nkeys != numfks) + { + ReleaseSysCache(parentConstrTup); + return false; + } + for (int i = 0; i < numfks; i++) + { + if (fk->conkey[i] != mapped_conkey[i] || + fk->confkey[i] != confkey[i] || + fk->conpfeqop[i] != conpfeqop[i]) + { + ReleaseSysCache(parentConstrTup); + return false; + } + } + + /* + * Looks good so far; do some more extensive checks. Presumably the check + * for 'convalidated' could be dropped, since we don't really care about + * that, but let's be careful for now. + */ + partcontup = SearchSysCache1(CONSTROID, + ObjectIdGetDatum(fk->conoid)); + if (!HeapTupleIsValid(partcontup)) + elog(ERROR, "cache lookup failed for constraint %u", fk->conoid); + partConstr = (Form_pg_constraint) GETSTRUCT(partcontup); + if (OidIsValid(partConstr->conparentid) || + !partConstr->convalidated || + partConstr->condeferrable != parentConstr->condeferrable || + partConstr->condeferred != parentConstr->condeferred || + partConstr->confupdtype != parentConstr->confupdtype || + partConstr->confdeltype != parentConstr->confdeltype || + partConstr->confmatchtype != parentConstr->confmatchtype) + { + ReleaseSysCache(parentConstrTup); + ReleaseSysCache(partcontup); + return false; + } + + ReleaseSysCache(partcontup); + ReleaseSysCache(parentConstrTup); + + /* + * Looks good! Attach this constraint. The action triggers in the new + * partition become redundant -- the parent table already has equivalent + * ones, and those will be able to reach the partition. Remove the ones + * in the partition. We identify them because they have our constraint + * OID, as well as being on the referenced rel. + */ + ScanKeyInit(&key, + Anum_pg_trigger_tgconstraint, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(fk->conoid)); + scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true, + NULL, 1, &key); + while ((trigtup = systable_getnext(scan)) != NULL) + { + Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup); + ObjectAddress trigger; + + if (trgform->tgconstrrelid != fk->conrelid) + continue; + if (trgform->tgrelid != fk->confrelid) + continue; + + /* + * The constraint is originally set up to contain this trigger as an + * implementation object, so there's a dependency record that links + * the two; however, since the trigger is no longer needed, we remove + * the dependency link in order to be able to drop the trigger while + * keeping the constraint intact. + */ + deleteDependencyRecordsFor(TriggerRelationId, + trgform->oid, + false); + /* make dependency deletion visible to performDeletion */ + CommandCounterIncrement(); + ObjectAddressSet(trigger, TriggerRelationId, + trgform->oid); + performDeletion(&trigger, DROP_RESTRICT, 0); + /* make trigger drop visible, in case the loop iterates */ + CommandCounterIncrement(); + } + + systable_endscan(scan); + + ConstraintSetParentConstraint(fk->conoid, parentConstrOid, partRelid); + + /* + * Like the constraint, attach partition's "check" triggers to the + * corresponding parent triggers. + */ + GetForeignKeyCheckTriggers(trigrel, + fk->conoid, fk->confrelid, fk->conrelid, + &insertTriggerOid, &updateTriggerOid); + Assert(OidIsValid(insertTriggerOid) && OidIsValid(parentInsTrigger)); + TriggerSetParentTrigger(trigrel, insertTriggerOid, parentInsTrigger, + partRelid); + Assert(OidIsValid(updateTriggerOid) && OidIsValid(parentUpdTrigger)); + TriggerSetParentTrigger(trigrel, updateTriggerOid, parentUpdTrigger, + partRelid); + + CommandCounterIncrement(); + return true; +} + +/* + * GetForeignKeyActionTriggers + * Returns delete and update "action" triggers of the given relation + * belonging to the given constraint + */ +static void +GetForeignKeyActionTriggers(Relation trigrel, + Oid conoid, Oid confrelid, Oid conrelid, + Oid *deleteTriggerOid, + Oid *updateTriggerOid) +{ + ScanKeyData key; + SysScanDesc scan; + HeapTuple trigtup; + + *deleteTriggerOid = *updateTriggerOid = InvalidOid; + ScanKeyInit(&key, + Anum_pg_trigger_tgconstraint, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(conoid)); + + scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true, + NULL, 1, &key); + while ((trigtup = systable_getnext(scan)) != NULL) + { + Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup); + + if (trgform->tgconstrrelid != conrelid) + continue; + if (trgform->tgrelid != confrelid) + continue; + /* Only ever look at "action" triggers on the PK side. */ + if (RI_FKey_trigger_type(trgform->tgfoid) != RI_TRIGGER_PK) + continue; + if (TRIGGER_FOR_DELETE(trgform->tgtype)) + { + Assert(*deleteTriggerOid == InvalidOid); + *deleteTriggerOid = trgform->oid; + } + else if (TRIGGER_FOR_UPDATE(trgform->tgtype)) + { + Assert(*updateTriggerOid == InvalidOid); + *updateTriggerOid = trgform->oid; + } +#ifndef USE_ASSERT_CHECKING + /* In an assert-enabled build, continue looking to find duplicates */ + if (OidIsValid(*deleteTriggerOid) && OidIsValid(*updateTriggerOid)) + break; +#endif + } + + if (!OidIsValid(*deleteTriggerOid)) + elog(ERROR, "could not find ON DELETE action trigger of foreign key constraint %u", + conoid); + if (!OidIsValid(*updateTriggerOid)) + elog(ERROR, "could not find ON UPDATE action trigger of foreign key constraint %u", + conoid); + + systable_endscan(scan); +} + +/* + * GetForeignKeyCheckTriggers + * Returns insert and update "check" triggers of the given relation + * belonging to the given constraint + */ +static void +GetForeignKeyCheckTriggers(Relation trigrel, + Oid conoid, Oid confrelid, Oid conrelid, + Oid *insertTriggerOid, + Oid *updateTriggerOid) +{ + ScanKeyData key; + SysScanDesc scan; + HeapTuple trigtup; + + *insertTriggerOid = *updateTriggerOid = InvalidOid; + ScanKeyInit(&key, + Anum_pg_trigger_tgconstraint, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(conoid)); + + scan = systable_beginscan(trigrel, TriggerConstraintIndexId, true, + NULL, 1, &key); + while ((trigtup = systable_getnext(scan)) != NULL) + { + Form_pg_trigger trgform = (Form_pg_trigger) GETSTRUCT(trigtup); + + if (trgform->tgconstrrelid != confrelid) + continue; + if (trgform->tgrelid != conrelid) + continue; + /* Only ever look at "check" triggers on the FK side. */ + if (RI_FKey_trigger_type(trgform->tgfoid) != RI_TRIGGER_FK) + continue; + if (TRIGGER_FOR_INSERT(trgform->tgtype)) + { + Assert(*insertTriggerOid == InvalidOid); + *insertTriggerOid = trgform->oid; + } + else if (TRIGGER_FOR_UPDATE(trgform->tgtype)) + { + Assert(*updateTriggerOid == InvalidOid); + *updateTriggerOid = trgform->oid; + } +#ifndef USE_ASSERT_CHECKING + /* In an assert-enabled build, continue looking to find duplicates. */ + if (OidIsValid(*insertTriggerOid) && OidIsValid(*updateTriggerOid)) + break; +#endif + } + + if (!OidIsValid(*insertTriggerOid)) + elog(ERROR, "could not find ON INSERT check triggers of foreign key constraint %u", + conoid); + if (!OidIsValid(*updateTriggerOid)) + elog(ERROR, "could not find ON UPDATE check triggers of foreign key constraint %u", + conoid); + + systable_endscan(scan); +} + +/* + * ALTER TABLE ALTER CONSTRAINT + * + * Update the attributes of a constraint. + * + * Currently only works for Foreign Key constraints. + * + * If the constraint is modified, returns its address; otherwise, return + * InvalidObjectAddress. + */ +static ObjectAddress +ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd, bool recurse, + bool recursing, LOCKMODE lockmode) +{ + Constraint *cmdcon; + Relation conrel; + Relation tgrel; + SysScanDesc scan; + ScanKeyData skey[3]; + HeapTuple contuple; + Form_pg_constraint currcon; + ObjectAddress address; + List *otherrelids = NIL; + ListCell *lc; + + cmdcon = castNode(Constraint, cmd->def); + + conrel = table_open(ConstraintRelationId, RowExclusiveLock); + tgrel = table_open(TriggerRelationId, RowExclusiveLock); + + /* + * Find and check the target constraint + */ + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + ScanKeyInit(&skey[1], + Anum_pg_constraint_contypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + ScanKeyInit(&skey[2], + Anum_pg_constraint_conname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(cmdcon->conname)); + scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, + true, NULL, 3, skey); + + /* There can be at most one matching row */ + if (!HeapTupleIsValid(contuple = systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("constraint \"%s\" of relation \"%s\" does not exist", + cmdcon->conname, RelationGetRelationName(rel)))); + + currcon = (Form_pg_constraint) GETSTRUCT(contuple); + if (currcon->contype != CONSTRAINT_FOREIGN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key constraint", + cmdcon->conname, RelationGetRelationName(rel)))); + + /* + * If it's not the topmost constraint, raise an error. + * + * Altering a non-topmost constraint leaves some triggers untouched, since + * they are not directly connected to this constraint; also, pg_dump would + * ignore the deferrability status of the individual constraint, since it + * only dumps topmost constraints. Avoid these problems by refusing this + * operation and telling the user to alter the parent constraint instead. + */ + if (OidIsValid(currcon->conparentid)) + { + HeapTuple tp; + Oid parent = currcon->conparentid; + char *ancestorname = NULL; + char *ancestortable = NULL; + + /* Loop to find the topmost constraint */ + while (HeapTupleIsValid(tp = SearchSysCache1(CONSTROID, ObjectIdGetDatum(parent)))) + { + Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(tp); + + /* If no parent, this is the constraint we want */ + if (!OidIsValid(contup->conparentid)) + { + ancestorname = pstrdup(NameStr(contup->conname)); + ancestortable = get_rel_name(contup->conrelid); + ReleaseSysCache(tp); + break; + } + + parent = contup->conparentid; + ReleaseSysCache(tp); + } + + ereport(ERROR, + (errmsg("cannot alter constraint \"%s\" on relation \"%s\"", + cmdcon->conname, RelationGetRelationName(rel)), + ancestorname && ancestortable ? + errdetail("Constraint \"%s\" is derived from constraint \"%s\" of relation \"%s\".", + cmdcon->conname, ancestorname, ancestortable) : 0, + errhint("You may alter the constraint it derives from, instead."))); + } + + /* + * Do the actual catalog work. We can skip changing if already in the + * desired state, but not if a partitioned table: partitions need to be + * processed regardless, in case they had the constraint locally changed. + */ + address = InvalidObjectAddress; + if (currcon->condeferrable != cmdcon->deferrable || + currcon->condeferred != cmdcon->initdeferred || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + if (ATExecAlterConstrRecurse(cmdcon, conrel, tgrel, rel, contuple, + &otherrelids, lockmode)) + ObjectAddressSet(address, ConstraintRelationId, currcon->oid); + } + + /* + * ATExecConstrRecurse already invalidated relcache for the relations + * having the constraint itself; here we also invalidate for relations + * that have any triggers that are part of the constraint. + */ + foreach(lc, otherrelids) + CacheInvalidateRelcacheByRelid(lfirst_oid(lc)); + + systable_endscan(scan); + + table_close(tgrel, RowExclusiveLock); + table_close(conrel, RowExclusiveLock); + + return address; +} + +/* + * Recursive subroutine of ATExecAlterConstraint. Returns true if the + * constraint is altered. + * + * *otherrelids is appended OIDs of relations containing affected triggers. + * + * Note that we must recurse even when the values are correct, in case + * indirect descendants have had their constraints altered locally. + * (This could be avoided if we forbade altering constraints in partitions + * but existing releases don't do that.) + */ +static bool +ATExecAlterConstrRecurse(Constraint *cmdcon, Relation conrel, Relation tgrel, + Relation rel, HeapTuple contuple, List **otherrelids, + LOCKMODE lockmode) +{ + Form_pg_constraint currcon; + Oid conoid; + Oid refrelid; + bool changed = false; + + currcon = (Form_pg_constraint) GETSTRUCT(contuple); + conoid = currcon->oid; + refrelid = currcon->confrelid; + + /* + * Update pg_constraint with the flags from cmdcon. + * + * If called to modify a constraint that's already in the desired state, + * silently do nothing. + */ + if (currcon->condeferrable != cmdcon->deferrable || + currcon->condeferred != cmdcon->initdeferred) + { + HeapTuple copyTuple; + Form_pg_constraint copy_con; + HeapTuple tgtuple; + ScanKeyData tgkey; + SysScanDesc tgscan; + + copyTuple = heap_copytuple(contuple); + copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple); + copy_con->condeferrable = cmdcon->deferrable; + copy_con->condeferred = cmdcon->initdeferred; + CatalogTupleUpdate(conrel, ©Tuple->t_self, copyTuple); + + InvokeObjectPostAlterHook(ConstraintRelationId, + conoid, 0); + + heap_freetuple(copyTuple); + changed = true; + + /* Make new constraint flags visible to others */ + CacheInvalidateRelcache(rel); + + /* + * Now we need to update the multiple entries in pg_trigger that + * implement the constraint. + */ + ScanKeyInit(&tgkey, + Anum_pg_trigger_tgconstraint, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(conoid)); + tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true, + NULL, 1, &tgkey); + while (HeapTupleIsValid(tgtuple = systable_getnext(tgscan))) + { + Form_pg_trigger tgform = (Form_pg_trigger) GETSTRUCT(tgtuple); + Form_pg_trigger copy_tg; + HeapTuple copyTuple; + + /* + * Remember OIDs of other relation(s) involved in FK constraint. + * (Note: it's likely that we could skip forcing a relcache inval + * for other rels that don't have a trigger whose properties + * change, but let's be conservative.) + */ + if (tgform->tgrelid != RelationGetRelid(rel)) + *otherrelids = list_append_unique_oid(*otherrelids, + tgform->tgrelid); + + /* + * Update deferrability of RI_FKey_noaction_del, + * RI_FKey_noaction_upd, RI_FKey_check_ins and RI_FKey_check_upd + * triggers, but not others; see createForeignKeyActionTriggers + * and CreateFKCheckTrigger. + */ + if (tgform->tgfoid != F_RI_FKEY_NOACTION_DEL && + tgform->tgfoid != F_RI_FKEY_NOACTION_UPD && + tgform->tgfoid != F_RI_FKEY_CHECK_INS && + tgform->tgfoid != F_RI_FKEY_CHECK_UPD) + continue; + + copyTuple = heap_copytuple(tgtuple); + copy_tg = (Form_pg_trigger) GETSTRUCT(copyTuple); + + copy_tg->tgdeferrable = cmdcon->deferrable; + copy_tg->tginitdeferred = cmdcon->initdeferred; + CatalogTupleUpdate(tgrel, ©Tuple->t_self, copyTuple); + + InvokeObjectPostAlterHook(TriggerRelationId, tgform->oid, 0); + + heap_freetuple(copyTuple); + } + + systable_endscan(tgscan); + } + + /* + * If the table at either end of the constraint is partitioned, we need to + * recurse and handle every constraint that is a child of this one. + * + * (This assumes that the recurse flag is forcibly set for partitioned + * tables, and not set for legacy inheritance, though we don't check for + * that here.) + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + get_rel_relkind(refrelid) == RELKIND_PARTITIONED_TABLE) + { + ScanKeyData pkey; + SysScanDesc pscan; + HeapTuple childtup; + + ScanKeyInit(&pkey, + Anum_pg_constraint_conparentid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(conoid)); + + pscan = systable_beginscan(conrel, ConstraintParentIndexId, + true, NULL, 1, &pkey); + + while (HeapTupleIsValid(childtup = systable_getnext(pscan))) + { + Form_pg_constraint childcon = (Form_pg_constraint) GETSTRUCT(childtup); + Relation childrel; + + childrel = table_open(childcon->conrelid, lockmode); + ATExecAlterConstrRecurse(cmdcon, conrel, tgrel, childrel, childtup, + otherrelids, lockmode); + table_close(childrel, NoLock); + } + + systable_endscan(pscan); + } + + return changed; +} + +/* + * ALTER TABLE VALIDATE CONSTRAINT + * + * XXX The reason we handle recursion here rather than at Phase 1 is because + * there's no good way to skip recursing when handling foreign keys: there is + * no need to lock children in that case, yet we wouldn't be able to avoid + * doing so at that level. + * + * Return value is the address of the validated constraint. If the constraint + * was already validated, InvalidObjectAddress is returned. + */ +static ObjectAddress +ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, + bool recurse, bool recursing, LOCKMODE lockmode) +{ + Relation conrel; + SysScanDesc scan; + ScanKeyData skey[3]; + HeapTuple tuple; + Form_pg_constraint con; + ObjectAddress address; + + conrel = table_open(ConstraintRelationId, RowExclusiveLock); + + /* + * Find and check the target constraint + */ + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + ScanKeyInit(&skey[1], + Anum_pg_constraint_contypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + ScanKeyInit(&skey[2], + Anum_pg_constraint_conname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(constrName)); + scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, + true, NULL, 3, skey); + + /* There can be at most one matching row */ + if (!HeapTupleIsValid(tuple = systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("constraint \"%s\" of relation \"%s\" does not exist", + constrName, RelationGetRelationName(rel)))); + + con = (Form_pg_constraint) GETSTRUCT(tuple); + if (con->contype != CONSTRAINT_FOREIGN && + con->contype != CONSTRAINT_CHECK) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key or check constraint", + constrName, RelationGetRelationName(rel)))); + + if (!con->convalidated) + { + AlteredTableInfo *tab; + HeapTuple copyTuple; + Form_pg_constraint copy_con; + + if (con->contype == CONSTRAINT_FOREIGN) + { + NewConstraint *newcon; + Constraint *fkconstraint; + + /* Queue validation for phase 3 */ + fkconstraint = makeNode(Constraint); + /* for now this is all we need */ + fkconstraint->conname = constrName; + + newcon = (NewConstraint *) palloc0(sizeof(NewConstraint)); + newcon->name = constrName; + newcon->contype = CONSTR_FOREIGN; + newcon->refrelid = con->confrelid; + newcon->refindid = con->conindid; + newcon->conid = con->oid; + newcon->qual = (Node *) fkconstraint; + + /* Find or create work queue entry for this table */ + tab = ATGetQueueEntry(wqueue, rel); + tab->constraints = lappend(tab->constraints, newcon); + + /* + * We disallow creating invalid foreign keys to or from + * partitioned tables, so ignoring the recursion bit is okay. + */ + } + else if (con->contype == CONSTRAINT_CHECK) + { + List *children = NIL; + ListCell *child; + NewConstraint *newcon; + bool isnull; + Datum val; + char *conbin; + + /* + * If we're recursing, the parent has already done this, so skip + * it. Also, if the constraint is a NO INHERIT constraint, we + * shouldn't try to look for it in the children. + */ + if (!recursing && !con->connoinherit) + children = find_all_inheritors(RelationGetRelid(rel), + lockmode, NULL); + + /* + * For CHECK constraints, we must ensure that we only mark the + * constraint as validated on the parent if it's already validated + * on the children. + * + * We recurse before validating on the parent, to reduce risk of + * deadlocks. + */ + foreach(child, children) + { + Oid childoid = lfirst_oid(child); + Relation childrel; + + if (childoid == RelationGetRelid(rel)) + continue; + + /* + * If we are told not to recurse, there had better not be any + * child tables, because we can't mark the constraint on the + * parent valid unless it is valid for all child tables. + */ + if (!recurse) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be validated on child tables too"))); + + /* find_all_inheritors already got lock */ + childrel = table_open(childoid, NoLock); + + ATExecValidateConstraint(wqueue, childrel, constrName, false, + true, lockmode); + table_close(childrel, NoLock); + } + + /* Queue validation for phase 3 */ + newcon = (NewConstraint *) palloc0(sizeof(NewConstraint)); + newcon->name = constrName; + newcon->contype = CONSTR_CHECK; + newcon->refrelid = InvalidOid; + newcon->refindid = InvalidOid; + newcon->conid = con->oid; + + val = SysCacheGetAttr(CONSTROID, tuple, + Anum_pg_constraint_conbin, &isnull); + if (isnull) + elog(ERROR, "null conbin for constraint %u", con->oid); + + conbin = TextDatumGetCString(val); + newcon->qual = (Node *) stringToNode(conbin); + + /* Find or create work queue entry for this table */ + tab = ATGetQueueEntry(wqueue, rel); + tab->constraints = lappend(tab->constraints, newcon); + + /* + * Invalidate relcache so that others see the new validated + * constraint. + */ + CacheInvalidateRelcache(rel); + } + + /* + * Now update the catalog, while we have the door open. + */ + copyTuple = heap_copytuple(tuple); + copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple); + copy_con->convalidated = true; + CatalogTupleUpdate(conrel, ©Tuple->t_self, copyTuple); + + InvokeObjectPostAlterHook(ConstraintRelationId, con->oid, 0); + + heap_freetuple(copyTuple); + + ObjectAddressSet(address, ConstraintRelationId, con->oid); + } + else + address = InvalidObjectAddress; /* already validated */ + + systable_endscan(scan); + + table_close(conrel, RowExclusiveLock); + + return address; +} + + +/* + * transformColumnNameList - transform list of column names + * + * Lookup each name and return its attnum and, optionally, type OID + * + * Note: the name of this function suggests that it's general-purpose, + * but actually it's only used to look up names appearing in foreign-key + * clauses. The error messages would need work to use it in other cases, + * and perhaps the validity checks as well. + */ +static int +transformColumnNameList(Oid relId, List *colList, + int16 *attnums, Oid *atttypids) +{ + ListCell *l; + int attnum; + + attnum = 0; + foreach(l, colList) + { + char *attname = strVal(lfirst(l)); + HeapTuple atttuple; + Form_pg_attribute attform; + + atttuple = SearchSysCacheAttName(relId, attname); + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" referenced in foreign key constraint does not exist", + attname))); + attform = (Form_pg_attribute) GETSTRUCT(atttuple); + if (attform->attnum < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("system columns cannot be used in foreign keys"))); + if (attnum >= INDEX_MAX_KEYS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot have more than %d keys in a foreign key", + INDEX_MAX_KEYS))); + attnums[attnum] = attform->attnum; + if (atttypids != NULL) + atttypids[attnum] = attform->atttypid; + ReleaseSysCache(atttuple); + attnum++; + } + + return attnum; +} + +/* + * transformFkeyGetPrimaryKey - + * + * Look up the names, attnums, and types of the primary key attributes + * for the pkrel. Also return the index OID and index opclasses of the + * index supporting the primary key. + * + * All parameters except pkrel are output parameters. Also, the function + * return value is the number of attributes in the primary key. + * + * Used when the column list in the REFERENCES specification is omitted. + */ +static int +transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid, + List **attnamelist, + int16 *attnums, Oid *atttypids, + Oid *opclasses) +{ + List *indexoidlist; + ListCell *indexoidscan; + HeapTuple indexTuple = NULL; + Form_pg_index indexStruct = NULL; + Datum indclassDatum; + bool isnull; + oidvector *indclass; + int i; + + /* + * Get the list of index OIDs for the table from the relcache, and look up + * each one in the pg_index syscache until we find one marked primary key + * (hopefully there isn't more than one such). Insist it's valid, too. + */ + *indexOid = InvalidOid; + + indexoidlist = RelationGetIndexList(pkrel); + + foreach(indexoidscan, indexoidlist) + { + Oid indexoid = lfirst_oid(indexoidscan); + + indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexoid); + indexStruct = (Form_pg_index) GETSTRUCT(indexTuple); + if (indexStruct->indisprimary && indexStruct->indisvalid) + { + /* + * Refuse to use a deferrable primary key. This is per SQL spec, + * and there would be a lot of interesting semantic problems if we + * tried to allow it. + */ + if (!indexStruct->indimmediate) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot use a deferrable primary key for referenced table \"%s\"", + RelationGetRelationName(pkrel)))); + + *indexOid = indexoid; + break; + } + ReleaseSysCache(indexTuple); + } + + list_free(indexoidlist); + + /* + * Check that we found it + */ + if (!OidIsValid(*indexOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("there is no primary key for referenced table \"%s\"", + RelationGetRelationName(pkrel)))); + + /* Must get indclass the hard way */ + indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple, + Anum_pg_index_indclass, &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + + /* + * Now build the list of PK attributes from the indkey definition (we + * assume a primary key cannot have expressional elements) + */ + *attnamelist = NIL; + for (i = 0; i < indexStruct->indnkeyatts; i++) + { + int pkattno = indexStruct->indkey.values[i]; + + attnums[i] = pkattno; + atttypids[i] = attnumTypeId(pkrel, pkattno); + opclasses[i] = indclass->values[i]; + *attnamelist = lappend(*attnamelist, + makeString(pstrdup(NameStr(*attnumAttName(pkrel, pkattno))))); + } + + ReleaseSysCache(indexTuple); + + return i; +} + +/* + * transformFkeyCheckAttrs - + * + * Make sure that the attributes of a referenced table belong to a unique + * (or primary key) constraint. Return the OID of the index supporting + * the constraint, as well as the opclasses associated with the index + * columns. + */ +static Oid +transformFkeyCheckAttrs(Relation pkrel, + int numattrs, int16 *attnums, + Oid *opclasses) /* output parameter */ +{ + Oid indexoid = InvalidOid; + bool found = false; + bool found_deferrable = false; + List *indexoidlist; + ListCell *indexoidscan; + int i, + j; + + /* + * Reject duplicate appearances of columns in the referenced-columns list. + * Such a case is forbidden by the SQL standard, and even if we thought it + * useful to allow it, there would be ambiguity about how to match the + * list to unique indexes (in particular, it'd be unclear which index + * opclass goes with which FK column). + */ + for (i = 0; i < numattrs; i++) + { + for (j = i + 1; j < numattrs; j++) + { + if (attnums[i] == attnums[j]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FOREIGN_KEY), + errmsg("foreign key referenced-columns list must not contain duplicates"))); + } + } + + /* + * Get the list of index OIDs for the table from the relcache, and look up + * each one in the pg_index syscache, and match unique indexes to the list + * of attnums we are given. + */ + indexoidlist = RelationGetIndexList(pkrel); + + foreach(indexoidscan, indexoidlist) + { + HeapTuple indexTuple; + Form_pg_index indexStruct; + + indexoid = lfirst_oid(indexoidscan); + indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexoid); + indexStruct = (Form_pg_index) GETSTRUCT(indexTuple); + + /* + * Must have the right number of columns; must be unique and not a + * partial index; forget it if there are any expressions, too. Invalid + * indexes are out as well. + */ + if (indexStruct->indnkeyatts == numattrs && + indexStruct->indisunique && + indexStruct->indisvalid && + heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) && + heap_attisnull(indexTuple, Anum_pg_index_indexprs, NULL)) + { + Datum indclassDatum; + bool isnull; + oidvector *indclass; + + /* Must get indclass the hard way */ + indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple, + Anum_pg_index_indclass, &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + + /* + * The given attnum list may match the index columns in any order. + * Check for a match, and extract the appropriate opclasses while + * we're at it. + * + * We know that attnums[] is duplicate-free per the test at the + * start of this function, and we checked above that the number of + * index columns agrees, so if we find a match for each attnums[] + * entry then we must have a one-to-one match in some order. + */ + for (i = 0; i < numattrs; i++) + { + found = false; + for (j = 0; j < numattrs; j++) + { + if (attnums[i] == indexStruct->indkey.values[j]) + { + opclasses[i] = indclass->values[j]; + found = true; + break; + } + } + if (!found) + break; + } + + /* + * Refuse to use a deferrable unique/primary key. This is per SQL + * spec, and there would be a lot of interesting semantic problems + * if we tried to allow it. + */ + if (found && !indexStruct->indimmediate) + { + /* + * Remember that we found an otherwise matching index, so that + * we can generate a more appropriate error message. + */ + found_deferrable = true; + found = false; + } + } + ReleaseSysCache(indexTuple); + if (found) + break; + } + + if (!found) + { + if (found_deferrable) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot use a deferrable unique constraint for referenced table \"%s\"", + RelationGetRelationName(pkrel)))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_FOREIGN_KEY), + errmsg("there is no unique constraint matching given keys for referenced table \"%s\"", + RelationGetRelationName(pkrel)))); + } + + list_free(indexoidlist); + + return indexoid; +} + +/* + * findFkeyCast - + * + * Wrapper around find_coercion_pathway() for ATAddForeignKeyConstraint(). + * Caller has equal regard for binary coercibility and for an exact match. +*/ +static CoercionPathType +findFkeyCast(Oid targetTypeId, Oid sourceTypeId, Oid *funcid) +{ + CoercionPathType ret; + + if (targetTypeId == sourceTypeId) + { + ret = COERCION_PATH_RELABELTYPE; + *funcid = InvalidOid; + } + else + { + ret = find_coercion_pathway(targetTypeId, sourceTypeId, + COERCION_IMPLICIT, funcid); + if (ret == COERCION_PATH_NONE) + /* A previously-relied-upon cast is now gone. */ + elog(ERROR, "could not find cast from %u to %u", + sourceTypeId, targetTypeId); + } + + return ret; +} + +/* + * Permissions checks on the referenced table for ADD FOREIGN KEY + * + * Note: we have already checked that the user owns the referencing table, + * else we'd have failed much earlier; no additional checks are needed for it. + */ +static void +checkFkeyPermissions(Relation rel, int16 *attnums, int natts) +{ + Oid roleid = GetUserId(); + AclResult aclresult; + int i; + + /* Okay if we have relation-level REFERENCES permission */ + aclresult = pg_class_aclcheck(RelationGetRelid(rel), roleid, + ACL_REFERENCES); + if (aclresult == ACLCHECK_OK) + return; + /* Else we must have REFERENCES on each column */ + for (i = 0; i < natts; i++) + { + aclresult = pg_attribute_aclcheck(RelationGetRelid(rel), attnums[i], + roleid, ACL_REFERENCES); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind), + RelationGetRelationName(rel)); + } +} + +/* + * Scan the existing rows in a table to verify they meet a proposed FK + * constraint. + * + * Caller must have opened and locked both relations appropriately. + */ +static void +validateForeignKeyConstraint(char *conname, + Relation rel, + Relation pkrel, + Oid pkindOid, + Oid constraintOid) +{ + TupleTableSlot *slot; + TableScanDesc scan; + Trigger trig; + Snapshot snapshot; + MemoryContext oldcxt; + MemoryContext perTupCxt; + + ereport(DEBUG1, + (errmsg_internal("validating foreign key constraint \"%s\"", conname))); + + /* + * Build a trigger call structure; we'll need it either way. + */ + MemSet(&trig, 0, sizeof(trig)); + trig.tgoid = InvalidOid; + trig.tgname = conname; + trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN; + trig.tgisinternal = true; + trig.tgconstrrelid = RelationGetRelid(pkrel); + trig.tgconstrindid = pkindOid; + trig.tgconstraint = constraintOid; + trig.tgdeferrable = false; + trig.tginitdeferred = false; + /* we needn't fill in remaining fields */ + + /* + * See if we can do it with a single LEFT JOIN query. A false result + * indicates we must proceed with the fire-the-trigger method. + */ + if (RI_Initial_Check(&trig, rel, pkrel)) + return; + + /* + * Scan through each tuple, calling RI_FKey_check_ins (insert trigger) as + * if that tuple had just been inserted. If any of those fail, it should + * ereport(ERROR) and that's that. + */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); + slot = table_slot_create(rel, NULL); + scan = table_beginscan(rel, snapshot, 0, NULL); + + perTupCxt = AllocSetContextCreate(CurrentMemoryContext, + "validateForeignKeyConstraint", + ALLOCSET_SMALL_SIZES); + oldcxt = MemoryContextSwitchTo(perTupCxt); + + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) + { + LOCAL_FCINFO(fcinfo, 0); + TriggerData trigdata = {0}; + + CHECK_FOR_INTERRUPTS(); + + /* + * Make a call to the trigger function + * + * No parameters are passed, but we do set a context + */ + MemSet(fcinfo, 0, SizeForFunctionCallInfo(0)); + + /* + * We assume RI_FKey_check_ins won't look at flinfo... + */ + trigdata.type = T_TriggerData; + trigdata.tg_event = TRIGGER_EVENT_INSERT | TRIGGER_EVENT_ROW; + trigdata.tg_relation = rel; + trigdata.tg_trigtuple = ExecFetchSlotHeapTuple(slot, false, NULL); + trigdata.tg_trigslot = slot; + trigdata.tg_trigger = &trig; + + fcinfo->context = (Node *) &trigdata; + + RI_FKey_check_ins(fcinfo); + + MemoryContextReset(perTupCxt); + } + + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(perTupCxt); + table_endscan(scan); + UnregisterSnapshot(snapshot); + ExecDropSingleTupleTableSlot(slot); +} + +/* + * CreateFKCheckTrigger + * Creates the insert (on_insert=true) or update "check" trigger that + * implements a given foreign key + * + * Returns the OID of the so created trigger. + */ +static Oid +CreateFKCheckTrigger(Oid myRelOid, Oid refRelOid, Constraint *fkconstraint, + Oid constraintOid, Oid indexOid, Oid parentTrigOid, + bool on_insert) +{ + ObjectAddress trigAddress; + CreateTrigStmt *fk_trigger; + + /* + * Note: for a self-referential FK (referencing and referenced tables are + * the same), it is important that the ON UPDATE action fires before the + * CHECK action, since both triggers will fire on the same row during an + * UPDATE event; otherwise the CHECK trigger will be checking a non-final + * state of the row. Triggers fire in name order, so we ensure this by + * using names like "RI_ConstraintTrigger_a_NNNN" for the action triggers + * and "RI_ConstraintTrigger_c_NNNN" for the check triggers. + */ + fk_trigger = makeNode(CreateTrigStmt); + fk_trigger->replace = false; + fk_trigger->isconstraint = true; + fk_trigger->trigname = "RI_ConstraintTrigger_c"; + fk_trigger->relation = NULL; + + /* Either ON INSERT or ON UPDATE */ + if (on_insert) + { + fk_trigger->funcname = SystemFuncName("RI_FKey_check_ins"); + fk_trigger->events = TRIGGER_TYPE_INSERT; + } + else + { + fk_trigger->funcname = SystemFuncName("RI_FKey_check_upd"); + fk_trigger->events = TRIGGER_TYPE_UPDATE; + } + + fk_trigger->args = NIL; + fk_trigger->row = true; + fk_trigger->timing = TRIGGER_TYPE_AFTER; + fk_trigger->columns = NIL; + fk_trigger->whenClause = NULL; + fk_trigger->transitionRels = NIL; + fk_trigger->deferrable = fkconstraint->deferrable; + fk_trigger->initdeferred = fkconstraint->initdeferred; + fk_trigger->constrrel = NULL; + + trigAddress = CreateTrigger(fk_trigger, NULL, myRelOid, refRelOid, + constraintOid, indexOid, InvalidOid, + parentTrigOid, NULL, true, false); + + /* Make changes-so-far visible */ + CommandCounterIncrement(); + + return trigAddress.objectId; +} + +/* + * createForeignKeyActionTriggers + * Create the referenced-side "action" triggers that implement a foreign + * key. + * + * Returns the OIDs of the so created triggers in *deleteTrigOid and + * *updateTrigOid. + */ +static void +createForeignKeyActionTriggers(Relation rel, Oid refRelOid, Constraint *fkconstraint, + Oid constraintOid, Oid indexOid, + Oid parentDelTrigger, Oid parentUpdTrigger, + Oid *deleteTrigOid, Oid *updateTrigOid) +{ + CreateTrigStmt *fk_trigger; + ObjectAddress trigAddress; + + /* + * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON + * DELETE action on the referenced table. + */ + fk_trigger = makeNode(CreateTrigStmt); + fk_trigger->replace = false; + fk_trigger->isconstraint = true; + fk_trigger->trigname = "RI_ConstraintTrigger_a"; + fk_trigger->relation = NULL; + fk_trigger->args = NIL; + fk_trigger->row = true; + fk_trigger->timing = TRIGGER_TYPE_AFTER; + fk_trigger->events = TRIGGER_TYPE_DELETE; + fk_trigger->columns = NIL; + fk_trigger->whenClause = NULL; + fk_trigger->transitionRels = NIL; + fk_trigger->constrrel = NULL; + switch (fkconstraint->fk_del_action) + { + case FKCONSTR_ACTION_NOACTION: + fk_trigger->deferrable = fkconstraint->deferrable; + fk_trigger->initdeferred = fkconstraint->initdeferred; + fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_del"); + break; + case FKCONSTR_ACTION_RESTRICT: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_del"); + break; + case FKCONSTR_ACTION_CASCADE: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_del"); + break; + case FKCONSTR_ACTION_SETNULL: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_del"); + break; + case FKCONSTR_ACTION_SETDEFAULT: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_del"); + break; + default: + elog(ERROR, "unrecognized FK action type: %d", + (int) fkconstraint->fk_del_action); + break; + } + + trigAddress = CreateTrigger(fk_trigger, NULL, refRelOid, + RelationGetRelid(rel), + constraintOid, indexOid, InvalidOid, + parentDelTrigger, NULL, true, false); + if (deleteTrigOid) + *deleteTrigOid = trigAddress.objectId; + + /* Make changes-so-far visible */ + CommandCounterIncrement(); + + /* + * Build and execute a CREATE CONSTRAINT TRIGGER statement for the ON + * UPDATE action on the referenced table. + */ + fk_trigger = makeNode(CreateTrigStmt); + fk_trigger->replace = false; + fk_trigger->isconstraint = true; + fk_trigger->trigname = "RI_ConstraintTrigger_a"; + fk_trigger->relation = NULL; + fk_trigger->args = NIL; + fk_trigger->row = true; + fk_trigger->timing = TRIGGER_TYPE_AFTER; + fk_trigger->events = TRIGGER_TYPE_UPDATE; + fk_trigger->columns = NIL; + fk_trigger->whenClause = NULL; + fk_trigger->transitionRels = NIL; + fk_trigger->constrrel = NULL; + switch (fkconstraint->fk_upd_action) + { + case FKCONSTR_ACTION_NOACTION: + fk_trigger->deferrable = fkconstraint->deferrable; + fk_trigger->initdeferred = fkconstraint->initdeferred; + fk_trigger->funcname = SystemFuncName("RI_FKey_noaction_upd"); + break; + case FKCONSTR_ACTION_RESTRICT: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_restrict_upd"); + break; + case FKCONSTR_ACTION_CASCADE: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_cascade_upd"); + break; + case FKCONSTR_ACTION_SETNULL: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_setnull_upd"); + break; + case FKCONSTR_ACTION_SETDEFAULT: + fk_trigger->deferrable = false; + fk_trigger->initdeferred = false; + fk_trigger->funcname = SystemFuncName("RI_FKey_setdefault_upd"); + break; + default: + elog(ERROR, "unrecognized FK action type: %d", + (int) fkconstraint->fk_upd_action); + break; + } + + trigAddress = CreateTrigger(fk_trigger, NULL, refRelOid, + RelationGetRelid(rel), + constraintOid, indexOid, InvalidOid, + parentUpdTrigger, NULL, true, false); + if (updateTrigOid) + *updateTrigOid = trigAddress.objectId; +} + +/* + * createForeignKeyCheckTriggers + * Create the referencing-side "check" triggers that implement a foreign + * key. + * + * Returns the OIDs of the so created triggers in *insertTrigOid and + * *updateTrigOid. + */ +static void +createForeignKeyCheckTriggers(Oid myRelOid, Oid refRelOid, + Constraint *fkconstraint, Oid constraintOid, + Oid indexOid, + Oid parentInsTrigger, Oid parentUpdTrigger, + Oid *insertTrigOid, Oid *updateTrigOid) +{ + *insertTrigOid = CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint, + constraintOid, indexOid, + parentInsTrigger, true); + *updateTrigOid = CreateFKCheckTrigger(myRelOid, refRelOid, fkconstraint, + constraintOid, indexOid, + parentUpdTrigger, false); +} + +/* + * ALTER TABLE DROP CONSTRAINT + * + * Like DROP COLUMN, we can't use the normal ALTER TABLE recursion mechanism. + */ +static void +ATExecDropConstraint(Relation rel, const char *constrName, + DropBehavior behavior, + bool recurse, bool recursing, + bool missing_ok, LOCKMODE lockmode) +{ + List *children; + ListCell *child; + Relation conrel; + Form_pg_constraint con; + SysScanDesc scan; + ScanKeyData skey[3]; + HeapTuple tuple; + bool found = false; + bool is_no_inherit_constraint = false; + char contype; + + /* At top level, permission check was done in ATPrepCmd, else do it */ + if (recursing) + ATSimplePermissions(AT_DropConstraint, rel, ATT_TABLE | ATT_FOREIGN_TABLE); + + conrel = table_open(ConstraintRelationId, RowExclusiveLock); + + /* + * Find and drop the target constraint + */ + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + ScanKeyInit(&skey[1], + Anum_pg_constraint_contypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + ScanKeyInit(&skey[2], + Anum_pg_constraint_conname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(constrName)); + scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, + true, NULL, 3, skey); + + /* There can be at most one matching row */ + if (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + ObjectAddress conobj; + + con = (Form_pg_constraint) GETSTRUCT(tuple); + + /* Don't drop inherited constraints */ + if (con->coninhcount > 0 && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop inherited constraint \"%s\" of relation \"%s\"", + constrName, RelationGetRelationName(rel)))); + + is_no_inherit_constraint = con->connoinherit; + contype = con->contype; + + /* + * If it's a foreign-key constraint, we'd better lock the referenced + * table and check that that's not in use, just as we've already done + * for the constrained table (else we might, eg, be dropping a trigger + * that has unfired events). But we can/must skip that in the + * self-referential case. + */ + if (contype == CONSTRAINT_FOREIGN && + con->confrelid != RelationGetRelid(rel)) + { + Relation frel; + + /* Must match lock taken by RemoveTriggerById: */ + frel = table_open(con->confrelid, AccessExclusiveLock); + CheckTableNotInUse(frel, "ALTER TABLE"); + table_close(frel, NoLock); + } + + /* + * Perform the actual constraint deletion + */ + conobj.classId = ConstraintRelationId; + conobj.objectId = con->oid; + conobj.objectSubId = 0; + + performDeletion(&conobj, behavior, 0); + + found = true; + } + + systable_endscan(scan); + + if (!found) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("constraint \"%s\" of relation \"%s\" does not exist", + constrName, RelationGetRelationName(rel)))); + } + else + { + ereport(NOTICE, + (errmsg("constraint \"%s\" of relation \"%s\" does not exist, skipping", + constrName, RelationGetRelationName(rel)))); + table_close(conrel, RowExclusiveLock); + return; + } + } + + /* + * For partitioned tables, non-CHECK inherited constraints are dropped via + * the dependency mechanism, so we're done here. + */ + if (contype != CONSTRAINT_CHECK && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + table_close(conrel, RowExclusiveLock); + return; + } + + /* + * Propagate to children as appropriate. Unlike most other ALTER + * routines, we have to do this one level of recursion at a time; we can't + * use find_all_inheritors to do it in one pass. + */ + if (!is_no_inherit_constraint) + children = find_inheritance_children(RelationGetRelid(rel), lockmode); + else + children = NIL; + + /* + * For a partitioned table, if partitions exist and we are told not to + * recurse, it's a user error. It doesn't make sense to have a constraint + * be defined only on the parent, especially if it's a partitioned table. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + children != NIL && !recurse) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot remove constraint from only the partitioned table when partitions exist"), + errhint("Do not specify the ONLY keyword."))); + + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + Relation childrel; + HeapTuple copy_tuple; + + /* find_inheritance_children already got lock */ + childrel = table_open(childrelid, NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(childrelid)); + ScanKeyInit(&skey[1], + Anum_pg_constraint_contypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + ScanKeyInit(&skey[2], + Anum_pg_constraint_conname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(constrName)); + scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, + true, NULL, 3, skey); + + /* There can be at most one matching row */ + if (!HeapTupleIsValid(tuple = systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("constraint \"%s\" of relation \"%s\" does not exist", + constrName, + RelationGetRelationName(childrel)))); + + copy_tuple = heap_copytuple(tuple); + + systable_endscan(scan); + + con = (Form_pg_constraint) GETSTRUCT(copy_tuple); + + /* Right now only CHECK constraints can be inherited */ + if (con->contype != CONSTRAINT_CHECK) + elog(ERROR, "inherited constraint is not a CHECK constraint"); + + if (con->coninhcount <= 0) /* shouldn't happen */ + elog(ERROR, "relation %u has non-inherited constraint \"%s\"", + childrelid, constrName); + + if (recurse) + { + /* + * If the child constraint has other definition sources, just + * decrement its inheritance count; if not, recurse to delete it. + */ + if (con->coninhcount == 1 && !con->conislocal) + { + /* Time to delete this child constraint, too */ + ATExecDropConstraint(childrel, constrName, behavior, + true, true, + false, lockmode); + } + else + { + /* Child constraint must survive my deletion */ + con->coninhcount--; + CatalogTupleUpdate(conrel, ©_tuple->t_self, copy_tuple); + + /* Make update visible */ + CommandCounterIncrement(); + } + } + else + { + /* + * If we were told to drop ONLY in this table (no recursion), we + * need to mark the inheritors' constraints as locally defined + * rather than inherited. + */ + con->coninhcount--; + con->conislocal = true; + + CatalogTupleUpdate(conrel, ©_tuple->t_self, copy_tuple); + + /* Make update visible */ + CommandCounterIncrement(); + } + + heap_freetuple(copy_tuple); + + table_close(childrel, NoLock); + } + + table_close(conrel, RowExclusiveLock); +} + +/* + * ALTER COLUMN TYPE + * + * Unlike other subcommand types, we do parse transformation for ALTER COLUMN + * TYPE during phase 1 --- the AlterTableCmd passed in here is already + * transformed (and must be, because we rely on some transformed fields). + * + * The point of this is that the execution of all ALTER COLUMN TYPEs for a + * table will be done "in parallel" during phase 3, so all the USING + * expressions should be parsed assuming the original column types. Also, + * this allows a USING expression to refer to a field that will be dropped. + * + * To make this work safely, AT_PASS_DROP then AT_PASS_ALTER_TYPE must be + * the first two execution steps in phase 2; they must not see the effects + * of any other subcommand types, since the USING expressions are parsed + * against the unmodified table's state. + */ +static void +ATPrepAlterColumnType(List **wqueue, + AlteredTableInfo *tab, Relation rel, + bool recurse, bool recursing, + AlterTableCmd *cmd, LOCKMODE lockmode, + AlterTableUtilityContext *context) +{ + char *colName = cmd->name; + ColumnDef *def = (ColumnDef *) cmd->def; + TypeName *typeName = def->typeName; + Node *transform = def->cooked_default; + HeapTuple tuple; + Form_pg_attribute attTup; + AttrNumber attnum; + Oid targettype; + int32 targettypmod; + Oid targetcollid; + NewColumnValue *newval; + ParseState *pstate = make_parsestate(NULL); + AclResult aclresult; + bool is_expr; + + if (rel->rd_rel->reloftype && !recursing) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot alter column type of typed table"))); + + /* lookup the attribute so we can check inheritance status */ + tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + attTup = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = attTup->attnum; + + /* Can't alter a system attribute */ + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", + colName))); + + /* + * Don't alter inherited columns. At outer level, there had better not be + * any inherited definition; when recursing, we assume this was checked at + * the parent level (see below). + */ + if (attTup->attinhcount > 0 && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot alter inherited column \"%s\"", + colName))); + + /* Don't alter columns used in the partition key */ + if (has_partition_attrs(rel, + bms_make_singleton(attnum - FirstLowInvalidHeapAttributeNumber), + &is_expr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot alter column \"%s\" because it is part of the partition key of relation \"%s\"", + colName, RelationGetRelationName(rel)))); + + /* Look up the target type */ + typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod); + + aclresult = pg_type_aclcheck(targettype, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, targettype); + + /* And the collation */ + targetcollid = GetColumnDefCollation(NULL, def, targettype); + + /* make sure datatype is legal for a column */ + CheckAttributeType(colName, targettype, targetcollid, + list_make1_oid(rel->rd_rel->reltype), + 0); + + if (tab->relkind == RELKIND_RELATION || + tab->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * Set up an expression to transform the old data value to the new + * type. If a USING option was given, use the expression as + * transformed by transformAlterTableStmt, else just take the old + * value and try to coerce it. We do this first so that type + * incompatibility can be detected before we waste effort, and because + * we need the expression to be parsed against the original table row + * type. + */ + if (!transform) + { + transform = (Node *) makeVar(1, attnum, + attTup->atttypid, attTup->atttypmod, + attTup->attcollation, + 0); + } + + transform = coerce_to_target_type(pstate, + transform, exprType(transform), + targettype, targettypmod, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (transform == NULL) + { + /* error text depends on whether USING was specified or not */ + if (def->cooked_default != NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("result of USING clause for column \"%s\"" + " cannot be cast automatically to type %s", + colName, format_type_be(targettype)), + errhint("You might need to add an explicit cast."))); + else + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" cannot be cast automatically to type %s", + colName, format_type_be(targettype)), + /* translator: USING is SQL, don't translate it */ + errhint("You might need to specify \"USING %s::%s\".", + quote_identifier(colName), + format_type_with_typemod(targettype, + targettypmod)))); + } + + /* Fix collations after all else */ + assign_expr_collations(pstate, transform); + + /* Plan the expr now so we can accurately assess the need to rewrite. */ + transform = (Node *) expression_planner((Expr *) transform); + + /* + * Add a work queue item to make ATRewriteTable update the column + * contents. + */ + newval = (NewColumnValue *) palloc0(sizeof(NewColumnValue)); + newval->attnum = attnum; + newval->expr = (Expr *) transform; + newval->is_generated = false; + + tab->newvals = lappend(tab->newvals, newval); + if (ATColumnChangeRequiresRewrite(transform, attnum)) + tab->rewrite |= AT_REWRITE_COLUMN_REWRITE; + } + else if (transform) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table", + RelationGetRelationName(rel)))); + + if (!RELKIND_HAS_STORAGE(tab->relkind)) + { + /* + * For relations without storage, do this check now. Regular tables + * will check it later when the table is being rewritten. + */ + find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL); + } + + ReleaseSysCache(tuple); + + /* + * Recurse manually by queueing a new command for each child, if + * necessary. We cannot apply ATSimpleRecursion here because we need to + * remap attribute numbers in the USING expression, if any. + * + * If we are told not to recurse, there had better not be any child + * tables; else the alter would put them out of step. + */ + if (recurse) + { + Oid relid = RelationGetRelid(rel); + List *child_oids, + *child_numparents; + ListCell *lo, + *li; + + child_oids = find_all_inheritors(relid, lockmode, + &child_numparents); + + /* + * find_all_inheritors does the recursive search of the inheritance + * hierarchy, so all we have to do is process all of the relids in the + * list that it returns. + */ + forboth(lo, child_oids, li, child_numparents) + { + Oid childrelid = lfirst_oid(lo); + int numparents = lfirst_int(li); + Relation childrel; + HeapTuple childtuple; + Form_pg_attribute childattTup; + + if (childrelid == relid) + continue; + + /* find_all_inheritors already got lock */ + childrel = relation_open(childrelid, NoLock); + CheckTableNotInUse(childrel, "ALTER TABLE"); + + /* + * Verify that the child doesn't have any inherited definitions of + * this column that came from outside this inheritance hierarchy. + * (renameatt makes a similar test, though in a different way + * because of its different recursion mechanism.) + */ + childtuple = SearchSysCacheAttName(RelationGetRelid(childrel), + colName); + if (!HeapTupleIsValid(childtuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(childrel)))); + childattTup = (Form_pg_attribute) GETSTRUCT(childtuple); + + if (childattTup->attinhcount > numparents) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot alter inherited column \"%s\" of relation \"%s\"", + colName, RelationGetRelationName(childrel)))); + + ReleaseSysCache(childtuple); + + /* + * Remap the attribute numbers. If no USING expression was + * specified, there is no need for this step. + */ + if (def->cooked_default) + { + AttrMap *attmap; + bool found_whole_row; + + /* create a copy to scribble on */ + cmd = copyObject(cmd); + + attmap = build_attrmap_by_name(RelationGetDescr(childrel), + RelationGetDescr(rel)); + ((ColumnDef *) cmd->def)->cooked_default = + map_variable_attnos(def->cooked_default, + 1, 0, + attmap, + InvalidOid, &found_whole_row); + if (found_whole_row) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot convert whole-row table reference"), + errdetail("USING expression contains a whole-row table reference."))); + pfree(attmap); + } + ATPrepCmd(wqueue, childrel, cmd, false, true, lockmode, context); + relation_close(childrel, NoLock); + } + } + else if (!recursing && + find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("type of inherited column \"%s\" must be changed in child tables too", + colName))); + + if (tab->relkind == RELKIND_COMPOSITE_TYPE) + ATTypedTableRecursion(wqueue, rel, cmd, lockmode, context); +} + +/* + * When the data type of a column is changed, a rewrite might not be required + * if the new type is sufficiently identical to the old one, and the USING + * clause isn't trying to insert some other value. It's safe to skip the + * rewrite in these cases: + * + * - the old type is binary coercible to the new type + * - the new type is an unconstrained domain over the old type + * - {NEW,OLD} or {OLD,NEW} is {timestamptz,timestamp} and the timezone is UTC + * + * In the case of a constrained domain, we could get by with scanning the + * table and checking the constraint rather than actually rewriting it, but we + * don't currently try to do that. + */ +static bool +ATColumnChangeRequiresRewrite(Node *expr, AttrNumber varattno) +{ + Assert(expr != NULL); + + for (;;) + { + /* only one varno, so no need to check that */ + if (IsA(expr, Var) && ((Var *) expr)->varattno == varattno) + return false; + else if (IsA(expr, RelabelType)) + expr = (Node *) ((RelabelType *) expr)->arg; + else if (IsA(expr, CoerceToDomain)) + { + CoerceToDomain *d = (CoerceToDomain *) expr; + + if (DomainHasConstraints(d->resulttype)) + return true; + expr = (Node *) d->arg; + } + else if (IsA(expr, FuncExpr)) + { + FuncExpr *f = (FuncExpr *) expr; + + switch (f->funcid) + { + case F_TIMESTAMPTZ_TIMESTAMP: + case F_TIMESTAMP_TIMESTAMPTZ: + if (TimestampTimestampTzRequiresRewrite()) + return true; + else + expr = linitial(f->args); + break; + default: + return true; + } + } + else + return true; + } +} + +/* + * ALTER COLUMN .. SET DATA TYPE + * + * Return the address of the modified column. + */ +static ObjectAddress +ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, + AlterTableCmd *cmd, LOCKMODE lockmode) +{ + char *colName = cmd->name; + ColumnDef *def = (ColumnDef *) cmd->def; + TypeName *typeName = def->typeName; + HeapTuple heapTup; + Form_pg_attribute attTup, + attOldTup; + AttrNumber attnum; + HeapTuple typeTuple; + Form_pg_type tform; + Oid targettype; + int32 targettypmod; + Oid targetcollid; + Node *defaultexpr; + Relation attrelation; + Relation depRel; + ScanKeyData key[3]; + SysScanDesc scan; + HeapTuple depTup; + ObjectAddress address; + + /* + * Clear all the missing values if we're rewriting the table, since this + * renders them pointless. + */ + if (tab->rewrite) + { + Relation newrel; + + newrel = table_open(RelationGetRelid(rel), NoLock); + RelationClearMissing(newrel); + relation_close(newrel, NoLock); + /* make sure we don't conflict with later attribute modifications */ + CommandCounterIncrement(); + } + + attrelation = table_open(AttributeRelationId, RowExclusiveLock); + + /* Look up the target column */ + heapTup = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(heapTup)) /* shouldn't happen */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + attTup = (Form_pg_attribute) GETSTRUCT(heapTup); + attnum = attTup->attnum; + attOldTup = TupleDescAttr(tab->oldDesc, attnum - 1); + + /* Check for multiple ALTER TYPE on same column --- can't cope */ + if (attTup->atttypid != attOldTup->atttypid || + attTup->atttypmod != attOldTup->atttypmod) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter type of column \"%s\" twice", + colName))); + + /* Look up the target type (should not fail, since prep found it) */ + typeTuple = typenameType(NULL, typeName, &targettypmod); + tform = (Form_pg_type) GETSTRUCT(typeTuple); + targettype = tform->oid; + /* And the collation */ + targetcollid = GetColumnDefCollation(NULL, def, targettype); + + /* + * If there is a default expression for the column, get it and ensure we + * can coerce it to the new datatype. (We must do this before changing + * the column type, because build_column_default itself will try to + * coerce, and will not issue the error message we want if it fails.) + * + * We remove any implicit coercion steps at the top level of the old + * default expression; this has been agreed to satisfy the principle of + * least surprise. (The conversion to the new column type should act like + * it started from what the user sees as the stored expression, and the + * implicit coercions aren't going to be shown.) + */ + if (attTup->atthasdef) + { + defaultexpr = build_column_default(rel, attnum); + Assert(defaultexpr); + defaultexpr = strip_implicit_coercions(defaultexpr); + defaultexpr = coerce_to_target_type(NULL, /* no UNKNOWN params */ + defaultexpr, exprType(defaultexpr), + targettype, targettypmod, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (defaultexpr == NULL) + { + if (attTup->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("generation expression for column \"%s\" cannot be cast automatically to type %s", + colName, format_type_be(targettype)))); + else + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("default for column \"%s\" cannot be cast automatically to type %s", + colName, format_type_be(targettype)))); + } + } + else + defaultexpr = NULL; + + /* + * Find everything that depends on the column (constraints, indexes, etc), + * and record enough information to let us recreate the objects. + * + * The actual recreation does not happen here, but only after we have + * performed all the individual ALTER TYPE operations. We have to save + * the info before executing ALTER TYPE, though, else the deparser will + * get confused. + */ + depRel = table_open(DependRelationId, RowExclusiveLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum((int32) attnum)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(depTup = systable_getnext(scan))) + { + Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup); + ObjectAddress foundObject; + + foundObject.classId = foundDep->classid; + foundObject.objectId = foundDep->objid; + foundObject.objectSubId = foundDep->objsubid; + + switch (getObjectClass(&foundObject)) + { + case OCLASS_CLASS: + { + char relKind = get_rel_relkind(foundObject.objectId); + + if (relKind == RELKIND_INDEX || + relKind == RELKIND_PARTITIONED_INDEX) + { + Assert(foundObject.objectSubId == 0); + RememberIndexForRebuilding(foundObject.objectId, tab); + } + else if (relKind == RELKIND_SEQUENCE) + { + /* + * This must be a SERIAL column's sequence. We need + * not do anything to it. + */ + Assert(foundObject.objectSubId == 0); + } + else + { + /* Not expecting any other direct dependencies... */ + elog(ERROR, "unexpected object depending on column: %s", + getObjectDescription(&foundObject, false)); + } + break; + } + + case OCLASS_CONSTRAINT: + Assert(foundObject.objectSubId == 0); + RememberConstraintForRebuilding(foundObject.objectId, tab); + break; + + case OCLASS_REWRITE: + /* XXX someday see if we can cope with revising views */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter type of a column used by a view or rule"), + errdetail("%s depends on column \"%s\"", + getObjectDescription(&foundObject, false), + colName))); + break; + + case OCLASS_TRIGGER: + + /* + * A trigger can depend on a column because the column is + * specified as an update target, or because the column is + * used in the trigger's WHEN condition. The first case would + * not require any extra work, but the second case would + * require updating the WHEN expression, which will take a + * significant amount of new code. Since we can't easily tell + * which case applies, we punt for both. FIXME someday. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter type of a column used in a trigger definition"), + errdetail("%s depends on column \"%s\"", + getObjectDescription(&foundObject, false), + colName))); + break; + + case OCLASS_POLICY: + + /* + * A policy can depend on a column because the column is + * specified in the policy's USING or WITH CHECK qual + * expressions. It might be possible to rewrite and recheck + * the policy expression, but punt for now. It's certainly + * easy enough to remove and recreate the policy; still, FIXME + * someday. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter type of a column used in a policy definition"), + errdetail("%s depends on column \"%s\"", + getObjectDescription(&foundObject, false), + colName))); + break; + + case OCLASS_DEFAULT: + { + ObjectAddress col = GetAttrDefaultColumnAddress(foundObject.objectId); + + if (col.objectId == RelationGetRelid(rel) && + col.objectSubId == attnum) + { + /* + * Ignore the column's own default expression, which + * we will deal with below. + */ + Assert(defaultexpr); + } + else + { + /* + * This must be a reference from the expression of a + * generated column elsewhere in the same table. + * Changing the type of a column that is used by a + * generated column is not allowed by SQL standard, so + * just punt for now. It might be doable with some + * thinking and effort. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter type of a column used by a generated column"), + errdetail("Column \"%s\" is used by generated column \"%s\".", + colName, + get_attname(col.objectId, + col.objectSubId, + false)))); + } + break; + } + + case OCLASS_STATISTIC_EXT: + + /* + * Give the extended-stats machinery a chance to fix anything + * that this column type change would break. + */ + RememberStatisticsForRebuilding(foundObject.objectId, tab); + break; + + case OCLASS_PROC: + case OCLASS_TYPE: + case OCLASS_CAST: + case OCLASS_COLLATION: + case OCLASS_CONVERSION: + case OCLASS_LANGUAGE: + case OCLASS_LARGEOBJECT: + case OCLASS_OPERATOR: + case OCLASS_OPCLASS: + case OCLASS_OPFAMILY: + case OCLASS_AM: + case OCLASS_AMOP: + case OCLASS_AMPROC: + case OCLASS_SCHEMA: + case OCLASS_TSPARSER: + case OCLASS_TSDICT: + case OCLASS_TSTEMPLATE: + case OCLASS_TSCONFIG: + case OCLASS_ROLE: + case OCLASS_DATABASE: + case OCLASS_TBLSPACE: + case OCLASS_FDW: + case OCLASS_FOREIGN_SERVER: + case OCLASS_USER_MAPPING: + case OCLASS_DEFACL: + case OCLASS_EXTENSION: + case OCLASS_EVENT_TRIGGER: + case OCLASS_PARAMETER_ACL: + case OCLASS_PUBLICATION: + case OCLASS_PUBLICATION_NAMESPACE: + case OCLASS_PUBLICATION_REL: + case OCLASS_SUBSCRIPTION: + case OCLASS_TRANSFORM: + + /* + * We don't expect any of these sorts of objects to depend on + * a column. + */ + elog(ERROR, "unexpected object depending on column: %s", + getObjectDescription(&foundObject, false)); + break; + + /* + * There's intentionally no default: case here; we want the + * compiler to warn if a new OCLASS hasn't been handled above. + */ + } + } + + systable_endscan(scan); + + /* + * Now scan for dependencies of this column on other things. The only + * things we should find are the dependency on the column datatype and + * possibly a collation dependency. Those can be removed. + */ + ScanKeyInit(&key[0], + Anum_pg_depend_classid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_objid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + ScanKeyInit(&key[2], + Anum_pg_depend_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum((int32) attnum)); + + scan = systable_beginscan(depRel, DependDependerIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(depTup = systable_getnext(scan))) + { + Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(depTup); + ObjectAddress foundObject; + + foundObject.classId = foundDep->refclassid; + foundObject.objectId = foundDep->refobjid; + foundObject.objectSubId = foundDep->refobjsubid; + + if (foundDep->deptype != DEPENDENCY_NORMAL) + elog(ERROR, "found unexpected dependency type '%c'", + foundDep->deptype); + if (!(foundDep->refclassid == TypeRelationId && + foundDep->refobjid == attTup->atttypid) && + !(foundDep->refclassid == CollationRelationId && + foundDep->refobjid == attTup->attcollation)) + elog(ERROR, "found unexpected dependency for column: %s", + getObjectDescription(&foundObject, false)); + + CatalogTupleDelete(depRel, &depTup->t_self); + } + + systable_endscan(scan); + + table_close(depRel, RowExclusiveLock); + + /* + * Here we go --- change the recorded column type and collation. (Note + * heapTup is a copy of the syscache entry, so okay to scribble on.) First + * fix up the missing value if any. + */ + if (attTup->atthasmissing) + { + Datum missingval; + bool missingNull; + + /* if rewrite is true the missing value should already be cleared */ + Assert(tab->rewrite == 0); + + /* Get the missing value datum */ + missingval = heap_getattr(heapTup, + Anum_pg_attribute_attmissingval, + attrelation->rd_att, + &missingNull); + + /* if it's a null array there is nothing to do */ + + if (!missingNull) + { + /* + * Get the datum out of the array and repack it in a new array + * built with the new type data. We assume that since the table + * doesn't need rewriting, the actual Datum doesn't need to be + * changed, only the array metadata. + */ + + int one = 1; + bool isNull; + Datum valuesAtt[Natts_pg_attribute]; + bool nullsAtt[Natts_pg_attribute]; + bool replacesAtt[Natts_pg_attribute]; + HeapTuple newTup; + + MemSet(valuesAtt, 0, sizeof(valuesAtt)); + MemSet(nullsAtt, false, sizeof(nullsAtt)); + MemSet(replacesAtt, false, sizeof(replacesAtt)); + + missingval = array_get_element(missingval, + 1, + &one, + 0, + attTup->attlen, + attTup->attbyval, + attTup->attalign, + &isNull); + missingval = PointerGetDatum(construct_array(&missingval, + 1, + targettype, + tform->typlen, + tform->typbyval, + tform->typalign)); + + valuesAtt[Anum_pg_attribute_attmissingval - 1] = missingval; + replacesAtt[Anum_pg_attribute_attmissingval - 1] = true; + nullsAtt[Anum_pg_attribute_attmissingval - 1] = false; + + newTup = heap_modify_tuple(heapTup, RelationGetDescr(attrelation), + valuesAtt, nullsAtt, replacesAtt); + heap_freetuple(heapTup); + heapTup = newTup; + attTup = (Form_pg_attribute) GETSTRUCT(heapTup); + } + } + + attTup->atttypid = targettype; + attTup->atttypmod = targettypmod; + attTup->attcollation = targetcollid; + attTup->attndims = list_length(typeName->arrayBounds); + attTup->attlen = tform->typlen; + attTup->attbyval = tform->typbyval; + attTup->attalign = tform->typalign; + attTup->attstorage = tform->typstorage; + attTup->attcompression = InvalidCompressionMethod; + + ReleaseSysCache(typeTuple); + + CatalogTupleUpdate(attrelation, &heapTup->t_self, heapTup); + + table_close(attrelation, RowExclusiveLock); + + /* Install dependencies on new datatype and collation */ + add_column_datatype_dependency(RelationGetRelid(rel), attnum, targettype); + add_column_collation_dependency(RelationGetRelid(rel), attnum, targetcollid); + + /* + * Drop any pg_statistic entry for the column, since it's now wrong type + */ + RemoveStatistics(RelationGetRelid(rel), attnum); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), attnum); + + /* + * Update the default, if present, by brute force --- remove and re-add + * the default. Probably unsafe to take shortcuts, since the new version + * may well have additional dependencies. (It's okay to do this now, + * rather than after other ALTER TYPE commands, since the default won't + * depend on other column types.) + */ + if (defaultexpr) + { + /* + * If it's a GENERATED default, drop its dependency records, in + * particular its INTERNAL dependency on the column, which would + * otherwise cause dependency.c to refuse to perform the deletion. + */ + if (attTup->attgenerated) + { + Oid attrdefoid = GetAttrDefaultOid(RelationGetRelid(rel), attnum); + + if (!OidIsValid(attrdefoid)) + elog(ERROR, "could not find attrdef tuple for relation %u attnum %d", + RelationGetRelid(rel), attnum); + (void) deleteDependencyRecordsFor(AttrDefaultRelationId, attrdefoid, false); + } + + /* + * Make updates-so-far visible, particularly the new pg_attribute row + * which will be updated again. + */ + CommandCounterIncrement(); + + /* + * We use RESTRICT here for safety, but at present we do not expect + * anything to depend on the default. + */ + RemoveAttrDefault(RelationGetRelid(rel), attnum, DROP_RESTRICT, true, + true); + + StoreAttrDefault(rel, attnum, defaultexpr, true, false); + } + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + + /* Cleanup */ + heap_freetuple(heapTup); + + return address; +} + +/* + * Subroutine for ATExecAlterColumnType: remember that a replica identity + * needs to be reset. + */ +static void +RememberReplicaIdentityForRebuilding(Oid indoid, AlteredTableInfo *tab) +{ + if (!get_index_isreplident(indoid)) + return; + + if (tab->replicaIdentityIndex) + elog(ERROR, "relation %u has multiple indexes marked as replica identity", tab->relid); + + tab->replicaIdentityIndex = get_rel_name(indoid); +} + +/* + * Subroutine for ATExecAlterColumnType: remember any clustered index. + */ +static void +RememberClusterOnForRebuilding(Oid indoid, AlteredTableInfo *tab) +{ + if (!get_index_isclustered(indoid)) + return; + + if (tab->clusterOnIndex) + elog(ERROR, "relation %u has multiple clustered indexes", tab->relid); + + tab->clusterOnIndex = get_rel_name(indoid); +} + +/* + * Subroutine for ATExecAlterColumnType: remember that a constraint needs + * to be rebuilt (which we might already know). + */ +static void +RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab) +{ + /* + * This de-duplication check is critical for two independent reasons: we + * mustn't try to recreate the same constraint twice, and if a constraint + * depends on more than one column whose type is to be altered, we must + * capture its definition string before applying any of the column type + * changes. ruleutils.c will get confused if we ask again later. + */ + if (!list_member_oid(tab->changedConstraintOids, conoid)) + { + /* OK, capture the constraint's existing definition string */ + char *defstring = pg_get_constraintdef_command(conoid); + Oid indoid; + + tab->changedConstraintOids = lappend_oid(tab->changedConstraintOids, + conoid); + tab->changedConstraintDefs = lappend(tab->changedConstraintDefs, + defstring); + + /* + * For the index of a constraint, if any, remember if it is used for + * the table's replica identity or if it is a clustered index, so that + * ATPostAlterTypeCleanup() can queue up commands necessary to restore + * those properties. + */ + indoid = get_constraint_index(conoid); + if (OidIsValid(indoid)) + { + RememberReplicaIdentityForRebuilding(indoid, tab); + RememberClusterOnForRebuilding(indoid, tab); + } + } +} + +/* + * Subroutine for ATExecAlterColumnType: remember that an index needs + * to be rebuilt (which we might already know). + */ +static void +RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab) +{ + /* + * This de-duplication check is critical for two independent reasons: we + * mustn't try to recreate the same index twice, and if an index depends + * on more than one column whose type is to be altered, we must capture + * its definition string before applying any of the column type changes. + * ruleutils.c will get confused if we ask again later. + */ + if (!list_member_oid(tab->changedIndexOids, indoid)) + { + /* + * Before adding it as an index-to-rebuild, we'd better see if it + * belongs to a constraint, and if so rebuild the constraint instead. + * Typically this check fails, because constraint indexes normally + * have only dependencies on their constraint. But it's possible for + * such an index to also have direct dependencies on table columns, + * for example with a partial exclusion constraint. + */ + Oid conoid = get_index_constraint(indoid); + + if (OidIsValid(conoid)) + { + RememberConstraintForRebuilding(conoid, tab); + } + else + { + /* OK, capture the index's existing definition string */ + char *defstring = pg_get_indexdef_string(indoid); + + tab->changedIndexOids = lappend_oid(tab->changedIndexOids, + indoid); + tab->changedIndexDefs = lappend(tab->changedIndexDefs, + defstring); + + /* + * Remember if this index is used for the table's replica identity + * or if it is a clustered index, so that ATPostAlterTypeCleanup() + * can queue up commands necessary to restore those properties. + */ + RememberReplicaIdentityForRebuilding(indoid, tab); + RememberClusterOnForRebuilding(indoid, tab); + } + } +} + +/* + * Subroutine for ATExecAlterColumnType: remember that a statistics object + * needs to be rebuilt (which we might already know). + */ +static void +RememberStatisticsForRebuilding(Oid stxoid, AlteredTableInfo *tab) +{ + /* + * This de-duplication check is critical for two independent reasons: we + * mustn't try to recreate the same statistics object twice, and if the + * statistics object depends on more than one column whose type is to be + * altered, we must capture its definition string before applying any of + * the type changes. ruleutils.c will get confused if we ask again later. + */ + if (!list_member_oid(tab->changedStatisticsOids, stxoid)) + { + /* OK, capture the statistics object's existing definition string */ + char *defstring = pg_get_statisticsobjdef_string(stxoid); + + tab->changedStatisticsOids = lappend_oid(tab->changedStatisticsOids, + stxoid); + tab->changedStatisticsDefs = lappend(tab->changedStatisticsDefs, + defstring); + } +} + +/* + * Cleanup after we've finished all the ALTER TYPE operations for a + * particular relation. We have to drop and recreate all the indexes + * and constraints that depend on the altered columns. We do the + * actual dropping here, but re-creation is managed by adding work + * queue entries to do those steps later. + */ +static void +ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) +{ + ObjectAddress obj; + ObjectAddresses *objects; + ListCell *def_item; + ListCell *oid_item; + + /* + * Collect all the constraints and indexes to drop so we can process them + * in a single call. That way we don't have to worry about dependencies + * among them. + */ + objects = new_object_addresses(); + + /* + * Re-parse the index and constraint definitions, and attach them to the + * appropriate work queue entries. We do this before dropping because in + * the case of a FOREIGN KEY constraint, we might not yet have exclusive + * lock on the table the constraint is attached to, and we need to get + * that before reparsing/dropping. + * + * We can't rely on the output of deparsing to tell us which relation to + * operate on, because concurrent activity might have made the name + * resolve differently. Instead, we've got to use the OID of the + * constraint or index we're processing to figure out which relation to + * operate on. + */ + forboth(oid_item, tab->changedConstraintOids, + def_item, tab->changedConstraintDefs) + { + Oid oldId = lfirst_oid(oid_item); + HeapTuple tup; + Form_pg_constraint con; + Oid relid; + Oid confrelid; + char contype; + bool conislocal; + + tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for constraint %u", oldId); + con = (Form_pg_constraint) GETSTRUCT(tup); + if (OidIsValid(con->conrelid)) + relid = con->conrelid; + else + { + /* must be a domain constraint */ + relid = get_typ_typrelid(getBaseType(con->contypid)); + if (!OidIsValid(relid)) + elog(ERROR, "could not identify relation associated with constraint %u", oldId); + } + confrelid = con->confrelid; + contype = con->contype; + conislocal = con->conislocal; + ReleaseSysCache(tup); + + ObjectAddressSet(obj, ConstraintRelationId, oldId); + add_exact_object_address(&obj, objects); + + /* + * If the constraint is inherited (only), we don't want to inject a + * new definition here; it'll get recreated when ATAddCheckConstraint + * recurses from adding the parent table's constraint. But we had to + * carry the info this far so that we can drop the constraint below. + */ + if (!conislocal) + continue; + + /* + * When rebuilding an FK constraint that references the table we're + * modifying, we might not yet have any lock on the FK's table, so get + * one now. We'll need AccessExclusiveLock for the DROP CONSTRAINT + * step, so there's no value in asking for anything weaker. + */ + if (relid != tab->relid && contype == CONSTRAINT_FOREIGN) + LockRelationOid(relid, AccessExclusiveLock); + + ATPostAlterTypeParse(oldId, relid, confrelid, + (char *) lfirst(def_item), + wqueue, lockmode, tab->rewrite); + } + forboth(oid_item, tab->changedIndexOids, + def_item, tab->changedIndexDefs) + { + Oid oldId = lfirst_oid(oid_item); + Oid relid; + + relid = IndexGetRelation(oldId, false); + ATPostAlterTypeParse(oldId, relid, InvalidOid, + (char *) lfirst(def_item), + wqueue, lockmode, tab->rewrite); + + ObjectAddressSet(obj, RelationRelationId, oldId); + add_exact_object_address(&obj, objects); + } + + /* add dependencies for new statistics */ + forboth(oid_item, tab->changedStatisticsOids, + def_item, tab->changedStatisticsDefs) + { + Oid oldId = lfirst_oid(oid_item); + Oid relid; + + relid = StatisticsGetRelation(oldId, false); + ATPostAlterTypeParse(oldId, relid, InvalidOid, + (char *) lfirst(def_item), + wqueue, lockmode, tab->rewrite); + + ObjectAddressSet(obj, StatisticExtRelationId, oldId); + add_exact_object_address(&obj, objects); + } + + /* + * Queue up command to restore replica identity index marking + */ + if (tab->replicaIdentityIndex) + { + AlterTableCmd *cmd = makeNode(AlterTableCmd); + ReplicaIdentityStmt *subcmd = makeNode(ReplicaIdentityStmt); + + subcmd->identity_type = REPLICA_IDENTITY_INDEX; + subcmd->name = tab->replicaIdentityIndex; + cmd->subtype = AT_ReplicaIdentity; + cmd->def = (Node *) subcmd; + + /* do it after indexes and constraints */ + tab->subcmds[AT_PASS_OLD_CONSTR] = + lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd); + } + + /* + * Queue up command to restore marking of index used for cluster. + */ + if (tab->clusterOnIndex) + { + AlterTableCmd *cmd = makeNode(AlterTableCmd); + + cmd->subtype = AT_ClusterOn; + cmd->name = tab->clusterOnIndex; + + /* do it after indexes and constraints */ + tab->subcmds[AT_PASS_OLD_CONSTR] = + lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd); + } + + /* + * It should be okay to use DROP_RESTRICT here, since nothing else should + * be depending on these objects. + */ + performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); + + free_object_addresses(objects); + + /* + * The objects will get recreated during subsequent passes over the work + * queue. + */ +} + +/* + * Parse the previously-saved definition string for a constraint, index or + * statistics object against the newly-established column data type(s), and + * queue up the resulting command parsetrees for execution. + * + * This might fail if, for example, you have a WHERE clause that uses an + * operator that's not available for the new column type. + */ +static void +ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd, + List **wqueue, LOCKMODE lockmode, bool rewrite) +{ + List *raw_parsetree_list; + List *querytree_list; + ListCell *list_item; + Relation rel; + + /* + * We expect that we will get only ALTER TABLE and CREATE INDEX + * statements. Hence, there is no need to pass them through + * parse_analyze_*() or the rewriter, but instead we need to pass them + * through parse_utilcmd.c to make them ready for execution. + */ + raw_parsetree_list = raw_parser(cmd, RAW_PARSE_DEFAULT); + querytree_list = NIL; + foreach(list_item, raw_parsetree_list) + { + RawStmt *rs = lfirst_node(RawStmt, list_item); + Node *stmt = rs->stmt; + + if (IsA(stmt, IndexStmt)) + querytree_list = lappend(querytree_list, + transformIndexStmt(oldRelId, + (IndexStmt *) stmt, + cmd)); + else if (IsA(stmt, AlterTableStmt)) + { + List *beforeStmts; + List *afterStmts; + + stmt = (Node *) transformAlterTableStmt(oldRelId, + (AlterTableStmt *) stmt, + cmd, + &beforeStmts, + &afterStmts); + querytree_list = list_concat(querytree_list, beforeStmts); + querytree_list = lappend(querytree_list, stmt); + querytree_list = list_concat(querytree_list, afterStmts); + } + else if (IsA(stmt, CreateStatsStmt)) + querytree_list = lappend(querytree_list, + transformStatsStmt(oldRelId, + (CreateStatsStmt *) stmt, + cmd)); + else + querytree_list = lappend(querytree_list, stmt); + } + + /* Caller should already have acquired whatever lock we need. */ + rel = relation_open(oldRelId, NoLock); + + /* + * Attach each generated command to the proper place in the work queue. + * Note this could result in creation of entirely new work-queue entries. + * + * Also note that we have to tweak the command subtypes, because it turns + * out that re-creation of indexes and constraints has to act a bit + * differently from initial creation. + */ + foreach(list_item, querytree_list) + { + Node *stm = (Node *) lfirst(list_item); + AlteredTableInfo *tab; + + tab = ATGetQueueEntry(wqueue, rel); + + if (IsA(stm, IndexStmt)) + { + IndexStmt *stmt = (IndexStmt *) stm; + AlterTableCmd *newcmd; + + if (!rewrite) + TryReuseIndex(oldId, stmt); + stmt->reset_default_tblspc = true; + /* keep the index's comment */ + stmt->idxcomment = GetComment(oldId, RelationRelationId, 0); + + newcmd = makeNode(AlterTableCmd); + newcmd->subtype = AT_ReAddIndex; + newcmd->def = (Node *) stmt; + tab->subcmds[AT_PASS_OLD_INDEX] = + lappend(tab->subcmds[AT_PASS_OLD_INDEX], newcmd); + } + else if (IsA(stm, AlterTableStmt)) + { + AlterTableStmt *stmt = (AlterTableStmt *) stm; + ListCell *lcmd; + + foreach(lcmd, stmt->cmds) + { + AlterTableCmd *cmd = lfirst_node(AlterTableCmd, lcmd); + + if (cmd->subtype == AT_AddIndex) + { + IndexStmt *indstmt; + Oid indoid; + + indstmt = castNode(IndexStmt, cmd->def); + indoid = get_constraint_index(oldId); + + if (!rewrite) + TryReuseIndex(indoid, indstmt); + /* keep any comment on the index */ + indstmt->idxcomment = GetComment(indoid, + RelationRelationId, 0); + indstmt->reset_default_tblspc = true; + + cmd->subtype = AT_ReAddIndex; + tab->subcmds[AT_PASS_OLD_INDEX] = + lappend(tab->subcmds[AT_PASS_OLD_INDEX], cmd); + + /* recreate any comment on the constraint */ + RebuildConstraintComment(tab, + AT_PASS_OLD_INDEX, + oldId, + rel, + NIL, + indstmt->idxname); + } + else if (cmd->subtype == AT_AddConstraint) + { + Constraint *con = castNode(Constraint, cmd->def); + + con->old_pktable_oid = refRelId; + /* rewriting neither side of a FK */ + if (con->contype == CONSTR_FOREIGN && + !rewrite && tab->rewrite == 0) + TryReuseForeignKey(oldId, con); + con->reset_default_tblspc = true; + cmd->subtype = AT_ReAddConstraint; + tab->subcmds[AT_PASS_OLD_CONSTR] = + lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd); + + /* recreate any comment on the constraint */ + RebuildConstraintComment(tab, + AT_PASS_OLD_CONSTR, + oldId, + rel, + NIL, + con->conname); + } + else if (cmd->subtype == AT_SetNotNull) + { + /* + * The parser will create AT_SetNotNull subcommands for + * columns of PRIMARY KEY indexes/constraints, but we need + * not do anything with them here, because the columns' + * NOT NULL marks will already have been propagated into + * the new table definition. + */ + } + else + elog(ERROR, "unexpected statement subtype: %d", + (int) cmd->subtype); + } + } + else if (IsA(stm, AlterDomainStmt)) + { + AlterDomainStmt *stmt = (AlterDomainStmt *) stm; + + if (stmt->subtype == 'C') /* ADD CONSTRAINT */ + { + Constraint *con = castNode(Constraint, stmt->def); + AlterTableCmd *cmd = makeNode(AlterTableCmd); + + cmd->subtype = AT_ReAddDomainConstraint; + cmd->def = (Node *) stmt; + tab->subcmds[AT_PASS_OLD_CONSTR] = + lappend(tab->subcmds[AT_PASS_OLD_CONSTR], cmd); + + /* recreate any comment on the constraint */ + RebuildConstraintComment(tab, + AT_PASS_OLD_CONSTR, + oldId, + NULL, + stmt->typeName, + con->conname); + } + else + elog(ERROR, "unexpected statement subtype: %d", + (int) stmt->subtype); + } + else if (IsA(stm, CreateStatsStmt)) + { + CreateStatsStmt *stmt = (CreateStatsStmt *) stm; + AlterTableCmd *newcmd; + + /* keep the statistics object's comment */ + stmt->stxcomment = GetComment(oldId, StatisticExtRelationId, 0); + + newcmd = makeNode(AlterTableCmd); + newcmd->subtype = AT_ReAddStatistics; + newcmd->def = (Node *) stmt; + tab->subcmds[AT_PASS_MISC] = + lappend(tab->subcmds[AT_PASS_MISC], newcmd); + } + else + elog(ERROR, "unexpected statement type: %d", + (int) nodeTag(stm)); + } + + relation_close(rel, NoLock); +} + +/* + * Subroutine for ATPostAlterTypeParse() to recreate any existing comment + * for a table or domain constraint that is being rebuilt. + * + * objid is the OID of the constraint. + * Pass "rel" for a table constraint, or "domname" (domain's qualified name + * as a string list) for a domain constraint. + * (We could dig that info, as well as the conname, out of the pg_constraint + * entry; but callers already have them so might as well pass them.) + */ +static void +RebuildConstraintComment(AlteredTableInfo *tab, int pass, Oid objid, + Relation rel, List *domname, + const char *conname) +{ + CommentStmt *cmd; + char *comment_str; + AlterTableCmd *newcmd; + + /* Look for comment for object wanted, and leave if none */ + comment_str = GetComment(objid, ConstraintRelationId, 0); + if (comment_str == NULL) + return; + + /* Build CommentStmt node, copying all input data for safety */ + cmd = makeNode(CommentStmt); + if (rel) + { + cmd->objtype = OBJECT_TABCONSTRAINT; + cmd->object = (Node *) + list_make3(makeString(get_namespace_name(RelationGetNamespace(rel))), + makeString(pstrdup(RelationGetRelationName(rel))), + makeString(pstrdup(conname))); + } + else + { + cmd->objtype = OBJECT_DOMCONSTRAINT; + cmd->object = (Node *) + list_make2(makeTypeNameFromNameList(copyObject(domname)), + makeString(pstrdup(conname))); + } + cmd->comment = comment_str; + + /* Append it to list of commands */ + newcmd = makeNode(AlterTableCmd); + newcmd->subtype = AT_ReAddComment; + newcmd->def = (Node *) cmd; + tab->subcmds[pass] = lappend(tab->subcmds[pass], newcmd); +} + +/* + * Subroutine for ATPostAlterTypeParse(). Calls out to CheckIndexCompatible() + * for the real analysis, then mutates the IndexStmt based on that verdict. + */ +static void +TryReuseIndex(Oid oldId, IndexStmt *stmt) +{ + if (CheckIndexCompatible(oldId, + stmt->accessMethod, + stmt->indexParams, + stmt->excludeOpNames)) + { + Relation irel = index_open(oldId, NoLock); + + /* If it's a partitioned index, there is no storage to share. */ + if (irel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + { + stmt->oldNode = irel->rd_node.relNode; + stmt->oldCreateSubid = irel->rd_createSubid; + stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid; + } + index_close(irel, NoLock); + } +} + +/* + * Subroutine for ATPostAlterTypeParse(). + * + * Stash the old P-F equality operator into the Constraint node, for possible + * use by ATAddForeignKeyConstraint() in determining whether revalidation of + * this constraint can be skipped. + */ +static void +TryReuseForeignKey(Oid oldId, Constraint *con) +{ + HeapTuple tup; + Datum adatum; + bool isNull; + ArrayType *arr; + Oid *rawarr; + int numkeys; + int i; + + Assert(con->contype == CONSTR_FOREIGN); + Assert(con->old_conpfeqop == NIL); /* already prepared this node */ + + tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for constraint %u", oldId); + + adatum = SysCacheGetAttr(CONSTROID, tup, + Anum_pg_constraint_conpfeqop, &isNull); + if (isNull) + elog(ERROR, "null conpfeqop for constraint %u", oldId); + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + /* test follows the one in ri_FetchConstraintInfo() */ + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "conpfeqop is not a 1-D Oid array"); + rawarr = (Oid *) ARR_DATA_PTR(arr); + + /* stash a List of the operator Oids in our Constraint node */ + for (i = 0; i < numkeys; i++) + con->old_conpfeqop = lappend_oid(con->old_conpfeqop, rawarr[i]); + + ReleaseSysCache(tup); +} + +/* + * ALTER COLUMN .. OPTIONS ( ... ) + * + * Returns the address of the modified column + */ +static ObjectAddress +ATExecAlterColumnGenericOptions(Relation rel, + const char *colName, + List *options, + LOCKMODE lockmode) +{ + Relation ftrel; + Relation attrel; + ForeignServer *server; + ForeignDataWrapper *fdw; + HeapTuple tuple; + HeapTuple newtuple; + bool isnull; + Datum repl_val[Natts_pg_attribute]; + bool repl_null[Natts_pg_attribute]; + bool repl_repl[Natts_pg_attribute]; + Datum datum; + Form_pg_foreign_table fttableform; + Form_pg_attribute atttableform; + AttrNumber attnum; + ObjectAddress address; + + if (options == NIL) + return InvalidObjectAddress; + + /* First, determine FDW validator associated to the foreign table. */ + ftrel = table_open(ForeignTableRelationId, AccessShareLock); + tuple = SearchSysCache1(FOREIGNTABLEREL, rel->rd_id); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("foreign table \"%s\" does not exist", + RelationGetRelationName(rel)))); + fttableform = (Form_pg_foreign_table) GETSTRUCT(tuple); + server = GetForeignServer(fttableform->ftserver); + fdw = GetForeignDataWrapper(server->fdwid); + + table_close(ftrel, AccessShareLock); + ReleaseSysCache(tuple); + + attrel = table_open(AttributeRelationId, RowExclusiveLock); + tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + colName, RelationGetRelationName(rel)))); + + /* Prevent them from altering a system attribute */ + atttableform = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = atttableform->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", colName))); + + + /* Initialize buffers for new tuple values */ + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + /* Extract the current options */ + datum = SysCacheGetAttr(ATTNAME, + tuple, + Anum_pg_attribute_attfdwoptions, + &isnull); + if (isnull) + datum = PointerGetDatum(NULL); + + /* Transform the options */ + datum = transformGenericOptions(AttributeRelationId, + datum, + options, + fdw->fdwvalidator); + + if (PointerIsValid(DatumGetPointer(datum))) + repl_val[Anum_pg_attribute_attfdwoptions - 1] = datum; + else + repl_null[Anum_pg_attribute_attfdwoptions - 1] = true; + + repl_repl[Anum_pg_attribute_attfdwoptions - 1] = true; + + /* Everything looks good - update the tuple */ + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(attrel, &newtuple->t_self, newtuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + atttableform->attnum); + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + + ReleaseSysCache(tuple); + + table_close(attrel, RowExclusiveLock); + + heap_freetuple(newtuple); + + return address; +} + +/* + * ALTER TABLE OWNER + * + * recursing is true if we are recursing from a table to its indexes, + * sequences, or toast table. We don't allow the ownership of those things to + * be changed separately from the parent table. Also, we can skip permission + * checks (this is necessary not just an optimization, else we'd fail to + * handle toast tables properly). + * + * recursing is also true if ALTER TYPE OWNER is calling us to fix up a + * free-standing composite type. + */ +void +ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lockmode) +{ + Relation target_rel; + Relation class_rel; + HeapTuple tuple; + Form_pg_class tuple_class; + + /* + * Get exclusive lock till end of transaction on the target table. Use + * relation_open so that we can work on indexes and sequences. + */ + target_rel = relation_open(relationOid, lockmode); + + /* Get its pg_class tuple, too */ + class_rel = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relationOid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relationOid); + tuple_class = (Form_pg_class) GETSTRUCT(tuple); + + /* Can we change the ownership of this tuple? */ + switch (tuple_class->relkind) + { + case RELKIND_RELATION: + case RELKIND_VIEW: + case RELKIND_MATVIEW: + case RELKIND_FOREIGN_TABLE: + case RELKIND_PARTITIONED_TABLE: + /* ok to change owner */ + break; + case RELKIND_INDEX: + if (!recursing) + { + /* + * Because ALTER INDEX OWNER used to be allowed, and in fact + * is generated by old versions of pg_dump, we give a warning + * and do nothing rather than erroring out. Also, to avoid + * unnecessary chatter while restoring those old dumps, say + * nothing at all if the command would be a no-op anyway. + */ + if (tuple_class->relowner != newOwnerId) + ereport(WARNING, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change owner of index \"%s\"", + NameStr(tuple_class->relname)), + errhint("Change the ownership of the index's table, instead."))); + /* quick hack to exit via the no-op path */ + newOwnerId = tuple_class->relowner; + } + break; + case RELKIND_PARTITIONED_INDEX: + if (recursing) + break; + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change owner of index \"%s\"", + NameStr(tuple_class->relname)), + errhint("Change the ownership of the index's table, instead."))); + break; + case RELKIND_SEQUENCE: + if (!recursing && + tuple_class->relowner != newOwnerId) + { + /* if it's an owned sequence, disallow changing it by itself */ + Oid tableId; + int32 colId; + + if (sequenceIsOwned(relationOid, DEPENDENCY_AUTO, &tableId, &colId) || + sequenceIsOwned(relationOid, DEPENDENCY_INTERNAL, &tableId, &colId)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change owner of sequence \"%s\"", + NameStr(tuple_class->relname)), + errdetail("Sequence \"%s\" is linked to table \"%s\".", + NameStr(tuple_class->relname), + get_rel_name(tableId)))); + } + break; + case RELKIND_COMPOSITE_TYPE: + if (recursing) + break; + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a composite type", + NameStr(tuple_class->relname)), + errhint("Use ALTER TYPE instead."))); + break; + case RELKIND_TOASTVALUE: + if (recursing) + break; + /* FALL THRU */ + default: + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change owner of relation \"%s\"", + NameStr(tuple_class->relname)), + errdetail_relkind_not_supported(tuple_class->relkind))); + } + + /* + * If the new owner is the same as the existing owner, consider the + * command to have succeeded. This is for dump restoration purposes. + */ + if (tuple_class->relowner != newOwnerId) + { + Datum repl_val[Natts_pg_class]; + bool repl_null[Natts_pg_class]; + bool repl_repl[Natts_pg_class]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + HeapTuple newtuple; + + /* skip permission checks when recursing to index or toast table */ + if (!recursing) + { + /* Superusers can always do it */ + if (!superuser()) + { + Oid namespaceOid = tuple_class->relnamespace; + AclResult aclresult; + + /* Otherwise, must be owner of the existing object */ + if (!pg_class_ownercheck(relationOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relationOid)), + RelationGetRelationName(target_rel)); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* New owner must have CREATE privilege on namespace */ + aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId, + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceOid)); + } + } + + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_repl[Anum_pg_class_relowner - 1] = true; + repl_val[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(newOwnerId); + + /* + * Determine the modified ACL for the new owner. This is only + * necessary when the ACL is non-null. + */ + aclDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relacl, + &isNull); + if (!isNull) + { + newAcl = aclnewowner(DatumGetAclP(aclDatum), + tuple_class->relowner, newOwnerId); + repl_repl[Anum_pg_class_relacl - 1] = true; + repl_val[Anum_pg_class_relacl - 1] = PointerGetDatum(newAcl); + } + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(class_rel), repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(class_rel, &newtuple->t_self, newtuple); + + heap_freetuple(newtuple); + + /* + * We must similarly update any per-column ACLs to reflect the new + * owner; for neatness reasons that's split out as a subroutine. + */ + change_owner_fix_column_acls(relationOid, + tuple_class->relowner, + newOwnerId); + + /* + * Update owner dependency reference, if any. A composite type has + * none, because it's tracked for the pg_type entry instead of here; + * indexes and TOAST tables don't have their own entries either. + */ + if (tuple_class->relkind != RELKIND_COMPOSITE_TYPE && + tuple_class->relkind != RELKIND_INDEX && + tuple_class->relkind != RELKIND_PARTITIONED_INDEX && + tuple_class->relkind != RELKIND_TOASTVALUE) + changeDependencyOnOwner(RelationRelationId, relationOid, + newOwnerId); + + /* + * Also change the ownership of the table's row type, if it has one + */ + if (OidIsValid(tuple_class->reltype)) + AlterTypeOwnerInternal(tuple_class->reltype, newOwnerId); + + /* + * If we are operating on a table or materialized view, also change + * the ownership of any indexes and sequences that belong to the + * relation, as well as its toast table (if it has one). + */ + if (tuple_class->relkind == RELKIND_RELATION || + tuple_class->relkind == RELKIND_PARTITIONED_TABLE || + tuple_class->relkind == RELKIND_MATVIEW || + tuple_class->relkind == RELKIND_TOASTVALUE) + { + List *index_oid_list; + ListCell *i; + + /* Find all the indexes belonging to this relation */ + index_oid_list = RelationGetIndexList(target_rel); + + /* For each index, recursively change its ownership */ + foreach(i, index_oid_list) + ATExecChangeOwner(lfirst_oid(i), newOwnerId, true, lockmode); + + list_free(index_oid_list); + } + + /* If it has a toast table, recurse to change its ownership */ + if (tuple_class->reltoastrelid != InvalidOid) + ATExecChangeOwner(tuple_class->reltoastrelid, newOwnerId, + true, lockmode); + + /* If it has dependent sequences, recurse to change them too */ + change_owner_recurse_to_sequences(relationOid, newOwnerId, lockmode); + } + + InvokeObjectPostAlterHook(RelationRelationId, relationOid, 0); + + ReleaseSysCache(tuple); + table_close(class_rel, RowExclusiveLock); + relation_close(target_rel, NoLock); +} + +/* + * change_owner_fix_column_acls + * + * Helper function for ATExecChangeOwner. Scan the columns of the table + * and fix any non-null column ACLs to reflect the new owner. + */ +static void +change_owner_fix_column_acls(Oid relationOid, Oid oldOwnerId, Oid newOwnerId) +{ + Relation attRelation; + SysScanDesc scan; + ScanKeyData key[1]; + HeapTuple attributeTuple; + + attRelation = table_open(AttributeRelationId, RowExclusiveLock); + ScanKeyInit(&key[0], + Anum_pg_attribute_attrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationOid)); + scan = systable_beginscan(attRelation, AttributeRelidNumIndexId, + true, NULL, 1, key); + while (HeapTupleIsValid(attributeTuple = systable_getnext(scan))) + { + Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple); + Datum repl_val[Natts_pg_attribute]; + bool repl_null[Natts_pg_attribute]; + bool repl_repl[Natts_pg_attribute]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + HeapTuple newtuple; + + /* Ignore dropped columns */ + if (att->attisdropped) + continue; + + aclDatum = heap_getattr(attributeTuple, + Anum_pg_attribute_attacl, + RelationGetDescr(attRelation), + &isNull); + /* Null ACLs do not require changes */ + if (isNull) + continue; + + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + newAcl = aclnewowner(DatumGetAclP(aclDatum), + oldOwnerId, newOwnerId); + repl_repl[Anum_pg_attribute_attacl - 1] = true; + repl_val[Anum_pg_attribute_attacl - 1] = PointerGetDatum(newAcl); + + newtuple = heap_modify_tuple(attributeTuple, + RelationGetDescr(attRelation), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(attRelation, &newtuple->t_self, newtuple); + + heap_freetuple(newtuple); + } + systable_endscan(scan); + table_close(attRelation, RowExclusiveLock); +} + +/* + * change_owner_recurse_to_sequences + * + * Helper function for ATExecChangeOwner. Examines pg_depend searching + * for sequences that are dependent on serial columns, and changes their + * ownership. + */ +static void +change_owner_recurse_to_sequences(Oid relationOid, Oid newOwnerId, LOCKMODE lockmode) +{ + Relation depRel; + SysScanDesc scan; + ScanKeyData key[2]; + HeapTuple tup; + + /* + * SERIAL sequences are those having an auto dependency on one of the + * table's columns (we don't care *which* column, exactly). + */ + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationOid)); + /* we leave refobjsubid unspecified */ + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup); + Relation seqRel; + + /* skip dependencies other than auto dependencies on columns */ + if (depForm->refobjsubid == 0 || + depForm->classid != RelationRelationId || + depForm->objsubid != 0 || + !(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL)) + continue; + + /* Use relation_open just in case it's an index */ + seqRel = relation_open(depForm->objid, lockmode); + + /* skip non-sequence relations */ + if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE) + { + /* No need to keep the lock */ + relation_close(seqRel, lockmode); + continue; + } + + /* We don't need to close the sequence while we alter it. */ + ATExecChangeOwner(depForm->objid, newOwnerId, true, lockmode); + + /* Now we can close it. Keep the lock till end of transaction. */ + relation_close(seqRel, NoLock); + } + + systable_endscan(scan); + + relation_close(depRel, AccessShareLock); +} + +/* + * ALTER TABLE CLUSTER ON + * + * The only thing we have to do is to change the indisclustered bits. + * + * Return the address of the new clustering index. + */ +static ObjectAddress +ATExecClusterOn(Relation rel, const char *indexName, LOCKMODE lockmode) +{ + Oid indexOid; + ObjectAddress address; + + indexOid = get_relname_relid(indexName, rel->rd_rel->relnamespace); + + if (!OidIsValid(indexOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("index \"%s\" for table \"%s\" does not exist", + indexName, RelationGetRelationName(rel)))); + + /* Check index is valid to cluster on */ + check_index_is_clusterable(rel, indexOid, lockmode); + + /* And do the work */ + mark_index_clustered(rel, indexOid, false); + + ObjectAddressSet(address, + RelationRelationId, indexOid); + + return address; +} + +/* + * ALTER TABLE SET WITHOUT CLUSTER + * + * We have to find any indexes on the table that have indisclustered bit + * set and turn it off. + */ +static void +ATExecDropCluster(Relation rel, LOCKMODE lockmode) +{ + mark_index_clustered(rel, InvalidOid, false); +} + +/* + * Preparation phase for SET ACCESS METHOD + * + * Check that access method exists. If it is the same as the table's current + * access method, it is a no-op. Otherwise, a table rewrite is necessary. + */ +static void +ATPrepSetAccessMethod(AlteredTableInfo *tab, Relation rel, const char *amname) +{ + Oid amoid; + + /* Check that the table access method exists */ + amoid = get_table_am_oid(amname, false); + + if (rel->rd_rel->relam == amoid) + return; + + /* Save info for Phase 3 to do the real work */ + tab->rewrite |= AT_REWRITE_ACCESS_METHOD; + tab->newAccessMethod = amoid; +} + +/* + * ALTER TABLE SET TABLESPACE + */ +static void +ATPrepSetTableSpace(AlteredTableInfo *tab, Relation rel, const char *tablespacename, LOCKMODE lockmode) +{ + Oid tablespaceId; + + /* Check that the tablespace exists */ + tablespaceId = get_tablespace_oid(tablespacename, false); + + /* Check permissions except when moving to database's default */ + if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace) + { + AclResult aclresult; + + aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, tablespacename); + } + + /* Save info for Phase 3 to do the real work */ + if (OidIsValid(tab->newTableSpace)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot have multiple SET TABLESPACE subcommands"))); + + tab->newTableSpace = tablespaceId; +} + +/* + * Set, reset, or replace reloptions. + */ +static void +ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, + LOCKMODE lockmode) +{ + Oid relid; + Relation pgclass; + HeapTuple tuple; + HeapTuple newtuple; + Datum datum; + bool isnull; + Datum newOptions; + Datum repl_val[Natts_pg_class]; + bool repl_null[Natts_pg_class]; + bool repl_repl[Natts_pg_class]; + static char *validnsps[] = HEAP_RELOPT_NAMESPACES; + + if (defList == NIL && operation != AT_ReplaceRelOptions) + return; /* nothing to do */ + + pgclass = table_open(RelationRelationId, RowExclusiveLock); + + /* Fetch heap tuple */ + relid = RelationGetRelid(rel); + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + + if (operation == AT_ReplaceRelOptions) + { + /* + * If we're supposed to replace the reloptions list, we just pretend + * there were none before. + */ + datum = (Datum) 0; + isnull = true; + } + else + { + /* Get the old reloptions */ + datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, + &isnull); + } + + /* Generate new proposed reloptions (text array) */ + newOptions = transformRelOptions(isnull ? (Datum) 0 : datum, + defList, NULL, validnsps, false, + operation == AT_ResetRelOptions); + + /* Validate */ + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_TOASTVALUE: + case RELKIND_MATVIEW: + (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true); + break; + case RELKIND_PARTITIONED_TABLE: + (void) partitioned_table_reloptions(newOptions, true); + break; + case RELKIND_VIEW: + (void) view_reloptions(newOptions, true); + break; + case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: + (void) index_reloptions(rel->rd_indam->amoptions, newOptions, true); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot set options for relation \"%s\"", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + break; + } + + /* Special-case validation of view options */ + if (rel->rd_rel->relkind == RELKIND_VIEW) + { + Query *view_query = get_view_query(rel); + List *view_options = untransformRelOptions(newOptions); + ListCell *cell; + bool check_option = false; + + foreach(cell, view_options) + { + DefElem *defel = (DefElem *) lfirst(cell); + + if (strcmp(defel->defname, "check_option") == 0) + check_option = true; + } + + /* + * If the check option is specified, look to see if the view is + * actually auto-updatable or not. + */ + if (check_option) + { + const char *view_updatable_error = + view_query_is_auto_updatable(view_query, true); + + if (view_updatable_error) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("WITH CHECK OPTION is supported only on automatically updatable views"), + errhint("%s", _(view_updatable_error)))); + } + } + + /* + * All we need do here is update the pg_class row; the new options will be + * propagated into relcaches during post-commit cache inval. + */ + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (newOptions != (Datum) 0) + repl_val[Anum_pg_class_reloptions - 1] = newOptions; + else + repl_null[Anum_pg_class_reloptions - 1] = true; + + repl_repl[Anum_pg_class_reloptions - 1] = true; + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple); + + InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0); + + heap_freetuple(newtuple); + + ReleaseSysCache(tuple); + + /* repeat the whole exercise for the toast table, if there's one */ + if (OidIsValid(rel->rd_rel->reltoastrelid)) + { + Relation toastrel; + Oid toastid = rel->rd_rel->reltoastrelid; + + toastrel = table_open(toastid, lockmode); + + /* Fetch heap tuple */ + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", toastid); + + if (operation == AT_ReplaceRelOptions) + { + /* + * If we're supposed to replace the reloptions list, we just + * pretend there were none before. + */ + datum = (Datum) 0; + isnull = true; + } + else + { + /* Get the old reloptions */ + datum = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, + &isnull); + } + + newOptions = transformRelOptions(isnull ? (Datum) 0 : datum, + defList, "toast", validnsps, false, + operation == AT_ResetRelOptions); + + (void) heap_reloptions(RELKIND_TOASTVALUE, newOptions, true); + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (newOptions != (Datum) 0) + repl_val[Anum_pg_class_reloptions - 1] = newOptions; + else + repl_null[Anum_pg_class_reloptions - 1] = true; + + repl_repl[Anum_pg_class_reloptions - 1] = true; + + newtuple = heap_modify_tuple(tuple, RelationGetDescr(pgclass), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(pgclass, &newtuple->t_self, newtuple); + + InvokeObjectPostAlterHookArg(RelationRelationId, + RelationGetRelid(toastrel), 0, + InvalidOid, true); + + heap_freetuple(newtuple); + + ReleaseSysCache(tuple); + + table_close(toastrel, NoLock); + } + + table_close(pgclass, RowExclusiveLock); +} + +/* + * Execute ALTER TABLE SET TABLESPACE for cases where there is no tuple + * rewriting to be done, so we just want to copy the data as fast as possible. + */ +static void +ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) +{ + Relation rel; + Oid reltoastrelid; + Oid newrelfilenode; + RelFileNode newrnode; + List *reltoastidxids = NIL; + ListCell *lc; + + /* + * Need lock here in case we are recursing to toast table or index + */ + rel = relation_open(tableOid, lockmode); + + /* Check first if relation can be moved to new tablespace */ + if (!CheckRelationTableSpaceMove(rel, newTableSpace)) + { + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), 0); + relation_close(rel, NoLock); + return; + } + + reltoastrelid = rel->rd_rel->reltoastrelid; + /* Fetch the list of indexes on toast relation if necessary */ + if (OidIsValid(reltoastrelid)) + { + Relation toastRel = relation_open(reltoastrelid, lockmode); + + reltoastidxids = RelationGetIndexList(toastRel); + relation_close(toastRel, lockmode); + } + + /* + * Relfilenodes are not unique in databases across tablespaces, so we need + * to allocate a new one in the new tablespace. + */ + newrelfilenode = GetNewRelFileNode(newTableSpace, NULL, + rel->rd_rel->relpersistence); + + /* Open old and new relation */ + newrnode = rel->rd_node; + newrnode.relNode = newrelfilenode; + newrnode.spcNode = newTableSpace; + + /* hand off to AM to actually create the new filenode and copy the data */ + if (rel->rd_rel->relkind == RELKIND_INDEX) + { + index_copy_data(rel, newrnode); + } + else + { + Assert(RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind)); + table_relation_copy_data(rel, &newrnode); + } + + /* + * Update the pg_class row. + * + * NB: This wouldn't work if ATExecSetTableSpace() were allowed to be + * executed on pg_class or its indexes (the above copy wouldn't contain + * the updated pg_class entry), but that's forbidden with + * CheckRelationTableSpaceMove(). + */ + SetRelationTableSpace(rel, newTableSpace, newrelfilenode); + + InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0); + + RelationAssumeNewRelfilenode(rel); + + relation_close(rel, NoLock); + + /* Make sure the reltablespace change is visible */ + CommandCounterIncrement(); + + /* Move associated toast relation and/or indexes, too */ + if (OidIsValid(reltoastrelid)) + ATExecSetTableSpace(reltoastrelid, newTableSpace, lockmode); + foreach(lc, reltoastidxids) + ATExecSetTableSpace(lfirst_oid(lc), newTableSpace, lockmode); + + /* Clean up */ + list_free(reltoastidxids); +} + +/* + * Special handling of ALTER TABLE SET TABLESPACE for relations with no + * storage that have an interest in preserving tablespace. + * + * Since these have no storage the tablespace can be updated with a simple + * metadata only operation to update the tablespace. + */ +static void +ATExecSetTableSpaceNoStorage(Relation rel, Oid newTableSpace) +{ + /* + * Shouldn't be called on relations having storage; these are processed in + * phase 3. + */ + Assert(!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)); + + /* check if relation can be moved to its new tablespace */ + if (!CheckRelationTableSpaceMove(rel, newTableSpace)) + { + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + 0); + return; + } + + /* Update can be done, so change reltablespace */ + SetRelationTableSpace(rel, newTableSpace, InvalidOid); + + InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), 0); + + /* Make sure the reltablespace change is visible */ + CommandCounterIncrement(); +} + +/* + * Alter Table ALL ... SET TABLESPACE + * + * Allows a user to move all objects of some type in a given tablespace in the + * current database to another tablespace. Objects can be chosen based on the + * owner of the object also, to allow users to move only their objects. + * The user must have CREATE rights on the new tablespace, as usual. The main + * permissions handling is done by the lower-level table move function. + * + * All to-be-moved objects are locked first. If NOWAIT is specified and the + * lock can't be acquired then we ereport(ERROR). + */ +Oid +AlterTableMoveAll(AlterTableMoveAllStmt *stmt) +{ + List *relations = NIL; + ListCell *l; + ScanKeyData key[1]; + Relation rel; + TableScanDesc scan; + HeapTuple tuple; + Oid orig_tablespaceoid; + Oid new_tablespaceoid; + List *role_oids = roleSpecsToIds(stmt->roles); + + /* Ensure we were not asked to move something we can't */ + if (stmt->objtype != OBJECT_TABLE && stmt->objtype != OBJECT_INDEX && + stmt->objtype != OBJECT_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("only tables, indexes, and materialized views exist in tablespaces"))); + + /* Get the orig and new tablespace OIDs */ + orig_tablespaceoid = get_tablespace_oid(stmt->orig_tablespacename, false); + new_tablespaceoid = get_tablespace_oid(stmt->new_tablespacename, false); + + /* Can't move shared relations in to or out of pg_global */ + /* This is also checked by ATExecSetTableSpace, but nice to stop earlier */ + if (orig_tablespaceoid == GLOBALTABLESPACE_OID || + new_tablespaceoid == GLOBALTABLESPACE_OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot move relations in to or out of pg_global tablespace"))); + + /* + * Must have CREATE rights on the new tablespace, unless it is the + * database default tablespace (which all users implicitly have CREATE + * rights on). + */ + if (OidIsValid(new_tablespaceoid) && new_tablespaceoid != MyDatabaseTableSpace) + { + AclResult aclresult; + + aclresult = pg_tablespace_aclcheck(new_tablespaceoid, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_TABLESPACE, + get_tablespace_name(new_tablespaceoid)); + } + + /* + * Now that the checks are done, check if we should set either to + * InvalidOid because it is our database's default tablespace. + */ + if (orig_tablespaceoid == MyDatabaseTableSpace) + orig_tablespaceoid = InvalidOid; + + if (new_tablespaceoid == MyDatabaseTableSpace) + new_tablespaceoid = InvalidOid; + + /* no-op */ + if (orig_tablespaceoid == new_tablespaceoid) + return new_tablespaceoid; + + /* + * Walk the list of objects in the tablespace and move them. This will + * only find objects in our database, of course. + */ + ScanKeyInit(&key[0], + Anum_pg_class_reltablespace, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(orig_tablespaceoid)); + + rel = table_open(RelationRelationId, AccessShareLock); + scan = table_beginscan_catalog(rel, 1, key); + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple); + Oid relOid = relForm->oid; + + /* + * Do not move objects in pg_catalog as part of this, if an admin + * really wishes to do so, they can issue the individual ALTER + * commands directly. + * + * Also, explicitly avoid any shared tables, temp tables, or TOAST + * (TOAST will be moved with the main table). + */ + if (IsCatalogNamespace(relForm->relnamespace) || + relForm->relisshared || + isAnyTempNamespace(relForm->relnamespace) || + IsToastNamespace(relForm->relnamespace)) + continue; + + /* Only move the object type requested */ + if ((stmt->objtype == OBJECT_TABLE && + relForm->relkind != RELKIND_RELATION && + relForm->relkind != RELKIND_PARTITIONED_TABLE) || + (stmt->objtype == OBJECT_INDEX && + relForm->relkind != RELKIND_INDEX && + relForm->relkind != RELKIND_PARTITIONED_INDEX) || + (stmt->objtype == OBJECT_MATVIEW && + relForm->relkind != RELKIND_MATVIEW)) + continue; + + /* Check if we are only moving objects owned by certain roles */ + if (role_oids != NIL && !list_member_oid(role_oids, relForm->relowner)) + continue; + + /* + * Handle permissions-checking here since we are locking the tables + * and also to avoid doing a bunch of work only to fail part-way. Note + * that permissions will also be checked by AlterTableInternal(). + * + * Caller must be considered an owner on the table to move it. + */ + if (!pg_class_ownercheck(relOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)), + NameStr(relForm->relname)); + + if (stmt->nowait && + !ConditionalLockRelationOid(relOid, AccessExclusiveLock)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("aborting because lock on relation \"%s.%s\" is not available", + get_namespace_name(relForm->relnamespace), + NameStr(relForm->relname)))); + else + LockRelationOid(relOid, AccessExclusiveLock); + + /* Add to our list of objects to move */ + relations = lappend_oid(relations, relOid); + } + + table_endscan(scan); + table_close(rel, AccessShareLock); + + if (relations == NIL) + ereport(NOTICE, + (errcode(ERRCODE_NO_DATA_FOUND), + errmsg("no matching relations in tablespace \"%s\" found", + orig_tablespaceoid == InvalidOid ? "(database default)" : + get_tablespace_name(orig_tablespaceoid)))); + + /* Everything is locked, loop through and move all of the relations. */ + foreach(l, relations) + { + List *cmds = NIL; + AlterTableCmd *cmd = makeNode(AlterTableCmd); + + cmd->subtype = AT_SetTableSpace; + cmd->name = stmt->new_tablespacename; + + cmds = lappend(cmds, cmd); + + EventTriggerAlterTableStart((Node *) stmt); + /* OID is set by AlterTableInternal */ + AlterTableInternal(lfirst_oid(l), cmds, false); + EventTriggerAlterTableEnd(); + } + + return new_tablespaceoid; +} + +static void +index_copy_data(Relation rel, RelFileNode newrnode) +{ + SMgrRelation dstrel; + + dstrel = smgropen(newrnode, rel->rd_backend); + + /* + * Since we copy the file directly without looking at the shared buffers, + * we'd better first flush out any pages of the source relation that are + * in shared buffers. We assume no new changes will be made while we are + * holding exclusive lock on the rel. + */ + FlushRelationBuffers(rel); + + /* + * Create and copy all forks of the relation, and schedule unlinking of + * old physical files. + * + * NOTE: any conflict in relfilenode value will be caught in + * RelationCreateStorage(). + */ + RelationCreateStorage(newrnode, rel->rd_rel->relpersistence, true); + + /* copy main fork */ + RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, + rel->rd_rel->relpersistence); + + /* copy those extra forks that exist */ + for (ForkNumber forkNum = MAIN_FORKNUM + 1; + forkNum <= MAX_FORKNUM; forkNum++) + { + if (smgrexists(RelationGetSmgr(rel), forkNum)) + { + smgrcreate(dstrel, forkNum, false); + + /* + * WAL log creation if the relation is persistent, or this is the + * init fork of an unlogged relation. + */ + if (RelationIsPermanent(rel) || + (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED && + forkNum == INIT_FORKNUM)) + log_smgrcreate(&newrnode, forkNum); + RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum, + rel->rd_rel->relpersistence); + } + } + + /* drop old relation, and close new one */ + RelationDropStorage(rel); + smgrclose(dstrel); +} + +/* + * ALTER TABLE ENABLE/DISABLE TRIGGER + * + * We just pass this off to trigger.c. + */ +static void +ATExecEnableDisableTrigger(Relation rel, const char *trigname, + char fires_when, bool skip_system, bool recurse, + LOCKMODE lockmode) +{ + EnableDisableTriggerNew2(rel, trigname, InvalidOid, + fires_when, skip_system, recurse, + lockmode); +} + +/* + * ALTER TABLE ENABLE/DISABLE RULE + * + * We just pass this off to rewriteDefine.c. + */ +static void +ATExecEnableDisableRule(Relation rel, const char *rulename, + char fires_when, LOCKMODE lockmode) +{ + EnableDisableRule(rel, rulename, fires_when); +} + +/* + * ALTER TABLE INHERIT + * + * Add a parent to the child's parents. This verifies that all the columns and + * check constraints of the parent appear in the child and that they have the + * same data types and expressions. + */ +static void +ATPrepAddInherit(Relation child_rel) +{ + if (child_rel->rd_rel->reloftype) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of typed table"))); + + if (child_rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of a partition"))); + + if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of partitioned table"))); +} + +/* + * Return the address of the new parent relation. + */ +static ObjectAddress +ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) +{ + Relation parent_rel; + List *children; + ObjectAddress address; + const char *trigger_name; + + /* + * A self-exclusive lock is needed here. See the similar case in + * MergeAttributes() for a full explanation. + */ + parent_rel = table_openrv(parent, ShareUpdateExclusiveLock); + + /* + * Must be owner of both parent and child -- child was checked by + * ATSimplePermissions call in ATPrepCmd + */ + ATSimplePermissions(AT_AddInherit, parent_rel, ATT_TABLE | ATT_FOREIGN_TABLE); + + /* Permanent rels cannot inherit from temporary ones */ + if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + child_rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from temporary relation \"%s\"", + RelationGetRelationName(parent_rel)))); + + /* If parent rel is temp, it must belong to this session */ + if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !parent_rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from temporary relation of another session"))); + + /* Ditto for the child */ + if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !child_rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit to temporary relation of another session"))); + + /* Prevent partitioned tables from becoming inheritance parents */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partitioned table \"%s\"", + parent->relname))); + + /* Likewise for partitions */ + if (parent_rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from a partition"))); + + /* + * Prevent circularity by seeing if proposed parent inherits from child. + * (In particular, this disallows making a rel inherit from itself.) + * + * This is not completely bulletproof because of race conditions: in + * multi-level inheritance trees, someone else could concurrently be + * making another inheritance link that closes the loop but does not join + * either of the rels we have locked. Preventing that seems to require + * exclusive locks on the entire inheritance tree, which is a cure worse + * than the disease. find_all_inheritors() will cope with circularity + * anyway, so don't sweat it too much. + * + * We use weakest lock we can on child's children, namely AccessShareLock. + */ + children = find_all_inheritors(RelationGetRelid(child_rel), + AccessShareLock, NULL); + + if (list_member_oid(children, RelationGetRelid(parent_rel))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("circular inheritance not allowed"), + errdetail("\"%s\" is already a child of \"%s\".", + parent->relname, + RelationGetRelationName(child_rel)))); + + /* + * If child_rel has row-level triggers with transition tables, we + * currently don't allow it to become an inheritance child. See also + * prohibitions in ATExecAttachPartition() and CreateTrigger(). + */ + trigger_name = FindTriggerIncompatibleWithInheritance(child_rel->trigdesc); + if (trigger_name != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("trigger \"%s\" prevents table \"%s\" from becoming an inheritance child", + trigger_name, RelationGetRelationName(child_rel)), + errdetail("ROW triggers with transition tables are not supported in inheritance hierarchies."))); + + /* OK to create inheritance */ + CreateInheritance(child_rel, parent_rel); + + ObjectAddressSet(address, RelationRelationId, + RelationGetRelid(parent_rel)); + + /* keep our lock on the parent relation until commit */ + table_close(parent_rel, NoLock); + + return address; +} + +/* + * CreateInheritance + * Catalog manipulation portion of creating inheritance between a child + * table and a parent table. + * + * Common to ATExecAddInherit() and ATExecAttachPartition(). + */ +static void +CreateInheritance(Relation child_rel, Relation parent_rel) +{ + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key; + HeapTuple inheritsTuple; + int32 inhseqno; + + /* Note: get RowExclusiveLock because we will write pg_inherits below. */ + catalogRelation = table_open(InheritsRelationId, RowExclusiveLock); + + /* + * Check for duplicates in the list of parents, and determine the highest + * inhseqno already present; we'll use the next one for the new parent. + * Also, if proposed child is a partition, it cannot already be + * inheriting. + * + * Note: we do not reject the case where the child already inherits from + * the parent indirectly; CREATE TABLE doesn't reject comparable cases. + */ + ScanKeyInit(&key, + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(child_rel))); + scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, + true, NULL, 1, &key); + + /* inhseqno sequences start at 1 */ + inhseqno = 0; + while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) + { + Form_pg_inherits inh = (Form_pg_inherits) GETSTRUCT(inheritsTuple); + + if (inh->inhparent == RelationGetRelid(parent_rel)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" would be inherited from more than once", + RelationGetRelationName(parent_rel)))); + + if (inh->inhseqno > inhseqno) + inhseqno = inh->inhseqno; + } + systable_endscan(scan); + + /* Match up the columns and bump attinhcount as needed */ + MergeAttributesIntoExisting(child_rel, parent_rel); + + /* Match up the constraints and bump coninhcount as needed */ + MergeConstraintsIntoExisting(child_rel, parent_rel); + + /* + * OK, it looks valid. Make the catalog entries that show inheritance. + */ + StoreCatalogInheritance1(RelationGetRelid(child_rel), + RelationGetRelid(parent_rel), + inhseqno + 1, + catalogRelation, + parent_rel->rd_rel->relkind == + RELKIND_PARTITIONED_TABLE); + + /* Now we're done with pg_inherits */ + table_close(catalogRelation, RowExclusiveLock); +} + +/* + * Obtain the source-text form of the constraint expression for a check + * constraint, given its pg_constraint tuple + */ +static char * +decompile_conbin(HeapTuple contup, TupleDesc tupdesc) +{ + Form_pg_constraint con; + bool isnull; + Datum attr; + Datum expr; + + con = (Form_pg_constraint) GETSTRUCT(contup); + attr = heap_getattr(contup, Anum_pg_constraint_conbin, tupdesc, &isnull); + if (isnull) + elog(ERROR, "null conbin for constraint %u", con->oid); + + expr = DirectFunctionCall2(pg_get_expr, attr, + ObjectIdGetDatum(con->conrelid)); + return TextDatumGetCString(expr); +} + +/* + * Determine whether two check constraints are functionally equivalent + * + * The test we apply is to see whether they reverse-compile to the same + * source string. This insulates us from issues like whether attributes + * have the same physical column numbers in parent and child relations. + */ +static bool +constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc) +{ + Form_pg_constraint acon = (Form_pg_constraint) GETSTRUCT(a); + Form_pg_constraint bcon = (Form_pg_constraint) GETSTRUCT(b); + + if (acon->condeferrable != bcon->condeferrable || + acon->condeferred != bcon->condeferred || + strcmp(decompile_conbin(a, tupleDesc), + decompile_conbin(b, tupleDesc)) != 0) + return false; + else + return true; +} + +/* + * Check columns in child table match up with columns in parent, and increment + * their attinhcount. + * + * Called by CreateInheritance + * + * Currently all parent columns must be found in child. Missing columns are an + * error. One day we might consider creating new columns like CREATE TABLE + * does. However, that is widely unpopular --- in the common use case of + * partitioned tables it's a foot-gun. + * + * The data type must match exactly. If the parent column is NOT NULL then + * the child must be as well. Defaults are not compared, however. + */ +static void +MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) +{ + Relation attrrel; + AttrNumber parent_attno; + int parent_natts; + TupleDesc tupleDesc; + HeapTuple tuple; + bool child_is_partition = false; + + attrrel = table_open(AttributeRelationId, RowExclusiveLock); + + tupleDesc = RelationGetDescr(parent_rel); + parent_natts = tupleDesc->natts; + + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; + + for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++) + { + Form_pg_attribute attribute = TupleDescAttr(tupleDesc, + parent_attno - 1); + char *attributeName = NameStr(attribute->attname); + + /* Ignore dropped columns in the parent. */ + if (attribute->attisdropped) + continue; + + /* Find same column in child (matching on column name). */ + tuple = SearchSysCacheCopyAttName(RelationGetRelid(child_rel), + attributeName); + if (HeapTupleIsValid(tuple)) + { + /* Check they are same type, typmod, and collation */ + Form_pg_attribute childatt = (Form_pg_attribute) GETSTRUCT(tuple); + + if (attribute->atttypid != childatt->atttypid || + attribute->atttypmod != childatt->atttypmod) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("child table \"%s\" has different type for column \"%s\"", + RelationGetRelationName(child_rel), + attributeName))); + + if (attribute->attcollation != childatt->attcollation) + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("child table \"%s\" has different collation for column \"%s\"", + RelationGetRelationName(child_rel), + attributeName))); + + /* + * Check child doesn't discard NOT NULL property. (Other + * constraints are checked elsewhere.) + */ + if (attribute->attnotnull && !childatt->attnotnull) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" in child table must be marked NOT NULL", + attributeName))); + + /* + * If parent column is generated, child column must be, too. + */ + if (attribute->attgenerated && !childatt->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" in child table must be a generated column", + attributeName))); + + /* + * Check that both generation expressions match. + * + * The test we apply is to see whether they reverse-compile to the + * same source string. This insulates us from issues like whether + * attributes have the same physical column numbers in parent and + * child relations. (See also constraints_equivalent().) + */ + if (attribute->attgenerated && childatt->attgenerated) + { + TupleConstr *child_constr = child_rel->rd_att->constr; + TupleConstr *parent_constr = parent_rel->rd_att->constr; + char *child_expr = NULL; + char *parent_expr = NULL; + + Assert(child_constr != NULL); + Assert(parent_constr != NULL); + + for (int i = 0; i < child_constr->num_defval; i++) + { + if (child_constr->defval[i].adnum == childatt->attnum) + { + child_expr = + TextDatumGetCString(DirectFunctionCall2(pg_get_expr, + CStringGetTextDatum(child_constr->defval[i].adbin), + ObjectIdGetDatum(child_rel->rd_id))); + break; + } + } + Assert(child_expr != NULL); + + for (int i = 0; i < parent_constr->num_defval; i++) + { + if (parent_constr->defval[i].adnum == attribute->attnum) + { + parent_expr = + TextDatumGetCString(DirectFunctionCall2(pg_get_expr, + CStringGetTextDatum(parent_constr->defval[i].adbin), + ObjectIdGetDatum(parent_rel->rd_id))); + break; + } + } + Assert(parent_expr != NULL); + + if (strcmp(child_expr, parent_expr) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" in child table has a conflicting generation expression", + attributeName))); + } + + /* + * OK, bump the child column's inheritance count. (If we fail + * later on, this change will just roll back.) + */ + childatt->attinhcount++; + + /* + * In case of partitions, we must enforce that value of attislocal + * is same in all partitions. (Note: there are only inherited + * attributes in partitions) + */ + if (child_is_partition) + { + Assert(childatt->attinhcount == 1); + childatt->attislocal = false; + } + + CatalogTupleUpdate(attrrel, &tuple->t_self, tuple); + heap_freetuple(tuple); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("child table is missing column \"%s\"", + attributeName))); + } + } + + table_close(attrrel, RowExclusiveLock); +} + +/* + * Check constraints in child table match up with constraints in parent, + * and increment their coninhcount. + * + * Constraints that are marked ONLY in the parent are ignored. + * + * Called by CreateInheritance + * + * Currently all constraints in parent must be present in the child. One day we + * may consider adding new constraints like CREATE TABLE does. + * + * XXX This is O(N^2) which may be an issue with tables with hundreds of + * constraints. As long as tables have more like 10 constraints it shouldn't be + * a problem though. Even 100 constraints ought not be the end of the world. + * + * XXX See MergeWithExistingConstraint too if you change this code. + */ +static void +MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) +{ + Relation catalog_relation; + TupleDesc tuple_desc; + SysScanDesc parent_scan; + ScanKeyData parent_key; + HeapTuple parent_tuple; + bool child_is_partition = false; + + catalog_relation = table_open(ConstraintRelationId, RowExclusiveLock); + tuple_desc = RelationGetDescr(catalog_relation); + + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; + + /* Outer loop scans through the parent's constraint definitions */ + ScanKeyInit(&parent_key, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(parent_rel))); + parent_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId, + true, NULL, 1, &parent_key); + + while (HeapTupleIsValid(parent_tuple = systable_getnext(parent_scan))) + { + Form_pg_constraint parent_con = (Form_pg_constraint) GETSTRUCT(parent_tuple); + SysScanDesc child_scan; + ScanKeyData child_key; + HeapTuple child_tuple; + bool found = false; + + if (parent_con->contype != CONSTRAINT_CHECK) + continue; + + /* if the parent's constraint is marked NO INHERIT, it's not inherited */ + if (parent_con->connoinherit) + continue; + + /* Search for a child constraint matching this one */ + ScanKeyInit(&child_key, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(child_rel))); + child_scan = systable_beginscan(catalog_relation, ConstraintRelidTypidNameIndexId, + true, NULL, 1, &child_key); + + while (HeapTupleIsValid(child_tuple = systable_getnext(child_scan))) + { + Form_pg_constraint child_con = (Form_pg_constraint) GETSTRUCT(child_tuple); + HeapTuple child_copy; + + if (child_con->contype != CONSTRAINT_CHECK) + continue; + + if (strcmp(NameStr(parent_con->conname), + NameStr(child_con->conname)) != 0) + continue; + + if (!constraints_equivalent(parent_tuple, child_tuple, tuple_desc)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("child table \"%s\" has different definition for check constraint \"%s\"", + RelationGetRelationName(child_rel), + NameStr(parent_con->conname)))); + + /* If the child constraint is "no inherit" then cannot merge */ + if (child_con->connoinherit) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("constraint \"%s\" conflicts with non-inherited constraint on child table \"%s\"", + NameStr(child_con->conname), + RelationGetRelationName(child_rel)))); + + /* + * If the child constraint is "not valid" then cannot merge with a + * valid parent constraint + */ + if (parent_con->convalidated && !child_con->convalidated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("constraint \"%s\" conflicts with NOT VALID constraint on child table \"%s\"", + NameStr(child_con->conname), + RelationGetRelationName(child_rel)))); + + /* + * OK, bump the child constraint's inheritance count. (If we fail + * later on, this change will just roll back.) + */ + child_copy = heap_copytuple(child_tuple); + child_con = (Form_pg_constraint) GETSTRUCT(child_copy); + child_con->coninhcount++; + + /* + * In case of partitions, an inherited constraint must be + * inherited only once since it cannot have multiple parents and + * it is never considered local. + */ + if (child_is_partition) + { + Assert(child_con->coninhcount == 1); + child_con->conislocal = false; + } + + CatalogTupleUpdate(catalog_relation, &child_copy->t_self, child_copy); + heap_freetuple(child_copy); + + found = true; + break; + } + + systable_endscan(child_scan); + + if (!found) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("child table is missing constraint \"%s\"", + NameStr(parent_con->conname)))); + } + + systable_endscan(parent_scan); + table_close(catalog_relation, RowExclusiveLock); +} + +/* + * ALTER TABLE NO INHERIT + * + * Return value is the address of the relation that is no longer parent. + */ +static ObjectAddress +ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) +{ + ObjectAddress address; + Relation parent_rel; + + if (rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of a partition"))); + + /* + * AccessShareLock on the parent is probably enough, seeing that DROP + * TABLE doesn't lock parent tables at all. We need some lock since we'll + * be inspecting the parent's schema. + */ + parent_rel = table_openrv(parent, AccessShareLock); + + /* + * We don't bother to check ownership of the parent table --- ownership of + * the child is presumed enough rights. + */ + + /* Off to RemoveInheritance() where most of the work happens */ + RemoveInheritance(rel, parent_rel, false); + + ObjectAddressSet(address, RelationRelationId, + RelationGetRelid(parent_rel)); + + /* keep our lock on the parent relation until commit */ + table_close(parent_rel, NoLock); + + return address; +} + +/* + * MarkInheritDetached + * + * Set inhdetachpending for a partition, for ATExecDetachPartition + * in concurrent mode. While at it, verify that no other partition is + * already pending detach. + */ +static void +MarkInheritDetached(Relation child_rel, Relation parent_rel) +{ + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key; + HeapTuple inheritsTuple; + bool found = false; + + Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + /* + * Find pg_inherits entries by inhparent. (We need to scan them all in + * order to verify that no other partition is pending detach.) + */ + catalogRelation = table_open(InheritsRelationId, RowExclusiveLock); + ScanKeyInit(&key, + Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(parent_rel))); + scan = systable_beginscan(catalogRelation, InheritsParentIndexId, + true, NULL, 1, &key); + + while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) + { + Form_pg_inherits inhForm; + + inhForm = (Form_pg_inherits) GETSTRUCT(inheritsTuple); + if (inhForm->inhdetachpending) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("partition \"%s\" already pending detach in partitioned table \"%s.%s\"", + get_rel_name(inhForm->inhrelid), + get_namespace_name(parent_rel->rd_rel->relnamespace), + RelationGetRelationName(parent_rel)), + errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the pending detach operation.")); + + if (inhForm->inhrelid == RelationGetRelid(child_rel)) + { + HeapTuple newtup; + + newtup = heap_copytuple(inheritsTuple); + ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true; + + CatalogTupleUpdate(catalogRelation, + &inheritsTuple->t_self, + newtup); + found = true; + heap_freetuple(newtup); + /* keep looking, to ensure we catch others pending detach */ + } + } + + /* Done */ + systable_endscan(scan); + table_close(catalogRelation, RowExclusiveLock); + + if (!found) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" is not a partition of relation \"%s\"", + RelationGetRelationName(child_rel), + RelationGetRelationName(parent_rel)))); +} + +/* + * RemoveInheritance + * + * Drop a parent from the child's parents. This just adjusts the attinhcount + * and attislocal of the columns and removes the pg_inherit and pg_depend + * entries. expect_detached is passed down to DeleteInheritsTuple, q.v.. + * + * If attinhcount goes to 0 then attislocal gets set to true. If it goes back + * up attislocal stays true, which means if a child is ever removed from a + * parent then its columns will never be automatically dropped which may + * surprise. But at least we'll never surprise by dropping columns someone + * isn't expecting to be dropped which would actually mean data loss. + * + * coninhcount and conislocal for inherited constraints are adjusted in + * exactly the same way. + * + * Common to ATExecDropInherit() and ATExecDetachPartition(). + */ +static void +RemoveInheritance(Relation child_rel, Relation parent_rel, bool expect_detached) +{ + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key[3]; + HeapTuple attributeTuple, + constraintTuple; + List *connames; + bool found; + bool child_is_partition = false; + + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; + + found = DeleteInheritsTuple(RelationGetRelid(child_rel), + RelationGetRelid(parent_rel), + expect_detached, + RelationGetRelationName(child_rel)); + if (!found) + { + if (child_is_partition) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" is not a partition of relation \"%s\"", + RelationGetRelationName(child_rel), + RelationGetRelationName(parent_rel)))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" is not a parent of relation \"%s\"", + RelationGetRelationName(parent_rel), + RelationGetRelationName(child_rel)))); + } + + /* + * Search through child columns looking for ones matching parent rel + */ + catalogRelation = table_open(AttributeRelationId, RowExclusiveLock); + ScanKeyInit(&key[0], + Anum_pg_attribute_attrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(child_rel))); + scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId, + true, NULL, 1, key); + while (HeapTupleIsValid(attributeTuple = systable_getnext(scan))) + { + Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple); + + /* Ignore if dropped or not inherited */ + if (att->attisdropped) + continue; + if (att->attinhcount <= 0) + continue; + + if (SearchSysCacheExistsAttName(RelationGetRelid(parent_rel), + NameStr(att->attname))) + { + /* Decrement inhcount and possibly set islocal to true */ + HeapTuple copyTuple = heap_copytuple(attributeTuple); + Form_pg_attribute copy_att = (Form_pg_attribute) GETSTRUCT(copyTuple); + + copy_att->attinhcount--; + if (copy_att->attinhcount == 0) + copy_att->attislocal = true; + + CatalogTupleUpdate(catalogRelation, ©Tuple->t_self, copyTuple); + heap_freetuple(copyTuple); + } + } + systable_endscan(scan); + table_close(catalogRelation, RowExclusiveLock); + + /* + * Likewise, find inherited check constraints and disinherit them. To do + * this, we first need a list of the names of the parent's check + * constraints. (We cheat a bit by only checking for name matches, + * assuming that the expressions will match.) + */ + catalogRelation = table_open(ConstraintRelationId, RowExclusiveLock); + ScanKeyInit(&key[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(parent_rel))); + scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId, + true, NULL, 1, key); + + connames = NIL; + + while (HeapTupleIsValid(constraintTuple = systable_getnext(scan))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple); + + if (con->contype == CONSTRAINT_CHECK) + connames = lappend(connames, pstrdup(NameStr(con->conname))); + } + + systable_endscan(scan); + + /* Now scan the child's constraints */ + ScanKeyInit(&key[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(child_rel))); + scan = systable_beginscan(catalogRelation, ConstraintRelidTypidNameIndexId, + true, NULL, 1, key); + + while (HeapTupleIsValid(constraintTuple = systable_getnext(scan))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(constraintTuple); + bool match; + ListCell *lc; + + if (con->contype != CONSTRAINT_CHECK) + continue; + + match = false; + foreach(lc, connames) + { + if (strcmp(NameStr(con->conname), (char *) lfirst(lc)) == 0) + { + match = true; + break; + } + } + + if (match) + { + /* Decrement inhcount and possibly set islocal to true */ + HeapTuple copyTuple = heap_copytuple(constraintTuple); + Form_pg_constraint copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple); + + if (copy_con->coninhcount <= 0) /* shouldn't happen */ + elog(ERROR, "relation %u has non-inherited constraint \"%s\"", + RelationGetRelid(child_rel), NameStr(copy_con->conname)); + + copy_con->coninhcount--; + if (copy_con->coninhcount == 0) + copy_con->conislocal = true; + + CatalogTupleUpdate(catalogRelation, ©Tuple->t_self, copyTuple); + heap_freetuple(copyTuple); + } + } + + systable_endscan(scan); + table_close(catalogRelation, RowExclusiveLock); + + drop_parent_dependency(RelationGetRelid(child_rel), + RelationRelationId, + RelationGetRelid(parent_rel), + child_dependency_type(child_is_partition)); + + /* + * Post alter hook of this inherits. Since object_access_hook doesn't take + * multiple object identifiers, we relay oid of parent relation using + * auxiliary_id argument. + */ + InvokeObjectPostAlterHookArg(InheritsRelationId, + RelationGetRelid(child_rel), 0, + RelationGetRelid(parent_rel), false); +} + +/* + * Drop the dependency created by StoreCatalogInheritance1 (CREATE TABLE + * INHERITS/ALTER TABLE INHERIT -- refclassid will be RelationRelationId) or + * heap_create_with_catalog (CREATE TABLE OF/ALTER TABLE OF -- refclassid will + * be TypeRelationId). There's no convenient way to do this, so go trawling + * through pg_depend. + */ +static void +drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid, + DependencyType deptype) +{ + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key[3]; + HeapTuple depTuple; + + catalogRelation = table_open(DependRelationId, RowExclusiveLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_classid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_objid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + ScanKeyInit(&key[2], + Anum_pg_depend_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(0)); + + scan = systable_beginscan(catalogRelation, DependDependerIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(depTuple = systable_getnext(scan))) + { + Form_pg_depend dep = (Form_pg_depend) GETSTRUCT(depTuple); + + if (dep->refclassid == refclassid && + dep->refobjid == refobjid && + dep->refobjsubid == 0 && + dep->deptype == deptype) + CatalogTupleDelete(catalogRelation, &depTuple->t_self); + } + + systable_endscan(scan); + table_close(catalogRelation, RowExclusiveLock); +} + +/* + * ALTER TABLE OF + * + * Attach a table to a composite type, as though it had been created with CREATE + * TABLE OF. All attname, atttypid, atttypmod and attcollation must match. The + * subject table must not have inheritance parents. These restrictions ensure + * that you cannot create a configuration impossible with CREATE TABLE OF alone. + * + * The address of the type is returned. + */ +static ObjectAddress +ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode) +{ + Oid relid = RelationGetRelid(rel); + Type typetuple; + Form_pg_type typeform; + Oid typeid; + Relation inheritsRelation, + relationRelation; + SysScanDesc scan; + ScanKeyData key; + AttrNumber table_attno, + type_attno; + TupleDesc typeTupleDesc, + tableTupleDesc; + ObjectAddress tableobj, + typeobj; + HeapTuple classtuple; + + /* Validate the type. */ + typetuple = typenameType(NULL, ofTypename, NULL); + check_of_type(typetuple); + typeform = (Form_pg_type) GETSTRUCT(typetuple); + typeid = typeform->oid; + + /* Fail if the table has any inheritance parents. */ + inheritsRelation = table_open(InheritsRelationId, AccessShareLock); + ScanKeyInit(&key, + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + scan = systable_beginscan(inheritsRelation, InheritsRelidSeqnoIndexId, + true, NULL, 1, &key); + if (HeapTupleIsValid(systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("typed tables cannot inherit"))); + systable_endscan(scan); + table_close(inheritsRelation, AccessShareLock); + + /* + * Check the tuple descriptors for compatibility. Unlike inheritance, we + * require that the order also match. However, attnotnull need not match. + */ + typeTupleDesc = lookup_rowtype_tupdesc(typeid, -1); + tableTupleDesc = RelationGetDescr(rel); + table_attno = 1; + for (type_attno = 1; type_attno <= typeTupleDesc->natts; type_attno++) + { + Form_pg_attribute type_attr, + table_attr; + const char *type_attname, + *table_attname; + + /* Get the next non-dropped type attribute. */ + type_attr = TupleDescAttr(typeTupleDesc, type_attno - 1); + if (type_attr->attisdropped) + continue; + type_attname = NameStr(type_attr->attname); + + /* Get the next non-dropped table attribute. */ + do + { + if (table_attno > tableTupleDesc->natts) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table is missing column \"%s\"", + type_attname))); + table_attr = TupleDescAttr(tableTupleDesc, table_attno - 1); + table_attno++; + } while (table_attr->attisdropped); + table_attname = NameStr(table_attr->attname); + + /* Compare name. */ + if (strncmp(table_attname, type_attname, NAMEDATALEN) != 0) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table has column \"%s\" where type requires \"%s\"", + table_attname, type_attname))); + + /* Compare type. */ + if (table_attr->atttypid != type_attr->atttypid || + table_attr->atttypmod != type_attr->atttypmod || + table_attr->attcollation != type_attr->attcollation) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table \"%s\" has different type for column \"%s\"", + RelationGetRelationName(rel), type_attname))); + } + ReleaseTupleDesc(typeTupleDesc); + + /* Any remaining columns at the end of the table had better be dropped. */ + for (; table_attno <= tableTupleDesc->natts; table_attno++) + { + Form_pg_attribute table_attr = TupleDescAttr(tableTupleDesc, + table_attno - 1); + + if (!table_attr->attisdropped) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table has extra column \"%s\"", + NameStr(table_attr->attname)))); + } + + /* If the table was already typed, drop the existing dependency. */ + if (rel->rd_rel->reloftype) + drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype, + DEPENDENCY_NORMAL); + + /* Record a dependency on the new type. */ + tableobj.classId = RelationRelationId; + tableobj.objectId = relid; + tableobj.objectSubId = 0; + typeobj.classId = TypeRelationId; + typeobj.objectId = typeid; + typeobj.objectSubId = 0; + recordDependencyOn(&tableobj, &typeobj, DEPENDENCY_NORMAL); + + /* Update pg_class.reloftype */ + relationRelation = table_open(RelationRelationId, RowExclusiveLock); + classtuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(classtuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + ((Form_pg_class) GETSTRUCT(classtuple))->reloftype = typeid; + CatalogTupleUpdate(relationRelation, &classtuple->t_self, classtuple); + + InvokeObjectPostAlterHook(RelationRelationId, relid, 0); + + heap_freetuple(classtuple); + table_close(relationRelation, RowExclusiveLock); + + ReleaseSysCache(typetuple); + + return typeobj; +} + +/* + * ALTER TABLE NOT OF + * + * Detach a typed table from its originating type. Just clear reloftype and + * remove the dependency. + */ +static void +ATExecDropOf(Relation rel, LOCKMODE lockmode) +{ + Oid relid = RelationGetRelid(rel); + Relation relationRelation; + HeapTuple tuple; + + if (!OidIsValid(rel->rd_rel->reloftype)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a typed table", + RelationGetRelationName(rel)))); + + /* + * We don't bother to check ownership of the type --- ownership of the + * table is presumed enough rights. No lock required on the type, either. + */ + + drop_parent_dependency(relid, TypeRelationId, rel->rd_rel->reloftype, + DEPENDENCY_NORMAL); + + /* Clear pg_class.reloftype */ + relationRelation = table_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + ((Form_pg_class) GETSTRUCT(tuple))->reloftype = InvalidOid; + CatalogTupleUpdate(relationRelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, relid, 0); + + heap_freetuple(tuple); + table_close(relationRelation, RowExclusiveLock); +} + +/* + * relation_mark_replica_identity: Update a table's replica identity + * + * Iff ri_type = REPLICA_IDENTITY_INDEX, indexOid must be the Oid of a suitable + * index. Otherwise, it must be InvalidOid. + * + * Caller had better hold an exclusive lock on the relation, as the results + * of running two of these concurrently wouldn't be pretty. + */ +static void +relation_mark_replica_identity(Relation rel, char ri_type, Oid indexOid, + bool is_internal) +{ + Relation pg_index; + Relation pg_class; + HeapTuple pg_class_tuple; + HeapTuple pg_index_tuple; + Form_pg_class pg_class_form; + Form_pg_index pg_index_form; + ListCell *index; + + /* + * Check whether relreplident has changed, and update it if so. + */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + pg_class_tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(rel))); + if (!HeapTupleIsValid(pg_class_tuple)) + elog(ERROR, "cache lookup failed for relation \"%s\"", + RelationGetRelationName(rel)); + pg_class_form = (Form_pg_class) GETSTRUCT(pg_class_tuple); + if (pg_class_form->relreplident != ri_type) + { + pg_class_form->relreplident = ri_type; + CatalogTupleUpdate(pg_class, &pg_class_tuple->t_self, pg_class_tuple); + } + table_close(pg_class, RowExclusiveLock); + heap_freetuple(pg_class_tuple); + + /* + * Update the per-index indisreplident flags correctly. + */ + pg_index = table_open(IndexRelationId, RowExclusiveLock); + foreach(index, RelationGetIndexList(rel)) + { + Oid thisIndexOid = lfirst_oid(index); + bool dirty = false; + + pg_index_tuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(thisIndexOid)); + if (!HeapTupleIsValid(pg_index_tuple)) + elog(ERROR, "cache lookup failed for index %u", thisIndexOid); + pg_index_form = (Form_pg_index) GETSTRUCT(pg_index_tuple); + + if (thisIndexOid == indexOid) + { + /* Set the bit if not already set. */ + if (!pg_index_form->indisreplident) + { + dirty = true; + pg_index_form->indisreplident = true; + } + } + else + { + /* Unset the bit if set. */ + if (pg_index_form->indisreplident) + { + dirty = true; + pg_index_form->indisreplident = false; + } + } + + if (dirty) + { + CatalogTupleUpdate(pg_index, &pg_index_tuple->t_self, pg_index_tuple); + InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, + InvalidOid, is_internal); + + /* + * Invalidate the relcache for the table, so that after we commit + * all sessions will refresh the table's replica identity index + * before attempting any UPDATE or DELETE on the table. (If we + * changed the table's pg_class row above, then a relcache inval + * is already queued due to that; but we might not have.) + */ + CacheInvalidateRelcache(rel); + } + heap_freetuple(pg_index_tuple); + } + + table_close(pg_index, RowExclusiveLock); +} + +/* + * ALTER TABLE REPLICA IDENTITY ... + */ +static void +ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKMODE lockmode) +{ + Oid indexOid; + Relation indexRel; + int key; + + if (stmt->identity_type == REPLICA_IDENTITY_DEFAULT) + { + relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true); + return; + } + else if (stmt->identity_type == REPLICA_IDENTITY_FULL) + { + relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true); + return; + } + else if (stmt->identity_type == REPLICA_IDENTITY_NOTHING) + { + relation_mark_replica_identity(rel, stmt->identity_type, InvalidOid, true); + return; + } + else if (stmt->identity_type == REPLICA_IDENTITY_INDEX) + { + /* fallthrough */ ; + } + else + elog(ERROR, "unexpected identity type %u", stmt->identity_type); + + /* Check that the index exists */ + indexOid = get_relname_relid(stmt->name, rel->rd_rel->relnamespace); + if (!OidIsValid(indexOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("index \"%s\" for table \"%s\" does not exist", + stmt->name, RelationGetRelationName(rel)))); + + indexRel = index_open(indexOid, ShareLock); + + /* Check that the index is on the relation we're altering. */ + if (indexRel->rd_index == NULL || + indexRel->rd_index->indrelid != RelationGetRelid(rel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not an index for table \"%s\"", + RelationGetRelationName(indexRel), + RelationGetRelationName(rel)))); + /* The AM must support uniqueness, and the index must in fact be unique. */ + if (!indexRel->rd_indam->amcanunique || + !indexRel->rd_index->indisunique) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot use non-unique index \"%s\" as replica identity", + RelationGetRelationName(indexRel)))); + /* Deferred indexes are not guaranteed to be always unique. */ + if (!indexRel->rd_index->indimmediate) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use non-immediate index \"%s\" as replica identity", + RelationGetRelationName(indexRel)))); + /* Expression indexes aren't supported. */ + if (RelationGetIndexExpressions(indexRel) != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use expression index \"%s\" as replica identity", + RelationGetRelationName(indexRel)))); + /* Predicate indexes aren't supported. */ + if (RelationGetIndexPredicate(indexRel) != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use partial index \"%s\" as replica identity", + RelationGetRelationName(indexRel)))); + + /* Check index for nullable columns. */ + for (key = 0; key < IndexRelationGetNumberOfKeyAttributes(indexRel); key++) + { + int16 attno = indexRel->rd_index->indkey.values[key]; + Form_pg_attribute attr; + + /* + * Reject any other system columns. (Going forward, we'll disallow + * indexes containing such columns in the first place, but they might + * exist in older branches.) + */ + if (attno <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("index \"%s\" cannot be used as replica identity because column %d is a system column", + RelationGetRelationName(indexRel), attno))); + + attr = TupleDescAttr(rel->rd_att, attno - 1); + if (!attr->attnotnull) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("index \"%s\" cannot be used as replica identity because column \"%s\" is nullable", + RelationGetRelationName(indexRel), + NameStr(attr->attname)))); + } + + /* This index is suitable for use as a replica identity. Mark it. */ + relation_mark_replica_identity(rel, stmt->identity_type, indexOid, true); + + index_close(indexRel, NoLock); +} + +/* + * ALTER TABLE ENABLE/DISABLE ROW LEVEL SECURITY + */ +static void +ATExecSetRowSecurity(Relation rel, bool rls) +{ + Relation pg_class; + Oid relid; + HeapTuple tuple; + + relid = RelationGetRelid(rel); + + /* Pull the record for this relation and update it */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + + ((Form_pg_class) GETSTRUCT(tuple))->relrowsecurity = rls; + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + + table_close(pg_class, RowExclusiveLock); + heap_freetuple(tuple); +} + +/* + * ALTER TABLE FORCE/NO FORCE ROW LEVEL SECURITY + */ +static void +ATExecForceNoForceRowSecurity(Relation rel, bool force_rls) +{ + Relation pg_class; + Oid relid; + HeapTuple tuple; + + relid = RelationGetRelid(rel); + + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + + ((Form_pg_class) GETSTRUCT(tuple))->relforcerowsecurity = force_rls; + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + + table_close(pg_class, RowExclusiveLock); + heap_freetuple(tuple); +} + +/* + * ALTER FOREIGN TABLE OPTIONS (...) + */ +static void +ATExecGenericOptions(Relation rel, List *options) +{ + Relation ftrel; + ForeignServer *server; + ForeignDataWrapper *fdw; + HeapTuple tuple; + bool isnull; + Datum repl_val[Natts_pg_foreign_table]; + bool repl_null[Natts_pg_foreign_table]; + bool repl_repl[Natts_pg_foreign_table]; + Datum datum; + Form_pg_foreign_table tableform; + + if (options == NIL) + return; + + ftrel = table_open(ForeignTableRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(FOREIGNTABLEREL, rel->rd_id); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("foreign table \"%s\" does not exist", + RelationGetRelationName(rel)))); + tableform = (Form_pg_foreign_table) GETSTRUCT(tuple); + server = GetForeignServer(tableform->ftserver); + fdw = GetForeignDataWrapper(server->fdwid); + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + /* Extract the current options */ + datum = SysCacheGetAttr(FOREIGNTABLEREL, + tuple, + Anum_pg_foreign_table_ftoptions, + &isnull); + if (isnull) + datum = PointerGetDatum(NULL); + + /* Transform the options */ + datum = transformGenericOptions(ForeignTableRelationId, + datum, + options, + fdw->fdwvalidator); + + if (PointerIsValid(DatumGetPointer(datum))) + repl_val[Anum_pg_foreign_table_ftoptions - 1] = datum; + else + repl_null[Anum_pg_foreign_table_ftoptions - 1] = true; + + repl_repl[Anum_pg_foreign_table_ftoptions - 1] = true; + + /* Everything looks good - update the tuple */ + + tuple = heap_modify_tuple(tuple, RelationGetDescr(ftrel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(ftrel, &tuple->t_self, tuple); + + /* + * Invalidate relcache so that all sessions will refresh any cached plans + * that might depend on the old options. + */ + CacheInvalidateRelcache(rel); + + InvokeObjectPostAlterHook(ForeignTableRelationId, + RelationGetRelid(rel), 0); + + table_close(ftrel, RowExclusiveLock); + + heap_freetuple(tuple); +} + +/* + * ALTER TABLE ALTER COLUMN SET COMPRESSION + * + * Return value is the address of the modified column + */ +static ObjectAddress +ATExecSetCompression(AlteredTableInfo *tab, + Relation rel, + const char *column, + Node *newValue, + LOCKMODE lockmode) +{ + Relation attrel; + HeapTuple tuple; + Form_pg_attribute atttableform; + AttrNumber attnum; + char *compression; + char cmethod; + ObjectAddress address; + + Assert(IsA(newValue, String)); + compression = strVal(newValue); + + attrel = table_open(AttributeRelationId, RowExclusiveLock); + + /* copy the cache entry so we can scribble on it below */ + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), column); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + column, RelationGetRelationName(rel)))); + + /* prevent them from altering a system attribute */ + atttableform = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = atttableform->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", column))); + + /* + * Check that column type is compressible, then get the attribute + * compression method code + */ + cmethod = GetAttributeCompression(atttableform->atttypid, compression); + + /* update pg_attribute entry */ + atttableform->attcompression = cmethod; + CatalogTupleUpdate(attrel, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attnum); + + /* + * Apply the change to indexes as well (only for simple index columns, + * matching behavior of index.c ConstructTupleDescriptor()). + */ + SetIndexStorageProperties(rel, attrel, attnum, + false, 0, + true, cmethod, + lockmode); + + heap_freetuple(tuple); + + table_close(attrel, RowExclusiveLock); + + /* make changes visible */ + CommandCounterIncrement(); + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), attnum); + return address; +} + + +/* + * Preparation phase for SET LOGGED/UNLOGGED + * + * This verifies that we're not trying to change a temp table. Also, + * existing foreign key constraints are checked to avoid ending up with + * permanent tables referencing unlogged tables. + * + * Return value is false if the operation is a no-op (in which case the + * checks are skipped), otherwise true. + */ +static bool +ATPrepChangePersistence(Relation rel, bool toLogged) +{ + Relation pg_constraint; + HeapTuple tuple; + SysScanDesc scan; + ScanKeyData skey[1]; + + /* + * Disallow changing status for a temp table. Also verify whether we can + * get away with doing nothing; in such cases we don't need to run the + * checks below, either. + */ + switch (rel->rd_rel->relpersistence) + { + case RELPERSISTENCE_TEMP: + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot change logged status of table \"%s\" because it is temporary", + RelationGetRelationName(rel)), + errtable(rel))); + break; + case RELPERSISTENCE_PERMANENT: + if (toLogged) + /* nothing to do */ + return false; + break; + case RELPERSISTENCE_UNLOGGED: + if (!toLogged) + /* nothing to do */ + return false; + break; + } + + /* + * Check that the table is not part any publication when changing to + * UNLOGGED as UNLOGGED tables can't be published. + */ + if (!toLogged && + list_length(GetRelationPublications(RelationGetRelid(rel))) > 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot change table \"%s\" to unlogged because it is part of a publication", + RelationGetRelationName(rel)), + errdetail("Unlogged relations cannot be replicated."))); + + /* + * Check existing foreign key constraints to preserve the invariant that + * permanent tables cannot reference unlogged ones. Self-referencing + * foreign keys can safely be ignored. + */ + pg_constraint = table_open(ConstraintRelationId, AccessShareLock); + + /* + * Scan conrelid if changing to permanent, else confrelid. This also + * determines whether a useful index exists. + */ + ScanKeyInit(&skey[0], + toLogged ? Anum_pg_constraint_conrelid : + Anum_pg_constraint_confrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + scan = systable_beginscan(pg_constraint, + toLogged ? ConstraintRelidTypidNameIndexId : InvalidOid, + true, NULL, 1, skey); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + + if (con->contype == CONSTRAINT_FOREIGN) + { + Oid foreignrelid; + Relation foreignrel; + + /* the opposite end of what we used as scankey */ + foreignrelid = toLogged ? con->confrelid : con->conrelid; + + /* ignore if self-referencing */ + if (RelationGetRelid(rel) == foreignrelid) + continue; + + foreignrel = relation_open(foreignrelid, AccessShareLock); + + if (toLogged) + { + if (!RelationIsPermanent(foreignrel)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("could not change table \"%s\" to logged because it references unlogged table \"%s\"", + RelationGetRelationName(rel), + RelationGetRelationName(foreignrel)), + errtableconstraint(rel, NameStr(con->conname)))); + } + else + { + if (RelationIsPermanent(foreignrel)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("could not change table \"%s\" to unlogged because it references logged table \"%s\"", + RelationGetRelationName(rel), + RelationGetRelationName(foreignrel)), + errtableconstraint(rel, NameStr(con->conname)))); + } + + relation_close(foreignrel, AccessShareLock); + } + } + + systable_endscan(scan); + + table_close(pg_constraint, AccessShareLock); + + return true; +} + +/* + * Execute ALTER TABLE SET SCHEMA + */ +ObjectAddress +AlterTableNamespace(AlterObjectSchemaStmt *stmt, Oid *oldschema) +{ + Relation rel; + Oid relid; + Oid oldNspOid; + Oid nspOid; + RangeVar *newrv; + ObjectAddresses *objsMoved; + ObjectAddress myself; + + relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, + stmt->missing_ok ? RVR_MISSING_OK : 0, + RangeVarCallbackForAlterRelation, + (void *) stmt); + + if (!OidIsValid(relid)) + { + ereport(NOTICE, + (errmsg("relation \"%s\" does not exist, skipping", + stmt->relation->relname))); + return InvalidObjectAddress; + } + + rel = relation_open(relid, NoLock); + + oldNspOid = RelationGetNamespace(rel); + + /* If it's an owned sequence, disallow moving it by itself. */ + if (rel->rd_rel->relkind == RELKIND_SEQUENCE) + { + Oid tableId; + int32 colId; + + if (sequenceIsOwned(relid, DEPENDENCY_AUTO, &tableId, &colId) || + sequenceIsOwned(relid, DEPENDENCY_INTERNAL, &tableId, &colId)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move an owned sequence into another schema"), + errdetail("Sequence \"%s\" is linked to table \"%s\".", + RelationGetRelationName(rel), + get_rel_name(tableId)))); + } + + /* Get and lock schema OID and check its permissions. */ + newrv = makeRangeVar(stmt->newschema, RelationGetRelationName(rel), -1); + nspOid = RangeVarGetAndCheckCreationNamespace(newrv, NoLock, NULL); + + /* common checks on switching namespaces */ + CheckSetNamespace(oldNspOid, nspOid); + + objsMoved = new_object_addresses(); + AlterTableNamespaceInternal(rel, oldNspOid, nspOid, objsMoved); + free_object_addresses(objsMoved); + + ObjectAddressSet(myself, RelationRelationId, relid); + + if (oldschema) + *oldschema = oldNspOid; + + /* close rel, but keep lock until commit */ + relation_close(rel, NoLock); + + return myself; +} + +/* + * The guts of relocating a table or materialized view to another namespace: + * besides moving the relation itself, its dependent objects are relocated to + * the new schema. + */ +void +AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid, + ObjectAddresses *objsMoved) +{ + Relation classRel; + + Assert(objsMoved != NULL); + + /* OK, modify the pg_class row and pg_depend entry */ + classRel = table_open(RelationRelationId, RowExclusiveLock); + + AlterRelationNamespaceInternal(classRel, RelationGetRelid(rel), oldNspOid, + nspOid, true, objsMoved); + + /* Fix the table's row type too, if it has one */ + if (OidIsValid(rel->rd_rel->reltype)) + AlterTypeNamespaceInternal(rel->rd_rel->reltype, + nspOid, false, false, objsMoved); + + /* Fix other dependent stuff */ + if (rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved); + AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid, + objsMoved, AccessExclusiveLock); + AlterConstraintNamespaces(RelationGetRelid(rel), oldNspOid, nspOid, + false, objsMoved); + } + + table_close(classRel, RowExclusiveLock); +} + +/* + * The guts of relocating a relation to another namespace: fix the pg_class + * entry, and the pg_depend entry if any. Caller must already have + * opened and write-locked pg_class. + */ +void +AlterRelationNamespaceInternal(Relation classRel, Oid relOid, + Oid oldNspOid, Oid newNspOid, + bool hasDependEntry, + ObjectAddresses *objsMoved) +{ + HeapTuple classTup; + Form_pg_class classForm; + ObjectAddress thisobj; + bool already_done = false; + + classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid)); + if (!HeapTupleIsValid(classTup)) + elog(ERROR, "cache lookup failed for relation %u", relOid); + classForm = (Form_pg_class) GETSTRUCT(classTup); + + Assert(classForm->relnamespace == oldNspOid); + + thisobj.classId = RelationRelationId; + thisobj.objectId = relOid; + thisobj.objectSubId = 0; + + /* + * If the object has already been moved, don't move it again. If it's + * already in the right place, don't move it, but still fire the object + * access hook. + */ + already_done = object_address_present(&thisobj, objsMoved); + if (!already_done && oldNspOid != newNspOid) + { + /* check for duplicate name (more friendly than unique-index failure) */ + if (get_relname_relid(NameStr(classForm->relname), + newNspOid) != InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists in schema \"%s\"", + NameStr(classForm->relname), + get_namespace_name(newNspOid)))); + + /* classTup is a copy, so OK to scribble on */ + classForm->relnamespace = newNspOid; + + CatalogTupleUpdate(classRel, &classTup->t_self, classTup); + + /* Update dependency on schema if caller said so */ + if (hasDependEntry && + changeDependencyFor(RelationRelationId, + relOid, + NamespaceRelationId, + oldNspOid, + newNspOid) != 1) + elog(ERROR, "failed to change schema dependency for relation \"%s\"", + NameStr(classForm->relname)); + } + if (!already_done) + { + add_exact_object_address(&thisobj, objsMoved); + + InvokeObjectPostAlterHook(RelationRelationId, relOid, 0); + } + + heap_freetuple(classTup); +} + +/* + * Move all indexes for the specified relation to another namespace. + * + * Note: we assume adequate permission checking was done by the caller, + * and that the caller has a suitable lock on the owning relation. + */ +static void +AlterIndexNamespaces(Relation classRel, Relation rel, + Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved) +{ + List *indexList; + ListCell *l; + + indexList = RelationGetIndexList(rel); + + foreach(l, indexList) + { + Oid indexOid = lfirst_oid(l); + ObjectAddress thisobj; + + thisobj.classId = RelationRelationId; + thisobj.objectId = indexOid; + thisobj.objectSubId = 0; + + /* + * Note: currently, the index will not have its own dependency on the + * namespace, so we don't need to do changeDependencyFor(). There's no + * row type in pg_type, either. + * + * XXX this objsMoved test may be pointless -- surely we have a single + * dependency link from a relation to each index? + */ + if (!object_address_present(&thisobj, objsMoved)) + { + AlterRelationNamespaceInternal(classRel, indexOid, + oldNspOid, newNspOid, + false, objsMoved); + add_exact_object_address(&thisobj, objsMoved); + } + } + + list_free(indexList); +} + +/* + * Move all identity and SERIAL-column sequences of the specified relation to another + * namespace. + * + * Note: we assume adequate permission checking was done by the caller, + * and that the caller has a suitable lock on the owning relation. + */ +static void +AlterSeqNamespaces(Relation classRel, Relation rel, + Oid oldNspOid, Oid newNspOid, ObjectAddresses *objsMoved, + LOCKMODE lockmode) +{ + Relation depRel; + SysScanDesc scan; + ScanKeyData key[2]; + HeapTuple tup; + + /* + * SERIAL sequences are those having an auto dependency on one of the + * table's columns (we don't care *which* column, exactly). + */ + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + /* we leave refobjsubid unspecified */ + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup); + Relation seqRel; + + /* skip dependencies other than auto dependencies on columns */ + if (depForm->refobjsubid == 0 || + depForm->classid != RelationRelationId || + depForm->objsubid != 0 || + !(depForm->deptype == DEPENDENCY_AUTO || depForm->deptype == DEPENDENCY_INTERNAL)) + continue; + + /* Use relation_open just in case it's an index */ + seqRel = relation_open(depForm->objid, lockmode); + + /* skip non-sequence relations */ + if (RelationGetForm(seqRel)->relkind != RELKIND_SEQUENCE) + { + /* No need to keep the lock */ + relation_close(seqRel, lockmode); + continue; + } + + /* Fix the pg_class and pg_depend entries */ + AlterRelationNamespaceInternal(classRel, depForm->objid, + oldNspOid, newNspOid, + true, objsMoved); + + /* + * Sequences used to have entries in pg_type, but no longer do. If we + * ever re-instate that, we'll need to move the pg_type entry to the + * new namespace, too (using AlterTypeNamespaceInternal). + */ + Assert(RelationGetForm(seqRel)->reltype == InvalidOid); + + /* Now we can close it. Keep the lock till end of transaction. */ + relation_close(seqRel, NoLock); + } + + systable_endscan(scan); + + relation_close(depRel, AccessShareLock); +} + + +/* + * This code supports + * CREATE TEMP TABLE ... ON COMMIT { DROP | PRESERVE ROWS | DELETE ROWS } + * + * Because we only support this for TEMP tables, it's sufficient to remember + * the state in a backend-local data structure. + */ + +/* + * Register a newly-created relation's ON COMMIT action. + */ +void +register_on_commit_action(Oid relid, OnCommitAction action) +{ + OnCommitItem *oc; + MemoryContext oldcxt; + + /* + * We needn't bother registering the relation unless there is an ON COMMIT + * action we need to take. + */ + if (action == ONCOMMIT_NOOP || action == ONCOMMIT_PRESERVE_ROWS) + return; + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + oc = (OnCommitItem *) palloc(sizeof(OnCommitItem)); + oc->relid = relid; + oc->oncommit = action; + oc->creating_subid = GetCurrentSubTransactionId(); + oc->deleting_subid = InvalidSubTransactionId; + + /* + * We use lcons() here so that ON COMMIT actions are processed in reverse + * order of registration. That might not be essential but it seems + * reasonable. + */ + on_commits = lcons(oc, on_commits); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * Unregister any ON COMMIT action when a relation is deleted. + * + * Actually, we only mark the OnCommitItem entry as to be deleted after commit. + */ +void +remove_on_commit_action(Oid relid) +{ + ListCell *l; + + foreach(l, on_commits) + { + OnCommitItem *oc = (OnCommitItem *) lfirst(l); + + if (oc->relid == relid) + { + oc->deleting_subid = GetCurrentSubTransactionId(); + break; + } + } +} + +/* + * Perform ON COMMIT actions. + * + * This is invoked just before actually committing, since it's possible + * to encounter errors. + */ +void +PreCommit_on_commit_actions(void) +{ + ListCell *l; + List *oids_to_truncate = NIL; + List *oids_to_drop = NIL; + + foreach(l, on_commits) + { + OnCommitItem *oc = (OnCommitItem *) lfirst(l); + + /* Ignore entry if already dropped in this xact */ + if (oc->deleting_subid != InvalidSubTransactionId) + continue; + + switch (oc->oncommit) + { + case ONCOMMIT_NOOP: + case ONCOMMIT_PRESERVE_ROWS: + /* Do nothing (there shouldn't be such entries, actually) */ + break; + case ONCOMMIT_DELETE_ROWS: + + /* + * If this transaction hasn't accessed any temporary + * relations, we can skip truncating ON COMMIT DELETE ROWS + * tables, as they must still be empty. + */ + if ((MyXactFlags & XACT_FLAGS_ACCESSEDTEMPNAMESPACE)) + oids_to_truncate = lappend_oid(oids_to_truncate, oc->relid); + break; + case ONCOMMIT_DROP: + oids_to_drop = lappend_oid(oids_to_drop, oc->relid); + break; + } + } + + /* + * Truncate relations before dropping so that all dependencies between + * relations are removed after they are worked on. Doing it like this + * might be a waste as it is possible that a relation being truncated will + * be dropped anyway due to its parent being dropped, but this makes the + * code more robust because of not having to re-check that the relation + * exists at truncation time. + */ + if (oids_to_truncate != NIL) + heap_truncate(oids_to_truncate); + + if (oids_to_drop != NIL) + { + ObjectAddresses *targetObjects = new_object_addresses(); + ListCell *l; + + foreach(l, oids_to_drop) + { + ObjectAddress object; + + object.classId = RelationRelationId; + object.objectId = lfirst_oid(l); + object.objectSubId = 0; + + Assert(!object_address_present(&object, targetObjects)); + + add_exact_object_address(&object, targetObjects); + } + + /* + * Object deletion might involve toast table access (to clean up + * toasted catalog entries), so ensure we have a valid snapshot. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* + * Since this is an automatic drop, rather than one directly initiated + * by the user, we pass the PERFORM_DELETION_INTERNAL flag. + */ + performMultipleDeletions(targetObjects, DROP_CASCADE, + PERFORM_DELETION_INTERNAL | PERFORM_DELETION_QUIETLY); + + PopActiveSnapshot(); + +#ifdef USE_ASSERT_CHECKING + + /* + * Note that table deletion will call remove_on_commit_action, so the + * entry should get marked as deleted. + */ + foreach(l, on_commits) + { + OnCommitItem *oc = (OnCommitItem *) lfirst(l); + + if (oc->oncommit != ONCOMMIT_DROP) + continue; + + Assert(oc->deleting_subid != InvalidSubTransactionId); + } +#endif + } +} + +/* + * Post-commit or post-abort cleanup for ON COMMIT management. + * + * All we do here is remove no-longer-needed OnCommitItem entries. + * + * During commit, remove entries that were deleted during this transaction; + * during abort, remove those created during this transaction. + */ +void +AtEOXact_on_commit_actions(bool isCommit) +{ + ListCell *cur_item; + + foreach(cur_item, on_commits) + { + OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item); + + if (isCommit ? oc->deleting_subid != InvalidSubTransactionId : + oc->creating_subid != InvalidSubTransactionId) + { + /* cur_item must be removed */ + on_commits = foreach_delete_current(on_commits, cur_item); + pfree(oc); + } + else + { + /* cur_item must be preserved */ + oc->creating_subid = InvalidSubTransactionId; + oc->deleting_subid = InvalidSubTransactionId; + } + } +} + +/* + * Post-subcommit or post-subabort cleanup for ON COMMIT management. + * + * During subabort, we can immediately remove entries created during this + * subtransaction. During subcommit, just relabel entries marked during + * this subtransaction as being the parent's responsibility. + */ +void +AtEOSubXact_on_commit_actions(bool isCommit, SubTransactionId mySubid, + SubTransactionId parentSubid) +{ + ListCell *cur_item; + + foreach(cur_item, on_commits) + { + OnCommitItem *oc = (OnCommitItem *) lfirst(cur_item); + + if (!isCommit && oc->creating_subid == mySubid) + { + /* cur_item must be removed */ + on_commits = foreach_delete_current(on_commits, cur_item); + pfree(oc); + } + else + { + /* cur_item must be preserved */ + if (oc->creating_subid == mySubid) + oc->creating_subid = parentSubid; + if (oc->deleting_subid == mySubid) + oc->deleting_subid = isCommit ? parentSubid : InvalidSubTransactionId; + } + } +} + +/* + * This is intended as a callback for RangeVarGetRelidExtended(). It allows + * the relation to be locked only if (1) it's a plain or partitioned table, + * materialized view, or TOAST table and (2) the current user is the owner (or + * the superuser). This meets the permission-checking needs of CLUSTER, + * REINDEX TABLE, and REFRESH MATERIALIZED VIEW; we expose it here so that it + * can be used by all. + */ +void +RangeVarCallbackOwnsTable(const RangeVar *relation, + Oid relId, Oid oldRelId, void *arg) +{ + char relkind; + + /* Nothing to do if the relation was not found. */ + if (!OidIsValid(relId)) + return; + + /* + * If the relation does exist, check whether it's an index. But note that + * the relation might have been dropped between the time we did the name + * lookup and now. In that case, there's nothing to do. + */ + relkind = get_rel_relkind(relId); + if (!relkind) + return; + if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE && + relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table or materialized view", relation->relname))); + + /* Check permissions */ + if (!pg_class_ownercheck(relId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)), relation->relname); +} + +/* + * Callback to RangeVarGetRelidExtended() for TRUNCATE processing. + */ +static void +RangeVarCallbackForTruncate(const RangeVar *relation, + Oid relId, Oid oldRelId, void *arg) +{ + HeapTuple tuple; + + /* Nothing to do if the relation was not found. */ + if (!OidIsValid(relId)) + return; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId)); + if (!HeapTupleIsValid(tuple)) /* should not happen */ + elog(ERROR, "cache lookup failed for relation %u", relId); + + truncate_check_rel(relId, (Form_pg_class) GETSTRUCT(tuple)); + truncate_check_perms(relId, (Form_pg_class) GETSTRUCT(tuple)); + + ReleaseSysCache(tuple); +} + +/* + * Callback to RangeVarGetRelidExtended(), similar to + * RangeVarCallbackOwnsTable() but without checks on the type of the relation. + */ +void +RangeVarCallbackOwnsRelation(const RangeVar *relation, + Oid relId, Oid oldRelId, void *arg) +{ + HeapTuple tuple; + + /* Nothing to do if the relation was not found. */ + if (!OidIsValid(relId)) + return; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relId)); + if (!HeapTupleIsValid(tuple)) /* should not happen */ + elog(ERROR, "cache lookup failed for relation %u", relId); + + if (!pg_class_ownercheck(relId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relId)), + relation->relname); + + if (!allowSystemTableMods && + IsSystemClass(relId, (Form_pg_class) GETSTRUCT(tuple))) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + relation->relname))); + + ReleaseSysCache(tuple); +} + +/* + * Common RangeVarGetRelid callback for rename, set schema, and alter table + * processing. + */ +static void +RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, + void *arg) +{ + Node *stmt = (Node *) arg; + ObjectType reltype; + HeapTuple tuple; + Form_pg_class classform; + AclResult aclresult; + char relkind; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + return; /* concurrently dropped */ + classform = (Form_pg_class) GETSTRUCT(tuple); + relkind = classform->relkind; + + /* Must own relation. */ + if (!pg_class_ownercheck(relid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname); + + /* No system table modifications unless explicitly allowed. */ + if (!allowSystemTableMods && IsSystemClass(relid, classform)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + rv->relname))); + + /* + * Extract the specified relation type from the statement parse tree. + * + * Also, for ALTER .. RENAME, check permissions: the user must (still) + * have CREATE rights on the containing namespace. + */ + if (IsA(stmt, RenameStmt)) + { + aclresult = pg_namespace_aclcheck(classform->relnamespace, + GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(classform->relnamespace)); + reltype = ((RenameStmt *) stmt)->renameType; + } + else if (IsA(stmt, AlterObjectSchemaStmt)) + reltype = ((AlterObjectSchemaStmt *) stmt)->objectType; + + else if (IsA(stmt, AlterTableStmt)) + reltype = ((AlterTableStmt *) stmt)->objtype; + else + { + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(stmt)); + reltype = OBJECT_TABLE; /* placate compiler */ + } + + /* + * For compatibility with prior releases, we allow ALTER TABLE to be used + * with most other types of relations (but not composite types). We allow + * similar flexibility for ALTER INDEX in the case of RENAME, but not + * otherwise. Otherwise, the user must select the correct form of the + * command for the relation at issue. + */ + if (reltype == OBJECT_SEQUENCE && relkind != RELKIND_SEQUENCE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a sequence", rv->relname))); + + if (reltype == OBJECT_VIEW && relkind != RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a view", rv->relname))); + + if (reltype == OBJECT_MATVIEW && relkind != RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a materialized view", rv->relname))); + + if (reltype == OBJECT_FOREIGN_TABLE && relkind != RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a foreign table", rv->relname))); + + if (reltype == OBJECT_TYPE && relkind != RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a composite type", rv->relname))); + + if (reltype == OBJECT_INDEX && relkind != RELKIND_INDEX && + relkind != RELKIND_PARTITIONED_INDEX + && !IsA(stmt, RenameStmt)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not an index", rv->relname))); + + /* + * Don't allow ALTER TABLE on composite types. We want people to use ALTER + * TYPE for that. + */ + if (reltype != OBJECT_TYPE && relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a composite type", rv->relname), + errhint("Use ALTER TYPE instead."))); + + /* + * Don't allow ALTER TABLE .. SET SCHEMA on relations that can't be moved + * to a different schema, such as indexes and TOAST tables. + */ + if (IsA(stmt, AlterObjectSchemaStmt)) + { + if (relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change schema of index \"%s\"", + rv->relname), + errhint("Change the schema of the table instead."))); + else if (relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change schema of composite type \"%s\"", + rv->relname), + errhint("Use ALTER TYPE instead."))); + else if (relkind == RELKIND_TOASTVALUE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change schema of TOAST table \"%s\"", + rv->relname), + errhint("Change the schema of the table instead."))); + } + + ReleaseSysCache(tuple); +} + +/* + * Transform any expressions present in the partition key + * + * Returns a transformed PartitionSpec, as well as the strategy code + */ +static PartitionSpec * +transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy) +{ + PartitionSpec *newspec; + ParseState *pstate; + ParseNamespaceItem *nsitem; + ListCell *l; + + newspec = makeNode(PartitionSpec); + + newspec->strategy = partspec->strategy; + newspec->partParams = NIL; + newspec->location = partspec->location; + + /* Parse partitioning strategy name */ + if (pg_strcasecmp(partspec->strategy, "hash") == 0) + *strategy = PARTITION_STRATEGY_HASH; + else if (pg_strcasecmp(partspec->strategy, "list") == 0) + *strategy = PARTITION_STRATEGY_LIST; + else if (pg_strcasecmp(partspec->strategy, "range") == 0) + *strategy = PARTITION_STRATEGY_RANGE; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized partitioning strategy \"%s\"", + partspec->strategy))); + + /* Check valid number of columns for strategy */ + if (*strategy == PARTITION_STRATEGY_LIST && + list_length(partspec->partParams) != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use \"list\" partition strategy with more than one column"))); + + /* + * Create a dummy ParseState and insert the target relation as its sole + * rangetable entry. We need a ParseState for transformExpr. + */ + pstate = make_parsestate(NULL); + nsitem = addRangeTableEntryForRelation(pstate, rel, AccessShareLock, + NULL, false, true); + addNSItemToQuery(pstate, nsitem, true, true, true); + + /* take care of any partition expressions */ + foreach(l, partspec->partParams) + { + PartitionElem *pelem = lfirst_node(PartitionElem, l); + + if (pelem->expr) + { + /* Copy, to avoid scribbling on the input */ + pelem = copyObject(pelem); + + /* Now do parse transformation of the expression */ + pelem->expr = transformExpr(pstate, pelem->expr, + EXPR_KIND_PARTITION_EXPRESSION); + + /* we have to fix its collations too */ + assign_expr_collations(pstate, pelem->expr); + } + + newspec->partParams = lappend(newspec->partParams, pelem); + } + + return newspec; +} + +/* + * Compute per-partition-column information from a list of PartitionElems. + * Expressions in the PartitionElems must be parse-analyzed already. + */ +static void +ComputePartitionAttrs(ParseState *pstate, Relation rel, List *partParams, AttrNumber *partattrs, + List **partexprs, Oid *partopclass, Oid *partcollation, + char strategy) +{ + int attn; + ListCell *lc; + Oid am_oid; + + attn = 0; + foreach(lc, partParams) + { + PartitionElem *pelem = lfirst_node(PartitionElem, lc); + Oid atttype; + Oid attcollation; + + if (pelem->name != NULL) + { + /* Simple attribute reference */ + HeapTuple atttuple; + Form_pg_attribute attform; + + atttuple = SearchSysCacheAttName(RelationGetRelid(rel), + pelem->name); + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" named in partition key does not exist", + pelem->name), + parser_errposition(pstate, pelem->location))); + attform = (Form_pg_attribute) GETSTRUCT(atttuple); + + if (attform->attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use system column \"%s\" in partition key", + pelem->name), + parser_errposition(pstate, pelem->location))); + + /* + * Generated columns cannot work: They are computed after BEFORE + * triggers, but partition routing is done before all triggers. + */ + if (attform->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use generated column in partition key"), + errdetail("Column \"%s\" is a generated column.", + pelem->name), + parser_errposition(pstate, pelem->location))); + + partattrs[attn] = attform->attnum; + atttype = attform->atttypid; + attcollation = attform->attcollation; + ReleaseSysCache(atttuple); + } + else + { + /* Expression */ + Node *expr = pelem->expr; + char partattname[16]; + + Assert(expr != NULL); + atttype = exprType(expr); + attcollation = exprCollation(expr); + + /* + * The expression must be of a storable type (e.g., not RECORD). + * The test is the same as for whether a table column is of a safe + * type (which is why we needn't check for the non-expression + * case). + */ + snprintf(partattname, sizeof(partattname), "%d", attn + 1); + CheckAttributeType(partattname, + atttype, attcollation, + NIL, CHKATYPE_IS_PARTKEY); + + /* + * Strip any top-level COLLATE clause. This ensures that we treat + * "x COLLATE y" and "(x COLLATE y)" alike. + */ + while (IsA(expr, CollateExpr)) + expr = (Node *) ((CollateExpr *) expr)->arg; + + if (IsA(expr, Var) && + ((Var *) expr)->varattno > 0) + { + /* + * User wrote "(column)" or "(column COLLATE something)". + * Treat it like simple attribute anyway. + */ + partattrs[attn] = ((Var *) expr)->varattno; + } + else + { + Bitmapset *expr_attrs = NULL; + int i; + + partattrs[attn] = 0; /* marks the column as expression */ + *partexprs = lappend(*partexprs, expr); + + /* + * Try to simplify the expression before checking for + * mutability. The main practical value of doing it in this + * order is that an inline-able SQL-language function will be + * accepted if its expansion is immutable, whether or not the + * function itself is marked immutable. + * + * Note that expression_planner does not change the passed in + * expression destructively and we have already saved the + * expression to be stored into the catalog above. + */ + expr = (Node *) expression_planner((Expr *) expr); + + /* + * Partition expression cannot contain mutable functions, + * because a given row must always map to the same partition + * as long as there is no change in the partition boundary + * structure. + */ + if (contain_mutable_functions(expr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("functions in partition key expression must be marked IMMUTABLE"))); + + /* + * transformPartitionSpec() should have already rejected + * subqueries, aggregates, window functions, and SRFs, based + * on the EXPR_KIND_ for partition expressions. + */ + + /* + * Cannot allow system column references, since that would + * make partition routing impossible: their values won't be + * known yet when we need to do that. + */ + pull_varattnos(expr, 1, &expr_attrs); + for (i = FirstLowInvalidHeapAttributeNumber; i < 0; i++) + { + if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber, + expr_attrs)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition key expressions cannot contain system column references"))); + } + + /* + * Generated columns cannot work: They are computed after + * BEFORE triggers, but partition routing is done before all + * triggers. + */ + i = -1; + while ((i = bms_next_member(expr_attrs, i)) >= 0) + { + AttrNumber attno = i + FirstLowInvalidHeapAttributeNumber; + + if (attno > 0 && + TupleDescAttr(RelationGetDescr(rel), attno - 1)->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use generated column in partition key"), + errdetail("Column \"%s\" is a generated column.", + get_attname(RelationGetRelid(rel), attno, false)), + parser_errposition(pstate, pelem->location))); + } + + /* + * While it is not exactly *wrong* for a partition expression + * to be a constant, it seems better to reject such keys. + */ + if (IsA(expr, Const)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use constant expression as partition key"))); + } + } + + /* + * Apply collation override if any + */ + if (pelem->collation) + attcollation = get_collation_oid(pelem->collation, false); + + /* + * Check we have a collation iff it's a collatable type. The only + * expected failures here are (1) COLLATE applied to a noncollatable + * type, or (2) partition expression had an unresolved collation. But + * we might as well code this to be a complete consistency check. + */ + if (type_is_collatable(atttype)) + { + if (!OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for partition expression"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + else + { + if (OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("collations are not supported by type %s", + format_type_be(atttype)))); + } + + partcollation[attn] = attcollation; + + /* + * Identify the appropriate operator class. For list and range + * partitioning, we use a btree operator class; hash partitioning uses + * a hash operator class. + */ + if (strategy == PARTITION_STRATEGY_HASH) + am_oid = HASH_AM_OID; + else + am_oid = BTREE_AM_OID; + + if (!pelem->opclass) + { + partopclass[attn] = GetDefaultOpClass(atttype, am_oid); + + if (!OidIsValid(partopclass[attn])) + { + if (strategy == PARTITION_STRATEGY_HASH) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default operator class for access method \"%s\"", + format_type_be(atttype), "hash"), + errhint("You must specify a hash operator class or define a default hash operator class for the data type."))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default operator class for access method \"%s\"", + format_type_be(atttype), "btree"), + errhint("You must specify a btree operator class or define a default btree operator class for the data type."))); + } + } + else + partopclass[attn] = ResolveOpClass(pelem->opclass, + atttype, + am_oid == HASH_AM_OID ? "hash" : "btree", + am_oid); + + attn++; + } +} + +/* + * PartConstraintImpliedByRelConstraint + * Do scanrel's existing constraints imply the partition constraint? + * + * "Existing constraints" include its check constraints and column-level + * NOT NULL constraints. partConstraint describes the partition constraint, + * in implicit-AND form. + */ +bool +PartConstraintImpliedByRelConstraint(Relation scanrel, + List *partConstraint) +{ + List *existConstraint = NIL; + TupleConstr *constr = RelationGetDescr(scanrel)->constr; + int i; + + if (constr && constr->has_not_null) + { + int natts = scanrel->rd_att->natts; + + for (i = 1; i <= natts; i++) + { + Form_pg_attribute att = TupleDescAttr(scanrel->rd_att, i - 1); + + if (att->attnotnull && !att->attisdropped) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = (Expr *) makeVar(1, + i, + att->atttypid, + att->atttypmod, + att->attcollation, + 0); + ntest->nulltesttype = IS_NOT_NULL; + + /* + * argisrow=false is correct even for a composite column, + * because attnotnull does not represent a SQL-spec IS NOT + * NULL test in such a case, just IS DISTINCT FROM NULL. + */ + ntest->argisrow = false; + ntest->location = -1; + existConstraint = lappend(existConstraint, ntest); + } + } + } + + return ConstraintImpliedByRelConstraint(scanrel, partConstraint, existConstraint); +} + +/* + * ConstraintImpliedByRelConstraint + * Do scanrel's existing constraints imply the given constraint? + * + * testConstraint is the constraint to validate. provenConstraint is a + * caller-provided list of conditions which this function may assume + * to be true. Both provenConstraint and testConstraint must be in + * implicit-AND form, must only contain immutable clauses, and must + * contain only Vars with varno = 1. + */ +bool +ConstraintImpliedByRelConstraint(Relation scanrel, List *testConstraint, List *provenConstraint) +{ + List *existConstraint = list_copy(provenConstraint); + TupleConstr *constr = RelationGetDescr(scanrel)->constr; + int num_check, + i; + + num_check = (constr != NULL) ? constr->num_check : 0; + for (i = 0; i < num_check; i++) + { + Node *cexpr; + + /* + * If this constraint hasn't been fully validated yet, we must ignore + * it here. + */ + if (!constr->check[i].ccvalid) + continue; + + cexpr = stringToNode(constr->check[i].ccbin); + + /* + * Run each expression through const-simplification and + * canonicalization. It is necessary, because we will be comparing it + * to similarly-processed partition constraint expressions, and may + * fail to detect valid matches without this. + */ + cexpr = eval_const_expressions(NULL, cexpr); + cexpr = (Node *) canonicalize_qual((Expr *) cexpr, true); + + existConstraint = list_concat(existConstraint, + make_ands_implicit((Expr *) cexpr)); + } + + /* + * Try to make the proof. Since we are comparing CHECK constraints, we + * need to use weak implication, i.e., we assume existConstraint is + * not-false and try to prove the same for testConstraint. + * + * Note that predicate_implied_by assumes its first argument is known + * immutable. That should always be true for both NOT NULL and partition + * constraints, so we don't test it here. + */ + return predicate_implied_by(testConstraint, existConstraint, true); +} + +/* + * QueuePartitionConstraintValidation + * + * Add an entry to wqueue to have the given partition constraint validated by + * Phase 3, for the given relation, and all its children. + * + * We first verify whether the given constraint is implied by pre-existing + * relation constraints; if it is, there's no need to scan the table to + * validate, so don't queue in that case. + */ +static void +QueuePartitionConstraintValidation(List **wqueue, Relation scanrel, + List *partConstraint, + bool validate_default) +{ + /* + * Based on the table's existing constraints, determine whether or not we + * may skip scanning the table. + */ + if (PartConstraintImpliedByRelConstraint(scanrel, partConstraint)) + { + if (!validate_default) + ereport(DEBUG1, + (errmsg_internal("partition constraint for table \"%s\" is implied by existing constraints", + RelationGetRelationName(scanrel)))); + else + ereport(DEBUG1, + (errmsg_internal("updated partition constraint for default partition \"%s\" is implied by existing constraints", + RelationGetRelationName(scanrel)))); + return; + } + + /* + * Constraints proved insufficient. For plain relations, queue a + * validation item now; for partitioned tables, recurse to process each + * partition. + */ + if (scanrel->rd_rel->relkind == RELKIND_RELATION) + { + AlteredTableInfo *tab; + + /* Grab a work queue entry. */ + tab = ATGetQueueEntry(wqueue, scanrel); + Assert(tab->partition_constraint == NULL); + tab->partition_constraint = (Expr *) linitial(partConstraint); + tab->validate_default = validate_default; + } + else if (scanrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, true); + int i; + + for (i = 0; i < partdesc->nparts; i++) + { + Relation part_rel; + List *thisPartConstraint; + + /* + * This is the minimum lock we need to prevent deadlocks. + */ + part_rel = table_open(partdesc->oids[i], AccessExclusiveLock); + + /* + * Adjust the constraint for scanrel so that it matches this + * partition's attribute numbers. + */ + thisPartConstraint = + map_partition_varattnos(partConstraint, 1, + part_rel, scanrel); + + QueuePartitionConstraintValidation(wqueue, part_rel, + thisPartConstraint, + validate_default); + table_close(part_rel, NoLock); /* keep lock till commit */ + } + } +} + +/* + * ALTER TABLE ATTACH PARTITION FOR VALUES + * + * Return the address of the newly attached partition. + */ +static ObjectAddress +ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd, + AlterTableUtilityContext *context) +{ + Relation attachrel, + catalog; + List *attachrel_children; + List *partConstraint; + SysScanDesc scan; + ScanKeyData skey; + AttrNumber attno; + int natts; + TupleDesc tupleDesc; + ObjectAddress address; + const char *trigger_name; + Oid defaultPartOid; + List *partBoundConstraint; + ParseState *pstate = make_parsestate(NULL); + + pstate->p_sourcetext = context->queryString; + + /* + * We must lock the default partition if one exists, because attaching a + * new partition will change its partition constraint. + */ + defaultPartOid = + get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true)); + if (OidIsValid(defaultPartOid)) + LockRelationOid(defaultPartOid, AccessExclusiveLock); + + attachrel = table_openrv(cmd->name, AccessExclusiveLock); + + /* + * XXX I think it'd be a good idea to grab locks on all tables referenced + * by FKs at this point also. + */ + + /* + * Must be owner of both parent and source table -- parent was checked by + * ATSimplePermissions call in ATPrepCmd + */ + ATSimplePermissions(AT_AttachPartition, attachrel, ATT_TABLE | ATT_FOREIGN_TABLE); + + /* A partition can only have one parent */ + if (attachrel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is already a partition", + RelationGetRelationName(attachrel)))); + + if (OidIsValid(attachrel->rd_rel->reloftype)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach a typed table as partition"))); + + /* + * Table being attached should not already be part of inheritance; either + * as a child table... + */ + catalog = table_open(InheritsRelationId, AccessShareLock); + ScanKeyInit(&skey, + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(attachrel))); + scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true, + NULL, 1, &skey); + if (HeapTupleIsValid(systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach inheritance child as partition"))); + systable_endscan(scan); + + /* ...or as a parent table (except the case when it is partitioned) */ + ScanKeyInit(&skey, + Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(attachrel))); + scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL, + 1, &skey); + if (HeapTupleIsValid(systable_getnext(scan)) && + attachrel->rd_rel->relkind == RELKIND_RELATION) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach inheritance parent as partition"))); + systable_endscan(scan); + table_close(catalog, AccessShareLock); + + /* + * Prevent circularity by seeing if rel is a partition of attachrel. (In + * particular, this disallows making a rel a partition of itself.) + * + * We do that by checking if rel is a member of the list of attachrel's + * partitions provided the latter is partitioned at all. We want to avoid + * having to construct this list again, so we request the strongest lock + * on all partitions. We need the strongest lock, because we may decide + * to scan them if we find out that the table being attached (or its leaf + * partitions) may contain rows that violate the partition constraint. If + * the table has a constraint that would prevent such rows, which by + * definition is present in all the partitions, we need not scan the + * table, nor its partitions. But we cannot risk a deadlock by taking a + * weaker lock now and the stronger one only when needed. + */ + attachrel_children = find_all_inheritors(RelationGetRelid(attachrel), + AccessExclusiveLock, NULL); + if (list_member_oid(attachrel_children, RelationGetRelid(rel))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("circular inheritance not allowed"), + errdetail("\"%s\" is already a child of \"%s\".", + RelationGetRelationName(rel), + RelationGetRelationName(attachrel)))); + + /* If the parent is permanent, so must be all of its partitions. */ + if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP && + attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach a temporary relation as partition of permanent relation \"%s\"", + RelationGetRelationName(rel)))); + + /* Temp parent cannot have a partition that is itself not a temp */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + attachrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"", + RelationGetRelationName(rel)))); + + /* If the parent is temp, it must belong to this session */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach as partition of temporary relation of another session"))); + + /* Ditto for the partition */ + if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !attachrel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach temporary relation of another session as partition"))); + + /* Check if there are any columns in attachrel that aren't in the parent */ + tupleDesc = RelationGetDescr(attachrel); + natts = tupleDesc->natts; + for (attno = 1; attno <= natts; attno++) + { + Form_pg_attribute attribute = TupleDescAttr(tupleDesc, attno - 1); + char *attributeName = NameStr(attribute->attname); + + /* Ignore dropped */ + if (attribute->attisdropped) + continue; + + /* Try to find the column in parent (matching on column name) */ + if (!SearchSysCacheExists2(ATTNAME, + ObjectIdGetDatum(RelationGetRelid(rel)), + CStringGetDatum(attributeName))) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"", + RelationGetRelationName(attachrel), attributeName, + RelationGetRelationName(rel)), + errdetail("The new partition may contain only the columns present in parent."))); + } + + /* + * If child_rel has row-level triggers with transition tables, we + * currently don't allow it to become a partition. See also prohibitions + * in ATExecAddInherit() and CreateTrigger(). + */ + trigger_name = FindTriggerIncompatibleWithInheritance(attachrel->trigdesc); + if (trigger_name != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("trigger \"%s\" prevents table \"%s\" from becoming a partition", + trigger_name, RelationGetRelationName(attachrel)), + errdetail("ROW triggers with transition tables are not supported on partitions."))); + + /* + * Check that the new partition's bound is valid and does not overlap any + * of existing partitions of the parent - note that it does not return on + * error. + */ + check_new_partition_bound(RelationGetRelationName(attachrel), rel, + cmd->bound, pstate); + + /* OK to create inheritance. Rest of the checks performed there */ + CreateInheritance(attachrel, rel); + + /* Update the pg_class entry. */ + StorePartitionBound(attachrel, rel, cmd->bound); + + /* Ensure there exists a correct set of indexes in the partition. */ + AttachPartitionEnsureIndexes(rel, attachrel); + + /* and triggers */ + CloneRowTriggersToPartition(rel, attachrel); + + /* + * Clone foreign key constraints. Callee is responsible for setting up + * for phase 3 constraint verification. + */ + CloneForeignKeyConstraints(wqueue, rel, attachrel); + + /* + * Generate partition constraint from the partition bound specification. + * If the parent itself is a partition, make sure to include its + * constraint as well. + */ + partBoundConstraint = get_qual_from_partbound(rel, cmd->bound); + partConstraint = list_concat(partBoundConstraint, + RelationGetPartitionQual(rel)); + + /* Skip validation if there are no constraints to validate. */ + if (partConstraint) + { + /* + * Run the partition quals through const-simplification similar to + * check constraints. We skip canonicalize_qual, though, because + * partition quals should be in canonical form already. + */ + partConstraint = + (List *) eval_const_expressions(NULL, + (Node *) partConstraint); + + /* XXX this sure looks wrong */ + partConstraint = list_make1(make_ands_explicit(partConstraint)); + + /* + * Adjust the generated constraint to match this partition's attribute + * numbers. + */ + partConstraint = map_partition_varattnos(partConstraint, 1, attachrel, + rel); + + /* Validate partition constraints against the table being attached. */ + QueuePartitionConstraintValidation(wqueue, attachrel, partConstraint, + false); + } + + /* + * If we're attaching a partition other than the default partition and a + * default one exists, then that partition's partition constraint changes, + * so add an entry to the work queue to validate it, too. (We must not do + * this when the partition being attached is the default one; we already + * did it above!) + */ + if (OidIsValid(defaultPartOid)) + { + Relation defaultrel; + List *defPartConstraint; + + Assert(!cmd->bound->is_default); + + /* we already hold a lock on the default partition */ + defaultrel = table_open(defaultPartOid, NoLock); + defPartConstraint = + get_proposed_default_constraint(partBoundConstraint); + + /* + * Map the Vars in the constraint expression from rel's attnos to + * defaultrel's. + */ + defPartConstraint = + map_partition_varattnos(defPartConstraint, + 1, defaultrel, rel); + QueuePartitionConstraintValidation(wqueue, defaultrel, + defPartConstraint, true); + + /* keep our lock until commit. */ + table_close(defaultrel, NoLock); + } + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachrel)); + + /* + * If the partition we just attached is partitioned itself, invalidate + * relcache for all descendent partitions too to ensure that their + * rd_partcheck expression trees are rebuilt; partitions already locked at + * the beginning of this function. + */ + if (attachrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + ListCell *l; + + foreach(l, attachrel_children) + { + CacheInvalidateRelcacheByRelid(lfirst_oid(l)); + } + } + + /* keep our lock until commit */ + table_close(attachrel, NoLock); + + return address; +} + +/* + * AttachPartitionEnsureIndexes + * subroutine for ATExecAttachPartition to create/match indexes + * + * Enforce the indexing rule for partitioned tables during ALTER TABLE / ATTACH + * PARTITION: every partition must have an index attached to each index on the + * partitioned table. + */ +static void +AttachPartitionEnsureIndexes(Relation rel, Relation attachrel) +{ + List *idxes; + List *attachRelIdxs; + Relation *attachrelIdxRels; + IndexInfo **attachInfos; + int i; + ListCell *cell; + MemoryContext cxt; + MemoryContext oldcxt; + + cxt = AllocSetContextCreate(CurrentMemoryContext, + "AttachPartitionEnsureIndexes", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(cxt); + + idxes = RelationGetIndexList(rel); + attachRelIdxs = RelationGetIndexList(attachrel); + attachrelIdxRels = palloc(sizeof(Relation) * list_length(attachRelIdxs)); + attachInfos = palloc(sizeof(IndexInfo *) * list_length(attachRelIdxs)); + + /* Build arrays of all existing indexes and their IndexInfos */ + i = 0; + foreach(cell, attachRelIdxs) + { + Oid cldIdxId = lfirst_oid(cell); + + attachrelIdxRels[i] = index_open(cldIdxId, AccessShareLock); + attachInfos[i] = BuildIndexInfo(attachrelIdxRels[i]); + i++; + } + + /* + * If we're attaching a foreign table, we must fail if any of the indexes + * is a constraint index; otherwise, there's nothing to do here. Do this + * before starting work, to avoid wasting the effort of building a few + * non-unique indexes before coming across a unique one. + */ + if (attachrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + foreach(cell, idxes) + { + Oid idx = lfirst_oid(cell); + Relation idxRel = index_open(idx, AccessShareLock); + + if (idxRel->rd_index->indisunique || + idxRel->rd_index->indisprimary) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach foreign table \"%s\" as partition of partitioned table \"%s\"", + RelationGetRelationName(attachrel), + RelationGetRelationName(rel)), + errdetail("Partitioned table \"%s\" contains unique indexes.", + RelationGetRelationName(rel)))); + index_close(idxRel, AccessShareLock); + } + + goto out; + } + + /* + * For each index on the partitioned table, find a matching one in the + * partition-to-be; if one is not found, create one. + */ + foreach(cell, idxes) + { + Oid idx = lfirst_oid(cell); + Relation idxRel = index_open(idx, AccessShareLock); + IndexInfo *info; + AttrMap *attmap; + bool found = false; + Oid constraintOid; + + /* + * Ignore indexes in the partitioned table other than partitioned + * indexes. + */ + if (idxRel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + { + index_close(idxRel, AccessShareLock); + continue; + } + + /* construct an indexinfo to compare existing indexes against */ + info = BuildIndexInfo(idxRel); + attmap = build_attrmap_by_name(RelationGetDescr(attachrel), + RelationGetDescr(rel)); + constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx); + + /* + * Scan the list of existing indexes in the partition-to-be, and mark + * the first matching, valid, unattached one we find, if any, as + * partition of the parent index. If we find one, we're done. + */ + for (i = 0; i < list_length(attachRelIdxs); i++) + { + Oid cldIdxId = RelationGetRelid(attachrelIdxRels[i]); + Oid cldConstrOid = InvalidOid; + + /* does this index have a parent? if so, can't use it */ + if (attachrelIdxRels[i]->rd_rel->relispartition) + continue; + + /* If this index is invalid, can't use it */ + if (!attachrelIdxRels[i]->rd_index->indisvalid) + continue; + + if (CompareIndexInfo(attachInfos[i], info, + attachrelIdxRels[i]->rd_indcollation, + idxRel->rd_indcollation, + attachrelIdxRels[i]->rd_opfamily, + idxRel->rd_opfamily, + attmap)) + { + /* + * If this index is being created in the parent because of a + * constraint, then the child needs to have a constraint also, + * so look for one. If there is no such constraint, this + * index is no good, so keep looking. + */ + if (OidIsValid(constraintOid)) + { + cldConstrOid = + get_relation_idx_constraint_oid(RelationGetRelid(attachrel), + cldIdxId); + /* no dice */ + if (!OidIsValid(cldConstrOid)) + continue; + } + + /* bingo. */ + IndexSetParentIndex(attachrelIdxRels[i], idx); + if (OidIsValid(constraintOid)) + ConstraintSetParentConstraint(cldConstrOid, constraintOid, + RelationGetRelid(attachrel)); + found = true; + + CommandCounterIncrement(); + break; + } + } + + /* + * If no suitable index was found in the partition-to-be, create one + * now. + */ + if (!found) + { + IndexStmt *stmt; + Oid constraintOid; + + stmt = generateClonedIndexStmt(NULL, + idxRel, attmap, + &constraintOid); + DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid, + RelationGetRelid(idxRel), + constraintOid, + true, false, false, false, false); + } + + index_close(idxRel, AccessShareLock); + } + +out: + /* Clean up. */ + for (i = 0; i < list_length(attachRelIdxs); i++) + index_close(attachrelIdxRels[i], AccessShareLock); + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(cxt); +} + +/* + * CloneRowTriggersToPartition + * subroutine for ATExecAttachPartition/DefineRelation to create row + * triggers on partitions + */ +static void +CloneRowTriggersToPartition(Relation parent, Relation partition) +{ + Relation pg_trigger; + ScanKeyData key; + SysScanDesc scan; + HeapTuple tuple; + MemoryContext perTupCxt; + + ScanKeyInit(&key, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parent))); + pg_trigger = table_open(TriggerRelationId, RowExclusiveLock); + scan = systable_beginscan(pg_trigger, TriggerRelidNameIndexId, + true, NULL, 1, &key); + + perTupCxt = AllocSetContextCreate(CurrentMemoryContext, + "clone trig", ALLOCSET_SMALL_SIZES); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Form_pg_trigger trigForm = (Form_pg_trigger) GETSTRUCT(tuple); + CreateTrigStmt *trigStmt; + Node *qual = NULL; + Datum value; + bool isnull; + List *cols = NIL; + List *trigargs = NIL; + MemoryContext oldcxt; + + /* + * Ignore statement-level triggers; those are not cloned. + */ + if (!TRIGGER_FOR_ROW(trigForm->tgtype)) + continue; + + /* + * Don't clone internal triggers, because the constraint cloning code + * will. + */ + if (trigForm->tgisinternal) + continue; + + /* + * Complain if we find an unexpected trigger type. + */ + if (!TRIGGER_FOR_BEFORE(trigForm->tgtype) && + !TRIGGER_FOR_AFTER(trigForm->tgtype)) + elog(ERROR, "unexpected trigger \"%s\" found", + NameStr(trigForm->tgname)); + + /* Use short-lived context for CREATE TRIGGER */ + oldcxt = MemoryContextSwitchTo(perTupCxt); + + /* + * If there is a WHEN clause, generate a 'cooked' version of it that's + * appropriate for the partition. + */ + value = heap_getattr(tuple, Anum_pg_trigger_tgqual, + RelationGetDescr(pg_trigger), &isnull); + if (!isnull) + { + qual = stringToNode(TextDatumGetCString(value)); + qual = (Node *) map_partition_varattnos((List *) qual, PRS2_OLD_VARNO, + partition, parent); + qual = (Node *) map_partition_varattnos((List *) qual, PRS2_NEW_VARNO, + partition, parent); + } + + /* + * If there is a column list, transform it to a list of column names. + * Note we don't need to map this list in any way ... + */ + if (trigForm->tgattr.dim1 > 0) + { + int i; + + for (i = 0; i < trigForm->tgattr.dim1; i++) + { + Form_pg_attribute col; + + col = TupleDescAttr(parent->rd_att, + trigForm->tgattr.values[i] - 1); + cols = lappend(cols, + makeString(pstrdup(NameStr(col->attname)))); + } + } + + /* Reconstruct trigger arguments list. */ + if (trigForm->tgnargs > 0) + { + char *p; + + value = heap_getattr(tuple, Anum_pg_trigger_tgargs, + RelationGetDescr(pg_trigger), &isnull); + if (isnull) + elog(ERROR, "tgargs is null for trigger \"%s\" in partition \"%s\"", + NameStr(trigForm->tgname), RelationGetRelationName(partition)); + + p = (char *) VARDATA_ANY(DatumGetByteaPP(value)); + + for (int i = 0; i < trigForm->tgnargs; i++) + { + trigargs = lappend(trigargs, makeString(pstrdup(p))); + p += strlen(p) + 1; + } + } + + trigStmt = makeNode(CreateTrigStmt); + trigStmt->replace = false; + trigStmt->isconstraint = OidIsValid(trigForm->tgconstraint); + trigStmt->trigname = NameStr(trigForm->tgname); + trigStmt->relation = NULL; + trigStmt->funcname = NULL; /* passed separately */ + trigStmt->args = trigargs; + trigStmt->row = true; + trigStmt->timing = trigForm->tgtype & TRIGGER_TYPE_TIMING_MASK; + trigStmt->events = trigForm->tgtype & TRIGGER_TYPE_EVENT_MASK; + trigStmt->columns = cols; + trigStmt->whenClause = NULL; /* passed separately */ + trigStmt->transitionRels = NIL; /* not supported at present */ + trigStmt->deferrable = trigForm->tgdeferrable; + trigStmt->initdeferred = trigForm->tginitdeferred; + trigStmt->constrrel = NULL; /* passed separately */ + + CreateTriggerFiringOn(trigStmt, NULL, RelationGetRelid(partition), + trigForm->tgconstrrelid, InvalidOid, InvalidOid, + trigForm->tgfoid, trigForm->oid, qual, + false, true, trigForm->tgenabled); + + MemoryContextSwitchTo(oldcxt); + MemoryContextReset(perTupCxt); + } + + MemoryContextDelete(perTupCxt); + + systable_endscan(scan); + table_close(pg_trigger, RowExclusiveLock); +} + +/* + * ALTER TABLE DETACH PARTITION + * + * Return the address of the relation that is no longer a partition of rel. + * + * If concurrent mode is requested, we run in two transactions. A side- + * effect is that this command cannot run in a multi-part ALTER TABLE. + * Currently, that's enforced by the grammar. + * + * The strategy for concurrency is to first modify the partition's + * pg_inherit catalog row to make it visible to everyone that the + * partition is detached, lock the partition against writes, and commit + * the transaction; anyone who requests the partition descriptor from + * that point onwards has to ignore such a partition. In a second + * transaction, we wait until all transactions that could have seen the + * partition as attached are gone, then we remove the rest of partition + * metadata (pg_inherits and pg_class.relpartbounds). + */ +static ObjectAddress +ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel, + RangeVar *name, bool concurrent) +{ + Relation partRel; + ObjectAddress address; + Oid defaultPartOid; + + /* + * We must lock the default partition, because detaching this partition + * will change its partition constraint. + */ + defaultPartOid = + get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true)); + if (OidIsValid(defaultPartOid)) + { + /* + * Concurrent detaching when a default partition exists is not + * supported. The main problem is that the default partition + * constraint would change. And there's a definitional problem: what + * should happen to the tuples that are being inserted that belong to + * the partition being detached? Putting them on the partition being + * detached would be wrong, since they'd become "lost" after the + * detaching completes but we cannot put them in the default partition + * either until we alter its partition constraint. + * + * I think we could solve this problem if we effected the constraint + * change before committing the first transaction. But the lock would + * have to remain AEL and it would cause concurrent query planning to + * be blocked, so changing it that way would be even worse. + */ + if (concurrent) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot detach partitions concurrently when a default partition exists"))); + LockRelationOid(defaultPartOid, AccessExclusiveLock); + } + + /* + * In concurrent mode, the partition is locked with share-update-exclusive + * in the first transaction. This allows concurrent transactions to be + * doing DML to the partition. + */ + partRel = table_openrv(name, concurrent ? ShareUpdateExclusiveLock : + AccessExclusiveLock); + + /* + * Check inheritance conditions and either delete the pg_inherits row (in + * non-concurrent mode) or just set the inhdetachpending flag. + */ + if (!concurrent) + RemoveInheritance(partRel, rel, false); + else + MarkInheritDetached(partRel, rel); + + /* + * Ensure that foreign keys still hold after this detach. This keeps + * locks on the referencing tables, which prevents concurrent transactions + * from adding rows that we wouldn't see. For this to work in concurrent + * mode, it is critical that the partition appears as no longer attached + * for the RI queries as soon as the first transaction commits. + */ + ATDetachCheckNoForeignKeyRefs(partRel); + + /* + * Concurrent mode has to work harder; first we add a new constraint to + * the partition that matches the partition constraint. Then we close our + * existing transaction, and in a new one wait for all processes to catch + * up on the catalog updates we've done so far; at that point we can + * complete the operation. + */ + if (concurrent) + { + Oid partrelid, + parentrelid; + LOCKTAG tag; + char *parentrelname; + char *partrelname; + + /* + * Add a new constraint to the partition being detached, which + * supplants the partition constraint (unless there is one already). + */ + DetachAddConstraintIfNeeded(wqueue, partRel); + + /* + * We're almost done now; the only traces that remain are the + * pg_inherits tuple and the partition's relpartbounds. Before we can + * remove those, we need to wait until all transactions that know that + * this is a partition are gone. + */ + + /* + * Remember relation OIDs to re-acquire them later; and relation names + * too, for error messages if something is dropped in between. + */ + partrelid = RelationGetRelid(partRel); + parentrelid = RelationGetRelid(rel); + parentrelname = MemoryContextStrdup(PortalContext, + RelationGetRelationName(rel)); + partrelname = MemoryContextStrdup(PortalContext, + RelationGetRelationName(partRel)); + + /* Invalidate relcache entries for the parent -- must be before close */ + CacheInvalidateRelcache(rel); + + table_close(partRel, NoLock); + table_close(rel, NoLock); + tab->rel = NULL; + + /* Make updated catalog entry visible */ + PopActiveSnapshot(); + CommitTransactionCommand(); + + StartTransactionCommand(); + + /* + * Now wait. This ensures that all queries that were planned + * including the partition are finished before we remove the rest of + * catalog entries. We don't need or indeed want to acquire this + * lock, though -- that would block later queries. + * + * We don't need to concern ourselves with waiting for a lock on the + * partition itself, since we will acquire AccessExclusiveLock below. + */ + SET_LOCKTAG_RELATION(tag, MyDatabaseId, parentrelid); + WaitForLockersMultiple(list_make1(&tag), AccessExclusiveLock, false); + + /* + * Now acquire locks in both relations again. Note they may have been + * removed in the meantime, so care is required. + */ + rel = try_relation_open(parentrelid, ShareUpdateExclusiveLock); + partRel = try_relation_open(partrelid, AccessExclusiveLock); + + /* If the relations aren't there, something bad happened; bail out */ + if (rel == NULL) + { + if (partRel != NULL) /* shouldn't happen */ + elog(WARNING, "dangling partition \"%s\" remains, can't fix", + partrelname); + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("partitioned table \"%s\" was removed concurrently", + parentrelname))); + } + if (partRel == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("partition \"%s\" was removed concurrently", partrelname))); + + tab->rel = rel; + } + + /* Do the final part of detaching */ + DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel)); + + /* keep our lock until commit */ + table_close(partRel, NoLock); + + return address; +} + +/* + * Second part of ALTER TABLE .. DETACH. + * + * This is separate so that it can be run independently when the second + * transaction of the concurrent algorithm fails (crash or abort). + */ +static void +DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent, + Oid defaultPartOid) +{ + Relation classRel; + List *fks; + ListCell *cell; + List *indexes; + Datum new_val[Natts_pg_class]; + bool new_null[Natts_pg_class], + new_repl[Natts_pg_class]; + HeapTuple tuple, + newtuple; + Relation trigrel = NULL; + + if (concurrent) + { + /* + * We can remove the pg_inherits row now. (In the non-concurrent case, + * this was already done). + */ + RemoveInheritance(partRel, rel, true); + } + + /* Drop any triggers that were cloned on creation/attach. */ + DropClonedTriggersFromPartition(RelationGetRelid(partRel)); + + /* + * Detach any foreign keys that are inherited. This includes creating + * additional action triggers. + */ + fks = copyObject(RelationGetFKeyList(partRel)); + if (fks != NIL) + trigrel = table_open(TriggerRelationId, RowExclusiveLock); + foreach(cell, fks) + { + ForeignKeyCacheInfo *fk = lfirst(cell); + HeapTuple contup; + Form_pg_constraint conform; + Constraint *fkconstraint; + Oid insertTriggerOid, + updateTriggerOid; + + contup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(fk->conoid)); + if (!HeapTupleIsValid(contup)) + elog(ERROR, "cache lookup failed for constraint %u", fk->conoid); + conform = (Form_pg_constraint) GETSTRUCT(contup); + + /* consider only the inherited foreign keys */ + if (conform->contype != CONSTRAINT_FOREIGN || + !OidIsValid(conform->conparentid)) + { + ReleaseSysCache(contup); + continue; + } + + /* unset conparentid and adjust conislocal, coninhcount, etc. */ + ConstraintSetParentConstraint(fk->conoid, InvalidOid, InvalidOid); + + /* + * Also, look up the partition's "check" triggers corresponding to the + * constraint being detached and detach them from the parent triggers. + */ + GetForeignKeyCheckTriggers(trigrel, + fk->conoid, fk->confrelid, fk->conrelid, + &insertTriggerOid, &updateTriggerOid); + Assert(OidIsValid(insertTriggerOid)); + TriggerSetParentTrigger(trigrel, insertTriggerOid, InvalidOid, + RelationGetRelid(partRel)); + Assert(OidIsValid(updateTriggerOid)); + TriggerSetParentTrigger(trigrel, updateTriggerOid, InvalidOid, + RelationGetRelid(partRel)); + + /* + * Make the action triggers on the referenced relation. When this was + * a partition the action triggers pointed to the parent rel (they + * still do), but now we need separate ones of our own. + */ + fkconstraint = makeNode(Constraint); + fkconstraint->contype = CONSTRAINT_FOREIGN; + fkconstraint->conname = pstrdup(NameStr(conform->conname)); + fkconstraint->deferrable = conform->condeferrable; + fkconstraint->initdeferred = conform->condeferred; + fkconstraint->location = -1; + fkconstraint->pktable = NULL; + fkconstraint->fk_attrs = NIL; + fkconstraint->pk_attrs = NIL; + fkconstraint->fk_matchtype = conform->confmatchtype; + fkconstraint->fk_upd_action = conform->confupdtype; + fkconstraint->fk_del_action = conform->confdeltype; + fkconstraint->fk_del_set_cols = NIL; + fkconstraint->old_conpfeqop = NIL; + fkconstraint->old_pktable_oid = InvalidOid; + fkconstraint->skip_validation = false; + fkconstraint->initially_valid = true; + + createForeignKeyActionTriggers(partRel, conform->confrelid, + fkconstraint, fk->conoid, + conform->conindid, + InvalidOid, InvalidOid, + NULL, NULL); + + ReleaseSysCache(contup); + } + list_free_deep(fks); + if (trigrel) + table_close(trigrel, RowExclusiveLock); + + /* + * Any sub-constraints that are in the referenced-side of a larger + * constraint have to be removed. This partition is no longer part of the + * key space of the constraint. + */ + foreach(cell, GetParentedForeignKeyRefs(partRel)) + { + Oid constrOid = lfirst_oid(cell); + ObjectAddress constraint; + + ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid); + deleteDependencyRecordsForClass(ConstraintRelationId, + constrOid, + ConstraintRelationId, + DEPENDENCY_INTERNAL); + CommandCounterIncrement(); + + ObjectAddressSet(constraint, ConstraintRelationId, constrOid); + performDeletion(&constraint, DROP_RESTRICT, 0); + } + + /* Now we can detach indexes */ + indexes = RelationGetIndexList(partRel); + foreach(cell, indexes) + { + Oid idxid = lfirst_oid(cell); + Relation idx; + Oid constrOid; + + if (!has_superclass(idxid)) + continue; + + Assert((IndexGetRelation(get_partition_parent(idxid, false), false) == + RelationGetRelid(rel))); + + idx = index_open(idxid, AccessExclusiveLock); + IndexSetParentIndex(idx, InvalidOid); + + /* If there's a constraint associated with the index, detach it too */ + constrOid = get_relation_idx_constraint_oid(RelationGetRelid(partRel), + idxid); + if (OidIsValid(constrOid)) + ConstraintSetParentConstraint(constrOid, InvalidOid, InvalidOid); + + index_close(idx, NoLock); + } + + /* Update pg_class tuple */ + classRel = table_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(partRel))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", + RelationGetRelid(partRel)); + Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition); + + /* Clear relpartbound and reset relispartition */ + memset(new_val, 0, sizeof(new_val)); + memset(new_null, false, sizeof(new_null)); + memset(new_repl, false, sizeof(new_repl)); + new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0; + new_null[Anum_pg_class_relpartbound - 1] = true; + new_repl[Anum_pg_class_relpartbound - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel), + new_val, new_null, new_repl); + + ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false; + CatalogTupleUpdate(classRel, &newtuple->t_self, newtuple); + heap_freetuple(newtuple); + table_close(classRel, RowExclusiveLock); + + if (OidIsValid(defaultPartOid)) + { + /* + * If the relation being detached is the default partition itself, + * remove it from the parent's pg_partitioned_table entry. + * + * If not, we must invalidate default partition's relcache entry, as + * in StorePartitionBound: its partition constraint depends on every + * other partition's partition constraint. + */ + if (RelationGetRelid(partRel) == defaultPartOid) + update_default_partition_oid(RelationGetRelid(rel), InvalidOid); + else + CacheInvalidateRelcacheByRelid(defaultPartOid); + } + + /* + * Invalidate the parent's relcache so that the partition is no longer + * included in its partition descriptor. + */ + CacheInvalidateRelcache(rel); + + /* + * If the partition we just detached is partitioned itself, invalidate + * relcache for all descendent partitions too to ensure that their + * rd_partcheck expression trees are rebuilt; must lock partitions before + * doing so, using the same lockmode as what partRel has been locked with + * by the caller. + */ + if (partRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + List *children; + + children = find_all_inheritors(RelationGetRelid(partRel), + AccessExclusiveLock, NULL); + foreach(cell, children) + { + CacheInvalidateRelcacheByRelid(lfirst_oid(cell)); + } + } +} + +/* + * ALTER TABLE ... DETACH PARTITION ... FINALIZE + * + * To use when a DETACH PARTITION command previously did not run to + * completion; this completes the detaching process. + */ +static ObjectAddress +ATExecDetachPartitionFinalize(Relation rel, RangeVar *name) +{ + Relation partRel; + ObjectAddress address; + Snapshot snap = GetActiveSnapshot(); + + partRel = table_openrv(name, AccessExclusiveLock); + + /* + * Wait until existing snapshots are gone. This is important if the + * second transaction of DETACH PARTITION CONCURRENTLY is canceled: the + * user could immediately run DETACH FINALIZE without actually waiting for + * existing transactions. We must not complete the detach action until + * all such queries are complete (otherwise we would present them with an + * inconsistent view of catalogs). + */ + WaitForOlderSnapshots(snap->xmin, false); + + DetachPartitionFinalize(rel, partRel, true, InvalidOid); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel)); + + table_close(partRel, NoLock); + + return address; +} + +/* + * DetachAddConstraintIfNeeded + * Subroutine for ATExecDetachPartition. Create a constraint that + * takes the place of the partition constraint, but avoid creating + * a dupe if an constraint already exists which implies the needed + * constraint. + */ +static void +DetachAddConstraintIfNeeded(List **wqueue, Relation partRel) +{ + List *constraintExpr; + + constraintExpr = RelationGetPartitionQual(partRel); + constraintExpr = (List *) eval_const_expressions(NULL, (Node *) constraintExpr); + + /* + * Avoid adding a new constraint if the needed constraint is implied by an + * existing constraint + */ + if (!PartConstraintImpliedByRelConstraint(partRel, constraintExpr)) + { + AlteredTableInfo *tab; + Constraint *n; + + tab = ATGetQueueEntry(wqueue, partRel); + + /* Add constraint on partition, equivalent to the partition constraint */ + n = makeNode(Constraint); + n->contype = CONSTR_CHECK; + n->conname = NULL; + n->location = -1; + n->is_no_inherit = false; + n->raw_expr = NULL; + n->cooked_expr = nodeToString(make_ands_explicit(constraintExpr)); + n->initially_valid = true; + n->skip_validation = true; + /* It's a re-add, since it nominally already exists */ + ATAddCheckConstraint(wqueue, tab, partRel, n, + true, false, true, ShareUpdateExclusiveLock); + } +} + +/* + * DropClonedTriggersFromPartition + * subroutine for ATExecDetachPartition to remove any triggers that were + * cloned to the partition when it was created-as-partition or attached. + * This undoes what CloneRowTriggersToPartition did. + */ +static void +DropClonedTriggersFromPartition(Oid partitionId) +{ + ScanKeyData skey; + SysScanDesc scan; + HeapTuple trigtup; + Relation tgrel; + ObjectAddresses *objects; + + objects = new_object_addresses(); + + /* + * Scan pg_trigger to search for all triggers on this rel. + */ + ScanKeyInit(&skey, Anum_pg_trigger_tgrelid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(partitionId)); + tgrel = table_open(TriggerRelationId, RowExclusiveLock); + scan = systable_beginscan(tgrel, TriggerRelidNameIndexId, + true, NULL, 1, &skey); + while (HeapTupleIsValid(trigtup = systable_getnext(scan))) + { + Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(trigtup); + ObjectAddress trig; + + /* Ignore triggers that weren't cloned */ + if (!OidIsValid(pg_trigger->tgparentid)) + continue; + + /* + * Ignore internal triggers that are implementation objects of foreign + * keys, because these will be detached when the foreign keys + * themselves are. + */ + if (OidIsValid(pg_trigger->tgconstrrelid)) + continue; + + /* + * This is ugly, but necessary: remove the dependency markings on the + * trigger so that it can be removed. + */ + deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid, + TriggerRelationId, + DEPENDENCY_PARTITION_PRI); + deleteDependencyRecordsForClass(TriggerRelationId, pg_trigger->oid, + RelationRelationId, + DEPENDENCY_PARTITION_SEC); + + /* remember this trigger to remove it below */ + ObjectAddressSet(trig, TriggerRelationId, pg_trigger->oid); + add_exact_object_address(&trig, objects); + } + + /* make the dependency removal visible to the deletion below */ + CommandCounterIncrement(); + performMultipleDeletions(objects, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); + + /* done */ + free_object_addresses(objects); + systable_endscan(scan); + table_close(tgrel, RowExclusiveLock); +} + +/* + * Before acquiring lock on an index, acquire the same lock on the owning + * table. + */ +struct AttachIndexCallbackState +{ + Oid partitionOid; + Oid parentTblOid; + bool lockedParentTbl; +}; + +static void +RangeVarCallbackForAttachIndex(const RangeVar *rv, Oid relOid, Oid oldRelOid, + void *arg) +{ + struct AttachIndexCallbackState *state; + Form_pg_class classform; + HeapTuple tuple; + + state = (struct AttachIndexCallbackState *) arg; + + if (!state->lockedParentTbl) + { + LockRelationOid(state->parentTblOid, AccessShareLock); + state->lockedParentTbl = true; + } + + /* + * If we previously locked some other heap, and the name we're looking up + * no longer refers to an index on that relation, release the now-useless + * lock. XXX maybe we should do *after* we verify whether the index does + * not actually belong to the same relation ... + */ + if (relOid != oldRelOid && OidIsValid(state->partitionOid)) + { + UnlockRelationOid(state->partitionOid, AccessShareLock); + state->partitionOid = InvalidOid; + } + + /* Didn't find a relation, so no need for locking or permission checks. */ + if (!OidIsValid(relOid)) + return; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid)); + if (!HeapTupleIsValid(tuple)) + return; /* concurrently dropped, so nothing to do */ + classform = (Form_pg_class) GETSTRUCT(tuple); + if (classform->relkind != RELKIND_PARTITIONED_INDEX && + classform->relkind != RELKIND_INDEX) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("\"%s\" is not an index", rv->relname))); + ReleaseSysCache(tuple); + + /* + * Since we need only examine the heap's tupledesc, an access share lock + * on it (preventing any DDL) is sufficient. + */ + state->partitionOid = IndexGetRelation(relOid, false); + LockRelationOid(state->partitionOid, AccessShareLock); +} + +/* + * ALTER INDEX i1 ATTACH PARTITION i2 + */ +static ObjectAddress +ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) +{ + Relation partIdx; + Relation partTbl; + Relation parentTbl; + ObjectAddress address; + Oid partIdxId; + Oid currParent; + struct AttachIndexCallbackState state; + + /* + * We need to obtain lock on the index 'name' to modify it, but we also + * need to read its owning table's tuple descriptor -- so we need to lock + * both. To avoid deadlocks, obtain lock on the table before doing so on + * the index. Furthermore, we need to examine the parent table of the + * partition, so lock that one too. + */ + state.partitionOid = InvalidOid; + state.parentTblOid = parentIdx->rd_index->indrelid; + state.lockedParentTbl = false; + partIdxId = + RangeVarGetRelidExtended(name, AccessExclusiveLock, 0, + RangeVarCallbackForAttachIndex, + (void *) &state); + /* Not there? */ + if (!OidIsValid(partIdxId)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("index \"%s\" does not exist", name->relname))); + + /* no deadlock risk: RangeVarGetRelidExtended already acquired the lock */ + partIdx = relation_open(partIdxId, AccessExclusiveLock); + + /* we already hold locks on both tables, so this is safe: */ + parentTbl = relation_open(parentIdx->rd_index->indrelid, AccessShareLock); + partTbl = relation_open(partIdx->rd_index->indrelid, NoLock); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partIdx)); + + /* Silently do nothing if already in the right state */ + currParent = partIdx->rd_rel->relispartition ? + get_partition_parent(partIdxId, false) : InvalidOid; + if (currParent != RelationGetRelid(parentIdx)) + { + IndexInfo *childInfo; + IndexInfo *parentInfo; + AttrMap *attmap; + bool found; + int i; + PartitionDesc partDesc; + Oid constraintOid, + cldConstrId = InvalidOid; + + /* + * If this partition already has an index attached, refuse the + * operation. + */ + refuseDupeIndexAttach(parentIdx, partIdx, partTbl); + + if (OidIsValid(currParent)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("Index \"%s\" is already attached to another index.", + RelationGetRelationName(partIdx)))); + + /* Make sure it indexes a partition of the other index's table */ + partDesc = RelationGetPartitionDesc(parentTbl, true); + found = false; + for (i = 0; i < partDesc->nparts; i++) + { + if (partDesc->oids[i] == state.partitionOid) + { + found = true; + break; + } + } + if (!found) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("Index \"%s\" is not an index on any partition of table \"%s\".", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentTbl)))); + + /* Ensure the indexes are compatible */ + childInfo = BuildIndexInfo(partIdx); + parentInfo = BuildIndexInfo(parentIdx); + attmap = build_attrmap_by_name(RelationGetDescr(partTbl), + RelationGetDescr(parentTbl)); + if (!CompareIndexInfo(childInfo, parentInfo, + partIdx->rd_indcollation, + parentIdx->rd_indcollation, + partIdx->rd_opfamily, + parentIdx->rd_opfamily, + attmap)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("The index definitions do not match."))); + + /* + * If there is a constraint in the parent, make sure there is one in + * the child too. + */ + constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(parentTbl), + RelationGetRelid(parentIdx)); + + if (OidIsValid(constraintOid)) + { + cldConstrId = get_relation_idx_constraint_oid(RelationGetRelid(partTbl), + partIdxId); + if (!OidIsValid(cldConstrId)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("The index \"%s\" belongs to a constraint in table \"%s\" but no constraint exists for index \"%s\".", + RelationGetRelationName(parentIdx), + RelationGetRelationName(parentTbl), + RelationGetRelationName(partIdx)))); + } + + /* All good -- do it */ + IndexSetParentIndex(partIdx, RelationGetRelid(parentIdx)); + if (OidIsValid(constraintOid)) + ConstraintSetParentConstraint(cldConstrId, constraintOid, + RelationGetRelid(partTbl)); + + free_attrmap(attmap); + + validatePartitionedIndex(parentIdx, parentTbl); + } + + relation_close(parentTbl, AccessShareLock); + /* keep these locks till commit */ + relation_close(partTbl, NoLock); + relation_close(partIdx, NoLock); + + return address; +} + +/* + * Verify whether the given partition already contains an index attached + * to the given partitioned index. If so, raise an error. + */ +static void +refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, Relation partitionTbl) +{ + Oid existingIdx; + + existingIdx = index_get_partition(partitionTbl, + RelationGetRelid(parentIdx)); + if (OidIsValid(existingIdx)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("Another index is already attached for partition \"%s\".", + RelationGetRelationName(partitionTbl)))); +} + +/* + * Verify whether the set of attached partition indexes to a parent index on + * a partitioned table is complete. If it is, mark the parent index valid. + * + * This should be called each time a partition index is attached. + */ +static void +validatePartitionedIndex(Relation partedIdx, Relation partedTbl) +{ + Relation inheritsRel; + SysScanDesc scan; + ScanKeyData key; + int tuples = 0; + HeapTuple inhTup; + bool updated = false; + + Assert(partedIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX); + + /* + * Scan pg_inherits for this parent index. Count each valid index we find + * (verifying the pg_index entry for each), and if we reach the total + * amount we expect, we can mark this parent index as valid. + */ + inheritsRel = table_open(InheritsRelationId, AccessShareLock); + ScanKeyInit(&key, Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(partedIdx))); + scan = systable_beginscan(inheritsRel, InheritsParentIndexId, true, + NULL, 1, &key); + while ((inhTup = systable_getnext(scan)) != NULL) + { + Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(inhTup); + HeapTuple indTup; + Form_pg_index indexForm; + + indTup = SearchSysCache1(INDEXRELID, + ObjectIdGetDatum(inhForm->inhrelid)); + if (!HeapTupleIsValid(indTup)) + elog(ERROR, "cache lookup failed for index %u", inhForm->inhrelid); + indexForm = (Form_pg_index) GETSTRUCT(indTup); + if (indexForm->indisvalid) + tuples += 1; + ReleaseSysCache(indTup); + } + + /* Done with pg_inherits */ + systable_endscan(scan); + table_close(inheritsRel, AccessShareLock); + + /* + * If we found as many inherited indexes as the partitioned table has + * partitions, we're good; update pg_index to set indisvalid. + */ + if (tuples == RelationGetPartitionDesc(partedTbl, true)->nparts) + { + Relation idxRel; + HeapTuple indTup; + Form_pg_index indexForm; + + idxRel = table_open(IndexRelationId, RowExclusiveLock); + indTup = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(RelationGetRelid(partedIdx))); + if (!HeapTupleIsValid(indTup)) + elog(ERROR, "cache lookup failed for index %u", + RelationGetRelid(partedIdx)); + indexForm = (Form_pg_index) GETSTRUCT(indTup); + + indexForm->indisvalid = true; + updated = true; + + CatalogTupleUpdate(idxRel, &indTup->t_self, indTup); + + table_close(idxRel, RowExclusiveLock); + heap_freetuple(indTup); + } + + /* + * If this index is in turn a partition of a larger index, validating it + * might cause the parent to become valid also. Try that. + */ + if (updated && partedIdx->rd_rel->relispartition) + { + Oid parentIdxId, + parentTblId; + Relation parentIdx, + parentTbl; + + /* make sure we see the validation we just did */ + CommandCounterIncrement(); + + parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), false); + parentTblId = get_partition_parent(RelationGetRelid(partedTbl), false); + parentIdx = relation_open(parentIdxId, AccessExclusiveLock); + parentTbl = relation_open(parentTblId, AccessExclusiveLock); + Assert(!parentIdx->rd_index->indisvalid); + + validatePartitionedIndex(parentIdx, parentTbl); + + relation_close(parentIdx, AccessExclusiveLock); + relation_close(parentTbl, AccessExclusiveLock); + } +} + +/* + * Return an OID list of constraints that reference the given relation + * that are marked as having a parent constraints. + */ +static List * +GetParentedForeignKeyRefs(Relation partition) +{ + Relation pg_constraint; + HeapTuple tuple; + SysScanDesc scan; + ScanKeyData key[2]; + List *constraints = NIL; + + /* + * If no indexes, or no columns are referenceable by FKs, we can avoid the + * scan. + */ + if (RelationGetIndexList(partition) == NIL || + bms_is_empty(RelationGetIndexAttrBitmap(partition, + INDEX_ATTR_BITMAP_KEY))) + return NIL; + + /* Search for constraints referencing this table */ + pg_constraint = table_open(ConstraintRelationId, AccessShareLock); + ScanKeyInit(&key[0], + Anum_pg_constraint_confrelid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(partition))); + ScanKeyInit(&key[1], + Anum_pg_constraint_contype, BTEqualStrategyNumber, + F_CHAREQ, CharGetDatum(CONSTRAINT_FOREIGN)); + + /* XXX This is a seqscan, as we don't have a usable index */ + scan = systable_beginscan(pg_constraint, InvalidOid, true, NULL, 2, key); + while ((tuple = systable_getnext(scan)) != NULL) + { + Form_pg_constraint constrForm = (Form_pg_constraint) GETSTRUCT(tuple); + + /* + * We only need to process constraints that are part of larger ones. + */ + if (!OidIsValid(constrForm->conparentid)) + continue; + + constraints = lappend_oid(constraints, constrForm->oid); + } + + systable_endscan(scan); + table_close(pg_constraint, AccessShareLock); + + return constraints; +} + +/* + * During DETACH PARTITION, verify that any foreign keys pointing to the + * partitioned table would not become invalid. An error is raised if any + * referenced values exist. + */ +static void +ATDetachCheckNoForeignKeyRefs(Relation partition) +{ + List *constraints; + ListCell *cell; + + constraints = GetParentedForeignKeyRefs(partition); + + foreach(cell, constraints) + { + Oid constrOid = lfirst_oid(cell); + HeapTuple tuple; + Form_pg_constraint constrForm; + Relation rel; + Trigger trig; + + tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constrOid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for constraint %u", constrOid); + constrForm = (Form_pg_constraint) GETSTRUCT(tuple); + + Assert(OidIsValid(constrForm->conparentid)); + Assert(constrForm->confrelid == RelationGetRelid(partition)); + + /* prevent data changes into the referencing table until commit */ + rel = table_open(constrForm->conrelid, ShareLock); + + MemSet(&trig, 0, sizeof(trig)); + trig.tgoid = InvalidOid; + trig.tgname = NameStr(constrForm->conname); + trig.tgenabled = TRIGGER_FIRES_ON_ORIGIN; + trig.tgisinternal = true; + trig.tgconstrrelid = RelationGetRelid(partition); + trig.tgconstrindid = constrForm->conindid; + trig.tgconstraint = constrForm->oid; + trig.tgdeferrable = false; + trig.tginitdeferred = false; + /* we needn't fill in remaining fields */ + + RI_PartitionRemove_Check(&trig, rel, partition); + + ReleaseSysCache(tuple); + + table_close(rel, NoLock); + } +} + +/* + * resolve column compression specification to compression method. + */ +static char +GetAttributeCompression(Oid atttypid, char *compression) +{ + char cmethod; + + if (compression == NULL || strcmp(compression, "default") == 0) + return InvalidCompressionMethod; + + /* + * To specify a nondefault method, the column data type must be toastable. + * Note this says nothing about whether the column's attstorage setting + * permits compression; we intentionally allow attstorage and + * attcompression to be independent. But with a non-toastable type, + * attstorage could not be set to a value that would permit compression. + * + * We don't actually need to enforce this, since nothing bad would happen + * if attcompression were non-default; it would never be consulted. But + * it seems more user-friendly to complain about a certainly-useless + * attempt to set the property. + */ + if (!TypeIsToastable(atttypid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("column data type %s does not support compression", + format_type_be(atttypid)))); + + cmethod = CompressionNameToMethod(compression); + if (!CompressionMethodIsValid(cmethod)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid compression method \"%s\"", compression))); + + return cmethod; +} diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c new file mode 100644 index 0000000..9bdfef9 --- /dev/null +++ b/src/backend/commands/tablespace.c @@ -0,0 +1,1595 @@ +/*------------------------------------------------------------------------- + * + * tablespace.c + * Commands to manipulate table spaces + * + * Tablespaces in PostgreSQL are designed to allow users to determine + * where the data file(s) for a given database object reside on the file + * system. + * + * A tablespace represents a directory on the file system. At tablespace + * creation time, the directory must be empty. To simplify things and + * remove the possibility of having file name conflicts, we isolate + * files within a tablespace into database-specific subdirectories. + * + * To support file access via the information given in RelFileNode, we + * maintain a symbolic-link map in $PGDATA/pg_tblspc. The symlinks are + * named by tablespace OIDs and point to the actual tablespace directories. + * There is also a per-cluster version directory in each tablespace. + * Thus the full path to an arbitrary file is + * $PGDATA/pg_tblspc/spcoid/PG_MAJORVER_CATVER/dboid/relfilenode + * e.g. + * $PGDATA/pg_tblspc/20981/PG_9.0_201002161/719849/83292814 + * + * There are two tablespaces created at initdb time: pg_global (for shared + * tables) and pg_default (for everything else). For backwards compatibility + * and to remain functional on platforms without symlinks, these tablespaces + * are accessed specially: they are respectively + * $PGDATA/global/relfilenode + * $PGDATA/base/dboid/relfilenode + * + * To allow CREATE DATABASE to give a new database a default tablespace + * that's different from the template database's default, we make the + * provision that a zero in pg_class.reltablespace means the database's + * default tablespace. Without this, CREATE DATABASE would have to go in + * and munge the system catalogs of the new database. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/tablespace.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xloginsert.h" +#include "access/xlogutils.h" +#include "catalog/binary_upgrade.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_tablespace.h" +#include "commands/comment.h" +#include "commands/seclabel.h" +#include "commands/tablecmds.h" +#include "commands/tablespace.h" +#include "common/file_perm.h" +#include "miscadmin.h" +#include "postmaster/bgwriter.h" +#include "storage/fd.h" +#include "storage/lmgr.h" +#include "storage/standby.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/varlena.h" + +/* GUC variables */ +char *default_tablespace = NULL; +char *temp_tablespaces = NULL; +bool allow_in_place_tablespaces = false; + +Oid binary_upgrade_next_pg_tablespace_oid = InvalidOid; + +static void create_tablespace_directories(const char *location, + const Oid tablespaceoid); +static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo); + + +/* + * Each database using a table space is isolated into its own name space + * by a subdirectory named for the database OID. On first creation of an + * object in the tablespace, create the subdirectory. If the subdirectory + * already exists, fall through quietly. + * + * isRedo indicates that we are creating an object during WAL replay. + * In this case we will cope with the possibility of the tablespace + * directory not being there either --- this could happen if we are + * replaying an operation on a table in a subsequently-dropped tablespace. + * We handle this by making a directory in the place where the tablespace + * symlink would normally be. This isn't an exact replay of course, but + * it's the best we can do given the available information. + * + * If tablespaces are not supported, we still need it in case we have to + * re-create a database subdirectory (of $PGDATA/base) during WAL replay. + */ +void +TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) +{ + struct stat st; + char *dir; + + /* + * The global tablespace doesn't have per-database subdirectories, so + * nothing to do for it. + */ + if (spcNode == GLOBALTABLESPACE_OID) + return; + + Assert(OidIsValid(spcNode)); + Assert(OidIsValid(dbNode)); + + dir = GetDatabasePath(dbNode, spcNode); + + if (stat(dir, &st) < 0) + { + /* Directory does not exist? */ + if (errno == ENOENT) + { + /* + * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE + * or TablespaceCreateDbspace is running concurrently. + */ + LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE); + + /* + * Recheck to see if someone created the directory while we were + * waiting for lock. + */ + if (stat(dir, &st) == 0 && S_ISDIR(st.st_mode)) + { + /* Directory was created */ + } + else + { + /* Directory creation failed? */ + if (MakePGDirectory(dir) < 0) + { + /* Failure other than not exists or not in WAL replay? */ + if (errno != ENOENT || !isRedo) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + dir))); + + /* + * During WAL replay, it's conceivable that several levels + * of directories are missing if tablespaces are dropped + * further ahead of the WAL stream than we're currently + * replaying. An easy way forward is to create them as + * plain directories and hope they are removed by further + * WAL replay if necessary. If this also fails, there is + * trouble we cannot get out of, so just report that and + * bail out. + */ + if (pg_mkdir_p(dir, pg_dir_create_mode) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + dir))); + } + } + + LWLockRelease(TablespaceCreateLock); + } + else + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", dir))); + } + } + else + { + /* Is it not a directory? */ + if (!S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" exists but is not a directory", + dir))); + } + + pfree(dir); +} + +/* + * Create a table space + * + * Only superusers can create a tablespace. This seems a reasonable restriction + * since we're determining the system layout and, anyway, we probably have + * root if we're doing this kind of activity + */ +Oid +CreateTableSpace(CreateTableSpaceStmt *stmt) +{ +#ifdef HAVE_SYMLINK + Relation rel; + Datum values[Natts_pg_tablespace]; + bool nulls[Natts_pg_tablespace]; + HeapTuple tuple; + Oid tablespaceoid; + char *location; + Oid ownerId; + Datum newOptions; + bool in_place; + + /* Must be superuser */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to create tablespace \"%s\"", + stmt->tablespacename), + errhint("Must be superuser to create a tablespace."))); + + /* However, the eventual owner of the tablespace need not be */ + if (stmt->owner) + ownerId = get_rolespec_oid(stmt->owner, false); + else + ownerId = GetUserId(); + + /* Unix-ify the offered path, and strip any trailing slashes */ + location = pstrdup(stmt->location); + canonicalize_path(location); + + /* disallow quotes, else CREATE DATABASE would be at risk */ + if (strchr(location, '\'')) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("tablespace location cannot contain single quotes"))); + + in_place = allow_in_place_tablespaces && strlen(location) == 0; + + /* + * Allowing relative paths seems risky + * + * This also helps us ensure that location is not empty or whitespace, + * unless specifying a developer-only in-place tablespace. + */ + if (!in_place && !is_absolute_path(location)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("tablespace location must be an absolute path"))); + + /* + * Check that location isn't too long. Remember that we're going to append + * 'PG_XXX//_.'. FYI, we never actually + * reference the whole path here, but MakePGDirectory() uses the first two + * parts. + */ + if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("tablespace location \"%s\" is too long", + location))); + + /* Warn if the tablespace is in the data directory. */ + if (path_is_prefix_of_path(DataDir, location)) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("tablespace location should not be inside the data directory"))); + + /* + * Disallow creation of tablespaces named "pg_xxx"; we reserve this + * namespace for system purposes. + */ + if (!allowSystemTableMods && IsReservedName(stmt->tablespacename)) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("unacceptable tablespace name \"%s\"", + stmt->tablespacename), + errdetail("The prefix \"pg_\" is reserved for system tablespaces."))); + + /* + * If built with appropriate switch, whine when regression-testing + * conventions for tablespace names are violated. + */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (strncmp(stmt->tablespacename, "regress_", 8) != 0) + elog(WARNING, "tablespaces created by regression test cases should have names starting with \"regress_\""); +#endif + + /* + * Check that there is no other tablespace by this name. (The unique + * index would catch this anyway, but might as well give a friendlier + * message.) + */ + if (OidIsValid(get_tablespace_oid(stmt->tablespacename, true))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("tablespace \"%s\" already exists", + stmt->tablespacename))); + + /* + * Insert tuple into pg_tablespace. The purpose of doing this first is to + * lock the proposed tablename against other would-be creators. The + * insertion will roll back if we find problems below. + */ + rel = table_open(TableSpaceRelationId, RowExclusiveLock); + + MemSet(nulls, false, sizeof(nulls)); + + if (IsBinaryUpgrade) + { + /* Use binary-upgrade override for tablespace oid */ + if (!OidIsValid(binary_upgrade_next_pg_tablespace_oid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_tablespace OID value not set when in binary upgrade mode"))); + + tablespaceoid = binary_upgrade_next_pg_tablespace_oid; + binary_upgrade_next_pg_tablespace_oid = InvalidOid; + } + else + tablespaceoid = GetNewOidWithIndex(rel, TablespaceOidIndexId, + Anum_pg_tablespace_oid); + values[Anum_pg_tablespace_oid - 1] = ObjectIdGetDatum(tablespaceoid); + values[Anum_pg_tablespace_spcname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename)); + values[Anum_pg_tablespace_spcowner - 1] = + ObjectIdGetDatum(ownerId); + nulls[Anum_pg_tablespace_spcacl - 1] = true; + + /* Generate new proposed spcoptions (text array) */ + newOptions = transformRelOptions((Datum) 0, + stmt->options, + NULL, NULL, false, false); + (void) tablespace_reloptions(newOptions, true); + if (newOptions != (Datum) 0) + values[Anum_pg_tablespace_spcoptions - 1] = newOptions; + else + nulls[Anum_pg_tablespace_spcoptions - 1] = true; + + tuple = heap_form_tuple(rel->rd_att, values, nulls); + + CatalogTupleInsert(rel, tuple); + + heap_freetuple(tuple); + + /* Record dependency on owner */ + recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId); + + /* Post creation hook for new tablespace */ + InvokeObjectPostCreateHook(TableSpaceRelationId, tablespaceoid, 0); + + create_tablespace_directories(location, tablespaceoid); + + /* Record the filesystem change in XLOG */ + { + xl_tblspc_create_rec xlrec; + + xlrec.ts_id = tablespaceoid; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, + offsetof(xl_tblspc_create_rec, ts_path)); + XLogRegisterData((char *) location, strlen(location) + 1); + + (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE); + } + + /* + * Force synchronous commit, to minimize the window between creating the + * symlink on-disk and marking the transaction committed. It's not great + * that there is any window at all, but definitely we don't want to make + * it larger than necessary. + */ + ForceSyncCommit(); + + pfree(location); + + /* We keep the lock on pg_tablespace until commit */ + table_close(rel, NoLock); + + return tablespaceoid; +#else /* !HAVE_SYMLINK */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("tablespaces are not supported on this platform"))); + return InvalidOid; /* keep compiler quiet */ +#endif /* HAVE_SYMLINK */ +} + +/* + * Drop a table space + * + * Be careful to check that the tablespace is empty. + */ +void +DropTableSpace(DropTableSpaceStmt *stmt) +{ +#ifdef HAVE_SYMLINK + char *tablespacename = stmt->tablespacename; + TableScanDesc scandesc; + Relation rel; + HeapTuple tuple; + Form_pg_tablespace spcform; + ScanKeyData entry[1]; + Oid tablespaceoid; + char *detail; + char *detail_log; + + /* + * Find the target tuple + */ + rel = table_open(TableSpaceRelationId, RowExclusiveLock); + + ScanKeyInit(&entry[0], + Anum_pg_tablespace_spcname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(tablespacename)); + scandesc = table_beginscan_catalog(rel, 1, entry); + tuple = heap_getnext(scandesc, ForwardScanDirection); + + if (!HeapTupleIsValid(tuple)) + { + if (!stmt->missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace \"%s\" does not exist", + tablespacename))); + } + else + { + ereport(NOTICE, + (errmsg("tablespace \"%s\" does not exist, skipping", + tablespacename))); + table_endscan(scandesc); + table_close(rel, NoLock); + } + return; + } + + spcform = (Form_pg_tablespace) GETSTRUCT(tuple); + tablespaceoid = spcform->oid; + + /* Must be tablespace owner */ + if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLESPACE, + tablespacename); + + /* Disallow drop of the standard tablespaces, even by superuser */ + if (IsPinnedObject(TableSpaceRelationId, tablespaceoid)) + aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_TABLESPACE, + tablespacename); + + /* Check for pg_shdepend entries depending on this tablespace */ + if (checkSharedDependencies(TableSpaceRelationId, tablespaceoid, + &detail, &detail_log)) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("tablespace \"%s\" cannot be dropped because some objects depend on it", + tablespacename), + errdetail_internal("%s", detail), + errdetail_log("%s", detail_log))); + + /* DROP hook for the tablespace being removed */ + InvokeObjectDropHook(TableSpaceRelationId, tablespaceoid, 0); + + /* + * Remove the pg_tablespace tuple (this will roll back if we fail below) + */ + CatalogTupleDelete(rel, &tuple->t_self); + + table_endscan(scandesc); + + /* + * Remove any comments or security labels on this tablespace. + */ + DeleteSharedComments(tablespaceoid, TableSpaceRelationId); + DeleteSharedSecurityLabel(tablespaceoid, TableSpaceRelationId); + + /* + * Remove dependency on owner. + */ + deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid, 0); + + /* + * Acquire TablespaceCreateLock to ensure that no TablespaceCreateDbspace + * is running concurrently. + */ + LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE); + + /* + * Try to remove the physical infrastructure. + */ + if (!destroy_tablespace_directories(tablespaceoid, false)) + { + /* + * Not all files deleted? However, there can be lingering empty files + * in the directories, left behind by for example DROP TABLE, that + * have been scheduled for deletion at next checkpoint (see comments + * in mdunlink() for details). We could just delete them immediately, + * but we can't tell them apart from important data files that we + * mustn't delete. So instead, we force a checkpoint which will clean + * out any lingering files, and try again. + */ + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); + + /* + * On Windows, an unlinked file persists in the directory listing + * until no process retains an open handle for the file. The DDL + * commands that schedule files for unlink send invalidation messages + * directing other PostgreSQL processes to close the files, but + * nothing guarantees they'll be processed in time. So, we'll also + * use a global barrier to ask all backends to close all files, and + * wait until they're finished. + */ + LWLockRelease(TablespaceCreateLock); + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE); + + /* And now try again. */ + if (!destroy_tablespace_directories(tablespaceoid, false)) + { + /* Still not empty, the files must be important then */ + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("tablespace \"%s\" is not empty", + tablespacename))); + } + } + + /* Record the filesystem change in XLOG */ + { + xl_tblspc_drop_rec xlrec; + + xlrec.ts_id = tablespaceoid; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec)); + + (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP); + } + + /* + * Note: because we checked that the tablespace was empty, there should be + * no need to worry about flushing shared buffers or free space map + * entries for relations in the tablespace. + */ + + /* + * Force synchronous commit, to minimize the window between removing the + * files on-disk and marking the transaction committed. It's not great + * that there is any window at all, but definitely we don't want to make + * it larger than necessary. + */ + ForceSyncCommit(); + + /* + * Allow TablespaceCreateDbspace again. + */ + LWLockRelease(TablespaceCreateLock); + + /* We keep the lock on pg_tablespace until commit */ + table_close(rel, NoLock); +#else /* !HAVE_SYMLINK */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("tablespaces are not supported on this platform"))); +#endif /* HAVE_SYMLINK */ +} + + +/* + * create_tablespace_directories + * + * Attempt to create filesystem infrastructure linking $PGDATA/pg_tblspc/ + * to the specified directory + */ +static void +create_tablespace_directories(const char *location, const Oid tablespaceoid) +{ + char *linkloc; + char *location_with_version_dir; + struct stat st; + bool in_place; + + linkloc = psprintf("pg_tblspc/%u", tablespaceoid); + + /* + * If we're asked to make an 'in place' tablespace, create the directory + * directly where the symlink would normally go. This is a developer-only + * option for now, to facilitate regression testing. + */ + in_place = strlen(location) == 0; + + if (in_place) + { + if (MakePGDirectory(linkloc) < 0 && errno != EEXIST) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + linkloc))); + } + + location_with_version_dir = psprintf("%s/%s", in_place ? linkloc : location, + TABLESPACE_VERSION_DIRECTORY); + + /* + * Attempt to coerce target directory to safe permissions. If this fails, + * it doesn't exist or has the wrong owner. Not needed for in-place mode, + * because in that case we created the directory with the desired + * permissions. + */ + if (!in_place && chmod(location, pg_dir_create_mode) != 0) + { + if (errno == ENOENT) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FILE), + errmsg("directory \"%s\" does not exist", location), + InRecovery ? errhint("Create this directory for the tablespace before " + "restarting the server.") : 0)); + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not set permissions on directory \"%s\": %m", + location))); + } + + /* + * The creation of the version directory prevents more than one tablespace + * in a single location. This imitates TablespaceCreateDbspace(), but it + * ignores concurrency and missing parent directories. The chmod() would + * have failed in the absence of a parent. pg_tablespace_spcname_index + * prevents concurrency. + */ + if (stat(location_with_version_dir, &st) < 0) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", + location_with_version_dir))); + else if (MakePGDirectory(location_with_version_dir) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + location_with_version_dir))); + } + else if (!S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" exists but is not a directory", + location_with_version_dir))); + else if (!InRecovery) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("directory \"%s\" already in use as a tablespace", + location_with_version_dir))); + + /* + * In recovery, remove old symlink, in case it points to the wrong place. + */ + if (!in_place && InRecovery) + remove_tablespace_symlink(linkloc); + + /* + * Create the symlink under PGDATA + */ + if (!in_place && symlink(location, linkloc) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create symbolic link \"%s\": %m", + linkloc))); + + pfree(linkloc); + pfree(location_with_version_dir); +} + + +/* + * destroy_tablespace_directories + * + * Attempt to remove filesystem infrastructure for the tablespace. + * + * 'redo' indicates we are redoing a drop from XLOG; in that case we should + * not throw an ERROR for problems, just LOG them. The worst consequence of + * not removing files here would be failure to release some disk space, which + * does not justify throwing an error that would require manual intervention + * to get the database running again. + * + * Returns true if successful, false if some subdirectory is not empty + */ +static bool +destroy_tablespace_directories(Oid tablespaceoid, bool redo) +{ + char *linkloc; + char *linkloc_with_version_dir; + DIR *dirdesc; + struct dirent *de; + char *subfile; + struct stat st; + + linkloc_with_version_dir = psprintf("pg_tblspc/%u/%s", tablespaceoid, + TABLESPACE_VERSION_DIRECTORY); + + /* + * Check if the tablespace still contains any files. We try to rmdir each + * per-database directory we find in it. rmdir failure implies there are + * still files in that subdirectory, so give up. (We do not have to worry + * about undoing any already completed rmdirs, since the next attempt to + * use the tablespace from that database will simply recreate the + * subdirectory via TablespaceCreateDbspace.) + * + * Since we hold TablespaceCreateLock, no one else should be creating any + * fresh subdirectories in parallel. It is possible that new files are + * being created within subdirectories, though, so the rmdir call could + * fail. Worst consequence is a less friendly error message. + * + * If redo is true then ENOENT is a likely outcome here, and we allow it + * to pass without comment. In normal operation we still allow it, but + * with a warning. This is because even though ProcessUtility disallows + * DROP TABLESPACE in a transaction block, it's possible that a previous + * DROP failed and rolled back after removing the tablespace directories + * and/or symlink. We want to allow a new DROP attempt to succeed at + * removing the catalog entries (and symlink if still present), so we + * should not give a hard error here. + */ + dirdesc = AllocateDir(linkloc_with_version_dir); + if (dirdesc == NULL) + { + if (errno == ENOENT) + { + if (!redo) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", + linkloc_with_version_dir))); + /* The symlink might still exist, so go try to remove it */ + goto remove_symlink; + } + else if (redo) + { + /* in redo, just log other types of error */ + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", + linkloc_with_version_dir))); + pfree(linkloc_with_version_dir); + return false; + } + /* else let ReadDir report the error */ + } + + while ((de = ReadDir(dirdesc, linkloc_with_version_dir)) != NULL) + { + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + subfile = psprintf("%s/%s", linkloc_with_version_dir, de->d_name); + + /* This check is just to deliver a friendlier error message */ + if (!redo && !directory_is_empty(subfile)) + { + FreeDir(dirdesc); + pfree(subfile); + pfree(linkloc_with_version_dir); + return false; + } + + /* remove empty directory */ + if (rmdir(subfile) < 0) + ereport(redo ? LOG : ERROR, + (errcode_for_file_access(), + errmsg("could not remove directory \"%s\": %m", + subfile))); + + pfree(subfile); + } + + FreeDir(dirdesc); + + /* remove version directory */ + if (rmdir(linkloc_with_version_dir) < 0) + { + ereport(redo ? LOG : ERROR, + (errcode_for_file_access(), + errmsg("could not remove directory \"%s\": %m", + linkloc_with_version_dir))); + pfree(linkloc_with_version_dir); + return false; + } + + /* + * Try to remove the symlink. We must however deal with the possibility + * that it's a directory instead of a symlink --- this could happen during + * WAL replay (see TablespaceCreateDbspace), and it is also the case on + * Windows where junction points lstat() as directories. + * + * Note: in the redo case, we'll return true if this final step fails; + * there's no point in retrying it. Also, ENOENT should provoke no more + * than a warning. + */ +remove_symlink: + linkloc = pstrdup(linkloc_with_version_dir); + get_parent_directory(linkloc); + if (lstat(linkloc, &st) < 0) + { + int saved_errno = errno; + + ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR), + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + linkloc))); + } + else if (S_ISDIR(st.st_mode)) + { + if (rmdir(linkloc) < 0) + { + int saved_errno = errno; + + ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR), + (errcode_for_file_access(), + errmsg("could not remove directory \"%s\": %m", + linkloc))); + } + } +#ifdef S_ISLNK + else if (S_ISLNK(st.st_mode)) + { + if (unlink(linkloc) < 0) + { + int saved_errno = errno; + + ereport(redo ? LOG : (saved_errno == ENOENT ? WARNING : ERROR), + (errcode_for_file_access(), + errmsg("could not remove symbolic link \"%s\": %m", + linkloc))); + } + } +#endif + else + { + /* Refuse to remove anything that's not a directory or symlink */ + ereport(redo ? LOG : ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("\"%s\" is not a directory or symbolic link", + linkloc))); + } + + pfree(linkloc_with_version_dir); + pfree(linkloc); + + return true; +} + + +/* + * Check if a directory is empty. + * + * This probably belongs somewhere else, but not sure where... + */ +bool +directory_is_empty(const char *path) +{ + DIR *dirdesc; + struct dirent *de; + + dirdesc = AllocateDir(path); + + while ((de = ReadDir(dirdesc, path)) != NULL) + { + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + FreeDir(dirdesc); + return false; + } + + FreeDir(dirdesc); + return true; +} + +/* + * remove_tablespace_symlink + * + * This function removes symlinks in pg_tblspc. On Windows, junction points + * act like directories so we must be able to apply rmdir. This function + * works like the symlink removal code in destroy_tablespace_directories, + * except that failure to remove is always an ERROR. But if the file doesn't + * exist at all, that's OK. + */ +void +remove_tablespace_symlink(const char *linkloc) +{ + struct stat st; + + if (lstat(linkloc, &st) < 0) + { + if (errno == ENOENT) + return; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", linkloc))); + } + + if (S_ISDIR(st.st_mode)) + { + /* + * This will fail if the directory isn't empty, but not if it's a + * junction point. + */ + if (rmdir(linkloc) < 0 && errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove directory \"%s\": %m", + linkloc))); + } +#ifdef S_ISLNK + else if (S_ISLNK(st.st_mode)) + { + if (unlink(linkloc) < 0 && errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove symbolic link \"%s\": %m", + linkloc))); + } +#endif + else + { + /* Refuse to remove anything that's not a directory or symlink */ + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("\"%s\" is not a directory or symbolic link", + linkloc))); + } +} + +/* + * Rename a tablespace + */ +ObjectAddress +RenameTableSpace(const char *oldname, const char *newname) +{ + Oid tspId; + Relation rel; + ScanKeyData entry[1]; + TableScanDesc scan; + HeapTuple tup; + HeapTuple newtuple; + Form_pg_tablespace newform; + ObjectAddress address; + + /* Search pg_tablespace */ + rel = table_open(TableSpaceRelationId, RowExclusiveLock); + + ScanKeyInit(&entry[0], + Anum_pg_tablespace_spcname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(oldname)); + scan = table_beginscan_catalog(rel, 1, entry); + tup = heap_getnext(scan, ForwardScanDirection); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace \"%s\" does not exist", + oldname))); + + newtuple = heap_copytuple(tup); + newform = (Form_pg_tablespace) GETSTRUCT(newtuple); + tspId = newform->oid; + + table_endscan(scan); + + /* Must be owner */ + if (!pg_tablespace_ownercheck(tspId, GetUserId())) + aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_TABLESPACE, oldname); + + /* Validate new name */ + if (!allowSystemTableMods && IsReservedName(newname)) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("unacceptable tablespace name \"%s\"", newname), + errdetail("The prefix \"pg_\" is reserved for system tablespaces."))); + + /* + * If built with appropriate switch, whine when regression-testing + * conventions for tablespace names are violated. + */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (strncmp(newname, "regress_", 8) != 0) + elog(WARNING, "tablespaces created by regression test cases should have names starting with \"regress_\""); +#endif + + /* Make sure the new name doesn't exist */ + ScanKeyInit(&entry[0], + Anum_pg_tablespace_spcname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(newname)); + scan = table_beginscan_catalog(rel, 1, entry); + tup = heap_getnext(scan, ForwardScanDirection); + if (HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("tablespace \"%s\" already exists", + newname))); + + table_endscan(scan); + + /* OK, update the entry */ + namestrcpy(&(newform->spcname), newname); + + CatalogTupleUpdate(rel, &newtuple->t_self, newtuple); + + InvokeObjectPostAlterHook(TableSpaceRelationId, tspId, 0); + + ObjectAddressSet(address, TableSpaceRelationId, tspId); + + table_close(rel, NoLock); + + return address; +} + +/* + * Alter table space options + */ +Oid +AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) +{ + Relation rel; + ScanKeyData entry[1]; + TableScanDesc scandesc; + HeapTuple tup; + Oid tablespaceoid; + Datum datum; + Datum newOptions; + Datum repl_val[Natts_pg_tablespace]; + bool isnull; + bool repl_null[Natts_pg_tablespace]; + bool repl_repl[Natts_pg_tablespace]; + HeapTuple newtuple; + + /* Search pg_tablespace */ + rel = table_open(TableSpaceRelationId, RowExclusiveLock); + + ScanKeyInit(&entry[0], + Anum_pg_tablespace_spcname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->tablespacename)); + scandesc = table_beginscan_catalog(rel, 1, entry); + tup = heap_getnext(scandesc, ForwardScanDirection); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace \"%s\" does not exist", + stmt->tablespacename))); + + tablespaceoid = ((Form_pg_tablespace) GETSTRUCT(tup))->oid; + + /* Must be owner of the existing object */ + if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLESPACE, + stmt->tablespacename); + + /* Generate new proposed spcoptions (text array) */ + datum = heap_getattr(tup, Anum_pg_tablespace_spcoptions, + RelationGetDescr(rel), &isnull); + newOptions = transformRelOptions(isnull ? (Datum) 0 : datum, + stmt->options, NULL, NULL, false, + stmt->isReset); + (void) tablespace_reloptions(newOptions, true); + + /* Build new tuple. */ + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + if (newOptions != (Datum) 0) + repl_val[Anum_pg_tablespace_spcoptions - 1] = newOptions; + else + repl_null[Anum_pg_tablespace_spcoptions - 1] = true; + repl_repl[Anum_pg_tablespace_spcoptions - 1] = true; + newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, + repl_null, repl_repl); + + /* Update system catalog. */ + CatalogTupleUpdate(rel, &newtuple->t_self, newtuple); + + InvokeObjectPostAlterHook(TableSpaceRelationId, tablespaceoid, 0); + + heap_freetuple(newtuple); + + /* Conclude heap scan. */ + table_endscan(scandesc); + table_close(rel, NoLock); + + return tablespaceoid; +} + +/* + * Routines for handling the GUC variable 'default_tablespace'. + */ + +/* check_hook: validate new default_tablespace */ +bool +check_default_tablespace(char **newval, void **extra, GucSource source) +{ + /* + * If we aren't inside a transaction, or connected to a database, we + * cannot do the catalog accesses necessary to verify the name. Must + * accept the value on faith. + */ + if (IsTransactionState() && MyDatabaseId != InvalidOid) + { + if (**newval != '\0' && + !OidIsValid(get_tablespace_oid(*newval, true))) + { + /* + * When source == PGC_S_TEST, don't throw a hard error for a + * nonexistent tablespace, only a NOTICE. See comments in guc.h. + */ + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace \"%s\" does not exist", + *newval))); + } + else + { + GUC_check_errdetail("Tablespace \"%s\" does not exist.", + *newval); + return false; + } + } + } + + return true; +} + +/* + * GetDefaultTablespace -- get the OID of the current default tablespace + * + * Temporary objects have different default tablespaces, hence the + * relpersistence parameter must be specified. Also, for partitioned tables, + * we disallow specifying the database default, so that needs to be specified + * too. + * + * May return InvalidOid to indicate "use the database's default tablespace". + * + * Note that caller is expected to check appropriate permissions for any + * result other than InvalidOid. + * + * This exists to hide (and possibly optimize the use of) the + * default_tablespace GUC variable. + */ +Oid +GetDefaultTablespace(char relpersistence, bool partitioned) +{ + Oid result; + + /* The temp-table case is handled elsewhere */ + if (relpersistence == RELPERSISTENCE_TEMP) + { + PrepareTempTablespaces(); + return GetNextTempTableSpace(); + } + + /* Fast path for default_tablespace == "" */ + if (default_tablespace == NULL || default_tablespace[0] == '\0') + return InvalidOid; + + /* + * It is tempting to cache this lookup for more speed, but then we would + * fail to detect the case where the tablespace was dropped since the GUC + * variable was set. Note also that we don't complain if the value fails + * to refer to an existing tablespace; we just silently return InvalidOid, + * causing the new object to be created in the database's tablespace. + */ + result = get_tablespace_oid(default_tablespace, true); + + /* + * Allow explicit specification of database's default tablespace in + * default_tablespace without triggering permissions checks. Don't allow + * specifying that when creating a partitioned table, however, since the + * result is confusing. + */ + if (result == MyDatabaseTableSpace) + { + if (partitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify default tablespace for partitioned relations"))); + result = InvalidOid; + } + return result; +} + + +/* + * Routines for handling the GUC variable 'temp_tablespaces'. + */ + +typedef struct +{ + /* Array of OIDs to be passed to SetTempTablespaces() */ + int numSpcs; + Oid tblSpcs[FLEXIBLE_ARRAY_MEMBER]; +} temp_tablespaces_extra; + +/* check_hook: validate new temp_tablespaces */ +bool +check_temp_tablespaces(char **newval, void **extra, GucSource source) +{ + char *rawname; + List *namelist; + + /* Need a modifiable copy of string */ + rawname = pstrdup(*newval); + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawname, ',', &namelist)) + { + /* syntax error in name list */ + GUC_check_errdetail("List syntax is invalid."); + pfree(rawname); + list_free(namelist); + return false; + } + + /* + * If we aren't inside a transaction, or connected to a database, we + * cannot do the catalog accesses necessary to verify the name. Must + * accept the value on faith. Fortunately, there's then also no need to + * pass the data to fd.c. + */ + if (IsTransactionState() && MyDatabaseId != InvalidOid) + { + temp_tablespaces_extra *myextra; + Oid *tblSpcs; + int numSpcs; + ListCell *l; + + /* temporary workspace until we are done verifying the list */ + tblSpcs = (Oid *) palloc(list_length(namelist) * sizeof(Oid)); + numSpcs = 0; + foreach(l, namelist) + { + char *curname = (char *) lfirst(l); + Oid curoid; + AclResult aclresult; + + /* Allow an empty string (signifying database default) */ + if (curname[0] == '\0') + { + /* InvalidOid signifies database's default tablespace */ + tblSpcs[numSpcs++] = InvalidOid; + continue; + } + + /* + * In an interactive SET command, we ereport for bad info. When + * source == PGC_S_TEST, don't throw a hard error for a + * nonexistent tablespace, only a NOTICE. See comments in guc.h. + */ + curoid = get_tablespace_oid(curname, source <= PGC_S_TEST); + if (curoid == InvalidOid) + { + if (source == PGC_S_TEST) + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace \"%s\" does not exist", + curname))); + continue; + } + + /* + * Allow explicit specification of database's default tablespace + * in temp_tablespaces without triggering permissions checks. + */ + if (curoid == MyDatabaseTableSpace) + { + /* InvalidOid signifies database's default tablespace */ + tblSpcs[numSpcs++] = InvalidOid; + continue; + } + + /* Check permissions, similarly complaining only if interactive */ + aclresult = pg_tablespace_aclcheck(curoid, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + { + if (source >= PGC_S_INTERACTIVE) + aclcheck_error(aclresult, OBJECT_TABLESPACE, curname); + continue; + } + + tblSpcs[numSpcs++] = curoid; + } + + /* Now prepare an "extra" struct for assign_temp_tablespaces */ + myextra = malloc(offsetof(temp_tablespaces_extra, tblSpcs) + + numSpcs * sizeof(Oid)); + if (!myextra) + return false; + myextra->numSpcs = numSpcs; + memcpy(myextra->tblSpcs, tblSpcs, numSpcs * sizeof(Oid)); + *extra = (void *) myextra; + + pfree(tblSpcs); + } + + pfree(rawname); + list_free(namelist); + + return true; +} + +/* assign_hook: do extra actions as needed */ +void +assign_temp_tablespaces(const char *newval, void *extra) +{ + temp_tablespaces_extra *myextra = (temp_tablespaces_extra *) extra; + + /* + * If check_temp_tablespaces was executed inside a transaction, then pass + * the list it made to fd.c. Otherwise, clear fd.c's list; we must be + * still outside a transaction, or else restoring during transaction exit, + * and in either case we can just let the next PrepareTempTablespaces call + * make things sane. + */ + if (myextra) + SetTempTablespaces(myextra->tblSpcs, myextra->numSpcs); + else + SetTempTablespaces(NULL, 0); +} + +/* + * PrepareTempTablespaces -- prepare to use temp tablespaces + * + * If we have not already done so in the current transaction, parse the + * temp_tablespaces GUC variable and tell fd.c which tablespace(s) to use + * for temp files. + */ +void +PrepareTempTablespaces(void) +{ + char *rawname; + List *namelist; + Oid *tblSpcs; + int numSpcs; + ListCell *l; + + /* No work if already done in current transaction */ + if (TempTablespacesAreSet()) + return; + + /* + * Can't do catalog access unless within a transaction. This is just a + * safety check in case this function is called by low-level code that + * could conceivably execute outside a transaction. Note that in such a + * scenario, fd.c will fall back to using the current database's default + * tablespace, which should always be OK. + */ + if (!IsTransactionState()) + return; + + /* Need a modifiable copy of string */ + rawname = pstrdup(temp_tablespaces); + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawname, ',', &namelist)) + { + /* syntax error in name list */ + SetTempTablespaces(NULL, 0); + pfree(rawname); + list_free(namelist); + return; + } + + /* Store tablespace OIDs in an array in TopTransactionContext */ + tblSpcs = (Oid *) MemoryContextAlloc(TopTransactionContext, + list_length(namelist) * sizeof(Oid)); + numSpcs = 0; + foreach(l, namelist) + { + char *curname = (char *) lfirst(l); + Oid curoid; + AclResult aclresult; + + /* Allow an empty string (signifying database default) */ + if (curname[0] == '\0') + { + /* InvalidOid signifies database's default tablespace */ + tblSpcs[numSpcs++] = InvalidOid; + continue; + } + + /* Else verify that name is a valid tablespace name */ + curoid = get_tablespace_oid(curname, true); + if (curoid == InvalidOid) + { + /* Skip any bad list elements */ + continue; + } + + /* + * Allow explicit specification of database's default tablespace in + * temp_tablespaces without triggering permissions checks. + */ + if (curoid == MyDatabaseTableSpace) + { + /* InvalidOid signifies database's default tablespace */ + tblSpcs[numSpcs++] = InvalidOid; + continue; + } + + /* Check permissions similarly */ + aclresult = pg_tablespace_aclcheck(curoid, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + continue; + + tblSpcs[numSpcs++] = curoid; + } + + SetTempTablespaces(tblSpcs, numSpcs); + + pfree(rawname); + list_free(namelist); +} + + +/* + * get_tablespace_oid - given a tablespace name, look up the OID + * + * If missing_ok is false, throw an error if tablespace name not found. If + * true, just return InvalidOid. + */ +Oid +get_tablespace_oid(const char *tablespacename, bool missing_ok) +{ + Oid result; + Relation rel; + TableScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + /* + * Search pg_tablespace. We use a heapscan here even though there is an + * index on name, on the theory that pg_tablespace will usually have just + * a few entries and so an indexed lookup is a waste of effort. + */ + rel = table_open(TableSpaceRelationId, AccessShareLock); + + ScanKeyInit(&entry[0], + Anum_pg_tablespace_spcname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(tablespacename)); + scandesc = table_beginscan_catalog(rel, 1, entry); + tuple = heap_getnext(scandesc, ForwardScanDirection); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + result = ((Form_pg_tablespace) GETSTRUCT(tuple))->oid; + else + result = InvalidOid; + + table_endscan(scandesc); + table_close(rel, AccessShareLock); + + if (!OidIsValid(result) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("tablespace \"%s\" does not exist", + tablespacename))); + + return result; +} + +/* + * get_tablespace_name - given a tablespace OID, look up the name + * + * Returns a palloc'd string, or NULL if no such tablespace. + */ +char * +get_tablespace_name(Oid spc_oid) +{ + char *result; + Relation rel; + TableScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + /* + * Search pg_tablespace. We use a heapscan here even though there is an + * index on oid, on the theory that pg_tablespace will usually have just a + * few entries and so an indexed lookup is a waste of effort. + */ + rel = table_open(TableSpaceRelationId, AccessShareLock); + + ScanKeyInit(&entry[0], + Anum_pg_tablespace_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(spc_oid)); + scandesc = table_beginscan_catalog(rel, 1, entry); + tuple = heap_getnext(scandesc, ForwardScanDirection); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple)) + result = pstrdup(NameStr(((Form_pg_tablespace) GETSTRUCT(tuple))->spcname)); + else + result = NULL; + + table_endscan(scandesc); + table_close(rel, AccessShareLock); + + return result; +} + + +/* + * TABLESPACE resource manager's routines + */ +void +tblspc_redo(XLogReaderState *record) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + /* Backup blocks are not used in tblspc records */ + Assert(!XLogRecHasAnyBlockRefs(record)); + + if (info == XLOG_TBLSPC_CREATE) + { + xl_tblspc_create_rec *xlrec = (xl_tblspc_create_rec *) XLogRecGetData(record); + char *location = xlrec->ts_path; + + create_tablespace_directories(location, xlrec->ts_id); + } + else if (info == XLOG_TBLSPC_DROP) + { + xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record); + + /* Close all smgr fds in all backends. */ + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE)); + + /* + * If we issued a WAL record for a drop tablespace it implies that + * there were no files in it at all when the DROP was done. That means + * that no permanent objects can exist in it at this point. + * + * It is possible for standby users to be using this tablespace as a + * location for their temporary files, so if we fail to remove all + * files then do conflict processing and try again, if currently + * enabled. + * + * Other possible reasons for failure include bollixed file + * permissions on a standby server when they were okay on the primary, + * etc etc. There's not much we can do about that, so just remove what + * we can and press on. + */ + if (!destroy_tablespace_directories(xlrec->ts_id, true)) + { + ResolveRecoveryConflictWithTablespace(xlrec->ts_id); + + /* + * If we did recovery processing then hopefully the backends who + * wrote temp files should have cleaned up and exited by now. So + * retry before complaining. If we fail again, this is just a LOG + * condition, because it's not worth throwing an ERROR for (as + * that would crash the database and require manual intervention + * before we could get past this WAL record on restart). + */ + if (!destroy_tablespace_directories(xlrec->ts_id, true)) + ereport(LOG, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("directories for tablespace %u could not be removed", + xlrec->ts_id), + errhint("You can remove the directories manually if necessary."))); + } + } + else + elog(PANIC, "tblspc_redo: unknown op code %u", info); +} diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c new file mode 100644 index 0000000..0769ae3 --- /dev/null +++ b/src/backend/commands/trigger.c @@ -0,0 +1,6664 @@ +/*------------------------------------------------------------------------- + * + * trigger.c + * PostgreSQL TRIGGERs support code. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/trigger.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/relation.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/index.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/partition.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_trigger.h" +#include "catalog/pg_type.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "executor/execPartition.h" +#include "miscadmin.h" +#include "nodes/bitmapset.h" +#include "nodes/makefuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parse_clause.h" +#include "parser/parse_collate.h" +#include "parser/parse_func.h" +#include "parser/parse_relation.h" +#include "parser/parsetree.h" +#include "partitioning/partdesc.h" +#include "pgstat.h" +#include "rewrite/rewriteManip.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "tcop/utility.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/tuplestore.h" + + +/* GUC variables */ +int SessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN; + +/* How many levels deep into trigger execution are we? */ +static int MyTriggerDepth = 0; + +/* Local function prototypes */ +static void renametrig_internal(Relation tgrel, Relation targetrel, + HeapTuple trigtup, const char *newname, + const char *expected_name); +static void renametrig_partition(Relation tgrel, Oid partitionId, + Oid parentTriggerOid, const char *newname, + const char *expected_name); +static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger); +static bool GetTupleForTrigger(EState *estate, + EPQState *epqstate, + ResultRelInfo *relinfo, + ItemPointer tid, + LockTupleMode lockmode, + TupleTableSlot *oldslot, + TupleTableSlot **epqslot, + TM_Result *tmresultp, + TM_FailureData *tmfdp); +static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo, + Trigger *trigger, TriggerEvent event, + Bitmapset *modifiedCols, + TupleTableSlot *oldslot, TupleTableSlot *newslot); +static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata, + int tgindx, + FmgrInfo *finfo, + Instrumentation *instr, + MemoryContext per_tuple_context); +static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, + ResultRelInfo *src_partinfo, + ResultRelInfo *dst_partinfo, + int event, bool row_trigger, + TupleTableSlot *oldtup, TupleTableSlot *newtup, + List *recheckIndexes, Bitmapset *modifiedCols, + TransitionCaptureState *transition_capture, + bool is_crosspart_update); +static void AfterTriggerEnlargeQueryState(void); +static bool before_stmt_triggers_fired(Oid relid, CmdType cmdType); + + +/* + * Create a trigger. Returns the address of the created trigger. + * + * queryString is the source text of the CREATE TRIGGER command. + * This must be supplied if a whenClause is specified, else it can be NULL. + * + * relOid, if nonzero, is the relation on which the trigger should be + * created. If zero, the name provided in the statement will be looked up. + * + * refRelOid, if nonzero, is the relation to which the constraint trigger + * refers. If zero, the constraint relation name provided in the statement + * will be looked up as needed. + * + * constraintOid, if nonzero, says that this trigger is being created + * internally to implement that constraint. A suitable pg_depend entry will + * be made to link the trigger to that constraint. constraintOid is zero when + * executing a user-entered CREATE TRIGGER command. (For CREATE CONSTRAINT + * TRIGGER, we build a pg_constraint entry internally.) + * + * indexOid, if nonzero, is the OID of an index associated with the constraint. + * We do nothing with this except store it into pg_trigger.tgconstrindid; + * but when creating a trigger for a deferrable unique constraint on a + * partitioned table, its children are looked up. Note we don't cope with + * invalid indexes in that case. + * + * funcoid, if nonzero, is the OID of the function to invoke. When this is + * given, stmt->funcname is ignored. + * + * parentTriggerOid, if nonzero, is a trigger that begets this one; so that + * if that trigger is dropped, this one should be too. There are two cases + * when a nonzero value is passed for this: 1) when this function recurses to + * create the trigger on partitions, 2) when creating child foreign key + * triggers; see CreateFKCheckTrigger() and createForeignKeyActionTriggers(). + * + * If whenClause is passed, it is an already-transformed expression for + * WHEN. In this case, we ignore any that may come in stmt->whenClause. + * + * If isInternal is true then this is an internally-generated trigger. + * This argument sets the tgisinternal field of the pg_trigger entry, and + * if true causes us to modify the given trigger name to ensure uniqueness. + * + * When isInternal is not true we require ACL_TRIGGER permissions on the + * relation, as well as ACL_EXECUTE on the trigger function. For internal + * triggers the caller must apply any required permission checks. + * + * When called on partitioned tables, this function recurses to create the + * trigger on all the partitions, except if isInternal is true, in which + * case caller is expected to execute recursion on its own. in_partition + * indicates such a recursive call; outside callers should pass "false" + * (but see CloneRowTriggersToPartition). + */ +ObjectAddress +CreateTrigger(CreateTrigStmt *stmt, const char *queryString, + Oid relOid, Oid refRelOid, Oid constraintOid, Oid indexOid, + Oid funcoid, Oid parentTriggerOid, Node *whenClause, + bool isInternal, bool in_partition) +{ + return + CreateTriggerFiringOn(stmt, queryString, relOid, refRelOid, + constraintOid, indexOid, funcoid, + parentTriggerOid, whenClause, isInternal, + in_partition, TRIGGER_FIRES_ON_ORIGIN); +} + +/* + * Like the above; additionally the firing condition + * (always/origin/replica/disabled) can be specified. + */ +ObjectAddress +CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, + Oid relOid, Oid refRelOid, Oid constraintOid, + Oid indexOid, Oid funcoid, Oid parentTriggerOid, + Node *whenClause, bool isInternal, bool in_partition, + char trigger_fires_when) +{ + int16 tgtype; + int ncolumns; + int16 *columns; + int2vector *tgattr; + List *whenRtable; + char *qual; + Datum values[Natts_pg_trigger]; + bool nulls[Natts_pg_trigger]; + Relation rel; + AclResult aclresult; + Relation tgrel; + Relation pgrel; + HeapTuple tuple = NULL; + Oid funcrettype; + Oid trigoid = InvalidOid; + char internaltrigname[NAMEDATALEN]; + char *trigname; + Oid constrrelid = InvalidOid; + ObjectAddress myself, + referenced; + char *oldtablename = NULL; + char *newtablename = NULL; + bool partition_recurse; + bool trigger_exists = false; + Oid existing_constraint_oid = InvalidOid; + bool existing_isInternal = false; + bool existing_isClone = false; + + if (OidIsValid(relOid)) + rel = table_open(relOid, ShareRowExclusiveLock); + else + rel = table_openrv(stmt->relation, ShareRowExclusiveLock); + + /* + * Triggers must be on tables or views, and there are additional + * relation-type-specific restrictions. + */ + if (rel->rd_rel->relkind == RELKIND_RELATION) + { + /* Tables can't have INSTEAD OF triggers */ + if (stmt->timing != TRIGGER_TYPE_BEFORE && + stmt->timing != TRIGGER_TYPE_AFTER) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a table", + RelationGetRelationName(rel)), + errdetail("Tables cannot have INSTEAD OF triggers."))); + } + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + /* Partitioned tables can't have INSTEAD OF triggers */ + if (stmt->timing != TRIGGER_TYPE_BEFORE && + stmt->timing != TRIGGER_TYPE_AFTER) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a table", + RelationGetRelationName(rel)), + errdetail("Tables cannot have INSTEAD OF triggers."))); + + /* + * FOR EACH ROW triggers have further restrictions + */ + if (stmt->row) + { + /* + * Disallow use of transition tables. + * + * Note that we have another restriction about transition tables + * in partitions; search for 'has_superclass' below for an + * explanation. The check here is just to protect from the fact + * that if we allowed it here, the creation would succeed for a + * partitioned table with no partitions, but would be blocked by + * the other restriction when the first partition was created, + * which is very unfriendly behavior. + */ + if (stmt->transitionRels != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" is a partitioned table", + RelationGetRelationName(rel)), + errdetail("ROW triggers with transition tables are not supported on partitioned tables."))); + } + } + else if (rel->rd_rel->relkind == RELKIND_VIEW) + { + /* + * Views can have INSTEAD OF triggers (which we check below are + * row-level), or statement-level BEFORE/AFTER triggers. + */ + if (stmt->timing != TRIGGER_TYPE_INSTEAD && stmt->row) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a view", + RelationGetRelationName(rel)), + errdetail("Views cannot have row-level BEFORE or AFTER triggers."))); + /* Disallow TRUNCATE triggers on VIEWs */ + if (TRIGGER_FOR_TRUNCATE(stmt->events)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a view", + RelationGetRelationName(rel)), + errdetail("Views cannot have TRUNCATE triggers."))); + } + else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + if (stmt->timing != TRIGGER_TYPE_BEFORE && + stmt->timing != TRIGGER_TYPE_AFTER) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a foreign table", + RelationGetRelationName(rel)), + errdetail("Foreign tables cannot have INSTEAD OF triggers."))); + + if (TRIGGER_FOR_TRUNCATE(stmt->events)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a foreign table", + RelationGetRelationName(rel)), + errdetail("Foreign tables cannot have TRUNCATE triggers."))); + + /* + * We disallow constraint triggers to protect the assumption that + * triggers on FKs can't be deferred. See notes with AfterTriggers + * data structures, below. + */ + if (stmt->isconstraint) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a foreign table", + RelationGetRelationName(rel)), + errdetail("Foreign tables cannot have constraint triggers."))); + } + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" cannot have triggers", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + + if (!allowSystemTableMods && IsSystemRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + RelationGetRelationName(rel)))); + + if (stmt->isconstraint) + { + /* + * We must take a lock on the target relation to protect against + * concurrent drop. It's not clear that AccessShareLock is strong + * enough, but we certainly need at least that much... otherwise, we + * might end up creating a pg_constraint entry referencing a + * nonexistent table. + */ + if (OidIsValid(refRelOid)) + { + LockRelationOid(refRelOid, AccessShareLock); + constrrelid = refRelOid; + } + else if (stmt->constrrel != NULL) + constrrelid = RangeVarGetRelid(stmt->constrrel, AccessShareLock, + false); + } + + /* permission checks */ + if (!isInternal) + { + aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(), + ACL_TRIGGER); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind), + RelationGetRelationName(rel)); + + if (OidIsValid(constrrelid)) + { + aclresult = pg_class_aclcheck(constrrelid, GetUserId(), + ACL_TRIGGER); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, get_relkind_objtype(get_rel_relkind(constrrelid)), + get_rel_name(constrrelid)); + } + } + + /* + * When called on a partitioned table to create a FOR EACH ROW trigger + * that's not internal, we create one trigger for each partition, too. + * + * For that, we'd better hold lock on all of them ahead of time. + */ + partition_recurse = !isInternal && stmt->row && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE; + if (partition_recurse) + list_free(find_all_inheritors(RelationGetRelid(rel), + ShareRowExclusiveLock, NULL)); + + /* Compute tgtype */ + TRIGGER_CLEAR_TYPE(tgtype); + if (stmt->row) + TRIGGER_SETT_ROW(tgtype); + tgtype |= stmt->timing; + tgtype |= stmt->events; + + /* Disallow ROW-level TRUNCATE triggers */ + if (TRIGGER_FOR_ROW(tgtype) && TRIGGER_FOR_TRUNCATE(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("TRUNCATE FOR EACH ROW triggers are not supported"))); + + /* INSTEAD triggers must be row-level, and can't have WHEN or columns */ + if (TRIGGER_FOR_INSTEAD(tgtype)) + { + if (!TRIGGER_FOR_ROW(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("INSTEAD OF triggers must be FOR EACH ROW"))); + if (stmt->whenClause) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("INSTEAD OF triggers cannot have WHEN conditions"))); + if (stmt->columns != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("INSTEAD OF triggers cannot have column lists"))); + } + + /* + * We don't yet support naming ROW transition variables, but the parser + * recognizes the syntax so we can give a nicer message here. + * + * Per standard, REFERENCING TABLE names are only allowed on AFTER + * triggers. Per standard, REFERENCING ROW names are not allowed with FOR + * EACH STATEMENT. Per standard, each OLD/NEW, ROW/TABLE permutation is + * only allowed once. Per standard, OLD may not be specified when + * creating a trigger only for INSERT, and NEW may not be specified when + * creating a trigger only for DELETE. + * + * Notice that the standard allows an AFTER ... FOR EACH ROW trigger to + * reference both ROW and TABLE transition data. + */ + if (stmt->transitionRels != NIL) + { + List *varList = stmt->transitionRels; + ListCell *lc; + + foreach(lc, varList) + { + TriggerTransition *tt = lfirst_node(TriggerTransition, lc); + + if (!(tt->isTable)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ROW variable naming in the REFERENCING clause is not supported"), + errhint("Use OLD TABLE or NEW TABLE for naming transition tables."))); + + /* + * Because of the above test, we omit further ROW-related testing + * below. If we later allow naming OLD and NEW ROW variables, + * adjustments will be needed below. + */ + + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a foreign table", + RelationGetRelationName(rel)), + errdetail("Triggers on foreign tables cannot have transition tables."))); + + if (rel->rd_rel->relkind == RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a view", + RelationGetRelationName(rel)), + errdetail("Triggers on views cannot have transition tables."))); + + /* + * We currently don't allow row-level triggers with transition + * tables on partition or inheritance children. Such triggers + * would somehow need to see tuples converted to the format of the + * table they're attached to, and it's not clear which subset of + * tuples each child should see. See also the prohibitions in + * ATExecAttachPartition() and ATExecAddInherit(). + */ + if (TRIGGER_FOR_ROW(tgtype) && has_superclass(rel->rd_id)) + { + /* Use appropriate error message. */ + if (rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ROW triggers with transition tables are not supported on partitions"))); + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ROW triggers with transition tables are not supported on inheritance children"))); + } + + if (stmt->timing != TRIGGER_TYPE_AFTER) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("transition table name can only be specified for an AFTER trigger"))); + + if (TRIGGER_FOR_TRUNCATE(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("TRUNCATE triggers with transition tables are not supported"))); + + /* + * We currently don't allow multi-event triggers ("INSERT OR + * UPDATE") with transition tables, because it's not clear how to + * handle INSERT ... ON CONFLICT statements which can fire both + * INSERT and UPDATE triggers. We show the inserted tuples to + * INSERT triggers and the updated tuples to UPDATE triggers, but + * it's not yet clear what INSERT OR UPDATE trigger should see. + * This restriction could be lifted if we can decide on the right + * semantics in a later release. + */ + if (((TRIGGER_FOR_INSERT(tgtype) ? 1 : 0) + + (TRIGGER_FOR_UPDATE(tgtype) ? 1 : 0) + + (TRIGGER_FOR_DELETE(tgtype) ? 1 : 0)) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("transition tables cannot be specified for triggers with more than one event"))); + + /* + * We currently don't allow column-specific triggers with + * transition tables. Per spec, that seems to require + * accumulating separate transition tables for each combination of + * columns, which is a lot of work for a rather marginal feature. + */ + if (stmt->columns != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("transition tables cannot be specified for triggers with column lists"))); + + /* + * We disallow constraint triggers with transition tables, to + * protect the assumption that such triggers can't be deferred. + * See notes with AfterTriggers data structures, below. + * + * Currently this is enforced by the grammar, so just Assert here. + */ + Assert(!stmt->isconstraint); + + if (tt->isNew) + { + if (!(TRIGGER_FOR_INSERT(tgtype) || + TRIGGER_FOR_UPDATE(tgtype))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("NEW TABLE can only be specified for an INSERT or UPDATE trigger"))); + + if (newtablename != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("NEW TABLE cannot be specified multiple times"))); + + newtablename = tt->name; + } + else + { + if (!(TRIGGER_FOR_DELETE(tgtype) || + TRIGGER_FOR_UPDATE(tgtype))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("OLD TABLE can only be specified for a DELETE or UPDATE trigger"))); + + if (oldtablename != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("OLD TABLE cannot be specified multiple times"))); + + oldtablename = tt->name; + } + } + + if (newtablename != NULL && oldtablename != NULL && + strcmp(newtablename, oldtablename) == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("OLD TABLE name and NEW TABLE name cannot be the same"))); + } + + /* + * Parse the WHEN clause, if any and we weren't passed an already + * transformed one. + * + * Note that as a side effect, we fill whenRtable when parsing. If we got + * an already parsed clause, this does not occur, which is what we want -- + * no point in adding redundant dependencies below. + */ + if (!whenClause && stmt->whenClause) + { + ParseState *pstate; + ParseNamespaceItem *nsitem; + List *varList; + ListCell *lc; + + /* Set up a pstate to parse with */ + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + /* + * Set up nsitems for OLD and NEW references. + * + * 'OLD' must always have varno equal to 1 and 'NEW' equal to 2. + */ + nsitem = addRangeTableEntryForRelation(pstate, rel, + AccessShareLock, + makeAlias("old", NIL), + false, false); + addNSItemToQuery(pstate, nsitem, false, true, true); + nsitem = addRangeTableEntryForRelation(pstate, rel, + AccessShareLock, + makeAlias("new", NIL), + false, false); + addNSItemToQuery(pstate, nsitem, false, true, true); + + /* Transform expression. Copy to be sure we don't modify original */ + whenClause = transformWhereClause(pstate, + copyObject(stmt->whenClause), + EXPR_KIND_TRIGGER_WHEN, + "WHEN"); + /* we have to fix its collations too */ + assign_expr_collations(pstate, whenClause); + + /* + * Check for disallowed references to OLD/NEW. + * + * NB: pull_var_clause is okay here only because we don't allow + * subselects in WHEN clauses; it would fail to examine the contents + * of subselects. + */ + varList = pull_var_clause(whenClause, 0); + foreach(lc, varList) + { + Var *var = (Var *) lfirst(lc); + + switch (var->varno) + { + case PRS2_OLD_VARNO: + if (!TRIGGER_FOR_ROW(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("statement trigger's WHEN condition cannot reference column values"), + parser_errposition(pstate, var->location))); + if (TRIGGER_FOR_INSERT(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("INSERT trigger's WHEN condition cannot reference OLD values"), + parser_errposition(pstate, var->location))); + /* system columns are okay here */ + break; + case PRS2_NEW_VARNO: + if (!TRIGGER_FOR_ROW(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("statement trigger's WHEN condition cannot reference column values"), + parser_errposition(pstate, var->location))); + if (TRIGGER_FOR_DELETE(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("DELETE trigger's WHEN condition cannot reference NEW values"), + parser_errposition(pstate, var->location))); + if (var->varattno < 0 && TRIGGER_FOR_BEFORE(tgtype)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("BEFORE trigger's WHEN condition cannot reference NEW system columns"), + parser_errposition(pstate, var->location))); + if (TRIGGER_FOR_BEFORE(tgtype) && + var->varattno == 0 && + RelationGetDescr(rel)->constr && + RelationGetDescr(rel)->constr->has_generated_stored) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("BEFORE trigger's WHEN condition cannot reference NEW generated columns"), + errdetail("A whole-row reference is used and the table contains generated columns."), + parser_errposition(pstate, var->location))); + if (TRIGGER_FOR_BEFORE(tgtype) && + var->varattno > 0 && + TupleDescAttr(RelationGetDescr(rel), var->varattno - 1)->attgenerated) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("BEFORE trigger's WHEN condition cannot reference NEW generated columns"), + errdetail("Column \"%s\" is a generated column.", + NameStr(TupleDescAttr(RelationGetDescr(rel), var->varattno - 1)->attname)), + parser_errposition(pstate, var->location))); + break; + default: + /* can't happen without add_missing_from, so just elog */ + elog(ERROR, "trigger WHEN condition cannot contain references to other relations"); + break; + } + } + + /* we'll need the rtable for recordDependencyOnExpr */ + whenRtable = pstate->p_rtable; + + qual = nodeToString(whenClause); + + free_parsestate(pstate); + } + else if (!whenClause) + { + whenClause = NULL; + whenRtable = NIL; + qual = NULL; + } + else + { + qual = nodeToString(whenClause); + whenRtable = NIL; + } + + /* + * Find and validate the trigger function. + */ + if (!OidIsValid(funcoid)) + funcoid = LookupFuncName(stmt->funcname, 0, NULL, false); + if (!isInternal) + { + aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(stmt->funcname)); + } + funcrettype = get_func_rettype(funcoid); + if (funcrettype != TRIGGEROID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("function %s must return type %s", + NameListToString(stmt->funcname), "trigger"))); + + /* + * Scan pg_trigger to see if there is already a trigger of the same name. + * Skip this for internally generated triggers, since we'll modify the + * name to be unique below. + * + * NOTE that this is cool only because we have ShareRowExclusiveLock on + * the relation, so the trigger set won't be changing underneath us. + */ + tgrel = table_open(TriggerRelationId, RowExclusiveLock); + if (!isInternal) + { + ScanKeyData skeys[2]; + SysScanDesc tgscan; + + ScanKeyInit(&skeys[0], + Anum_pg_trigger_tgrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + + ScanKeyInit(&skeys[1], + Anum_pg_trigger_tgname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(stmt->trigname)); + + tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, + NULL, 2, skeys); + + /* There should be at most one matching tuple */ + if (HeapTupleIsValid(tuple = systable_getnext(tgscan))) + { + Form_pg_trigger oldtrigger = (Form_pg_trigger) GETSTRUCT(tuple); + + trigoid = oldtrigger->oid; + existing_constraint_oid = oldtrigger->tgconstraint; + existing_isInternal = oldtrigger->tgisinternal; + existing_isClone = OidIsValid(oldtrigger->tgparentid); + trigger_exists = true; + /* copy the tuple to use in CatalogTupleUpdate() */ + tuple = heap_copytuple(tuple); + } + systable_endscan(tgscan); + } + + if (!trigger_exists) + { + /* Generate the OID for the new trigger. */ + trigoid = GetNewOidWithIndex(tgrel, TriggerOidIndexId, + Anum_pg_trigger_oid); + } + else + { + /* + * If OR REPLACE was specified, we'll replace the old trigger; + * otherwise complain about the duplicate name. + */ + if (!stmt->replace) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("trigger \"%s\" for relation \"%s\" already exists", + stmt->trigname, RelationGetRelationName(rel)))); + + /* + * An internal trigger or a child trigger (isClone) cannot be replaced + * by a user-defined trigger. However, skip this test when + * in_partition, because then we're recursing from a partitioned table + * and the check was made at the parent level. + */ + if ((existing_isInternal || existing_isClone) && + !isInternal && !in_partition) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("trigger \"%s\" for relation \"%s\" is an internal or a child trigger", + stmt->trigname, RelationGetRelationName(rel)))); + + /* + * It is not allowed to replace with a constraint trigger; gram.y + * should have enforced this already. + */ + Assert(!stmt->isconstraint); + + /* + * It is not allowed to replace an existing constraint trigger, + * either. (The reason for these restrictions is partly that it seems + * difficult to deal with pending trigger events in such cases, and + * partly that the command might imply changing the constraint's + * properties as well, which doesn't seem nice.) + */ + if (OidIsValid(existing_constraint_oid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("trigger \"%s\" for relation \"%s\" is a constraint trigger", + stmt->trigname, RelationGetRelationName(rel)))); + } + + /* + * If it's a user-entered CREATE CONSTRAINT TRIGGER command, make a + * corresponding pg_constraint entry. + */ + if (stmt->isconstraint && !OidIsValid(constraintOid)) + { + /* Internal callers should have made their own constraints */ + Assert(!isInternal); + constraintOid = CreateConstraintEntry(stmt->trigname, + RelationGetNamespace(rel), + CONSTRAINT_TRIGGER, + stmt->deferrable, + stmt->initdeferred, + true, + InvalidOid, /* no parent */ + RelationGetRelid(rel), + NULL, /* no conkey */ + 0, + 0, + InvalidOid, /* no domain */ + InvalidOid, /* no index */ + InvalidOid, /* no foreign key */ + NULL, + NULL, + NULL, + NULL, + 0, + ' ', + ' ', + NULL, + 0, + ' ', + NULL, /* no exclusion */ + NULL, /* no check constraint */ + NULL, + true, /* islocal */ + 0, /* inhcount */ + true, /* noinherit */ + isInternal); /* is_internal */ + } + + /* + * If trigger is internally generated, modify the provided trigger name to + * ensure uniqueness by appending the trigger OID. (Callers will usually + * supply a simple constant trigger name in these cases.) + */ + if (isInternal) + { + snprintf(internaltrigname, sizeof(internaltrigname), + "%s_%u", stmt->trigname, trigoid); + trigname = internaltrigname; + } + else + { + /* user-defined trigger; use the specified trigger name as-is */ + trigname = stmt->trigname; + } + + /* + * Build the new pg_trigger tuple. + */ + memset(nulls, false, sizeof(nulls)); + + values[Anum_pg_trigger_oid - 1] = ObjectIdGetDatum(trigoid); + values[Anum_pg_trigger_tgrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel)); + values[Anum_pg_trigger_tgparentid - 1] = ObjectIdGetDatum(parentTriggerOid); + values[Anum_pg_trigger_tgname - 1] = DirectFunctionCall1(namein, + CStringGetDatum(trigname)); + values[Anum_pg_trigger_tgfoid - 1] = ObjectIdGetDatum(funcoid); + values[Anum_pg_trigger_tgtype - 1] = Int16GetDatum(tgtype); + values[Anum_pg_trigger_tgenabled - 1] = trigger_fires_when; + values[Anum_pg_trigger_tgisinternal - 1] = BoolGetDatum(isInternal); + values[Anum_pg_trigger_tgconstrrelid - 1] = ObjectIdGetDatum(constrrelid); + values[Anum_pg_trigger_tgconstrindid - 1] = ObjectIdGetDatum(indexOid); + values[Anum_pg_trigger_tgconstraint - 1] = ObjectIdGetDatum(constraintOid); + values[Anum_pg_trigger_tgdeferrable - 1] = BoolGetDatum(stmt->deferrable); + values[Anum_pg_trigger_tginitdeferred - 1] = BoolGetDatum(stmt->initdeferred); + + if (stmt->args) + { + ListCell *le; + char *args; + int16 nargs = list_length(stmt->args); + int len = 0; + + foreach(le, stmt->args) + { + char *ar = strVal(lfirst(le)); + + len += strlen(ar) + 4; + for (; *ar; ar++) + { + if (*ar == '\\') + len++; + } + } + args = (char *) palloc(len + 1); + args[0] = '\0'; + foreach(le, stmt->args) + { + char *s = strVal(lfirst(le)); + char *d = args + strlen(args); + + while (*s) + { + if (*s == '\\') + *d++ = '\\'; + *d++ = *s++; + } + strcpy(d, "\\000"); + } + values[Anum_pg_trigger_tgnargs - 1] = Int16GetDatum(nargs); + values[Anum_pg_trigger_tgargs - 1] = DirectFunctionCall1(byteain, + CStringGetDatum(args)); + } + else + { + values[Anum_pg_trigger_tgnargs - 1] = Int16GetDatum(0); + values[Anum_pg_trigger_tgargs - 1] = DirectFunctionCall1(byteain, + CStringGetDatum("")); + } + + /* build column number array if it's a column-specific trigger */ + ncolumns = list_length(stmt->columns); + if (ncolumns == 0) + columns = NULL; + else + { + ListCell *cell; + int i = 0; + + columns = (int16 *) palloc(ncolumns * sizeof(int16)); + foreach(cell, stmt->columns) + { + char *name = strVal(lfirst(cell)); + int16 attnum; + int j; + + /* Lookup column name. System columns are not allowed */ + attnum = attnameAttNum(rel, name, false); + if (attnum == InvalidAttrNumber) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + name, RelationGetRelationName(rel)))); + + /* Check for duplicates */ + for (j = i - 1; j >= 0; j--) + { + if (columns[j] == attnum) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" specified more than once", + name))); + } + + columns[i++] = attnum; + } + } + tgattr = buildint2vector(columns, ncolumns); + values[Anum_pg_trigger_tgattr - 1] = PointerGetDatum(tgattr); + + /* set tgqual if trigger has WHEN clause */ + if (qual) + values[Anum_pg_trigger_tgqual - 1] = CStringGetTextDatum(qual); + else + nulls[Anum_pg_trigger_tgqual - 1] = true; + + if (oldtablename) + values[Anum_pg_trigger_tgoldtable - 1] = DirectFunctionCall1(namein, + CStringGetDatum(oldtablename)); + else + nulls[Anum_pg_trigger_tgoldtable - 1] = true; + if (newtablename) + values[Anum_pg_trigger_tgnewtable - 1] = DirectFunctionCall1(namein, + CStringGetDatum(newtablename)); + else + nulls[Anum_pg_trigger_tgnewtable - 1] = true; + + /* + * Insert or replace tuple in pg_trigger. + */ + if (!trigger_exists) + { + tuple = heap_form_tuple(tgrel->rd_att, values, nulls); + CatalogTupleInsert(tgrel, tuple); + } + else + { + HeapTuple newtup; + + newtup = heap_form_tuple(tgrel->rd_att, values, nulls); + CatalogTupleUpdate(tgrel, &tuple->t_self, newtup); + heap_freetuple(newtup); + } + + heap_freetuple(tuple); /* free either original or new tuple */ + table_close(tgrel, RowExclusiveLock); + + pfree(DatumGetPointer(values[Anum_pg_trigger_tgname - 1])); + pfree(DatumGetPointer(values[Anum_pg_trigger_tgargs - 1])); + pfree(DatumGetPointer(values[Anum_pg_trigger_tgattr - 1])); + if (oldtablename) + pfree(DatumGetPointer(values[Anum_pg_trigger_tgoldtable - 1])); + if (newtablename) + pfree(DatumGetPointer(values[Anum_pg_trigger_tgnewtable - 1])); + + /* + * Update relation's pg_class entry; if necessary; and if not, send an SI + * message to make other backends (and this one) rebuild relcache entries. + */ + pgrel = table_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(rel))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", + RelationGetRelid(rel)); + if (!((Form_pg_class) GETSTRUCT(tuple))->relhastriggers) + { + ((Form_pg_class) GETSTRUCT(tuple))->relhastriggers = true; + + CatalogTupleUpdate(pgrel, &tuple->t_self, tuple); + + CommandCounterIncrement(); + } + else + CacheInvalidateRelcacheByTuple(tuple); + + heap_freetuple(tuple); + table_close(pgrel, RowExclusiveLock); + + /* + * If we're replacing a trigger, flush all the old dependencies before + * recording new ones. + */ + if (trigger_exists) + deleteDependencyRecordsFor(TriggerRelationId, trigoid, true); + + /* + * Record dependencies for trigger. Always place a normal dependency on + * the function. + */ + myself.classId = TriggerRelationId; + myself.objectId = trigoid; + myself.objectSubId = 0; + + referenced.classId = ProcedureRelationId; + referenced.objectId = funcoid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + if (isInternal && OidIsValid(constraintOid)) + { + /* + * Internally-generated trigger for a constraint, so make it an + * internal dependency of the constraint. We can skip depending on + * the relation(s), as there'll be an indirect dependency via the + * constraint. + */ + referenced.classId = ConstraintRelationId; + referenced.objectId = constraintOid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); + } + else + { + /* + * User CREATE TRIGGER, so place dependencies. We make trigger be + * auto-dropped if its relation is dropped or if the FK relation is + * dropped. (Auto drop is compatible with our pre-7.3 behavior.) + */ + referenced.classId = RelationRelationId; + referenced.objectId = RelationGetRelid(rel); + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + + if (OidIsValid(constrrelid)) + { + referenced.classId = RelationRelationId; + referenced.objectId = constrrelid; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + } + /* Not possible to have an index dependency in this case */ + Assert(!OidIsValid(indexOid)); + + /* + * If it's a user-specified constraint trigger, make the constraint + * internally dependent on the trigger instead of vice versa. + */ + if (OidIsValid(constraintOid)) + { + referenced.classId = ConstraintRelationId; + referenced.objectId = constraintOid; + referenced.objectSubId = 0; + recordDependencyOn(&referenced, &myself, DEPENDENCY_INTERNAL); + } + + /* + * If it's a partition trigger, create the partition dependencies. + */ + if (OidIsValid(parentTriggerOid)) + { + ObjectAddressSet(referenced, TriggerRelationId, parentTriggerOid); + recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI); + ObjectAddressSet(referenced, RelationRelationId, RelationGetRelid(rel)); + recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC); + } + } + + /* If column-specific trigger, add normal dependencies on columns */ + if (columns != NULL) + { + int i; + + referenced.classId = RelationRelationId; + referenced.objectId = RelationGetRelid(rel); + for (i = 0; i < ncolumns; i++) + { + referenced.objectSubId = columns[i]; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + } + + /* + * If it has a WHEN clause, add dependencies on objects mentioned in the + * expression (eg, functions, as well as any columns used). + */ + if (whenRtable != NIL) + recordDependencyOnExpr(&myself, whenClause, whenRtable, + DEPENDENCY_NORMAL); + + /* Post creation hook for new trigger */ + InvokeObjectPostCreateHookArg(TriggerRelationId, trigoid, 0, + isInternal); + + /* + * Lastly, create the trigger on child relations, if needed. + */ + if (partition_recurse) + { + PartitionDesc partdesc = RelationGetPartitionDesc(rel, true); + List *idxs = NIL; + List *childTbls = NIL; + ListCell *l; + int i; + MemoryContext oldcxt, + perChildCxt; + + perChildCxt = AllocSetContextCreate(CurrentMemoryContext, + "part trig clone", + ALLOCSET_SMALL_SIZES); + + /* + * When a trigger is being created associated with an index, we'll + * need to associate the trigger in each child partition with the + * corresponding index on it. + */ + if (OidIsValid(indexOid)) + { + ListCell *l; + List *idxs = NIL; + + idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock); + foreach(l, idxs) + childTbls = lappend_oid(childTbls, + IndexGetRelation(lfirst_oid(l), + false)); + } + + oldcxt = MemoryContextSwitchTo(perChildCxt); + + /* Iterate to create the trigger on each existing partition */ + for (i = 0; i < partdesc->nparts; i++) + { + Oid indexOnChild = InvalidOid; + ListCell *l2; + CreateTrigStmt *childStmt; + Relation childTbl; + Node *qual; + + childTbl = table_open(partdesc->oids[i], ShareRowExclusiveLock); + + /* Find which of the child indexes is the one on this partition */ + if (OidIsValid(indexOid)) + { + forboth(l, idxs, l2, childTbls) + { + if (lfirst_oid(l2) == partdesc->oids[i]) + { + indexOnChild = lfirst_oid(l); + break; + } + } + if (!OidIsValid(indexOnChild)) + elog(ERROR, "failed to find index matching index \"%s\" in partition \"%s\"", + get_rel_name(indexOid), + get_rel_name(partdesc->oids[i])); + } + + /* + * Initialize our fabricated parse node by copying the original + * one, then resetting fields that we pass separately. + */ + childStmt = (CreateTrigStmt *) copyObject(stmt); + childStmt->funcname = NIL; + childStmt->whenClause = NULL; + + /* If there is a WHEN clause, create a modified copy of it */ + qual = copyObject(whenClause); + qual = (Node *) + map_partition_varattnos((List *) qual, PRS2_OLD_VARNO, + childTbl, rel); + qual = (Node *) + map_partition_varattnos((List *) qual, PRS2_NEW_VARNO, + childTbl, rel); + + CreateTriggerFiringOn(childStmt, queryString, + partdesc->oids[i], refRelOid, + InvalidOid, indexOnChild, + funcoid, trigoid, qual, + isInternal, true, trigger_fires_when); + + table_close(childTbl, NoLock); + + MemoryContextReset(perChildCxt); + } + + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(perChildCxt); + list_free(idxs); + list_free(childTbls); + } + + /* Keep lock on target rel until end of xact */ + table_close(rel, NoLock); + + return myself; +} + +/* + * TriggerSetParentTrigger + * Set a partition's trigger as child of its parent trigger, + * or remove the linkage if parentTrigId is InvalidOid. + * + * This updates the constraint's pg_trigger row to show it as inherited, and + * adds PARTITION dependencies to prevent the trigger from being deleted + * on its own. Alternatively, reverse that. + */ +void +TriggerSetParentTrigger(Relation trigRel, + Oid childTrigId, + Oid parentTrigId, + Oid childTableId) +{ + SysScanDesc tgscan; + ScanKeyData skey[1]; + Form_pg_trigger trigForm; + HeapTuple tuple, + newtup; + ObjectAddress depender; + ObjectAddress referenced; + + /* + * Find the trigger to delete. + */ + ScanKeyInit(&skey[0], + Anum_pg_trigger_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(childTrigId)); + + tgscan = systable_beginscan(trigRel, TriggerOidIndexId, true, + NULL, 1, skey); + + tuple = systable_getnext(tgscan); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for trigger %u", childTrigId); + newtup = heap_copytuple(tuple); + trigForm = (Form_pg_trigger) GETSTRUCT(newtup); + if (OidIsValid(parentTrigId)) + { + /* don't allow setting parent for a constraint that already has one */ + if (OidIsValid(trigForm->tgparentid)) + elog(ERROR, "trigger %u already has a parent trigger", + childTrigId); + + trigForm->tgparentid = parentTrigId; + + CatalogTupleUpdate(trigRel, &tuple->t_self, newtup); + + ObjectAddressSet(depender, TriggerRelationId, childTrigId); + + ObjectAddressSet(referenced, TriggerRelationId, parentTrigId); + recordDependencyOn(&depender, &referenced, DEPENDENCY_PARTITION_PRI); + + ObjectAddressSet(referenced, RelationRelationId, childTableId); + recordDependencyOn(&depender, &referenced, DEPENDENCY_PARTITION_SEC); + } + else + { + trigForm->tgparentid = InvalidOid; + + CatalogTupleUpdate(trigRel, &tuple->t_self, newtup); + + deleteDependencyRecordsForClass(TriggerRelationId, childTrigId, + TriggerRelationId, + DEPENDENCY_PARTITION_PRI); + deleteDependencyRecordsForClass(TriggerRelationId, childTrigId, + RelationRelationId, + DEPENDENCY_PARTITION_SEC); + } + + heap_freetuple(newtup); + systable_endscan(tgscan); +} + + +/* + * Guts of trigger deletion. + */ +void +RemoveTriggerById(Oid trigOid) +{ + Relation tgrel; + SysScanDesc tgscan; + ScanKeyData skey[1]; + HeapTuple tup; + Oid relid; + Relation rel; + + tgrel = table_open(TriggerRelationId, RowExclusiveLock); + + /* + * Find the trigger to delete. + */ + ScanKeyInit(&skey[0], + Anum_pg_trigger_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(trigOid)); + + tgscan = systable_beginscan(tgrel, TriggerOidIndexId, true, + NULL, 1, skey); + + tup = systable_getnext(tgscan); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "could not find tuple for trigger %u", trigOid); + + /* + * Open and exclusive-lock the relation the trigger belongs to. + */ + relid = ((Form_pg_trigger) GETSTRUCT(tup))->tgrelid; + + rel = table_open(relid, AccessExclusiveLock); + + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_VIEW && + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" cannot have triggers", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + + if (!allowSystemTableMods && IsSystemRelation(rel)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + RelationGetRelationName(rel)))); + + /* + * Delete the pg_trigger tuple. + */ + CatalogTupleDelete(tgrel, &tup->t_self); + + systable_endscan(tgscan); + table_close(tgrel, RowExclusiveLock); + + /* + * We do not bother to try to determine whether any other triggers remain, + * which would be needed in order to decide whether it's safe to clear the + * relation's relhastriggers. (In any case, there might be a concurrent + * process adding new triggers.) Instead, just force a relcache inval to + * make other backends (and this one too!) rebuild their relcache entries. + * There's no great harm in leaving relhastriggers true even if there are + * no triggers left. + */ + CacheInvalidateRelcache(rel); + + /* Keep lock on trigger's rel until end of xact */ + table_close(rel, NoLock); +} + +/* + * get_trigger_oid - Look up a trigger by name to find its OID. + * + * If missing_ok is false, throw an error if trigger not found. If + * true, just return InvalidOid. + */ +Oid +get_trigger_oid(Oid relid, const char *trigname, bool missing_ok) +{ + Relation tgrel; + ScanKeyData skey[2]; + SysScanDesc tgscan; + HeapTuple tup; + Oid oid; + + /* + * Find the trigger, verify permissions, set up object address + */ + tgrel = table_open(TriggerRelationId, AccessShareLock); + + ScanKeyInit(&skey[0], + Anum_pg_trigger_tgrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + ScanKeyInit(&skey[1], + Anum_pg_trigger_tgname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(trigname)); + + tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, + NULL, 2, skey); + + tup = systable_getnext(tgscan); + + if (!HeapTupleIsValid(tup)) + { + if (!missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("trigger \"%s\" for table \"%s\" does not exist", + trigname, get_rel_name(relid)))); + oid = InvalidOid; + } + else + { + oid = ((Form_pg_trigger) GETSTRUCT(tup))->oid; + } + + systable_endscan(tgscan); + table_close(tgrel, AccessShareLock); + return oid; +} + +/* + * Perform permissions and integrity checks before acquiring a relation lock. + */ +static void +RangeVarCallbackForRenameTrigger(const RangeVar *rv, Oid relid, Oid oldrelid, + void *arg) +{ + HeapTuple tuple; + Form_pg_class form; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + return; /* concurrently dropped */ + form = (Form_pg_class) GETSTRUCT(tuple); + + /* only tables and views can have triggers */ + if (form->relkind != RELKIND_RELATION && form->relkind != RELKIND_VIEW && + form->relkind != RELKIND_FOREIGN_TABLE && + form->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" cannot have triggers", + rv->relname), + errdetail_relkind_not_supported(form->relkind))); + + /* you must own the table to rename one of its triggers */ + if (!pg_class_ownercheck(relid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname); + if (!allowSystemTableMods && IsSystemClass(relid, form)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system catalog", + rv->relname))); + + ReleaseSysCache(tuple); +} + +/* + * renametrig - changes the name of a trigger on a relation + * + * trigger name is changed in trigger catalog. + * No record of the previous name is kept. + * + * get proper relrelation from relation catalog (if not arg) + * scan trigger catalog + * for name conflict (within rel) + * for original trigger (if not arg) + * modify tgname in trigger tuple + * update row in catalog + */ +ObjectAddress +renametrig(RenameStmt *stmt) +{ + Oid tgoid; + Relation targetrel; + Relation tgrel; + HeapTuple tuple; + SysScanDesc tgscan; + ScanKeyData key[2]; + Oid relid; + ObjectAddress address; + + /* + * Look up name, check permissions, and acquire lock (which we will NOT + * release until end of transaction). + */ + relid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, + 0, + RangeVarCallbackForRenameTrigger, + NULL); + + /* Have lock already, so just need to build relcache entry. */ + targetrel = relation_open(relid, NoLock); + + /* + * On partitioned tables, this operation recurses to partitions. Lock all + * tables upfront. + */ + if (targetrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + (void) find_all_inheritors(relid, AccessExclusiveLock, NULL); + + tgrel = table_open(TriggerRelationId, RowExclusiveLock); + + /* + * Search for the trigger to modify. + */ + ScanKeyInit(&key[0], + Anum_pg_trigger_tgrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + ScanKeyInit(&key[1], + Anum_pg_trigger_tgname, + BTEqualStrategyNumber, F_NAMEEQ, + PointerGetDatum(stmt->subname)); + tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, + NULL, 2, key); + if (HeapTupleIsValid(tuple = systable_getnext(tgscan))) + { + Form_pg_trigger trigform; + + trigform = (Form_pg_trigger) GETSTRUCT(tuple); + tgoid = trigform->oid; + + /* + * If the trigger descends from a trigger on a parent partitioned + * table, reject the rename. We don't allow a trigger in a partition + * to differ in name from that of its parent: that would lead to an + * inconsistency that pg_dump would not reproduce. + */ + if (OidIsValid(trigform->tgparentid)) + ereport(ERROR, + errmsg("cannot rename trigger \"%s\" on table \"%s\"", + stmt->subname, RelationGetRelationName(targetrel)), + errhint("Rename the trigger on the partitioned table \"%s\" instead.", + get_rel_name(get_partition_parent(relid, false)))); + + + /* Rename the trigger on this relation ... */ + renametrig_internal(tgrel, targetrel, tuple, stmt->newname, + stmt->subname); + + /* ... and if it is partitioned, recurse to its partitions */ + if (targetrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDesc partdesc = RelationGetPartitionDesc(targetrel, true); + + for (int i = 0; i < partdesc->nparts; i++) + { + Oid partitionId = partdesc->oids[i]; + + renametrig_partition(tgrel, partitionId, trigform->oid, + stmt->newname, stmt->subname); + } + } + } + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("trigger \"%s\" for table \"%s\" does not exist", + stmt->subname, RelationGetRelationName(targetrel)))); + } + + ObjectAddressSet(address, TriggerRelationId, tgoid); + + systable_endscan(tgscan); + + table_close(tgrel, RowExclusiveLock); + + /* + * Close rel, but keep exclusive lock! + */ + relation_close(targetrel, NoLock); + + return address; +} + +/* + * Subroutine for renametrig -- perform the actual work of renaming one + * trigger on one table. + * + * If the trigger has a name different from the expected one, raise a + * NOTICE about it. + */ +static void +renametrig_internal(Relation tgrel, Relation targetrel, HeapTuple trigtup, + const char *newname, const char *expected_name) +{ + HeapTuple tuple; + Form_pg_trigger tgform; + ScanKeyData key[2]; + SysScanDesc tgscan; + + /* If the trigger already has the new name, nothing to do. */ + tgform = (Form_pg_trigger) GETSTRUCT(trigtup); + if (strcmp(NameStr(tgform->tgname), newname) == 0) + return; + + /* + * Before actually trying the rename, search for triggers with the same + * name. The update would fail with an ugly message in that case, and it + * is better to throw a nicer error. + */ + ScanKeyInit(&key[0], + Anum_pg_trigger_tgrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(targetrel))); + ScanKeyInit(&key[1], + Anum_pg_trigger_tgname, + BTEqualStrategyNumber, F_NAMEEQ, + PointerGetDatum(newname)); + tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, + NULL, 2, key); + if (HeapTupleIsValid(tuple = systable_getnext(tgscan))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("trigger \"%s\" for relation \"%s\" already exists", + newname, RelationGetRelationName(targetrel)))); + systable_endscan(tgscan); + + /* + * The target name is free; update the existing pg_trigger tuple with it. + */ + tuple = heap_copytuple(trigtup); /* need a modifiable copy */ + tgform = (Form_pg_trigger) GETSTRUCT(tuple); + + /* + * If the trigger has a name different from what we expected, let the user + * know. (We can proceed anyway, since we must have reached here following + * a tgparentid link.) + */ + if (strcmp(NameStr(tgform->tgname), expected_name) != 0) + ereport(NOTICE, + errmsg("renamed trigger \"%s\" on relation \"%s\"", + NameStr(tgform->tgname), + RelationGetRelationName(targetrel))); + + namestrcpy(&tgform->tgname, newname); + + CatalogTupleUpdate(tgrel, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(TriggerRelationId, tgform->oid, 0); + + /* + * Invalidate relation's relcache entry so that other backends (and this + * one too!) are sent SI message to make them rebuild relcache entries. + * (Ideally this should happen automatically...) + */ + CacheInvalidateRelcache(targetrel); +} + +/* + * Subroutine for renametrig -- Helper for recursing to partitions when + * renaming triggers on a partitioned table. + */ +static void +renametrig_partition(Relation tgrel, Oid partitionId, Oid parentTriggerOid, + const char *newname, const char *expected_name) +{ + SysScanDesc tgscan; + ScanKeyData key; + HeapTuple tuple; + + /* + * Given a relation and the OID of a trigger on parent relation, find the + * corresponding trigger in the child and rename that trigger to the given + * name. + */ + ScanKeyInit(&key, + Anum_pg_trigger_tgrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(partitionId)); + tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, + NULL, 1, &key); + while (HeapTupleIsValid(tuple = systable_getnext(tgscan))) + { + Form_pg_trigger tgform = (Form_pg_trigger) GETSTRUCT(tuple); + Relation partitionRel; + + if (tgform->tgparentid != parentTriggerOid) + continue; /* not our trigger */ + + partitionRel = table_open(partitionId, NoLock); + + /* Rename the trigger on this partition */ + renametrig_internal(tgrel, partitionRel, tuple, newname, expected_name); + + /* And if this relation is partitioned, recurse to its partitions */ + if (partitionRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDesc partdesc = RelationGetPartitionDesc(partitionRel, + true); + + for (int i = 0; i < partdesc->nparts; i++) + { + Oid partitionId = partdesc->oids[i]; + + renametrig_partition(tgrel, partitionId, tgform->oid, newname, + NameStr(tgform->tgname)); + } + } + table_close(partitionRel, NoLock); + + /* There should be at most one matching tuple */ + break; + } + systable_endscan(tgscan); +} + +/* + * EnableDisableTrigger() + * + * Called by ALTER TABLE ENABLE/DISABLE [ REPLICA | ALWAYS ] TRIGGER + * to change 'tgenabled' field for the specified trigger(s) + * + * rel: relation to process (caller must hold suitable lock on it) + * tgname: name of trigger to process, or NULL to scan all triggers + * tgparent: if not zero, process only triggers with this tgparentid + * fires_when: new value for tgenabled field. In addition to generic + * enablement/disablement, this also defines when the trigger + * should be fired in session replication roles. + * skip_system: if true, skip "system" triggers (constraint triggers) + * recurse: if true, recurse to partitions + * + * Caller should have checked permissions for the table; here we also + * enforce that superuser privilege is required to alter the state of + * system triggers + */ +void +EnableDisableTriggerNew2(Relation rel, const char *tgname, Oid tgparent, + char fires_when, bool skip_system, bool recurse, + LOCKMODE lockmode) +{ + Relation tgrel; + int nkeys; + ScanKeyData keys[2]; + SysScanDesc tgscan; + HeapTuple tuple; + bool found; + bool changed; + + /* Scan the relevant entries in pg_triggers */ + tgrel = table_open(TriggerRelationId, RowExclusiveLock); + + ScanKeyInit(&keys[0], + Anum_pg_trigger_tgrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + if (tgname) + { + ScanKeyInit(&keys[1], + Anum_pg_trigger_tgname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(tgname)); + nkeys = 2; + } + else + nkeys = 1; + + tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, + NULL, nkeys, keys); + + found = changed = false; + + while (HeapTupleIsValid(tuple = systable_getnext(tgscan))) + { + Form_pg_trigger oldtrig = (Form_pg_trigger) GETSTRUCT(tuple); + + if (OidIsValid(tgparent) && tgparent != oldtrig->tgparentid) + continue; + + if (oldtrig->tgisinternal) + { + /* system trigger ... ok to process? */ + if (skip_system) + continue; + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied: \"%s\" is a system trigger", + NameStr(oldtrig->tgname)))); + } + + found = true; + + if (oldtrig->tgenabled != fires_when) + { + /* need to change this one ... make a copy to scribble on */ + HeapTuple newtup = heap_copytuple(tuple); + Form_pg_trigger newtrig = (Form_pg_trigger) GETSTRUCT(newtup); + + newtrig->tgenabled = fires_when; + + CatalogTupleUpdate(tgrel, &newtup->t_self, newtup); + + heap_freetuple(newtup); + + changed = true; + } + + /* + * When altering FOR EACH ROW triggers on a partitioned table, do the + * same on the partitions as well, unless ONLY is specified. + * + * Note that we recurse even if we didn't change the trigger above, + * because the partitions' copy of the trigger may have a different + * value of tgenabled than the parent's trigger and thus might need to + * be changed. + */ + if (recurse && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + (TRIGGER_FOR_ROW(oldtrig->tgtype))) + { + PartitionDesc partdesc = RelationGetPartitionDesc(rel, true); + int i; + + for (i = 0; i < partdesc->nparts; i++) + { + Relation part; + + part = relation_open(partdesc->oids[i], lockmode); + /* Match on child triggers' tgparentid, not their name */ + EnableDisableTriggerNew2(part, NULL, oldtrig->oid, + fires_when, skip_system, recurse, + lockmode); + table_close(part, NoLock); /* keep lock till commit */ + } + } + + InvokeObjectPostAlterHook(TriggerRelationId, + oldtrig->oid, 0); + } + + systable_endscan(tgscan); + + table_close(tgrel, RowExclusiveLock); + + if (tgname && !found) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("trigger \"%s\" for table \"%s\" does not exist", + tgname, RelationGetRelationName(rel)))); + + /* + * If we changed anything, broadcast a SI inval message to force each + * backend (including our own!) to rebuild relation's relcache entry. + * Otherwise they will fail to apply the change promptly. + */ + if (changed) + CacheInvalidateRelcache(rel); +} + +/* + * ABI-compatible wrappers to emulate old versions of the above function. + * Do not call these versions in new code. + */ +void +EnableDisableTriggerNew(Relation rel, const char *tgname, + char fires_when, bool skip_system, bool recurse, + LOCKMODE lockmode) +{ + EnableDisableTriggerNew2(rel, tgname, InvalidOid, + fires_when, skip_system, + recurse, lockmode); +} + +void +EnableDisableTrigger(Relation rel, const char *tgname, + char fires_when, bool skip_system, + LOCKMODE lockmode) +{ + EnableDisableTriggerNew2(rel, tgname, InvalidOid, + fires_when, skip_system, + true, lockmode); +} + + +/* + * Build trigger data to attach to the given relcache entry. + * + * Note that trigger data attached to a relcache entry must be stored in + * CacheMemoryContext to ensure it survives as long as the relcache entry. + * But we should be running in a less long-lived working context. To avoid + * leaking cache memory if this routine fails partway through, we build a + * temporary TriggerDesc in working memory and then copy the completed + * structure into cache memory. + */ +void +RelationBuildTriggers(Relation relation) +{ + TriggerDesc *trigdesc; + int numtrigs; + int maxtrigs; + Trigger *triggers; + Relation tgrel; + ScanKeyData skey; + SysScanDesc tgscan; + HeapTuple htup; + MemoryContext oldContext; + int i; + + /* + * Allocate a working array to hold the triggers (the array is extended if + * necessary) + */ + maxtrigs = 16; + triggers = (Trigger *) palloc(maxtrigs * sizeof(Trigger)); + numtrigs = 0; + + /* + * Note: since we scan the triggers using TriggerRelidNameIndexId, we will + * be reading the triggers in name order, except possibly during + * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn + * ensures that triggers will be fired in name order. + */ + ScanKeyInit(&skey, + Anum_pg_trigger_tgrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + tgrel = table_open(TriggerRelationId, AccessShareLock); + tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(tgscan))) + { + Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(htup); + Trigger *build; + Datum datum; + bool isnull; + + if (numtrigs >= maxtrigs) + { + maxtrigs *= 2; + triggers = (Trigger *) repalloc(triggers, maxtrigs * sizeof(Trigger)); + } + build = &(triggers[numtrigs]); + + build->tgoid = pg_trigger->oid; + build->tgname = DatumGetCString(DirectFunctionCall1(nameout, + NameGetDatum(&pg_trigger->tgname))); + build->tgfoid = pg_trigger->tgfoid; + build->tgtype = pg_trigger->tgtype; + build->tgenabled = pg_trigger->tgenabled; + build->tgisinternal = pg_trigger->tgisinternal; + build->tgisclone = OidIsValid(pg_trigger->tgparentid); + build->tgconstrrelid = pg_trigger->tgconstrrelid; + build->tgconstrindid = pg_trigger->tgconstrindid; + build->tgconstraint = pg_trigger->tgconstraint; + build->tgdeferrable = pg_trigger->tgdeferrable; + build->tginitdeferred = pg_trigger->tginitdeferred; + build->tgnargs = pg_trigger->tgnargs; + /* tgattr is first var-width field, so OK to access directly */ + build->tgnattr = pg_trigger->tgattr.dim1; + if (build->tgnattr > 0) + { + build->tgattr = (int16 *) palloc(build->tgnattr * sizeof(int16)); + memcpy(build->tgattr, &(pg_trigger->tgattr.values), + build->tgnattr * sizeof(int16)); + } + else + build->tgattr = NULL; + if (build->tgnargs > 0) + { + bytea *val; + char *p; + + val = DatumGetByteaPP(fastgetattr(htup, + Anum_pg_trigger_tgargs, + tgrel->rd_att, &isnull)); + if (isnull) + elog(ERROR, "tgargs is null in trigger for relation \"%s\"", + RelationGetRelationName(relation)); + p = (char *) VARDATA_ANY(val); + build->tgargs = (char **) palloc(build->tgnargs * sizeof(char *)); + for (i = 0; i < build->tgnargs; i++) + { + build->tgargs[i] = pstrdup(p); + p += strlen(p) + 1; + } + } + else + build->tgargs = NULL; + + datum = fastgetattr(htup, Anum_pg_trigger_tgoldtable, + tgrel->rd_att, &isnull); + if (!isnull) + build->tgoldtable = + DatumGetCString(DirectFunctionCall1(nameout, datum)); + else + build->tgoldtable = NULL; + + datum = fastgetattr(htup, Anum_pg_trigger_tgnewtable, + tgrel->rd_att, &isnull); + if (!isnull) + build->tgnewtable = + DatumGetCString(DirectFunctionCall1(nameout, datum)); + else + build->tgnewtable = NULL; + + datum = fastgetattr(htup, Anum_pg_trigger_tgqual, + tgrel->rd_att, &isnull); + if (!isnull) + build->tgqual = TextDatumGetCString(datum); + else + build->tgqual = NULL; + + numtrigs++; + } + + systable_endscan(tgscan); + table_close(tgrel, AccessShareLock); + + /* There might not be any triggers */ + if (numtrigs == 0) + { + pfree(triggers); + return; + } + + /* Build trigdesc */ + trigdesc = (TriggerDesc *) palloc0(sizeof(TriggerDesc)); + trigdesc->triggers = triggers; + trigdesc->numtriggers = numtrigs; + for (i = 0; i < numtrigs; i++) + SetTriggerFlags(trigdesc, &(triggers[i])); + + /* Copy completed trigdesc into cache storage */ + oldContext = MemoryContextSwitchTo(CacheMemoryContext); + relation->trigdesc = CopyTriggerDesc(trigdesc); + MemoryContextSwitchTo(oldContext); + + /* Release working memory */ + FreeTriggerDesc(trigdesc); +} + +/* + * Update the TriggerDesc's hint flags to include the specified trigger + */ +static void +SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger) +{ + int16 tgtype = trigger->tgtype; + + trigdesc->trig_insert_before_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_INSERT); + trigdesc->trig_insert_after_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_AFTER, TRIGGER_TYPE_INSERT); + trigdesc->trig_insert_instead_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_INSERT); + trigdesc->trig_insert_before_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_INSERT); + trigdesc->trig_insert_after_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_AFTER, TRIGGER_TYPE_INSERT); + trigdesc->trig_update_before_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_UPDATE); + trigdesc->trig_update_after_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_AFTER, TRIGGER_TYPE_UPDATE); + trigdesc->trig_update_instead_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_UPDATE); + trigdesc->trig_update_before_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_UPDATE); + trigdesc->trig_update_after_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_AFTER, TRIGGER_TYPE_UPDATE); + trigdesc->trig_delete_before_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_DELETE); + trigdesc->trig_delete_after_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_AFTER, TRIGGER_TYPE_DELETE); + trigdesc->trig_delete_instead_row |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_ROW, + TRIGGER_TYPE_INSTEAD, TRIGGER_TYPE_DELETE); + trigdesc->trig_delete_before_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_DELETE); + trigdesc->trig_delete_after_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_AFTER, TRIGGER_TYPE_DELETE); + /* there are no row-level truncate triggers */ + trigdesc->trig_truncate_before_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, TRIGGER_TYPE_TRUNCATE); + trigdesc->trig_truncate_after_statement |= + TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_AFTER, TRIGGER_TYPE_TRUNCATE); + + trigdesc->trig_insert_new_table |= + (TRIGGER_FOR_INSERT(tgtype) && + TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable)); + trigdesc->trig_update_old_table |= + (TRIGGER_FOR_UPDATE(tgtype) && + TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable)); + trigdesc->trig_update_new_table |= + (TRIGGER_FOR_UPDATE(tgtype) && + TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable)); + trigdesc->trig_delete_old_table |= + (TRIGGER_FOR_DELETE(tgtype) && + TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable)); +} + +/* + * Copy a TriggerDesc data structure. + * + * The copy is allocated in the current memory context. + */ +TriggerDesc * +CopyTriggerDesc(TriggerDesc *trigdesc) +{ + TriggerDesc *newdesc; + Trigger *trigger; + int i; + + if (trigdesc == NULL || trigdesc->numtriggers <= 0) + return NULL; + + newdesc = (TriggerDesc *) palloc(sizeof(TriggerDesc)); + memcpy(newdesc, trigdesc, sizeof(TriggerDesc)); + + trigger = (Trigger *) palloc(trigdesc->numtriggers * sizeof(Trigger)); + memcpy(trigger, trigdesc->triggers, + trigdesc->numtriggers * sizeof(Trigger)); + newdesc->triggers = trigger; + + for (i = 0; i < trigdesc->numtriggers; i++) + { + trigger->tgname = pstrdup(trigger->tgname); + if (trigger->tgnattr > 0) + { + int16 *newattr; + + newattr = (int16 *) palloc(trigger->tgnattr * sizeof(int16)); + memcpy(newattr, trigger->tgattr, + trigger->tgnattr * sizeof(int16)); + trigger->tgattr = newattr; + } + if (trigger->tgnargs > 0) + { + char **newargs; + int16 j; + + newargs = (char **) palloc(trigger->tgnargs * sizeof(char *)); + for (j = 0; j < trigger->tgnargs; j++) + newargs[j] = pstrdup(trigger->tgargs[j]); + trigger->tgargs = newargs; + } + if (trigger->tgqual) + trigger->tgqual = pstrdup(trigger->tgqual); + if (trigger->tgoldtable) + trigger->tgoldtable = pstrdup(trigger->tgoldtable); + if (trigger->tgnewtable) + trigger->tgnewtable = pstrdup(trigger->tgnewtable); + trigger++; + } + + return newdesc; +} + +/* + * Free a TriggerDesc data structure. + */ +void +FreeTriggerDesc(TriggerDesc *trigdesc) +{ + Trigger *trigger; + int i; + + if (trigdesc == NULL) + return; + + trigger = trigdesc->triggers; + for (i = 0; i < trigdesc->numtriggers; i++) + { + pfree(trigger->tgname); + if (trigger->tgnattr > 0) + pfree(trigger->tgattr); + if (trigger->tgnargs > 0) + { + while (--(trigger->tgnargs) >= 0) + pfree(trigger->tgargs[trigger->tgnargs]); + pfree(trigger->tgargs); + } + if (trigger->tgqual) + pfree(trigger->tgqual); + if (trigger->tgoldtable) + pfree(trigger->tgoldtable); + if (trigger->tgnewtable) + pfree(trigger->tgnewtable); + trigger++; + } + pfree(trigdesc->triggers); + pfree(trigdesc); +} + +/* + * Compare two TriggerDesc structures for logical equality. + */ +#ifdef NOT_USED +bool +equalTriggerDescs(TriggerDesc *trigdesc1, TriggerDesc *trigdesc2) +{ + int i, + j; + + /* + * We need not examine the hint flags, just the trigger array itself; if + * we have the same triggers with the same types, the flags should match. + * + * As of 7.3 we assume trigger set ordering is significant in the + * comparison; so we just compare corresponding slots of the two sets. + * + * Note: comparing the stringToNode forms of the WHEN clauses means that + * parse column locations will affect the result. This is okay as long as + * this function is only used for detecting exact equality, as for example + * in checking for staleness of a cache entry. + */ + if (trigdesc1 != NULL) + { + if (trigdesc2 == NULL) + return false; + if (trigdesc1->numtriggers != trigdesc2->numtriggers) + return false; + for (i = 0; i < trigdesc1->numtriggers; i++) + { + Trigger *trig1 = trigdesc1->triggers + i; + Trigger *trig2 = trigdesc2->triggers + i; + + if (trig1->tgoid != trig2->tgoid) + return false; + if (strcmp(trig1->tgname, trig2->tgname) != 0) + return false; + if (trig1->tgfoid != trig2->tgfoid) + return false; + if (trig1->tgtype != trig2->tgtype) + return false; + if (trig1->tgenabled != trig2->tgenabled) + return false; + if (trig1->tgisinternal != trig2->tgisinternal) + return false; + if (trig1->tgisclone != trig2->tgisclone) + return false; + if (trig1->tgconstrrelid != trig2->tgconstrrelid) + return false; + if (trig1->tgconstrindid != trig2->tgconstrindid) + return false; + if (trig1->tgconstraint != trig2->tgconstraint) + return false; + if (trig1->tgdeferrable != trig2->tgdeferrable) + return false; + if (trig1->tginitdeferred != trig2->tginitdeferred) + return false; + if (trig1->tgnargs != trig2->tgnargs) + return false; + if (trig1->tgnattr != trig2->tgnattr) + return false; + if (trig1->tgnattr > 0 && + memcmp(trig1->tgattr, trig2->tgattr, + trig1->tgnattr * sizeof(int16)) != 0) + return false; + for (j = 0; j < trig1->tgnargs; j++) + if (strcmp(trig1->tgargs[j], trig2->tgargs[j]) != 0) + return false; + if (trig1->tgqual == NULL && trig2->tgqual == NULL) + /* ok */ ; + else if (trig1->tgqual == NULL || trig2->tgqual == NULL) + return false; + else if (strcmp(trig1->tgqual, trig2->tgqual) != 0) + return false; + if (trig1->tgoldtable == NULL && trig2->tgoldtable == NULL) + /* ok */ ; + else if (trig1->tgoldtable == NULL || trig2->tgoldtable == NULL) + return false; + else if (strcmp(trig1->tgoldtable, trig2->tgoldtable) != 0) + return false; + if (trig1->tgnewtable == NULL && trig2->tgnewtable == NULL) + /* ok */ ; + else if (trig1->tgnewtable == NULL || trig2->tgnewtable == NULL) + return false; + else if (strcmp(trig1->tgnewtable, trig2->tgnewtable) != 0) + return false; + } + } + else if (trigdesc2 != NULL) + return false; + return true; +} +#endif /* NOT_USED */ + +/* + * Check if there is a row-level trigger with transition tables that prevents + * a table from becoming an inheritance child or partition. Return the name + * of the first such incompatible trigger, or NULL if there is none. + */ +const char * +FindTriggerIncompatibleWithInheritance(TriggerDesc *trigdesc) +{ + if (trigdesc != NULL) + { + int i; + + for (i = 0; i < trigdesc->numtriggers; ++i) + { + Trigger *trigger = &trigdesc->triggers[i]; + + if (trigger->tgoldtable != NULL || trigger->tgnewtable != NULL) + return trigger->tgname; + } + } + + return NULL; +} + +/* + * Call a trigger function. + * + * trigdata: trigger descriptor. + * tgindx: trigger's index in finfo and instr arrays. + * finfo: array of cached trigger function call information. + * instr: optional array of EXPLAIN ANALYZE instrumentation state. + * per_tuple_context: memory context to execute the function in. + * + * Returns the tuple (or NULL) as returned by the function. + */ +static HeapTuple +ExecCallTriggerFunc(TriggerData *trigdata, + int tgindx, + FmgrInfo *finfo, + Instrumentation *instr, + MemoryContext per_tuple_context) +{ + LOCAL_FCINFO(fcinfo, 0); + PgStat_FunctionCallUsage fcusage; + Datum result; + MemoryContext oldContext; + + /* + * Protect against code paths that may fail to initialize transition table + * info. + */ + Assert(((TRIGGER_FIRED_BY_INSERT(trigdata->tg_event) || + TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event) || + TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) && + TRIGGER_FIRED_AFTER(trigdata->tg_event) && + !(trigdata->tg_event & AFTER_TRIGGER_DEFERRABLE) && + !(trigdata->tg_event & AFTER_TRIGGER_INITDEFERRED)) || + (trigdata->tg_oldtable == NULL && trigdata->tg_newtable == NULL)); + + finfo += tgindx; + + /* + * We cache fmgr lookup info, to avoid making the lookup again on each + * call. + */ + if (finfo->fn_oid == InvalidOid) + fmgr_info(trigdata->tg_trigger->tgfoid, finfo); + + Assert(finfo->fn_oid == trigdata->tg_trigger->tgfoid); + + /* + * If doing EXPLAIN ANALYZE, start charging time to this trigger. + */ + if (instr) + InstrStartNode(instr + tgindx); + + /* + * Do the function evaluation in the per-tuple memory context, so that + * leaked memory will be reclaimed once per tuple. Note in particular that + * any new tuple created by the trigger function will live till the end of + * the tuple cycle. + */ + oldContext = MemoryContextSwitchTo(per_tuple_context); + + /* + * Call the function, passing no arguments but setting a context. + */ + InitFunctionCallInfoData(*fcinfo, finfo, 0, + InvalidOid, (Node *) trigdata, NULL); + + pgstat_init_function_usage(fcinfo, &fcusage); + + MyTriggerDepth++; + PG_TRY(); + { + result = FunctionCallInvoke(fcinfo); + } + PG_FINALLY(); + { + MyTriggerDepth--; + } + PG_END_TRY(); + + pgstat_end_function_usage(&fcusage, true); + + MemoryContextSwitchTo(oldContext); + + /* + * Trigger protocol allows function to return a null pointer, but NOT to + * set the isnull result flag. + */ + if (fcinfo->isnull) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("trigger function %u returned null value", + fcinfo->flinfo->fn_oid))); + + /* + * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count + * one "tuple returned" (really the number of firings). + */ + if (instr) + InstrStopNode(instr + tgindx, 1); + + return (HeapTuple) DatumGetPointer(result); +} + +void +ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo) +{ + TriggerDesc *trigdesc; + int i; + TriggerData LocTriggerData = {0}; + + trigdesc = relinfo->ri_TrigDesc; + + if (trigdesc == NULL) + return; + if (!trigdesc->trig_insert_before_statement) + return; + + /* no-op if we already fired BS triggers in this context */ + if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc), + CMD_INSERT)) + return; + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_INSERT | + TRIGGER_EVENT_BEFORE; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple newtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, + TRIGGER_TYPE_INSERT)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, NULL, NULL)) + continue; + + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + + if (newtuple) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("BEFORE STATEMENT trigger cannot return a value"))); + } +} + +void +ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo, + TransitionCaptureState *transition_capture) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + + if (trigdesc && trigdesc->trig_insert_after_statement) + AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, + TRIGGER_EVENT_INSERT, + false, NULL, NULL, NIL, NULL, transition_capture, + false); +} + +bool +ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo, + TupleTableSlot *slot) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + HeapTuple newtuple = NULL; + bool should_free; + TriggerData LocTriggerData = {0}; + int i; + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_INSERT | + TRIGGER_EVENT_ROW | + TRIGGER_EVENT_BEFORE; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple oldtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_ROW, + TRIGGER_TYPE_BEFORE, + TRIGGER_TYPE_INSERT)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, NULL, slot)) + continue; + + if (!newtuple) + newtuple = ExecFetchSlotHeapTuple(slot, true, &should_free); + + LocTriggerData.tg_trigslot = slot; + LocTriggerData.tg_trigtuple = oldtuple = newtuple; + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + if (newtuple == NULL) + { + if (should_free) + heap_freetuple(oldtuple); + return false; /* "do nothing" */ + } + else if (newtuple != oldtuple) + { + ExecForceStoreHeapTuple(newtuple, slot, false); + + /* + * After a tuple in a partition goes through a trigger, the user + * could have changed the partition key enough that the tuple no + * longer fits the partition. Verify that. + */ + if (trigger->tgisclone && + !ExecPartitionCheck(relinfo, slot, estate, false)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("moving row to another partition during a BEFORE FOR EACH ROW trigger is not supported"), + errdetail("Before executing trigger \"%s\", the row was to be in partition \"%s.%s\".", + trigger->tgname, + get_namespace_name(RelationGetNamespace(relinfo->ri_RelationDesc)), + RelationGetRelationName(relinfo->ri_RelationDesc)))); + + if (should_free) + heap_freetuple(oldtuple); + + /* signal tuple should be re-fetched if used */ + newtuple = NULL; + } + } + + return true; +} + +void +ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo, + TupleTableSlot *slot, List *recheckIndexes, + TransitionCaptureState *transition_capture) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + + if ((trigdesc && trigdesc->trig_insert_after_row) || + (transition_capture && transition_capture->tcs_insert_new_table)) + AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, + TRIGGER_EVENT_INSERT, + true, NULL, slot, + recheckIndexes, NULL, + transition_capture, + false); +} + +bool +ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo, + TupleTableSlot *slot) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + HeapTuple newtuple = NULL; + bool should_free; + TriggerData LocTriggerData = {0}; + int i; + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_INSERT | + TRIGGER_EVENT_ROW | + TRIGGER_EVENT_INSTEAD; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple oldtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_ROW, + TRIGGER_TYPE_INSTEAD, + TRIGGER_TYPE_INSERT)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, NULL, slot)) + continue; + + if (!newtuple) + newtuple = ExecFetchSlotHeapTuple(slot, true, &should_free); + + LocTriggerData.tg_trigslot = slot; + LocTriggerData.tg_trigtuple = oldtuple = newtuple; + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + if (newtuple == NULL) + { + if (should_free) + heap_freetuple(oldtuple); + return false; /* "do nothing" */ + } + else if (newtuple != oldtuple) + { + ExecForceStoreHeapTuple(newtuple, slot, false); + + if (should_free) + heap_freetuple(oldtuple); + + /* signal tuple should be re-fetched if used */ + newtuple = NULL; + } + } + + return true; +} + +void +ExecBSDeleteTriggers(EState *estate, ResultRelInfo *relinfo) +{ + TriggerDesc *trigdesc; + int i; + TriggerData LocTriggerData = {0}; + + trigdesc = relinfo->ri_TrigDesc; + + if (trigdesc == NULL) + return; + if (!trigdesc->trig_delete_before_statement) + return; + + /* no-op if we already fired BS triggers in this context */ + if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc), + CMD_DELETE)) + return; + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_DELETE | + TRIGGER_EVENT_BEFORE; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple newtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, + TRIGGER_TYPE_DELETE)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, NULL, NULL)) + continue; + + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + + if (newtuple) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("BEFORE STATEMENT trigger cannot return a value"))); + } +} + +void +ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo, + TransitionCaptureState *transition_capture) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + + if (trigdesc && trigdesc->trig_delete_after_statement) + AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, + TRIGGER_EVENT_DELETE, + false, NULL, NULL, NIL, NULL, transition_capture, + false); +} + +/* + * Execute BEFORE ROW DELETE triggers. + * + * True indicates caller can proceed with the delete. False indicates caller + * need to suppress the delete and additionally if requested, we need to pass + * back the concurrently updated tuple if any. + */ +bool +ExecBRDeleteTriggersNew(EState *estate, EPQState *epqstate, + ResultRelInfo *relinfo, + ItemPointer tupleid, + HeapTuple fdw_trigtuple, + TupleTableSlot **epqslot, + TM_Result *tmresult, + TM_FailureData *tmfd) +{ + TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo); + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + bool result = true; + TriggerData LocTriggerData = {0}; + HeapTuple trigtuple; + bool should_free = false; + int i; + + Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + if (fdw_trigtuple == NULL) + { + TupleTableSlot *epqslot_candidate = NULL; + + if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid, + LockTupleExclusive, slot, &epqslot_candidate, + tmresult, tmfd)) + return false; + + /* + * If the tuple was concurrently updated and the caller of this + * function requested for the updated tuple, skip the trigger + * execution. + */ + if (epqslot_candidate != NULL && epqslot != NULL) + { + *epqslot = epqslot_candidate; + return false; + } + + trigtuple = ExecFetchSlotHeapTuple(slot, true, &should_free); + } + else + { + trigtuple = fdw_trigtuple; + ExecForceStoreHeapTuple(trigtuple, slot, false); + } + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_DELETE | + TRIGGER_EVENT_ROW | + TRIGGER_EVENT_BEFORE; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + for (i = 0; i < trigdesc->numtriggers; i++) + { + HeapTuple newtuple; + Trigger *trigger = &trigdesc->triggers[i]; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_ROW, + TRIGGER_TYPE_BEFORE, + TRIGGER_TYPE_DELETE)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, slot, NULL)) + continue; + + LocTriggerData.tg_trigslot = slot; + LocTriggerData.tg_trigtuple = trigtuple; + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + if (newtuple == NULL) + { + result = false; /* tell caller to suppress delete */ + break; + } + if (newtuple != trigtuple) + heap_freetuple(newtuple); + } + if (should_free) + heap_freetuple(trigtuple); + + return result; +} + +/* + * ABI-compatible wrapper to emulate old version of the above function. + * Do not call this version in new code. + */ +bool +ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, + ResultRelInfo *relinfo, + ItemPointer tupleid, + HeapTuple fdw_trigtuple, + TupleTableSlot **epqslot) +{ + return ExecBRDeleteTriggersNew(estate, epqstate, relinfo, tupleid, + fdw_trigtuple, epqslot, NULL, NULL); +} + +/* + * Note: is_crosspart_update must be true if the DELETE is being performed + * as part of a cross-partition update. + */ +void +ExecARDeleteTriggers(EState *estate, + ResultRelInfo *relinfo, + ItemPointer tupleid, + HeapTuple fdw_trigtuple, + TransitionCaptureState *transition_capture, + bool is_crosspart_update) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + + if ((trigdesc && trigdesc->trig_delete_after_row) || + (transition_capture && transition_capture->tcs_delete_old_table)) + { + TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo); + + Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + if (fdw_trigtuple == NULL) + GetTupleForTrigger(estate, + NULL, + relinfo, + tupleid, + LockTupleExclusive, + slot, + NULL, + NULL, + NULL); + else + ExecForceStoreHeapTuple(fdw_trigtuple, slot, false); + + AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, + TRIGGER_EVENT_DELETE, + true, slot, NULL, NIL, NULL, + transition_capture, + is_crosspart_update); + } +} + +bool +ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo, + HeapTuple trigtuple) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo); + TriggerData LocTriggerData = {0}; + int i; + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_DELETE | + TRIGGER_EVENT_ROW | + TRIGGER_EVENT_INSTEAD; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + + ExecForceStoreHeapTuple(trigtuple, slot, false); + + for (i = 0; i < trigdesc->numtriggers; i++) + { + HeapTuple rettuple; + Trigger *trigger = &trigdesc->triggers[i]; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_ROW, + TRIGGER_TYPE_INSTEAD, + TRIGGER_TYPE_DELETE)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, slot, NULL)) + continue; + + LocTriggerData.tg_trigslot = slot; + LocTriggerData.tg_trigtuple = trigtuple; + LocTriggerData.tg_trigger = trigger; + rettuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + if (rettuple == NULL) + return false; /* Delete was suppressed */ + if (rettuple != trigtuple) + heap_freetuple(rettuple); + } + return true; +} + +void +ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo) +{ + TriggerDesc *trigdesc; + int i; + TriggerData LocTriggerData = {0}; + Bitmapset *updatedCols; + + trigdesc = relinfo->ri_TrigDesc; + + if (trigdesc == NULL) + return; + if (!trigdesc->trig_update_before_statement) + return; + + /* no-op if we already fired BS triggers in this context */ + if (before_stmt_triggers_fired(RelationGetRelid(relinfo->ri_RelationDesc), + CMD_UPDATE)) + return; + + /* statement-level triggers operate on the parent table */ + Assert(relinfo->ri_RootResultRelInfo == NULL); + + updatedCols = ExecGetAllUpdatedCols(relinfo, estate); + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE | + TRIGGER_EVENT_BEFORE; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + LocTriggerData.tg_updatedcols = updatedCols; + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple newtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, + TRIGGER_TYPE_UPDATE)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + updatedCols, NULL, NULL)) + continue; + + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + + if (newtuple) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("BEFORE STATEMENT trigger cannot return a value"))); + } +} + +void +ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo, + TransitionCaptureState *transition_capture) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + + /* statement-level triggers operate on the parent table */ + Assert(relinfo->ri_RootResultRelInfo == NULL); + + if (trigdesc && trigdesc->trig_update_after_statement) + AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, + TRIGGER_EVENT_UPDATE, + false, NULL, NULL, NIL, + ExecGetAllUpdatedCols(relinfo, estate), + transition_capture, + false); +} + +bool +ExecBRUpdateTriggersNew(EState *estate, EPQState *epqstate, + ResultRelInfo *relinfo, + ItemPointer tupleid, + HeapTuple fdw_trigtuple, + TupleTableSlot *newslot, + TM_Result *tmresult, + TM_FailureData *tmfd) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo); + HeapTuple newtuple = NULL; + HeapTuple trigtuple; + bool should_free_trig = false; + bool should_free_new = false; + TriggerData LocTriggerData = {0}; + int i; + Bitmapset *updatedCols; + LockTupleMode lockmode; + + /* Determine lock mode to use */ + lockmode = ExecUpdateLockMode(estate, relinfo); + + Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + if (fdw_trigtuple == NULL) + { + TupleTableSlot *epqslot_candidate = NULL; + + /* get a copy of the on-disk tuple we are planning to update */ + if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid, + lockmode, oldslot, &epqslot_candidate, + tmresult, tmfd)) + return false; /* cancel the update action */ + + /* + * In READ COMMITTED isolation level it's possible that target tuple + * was changed due to concurrent update. In that case we have a raw + * subplan output tuple in epqslot_candidate, and need to form a new + * insertable tuple using ExecGetUpdateNewTuple to replace the one we + * received in newslot. Neither we nor our callers have any further + * interest in the passed-in tuple, so it's okay to overwrite newslot + * with the newer data. + * + * (Typically, newslot was also generated by ExecGetUpdateNewTuple, so + * that epqslot_clean will be that same slot and the copy step below + * is not needed.) + */ + if (epqslot_candidate != NULL) + { + TupleTableSlot *epqslot_clean; + + epqslot_clean = ExecGetUpdateNewTuple(relinfo, epqslot_candidate, + oldslot); + + if (newslot != epqslot_clean) + ExecCopySlot(newslot, epqslot_clean); + } + + trigtuple = ExecFetchSlotHeapTuple(oldslot, true, &should_free_trig); + } + else + { + ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false); + trigtuple = fdw_trigtuple; + } + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE | + TRIGGER_EVENT_ROW | + TRIGGER_EVENT_BEFORE; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + updatedCols = ExecGetAllUpdatedCols(relinfo, estate); + LocTriggerData.tg_updatedcols = updatedCols; + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple oldtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_ROW, + TRIGGER_TYPE_BEFORE, + TRIGGER_TYPE_UPDATE)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + updatedCols, oldslot, newslot)) + continue; + + if (!newtuple) + newtuple = ExecFetchSlotHeapTuple(newslot, true, &should_free_new); + + LocTriggerData.tg_trigslot = oldslot; + LocTriggerData.tg_trigtuple = trigtuple; + LocTriggerData.tg_newtuple = oldtuple = newtuple; + LocTriggerData.tg_newslot = newslot; + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + + if (newtuple == NULL) + { + if (should_free_trig) + heap_freetuple(trigtuple); + if (should_free_new) + heap_freetuple(oldtuple); + return false; /* "do nothing" */ + } + else if (newtuple != oldtuple) + { + ExecForceStoreHeapTuple(newtuple, newslot, false); + + /* + * If the tuple returned by the trigger / being stored, is the old + * row version, and the heap tuple passed to the trigger was + * allocated locally, materialize the slot. Otherwise we might + * free it while still referenced by the slot. + */ + if (should_free_trig && newtuple == trigtuple) + ExecMaterializeSlot(newslot); + + if (should_free_new) + heap_freetuple(oldtuple); + + /* signal tuple should be re-fetched if used */ + newtuple = NULL; + } + } + if (should_free_trig) + heap_freetuple(trigtuple); + + return true; +} + +/* + * ABI-compatible wrapper to emulate old version of the above function. + * Do not call this version in new code. + */ +bool +ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, + ResultRelInfo *relinfo, + ItemPointer tupleid, + HeapTuple fdw_trigtuple, + TupleTableSlot *newslot, + TM_FailureData *tmfd) +{ + return ExecBRUpdateTriggersNew(estate, epqstate, relinfo, tupleid, + fdw_trigtuple, newslot, NULL, tmfd); +} + +/* + * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source + * and destination partitions, respectively, of a cross-partition update of + * the root partitioned table mentioned in the query, given by 'relinfo'. + * 'tupleid' in that case refers to the ctid of the "old" tuple in the source + * partition, and 'newslot' contains the "new" tuple in the destination + * partition. This interface allows to support the requirements of + * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in + * that case. + */ +void +ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, + ResultRelInfo *src_partinfo, + ResultRelInfo *dst_partinfo, + ItemPointer tupleid, + HeapTuple fdw_trigtuple, + TupleTableSlot *newslot, + List *recheckIndexes, + TransitionCaptureState *transition_capture, + bool is_crosspart_update) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + + if ((trigdesc && trigdesc->trig_update_after_row) || + (transition_capture && + (transition_capture->tcs_update_old_table || + transition_capture->tcs_update_new_table))) + { + /* + * Note: if the UPDATE is converted into a DELETE+INSERT as part of + * update-partition-key operation, then this function is also called + * separately for DELETE and INSERT to capture transition table rows. + * In such case, either old tuple or new tuple can be NULL. + */ + TupleTableSlot *oldslot; + ResultRelInfo *tupsrc; + + Assert((src_partinfo != NULL && dst_partinfo != NULL) || + !is_crosspart_update); + + tupsrc = src_partinfo ? src_partinfo : relinfo; + oldslot = ExecGetTriggerOldSlot(estate, tupsrc); + + if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid)) + GetTupleForTrigger(estate, + NULL, + tupsrc, + tupleid, + LockTupleExclusive, + oldslot, + NULL, + NULL, + NULL); + else if (fdw_trigtuple != NULL) + ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false); + else + ExecClearTuple(oldslot); + + AfterTriggerSaveEvent(estate, relinfo, + src_partinfo, dst_partinfo, + TRIGGER_EVENT_UPDATE, + true, + oldslot, newslot, recheckIndexes, + ExecGetAllUpdatedCols(relinfo, estate), + transition_capture, + is_crosspart_update); + } +} + +bool +ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo, + HeapTuple trigtuple, TupleTableSlot *newslot) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo); + HeapTuple newtuple = NULL; + bool should_free; + TriggerData LocTriggerData = {0}; + int i; + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE | + TRIGGER_EVENT_ROW | + TRIGGER_EVENT_INSTEAD; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + + ExecForceStoreHeapTuple(trigtuple, oldslot, false); + + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple oldtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_ROW, + TRIGGER_TYPE_INSTEAD, + TRIGGER_TYPE_UPDATE)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, oldslot, newslot)) + continue; + + if (!newtuple) + newtuple = ExecFetchSlotHeapTuple(newslot, true, &should_free); + + LocTriggerData.tg_trigslot = oldslot; + LocTriggerData.tg_trigtuple = trigtuple; + LocTriggerData.tg_newslot = newslot; + LocTriggerData.tg_newtuple = oldtuple = newtuple; + + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + if (newtuple == NULL) + { + return false; /* "do nothing" */ + } + else if (newtuple != oldtuple) + { + ExecForceStoreHeapTuple(newtuple, newslot, false); + + if (should_free) + heap_freetuple(oldtuple); + + /* signal tuple should be re-fetched if used */ + newtuple = NULL; + } + } + + return true; +} + +void +ExecBSTruncateTriggers(EState *estate, ResultRelInfo *relinfo) +{ + TriggerDesc *trigdesc; + int i; + TriggerData LocTriggerData = {0}; + + trigdesc = relinfo->ri_TrigDesc; + + if (trigdesc == NULL) + return; + if (!trigdesc->trig_truncate_before_statement) + return; + + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = TRIGGER_EVENT_TRUNCATE | + TRIGGER_EVENT_BEFORE; + LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple newtuple; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + TRIGGER_TYPE_STATEMENT, + TRIGGER_TYPE_BEFORE, + TRIGGER_TYPE_TRUNCATE)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, + NULL, NULL, NULL)) + continue; + + LocTriggerData.tg_trigger = trigger; + newtuple = ExecCallTriggerFunc(&LocTriggerData, + i, + relinfo->ri_TrigFunctions, + relinfo->ri_TrigInstrument, + GetPerTupleMemoryContext(estate)); + + if (newtuple) + ereport(ERROR, + (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED), + errmsg("BEFORE STATEMENT trigger cannot return a value"))); + } +} + +void +ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo) +{ + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + + if (trigdesc && trigdesc->trig_truncate_after_statement) + AfterTriggerSaveEvent(estate, relinfo, + NULL, NULL, + TRIGGER_EVENT_TRUNCATE, + false, NULL, NULL, NIL, NULL, NULL, + false); +} + + +/* + * Fetch tuple into "oldslot", dealing with locking and EPQ if necessary + */ +static bool +GetTupleForTrigger(EState *estate, + EPQState *epqstate, + ResultRelInfo *relinfo, + ItemPointer tid, + LockTupleMode lockmode, + TupleTableSlot *oldslot, + TupleTableSlot **epqslot, + TM_Result *tmresultp, + TM_FailureData *tmfdp) +{ + Relation relation = relinfo->ri_RelationDesc; + + if (epqslot != NULL) + { + TM_Result test; + TM_FailureData tmfd; + int lockflags = 0; + + *epqslot = NULL; + + /* caller must pass an epqstate if EvalPlanQual is possible */ + Assert(epqstate != NULL); + + /* + * lock tuple for update + */ + if (!IsolationUsesXactSnapshot()) + lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; + test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot, + estate->es_output_cid, + lockmode, LockWaitBlock, + lockflags, + &tmfd); + + /* Let the caller know about the status of this operation */ + if (tmresultp) + *tmresultp = test; + if (tmfdp) + *tmfdp = tmfd; + + switch (test) + { + case TM_SelfModified: + + /* + * The target tuple was already updated or deleted by the + * current command, or by a later command in the current + * transaction. We ignore the tuple in the former case, and + * throw error in the latter case, for the same reasons + * enumerated in ExecUpdate and ExecDelete in + * nodeModifyTable.c. + */ + if (tmfd.cmax != estate->es_output_cid) + ereport(ERROR, + (errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION), + errmsg("tuple to be updated was already modified by an operation triggered by the current command"), + errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); + + /* treat it as deleted; do not process */ + return false; + + case TM_Ok: + if (tmfd.traversed) + { + /* + * Recheck the tuple using EPQ. For MERGE, we leave this + * to the caller (it must do additional rechecking, and + * might end up executing a different action entirely). + */ + if (estate->es_plannedstmt->commandType == CMD_MERGE) + { + if (tmresultp) + *tmresultp = TM_Updated; + return false; + } + + *epqslot = EvalPlanQual(epqstate, + relation, + relinfo->ri_RangeTableIndex, + oldslot); + + /* + * If PlanQual failed for updated tuple - we must not + * process this tuple! + */ + if (TupIsNull(*epqslot)) + { + *epqslot = NULL; + return false; + } + } + break; + + case TM_Updated: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + elog(ERROR, "unexpected table_tuple_lock status: %u", test); + break; + + case TM_Deleted: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent delete"))); + /* tuple was deleted */ + return false; + + case TM_Invisible: + elog(ERROR, "attempted to lock invisible tuple"); + break; + + default: + elog(ERROR, "unrecognized table_tuple_lock status: %u", test); + return false; /* keep compiler quiet */ + } + } + else + { + /* + * We expect the tuple to be present, thus very simple error handling + * suffices. + */ + if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny, + oldslot)) + elog(ERROR, "failed to fetch tuple for trigger"); + } + + return true; +} + +/* + * Is trigger enabled to fire? + */ +static bool +TriggerEnabled(EState *estate, ResultRelInfo *relinfo, + Trigger *trigger, TriggerEvent event, + Bitmapset *modifiedCols, + TupleTableSlot *oldslot, TupleTableSlot *newslot) +{ + /* Check replication-role-dependent enable state */ + if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA) + { + if (trigger->tgenabled == TRIGGER_FIRES_ON_ORIGIN || + trigger->tgenabled == TRIGGER_DISABLED) + return false; + } + else /* ORIGIN or LOCAL role */ + { + if (trigger->tgenabled == TRIGGER_FIRES_ON_REPLICA || + trigger->tgenabled == TRIGGER_DISABLED) + return false; + } + + /* + * Check for column-specific trigger (only possible for UPDATE, and in + * fact we *must* ignore tgattr for other event types) + */ + if (trigger->tgnattr > 0 && TRIGGER_FIRED_BY_UPDATE(event)) + { + int i; + bool modified; + + modified = false; + for (i = 0; i < trigger->tgnattr; i++) + { + if (bms_is_member(trigger->tgattr[i] - FirstLowInvalidHeapAttributeNumber, + modifiedCols)) + { + modified = true; + break; + } + } + if (!modified) + return false; + } + + /* Check for WHEN clause */ + if (trigger->tgqual) + { + ExprState **predicate; + ExprContext *econtext; + MemoryContext oldContext; + int i; + + Assert(estate != NULL); + + /* + * trigger is an element of relinfo->ri_TrigDesc->triggers[]; find the + * matching element of relinfo->ri_TrigWhenExprs[] + */ + i = trigger - relinfo->ri_TrigDesc->triggers; + predicate = &relinfo->ri_TrigWhenExprs[i]; + + /* + * If first time through for this WHEN expression, build expression + * nodetrees for it. Keep them in the per-query memory context so + * they'll survive throughout the query. + */ + if (*predicate == NULL) + { + Node *tgqual; + + oldContext = MemoryContextSwitchTo(estate->es_query_cxt); + tgqual = stringToNode(trigger->tgqual); + /* Change references to OLD and NEW to INNER_VAR and OUTER_VAR */ + ChangeVarNodes(tgqual, PRS2_OLD_VARNO, INNER_VAR, 0); + ChangeVarNodes(tgqual, PRS2_NEW_VARNO, OUTER_VAR, 0); + /* ExecPrepareQual wants implicit-AND form */ + tgqual = (Node *) make_ands_implicit((Expr *) tgqual); + *predicate = ExecPrepareQual((List *) tgqual, estate); + MemoryContextSwitchTo(oldContext); + } + + /* + * We will use the EState's per-tuple context for evaluating WHEN + * expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* + * Finally evaluate the expression, making the old and/or new tuples + * available as INNER_VAR/OUTER_VAR respectively. + */ + econtext->ecxt_innertuple = oldslot; + econtext->ecxt_outertuple = newslot; + if (!ExecQual(*predicate, econtext)) + return false; + } + + return true; +} + + +/* ---------- + * After-trigger stuff + * + * The AfterTriggersData struct holds data about pending AFTER trigger events + * during the current transaction tree. (BEFORE triggers are fired + * immediately so we don't need any persistent state about them.) The struct + * and most of its subsidiary data are kept in TopTransactionContext; however + * some data that can be discarded sooner appears in the CurTransactionContext + * of the relevant subtransaction. Also, the individual event records are + * kept in a separate sub-context of TopTransactionContext. This is done + * mainly so that it's easy to tell from a memory context dump how much space + * is being eaten by trigger events. + * + * Because the list of pending events can grow large, we go to some + * considerable effort to minimize per-event memory consumption. The event + * records are grouped into chunks and common data for similar events in the + * same chunk is only stored once. + * + * XXX We need to be able to save the per-event data in a file if it grows too + * large. + * ---------- + */ + +/* Per-trigger SET CONSTRAINT status */ +typedef struct SetConstraintTriggerData +{ + Oid sct_tgoid; + bool sct_tgisdeferred; +} SetConstraintTriggerData; + +typedef struct SetConstraintTriggerData *SetConstraintTrigger; + +/* + * SET CONSTRAINT intra-transaction status. + * + * We make this a single palloc'd object so it can be copied and freed easily. + * + * all_isset and all_isdeferred are used to keep track + * of SET CONSTRAINTS ALL {DEFERRED, IMMEDIATE}. + * + * trigstates[] stores per-trigger tgisdeferred settings. + */ +typedef struct SetConstraintStateData +{ + bool all_isset; + bool all_isdeferred; + int numstates; /* number of trigstates[] entries in use */ + int numalloc; /* allocated size of trigstates[] */ + SetConstraintTriggerData trigstates[FLEXIBLE_ARRAY_MEMBER]; +} SetConstraintStateData; + +typedef SetConstraintStateData *SetConstraintState; + + +/* + * Per-trigger-event data + * + * The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS + * status bits, up to two tuple CTIDs, and optionally two OIDs of partitions. + * Each event record also has an associated AfterTriggerSharedData that is + * shared across all instances of similar events within a "chunk". + * + * For row-level triggers, we arrange not to waste storage on unneeded ctid + * fields. Updates of regular tables use two; inserts and deletes of regular + * tables use one; foreign tables always use zero and save the tuple(s) to a + * tuplestore. AFTER_TRIGGER_FDW_FETCH directs AfterTriggerExecute() to + * retrieve a fresh tuple or pair of tuples from that tuplestore, while + * AFTER_TRIGGER_FDW_REUSE directs it to use the most-recently-retrieved + * tuple(s). This permits storing tuples once regardless of the number of + * row-level triggers on a foreign table. + * + * When updates on partitioned tables cause rows to move between partitions, + * the OIDs of both partitions are stored too, so that the tuples can be + * fetched; such entries are marked AFTER_TRIGGER_CP_UPDATE (for "cross- + * partition update"). + * + * Note that we need triggers on foreign tables to be fired in exactly the + * order they were queued, so that the tuples come out of the tuplestore in + * the right order. To ensure that, we forbid deferrable (constraint) + * triggers on foreign tables. This also ensures that such triggers do not + * get deferred into outer trigger query levels, meaning that it's okay to + * destroy the tuplestore at the end of the query level. + * + * Statement-level triggers always bear AFTER_TRIGGER_1CTID, though they + * require no ctid field. We lack the flag bit space to neatly represent that + * distinct case, and it seems unlikely to be worth much trouble. + * + * Note: ats_firing_id is initially zero and is set to something else when + * AFTER_TRIGGER_IN_PROGRESS is set. It indicates which trigger firing + * cycle the trigger will be fired in (or was fired in, if DONE is set). + * Although this is mutable state, we can keep it in AfterTriggerSharedData + * because all instances of the same type of event in a given event list will + * be fired at the same time, if they were queued between the same firing + * cycles. So we need only ensure that ats_firing_id is zero when attaching + * a new event to an existing AfterTriggerSharedData record. + */ +typedef uint32 TriggerFlags; + +#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */ +#define AFTER_TRIGGER_DONE 0x80000000 +#define AFTER_TRIGGER_IN_PROGRESS 0x40000000 +/* bits describing the size and tuple sources of this event */ +#define AFTER_TRIGGER_FDW_REUSE 0x00000000 +#define AFTER_TRIGGER_FDW_FETCH 0x20000000 +#define AFTER_TRIGGER_1CTID 0x10000000 +#define AFTER_TRIGGER_2CTID 0x30000000 +#define AFTER_TRIGGER_CP_UPDATE 0x08000000 +#define AFTER_TRIGGER_TUP_BITS 0x38000000 +typedef struct AfterTriggerSharedData *AfterTriggerShared; + +typedef struct AfterTriggerSharedData +{ + TriggerEvent ats_event; /* event type indicator, see trigger.h */ + Oid ats_tgoid; /* the trigger's ID */ + Oid ats_relid; /* the relation it's on */ + CommandId ats_firing_id; /* ID for firing cycle */ + struct AfterTriggersTableData *ats_table; /* transition table access */ + Bitmapset *ats_modifiedcols; /* modified columns */ +} AfterTriggerSharedData; + +typedef struct AfterTriggerEventData *AfterTriggerEvent; + +typedef struct AfterTriggerEventData +{ + TriggerFlags ate_flags; /* status bits and offset to shared data */ + ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */ + ItemPointerData ate_ctid2; /* new updated tuple */ + + /* + * During a cross-partition update of a partitioned table, we also store + * the OIDs of source and destination partitions that are needed to fetch + * the old (ctid1) and the new tuple (ctid2) from, respectively. + */ + Oid ate_src_part; + Oid ate_dst_part; +} AfterTriggerEventData; + +/* AfterTriggerEventData, minus ate_src_part, ate_dst_part */ +typedef struct AfterTriggerEventDataNoOids +{ + TriggerFlags ate_flags; + ItemPointerData ate_ctid1; + ItemPointerData ate_ctid2; +} AfterTriggerEventDataNoOids; + +/* AfterTriggerEventData, minus ate_*_part and ate_ctid2 */ +typedef struct AfterTriggerEventDataOneCtid +{ + TriggerFlags ate_flags; /* status bits and offset to shared data */ + ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */ +} AfterTriggerEventDataOneCtid; + +/* AfterTriggerEventData, minus ate_*_part, ate_ctid1 and ate_ctid2 */ +typedef struct AfterTriggerEventDataZeroCtids +{ + TriggerFlags ate_flags; /* status bits and offset to shared data */ +} AfterTriggerEventDataZeroCtids; + +#define SizeofTriggerEvent(evt) \ + (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \ + sizeof(AfterTriggerEventData) : \ + (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \ + sizeof(AfterTriggerEventDataNoOids) : \ + (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \ + sizeof(AfterTriggerEventDataOneCtid) : \ + sizeof(AfterTriggerEventDataZeroCtids)))) + +#define GetTriggerSharedData(evt) \ + ((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET))) + +/* + * To avoid palloc overhead, we keep trigger events in arrays in successively- + * larger chunks (a slightly more sophisticated version of an expansible + * array). The space between CHUNK_DATA_START and freeptr is occupied by + * AfterTriggerEventData records; the space between endfree and endptr is + * occupied by AfterTriggerSharedData records. + */ +typedef struct AfterTriggerEventChunk +{ + struct AfterTriggerEventChunk *next; /* list link */ + char *freeptr; /* start of free space in chunk */ + char *endfree; /* end of free space in chunk */ + char *endptr; /* end of chunk */ + /* event data follows here */ +} AfterTriggerEventChunk; + +#define CHUNK_DATA_START(cptr) ((char *) (cptr) + MAXALIGN(sizeof(AfterTriggerEventChunk))) + +/* A list of events */ +typedef struct AfterTriggerEventList +{ + AfterTriggerEventChunk *head; + AfterTriggerEventChunk *tail; + char *tailfree; /* freeptr of tail chunk */ +} AfterTriggerEventList; + +/* Macros to help in iterating over a list of events */ +#define for_each_chunk(cptr, evtlist) \ + for (cptr = (evtlist).head; cptr != NULL; cptr = cptr->next) +#define for_each_event(eptr, cptr) \ + for (eptr = (AfterTriggerEvent) CHUNK_DATA_START(cptr); \ + (char *) eptr < (cptr)->freeptr; \ + eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr))) +/* Use this if no special per-chunk processing is needed */ +#define for_each_event_chunk(eptr, cptr, evtlist) \ + for_each_chunk(cptr, evtlist) for_each_event(eptr, cptr) + +/* Macros for iterating from a start point that might not be list start */ +#define for_each_chunk_from(cptr) \ + for (; cptr != NULL; cptr = cptr->next) +#define for_each_event_from(eptr, cptr) \ + for (; \ + (char *) eptr < (cptr)->freeptr; \ + eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr))) + + +/* + * All per-transaction data for the AFTER TRIGGERS module. + * + * AfterTriggersData has the following fields: + * + * firing_counter is incremented for each call of afterTriggerInvokeEvents. + * We mark firable events with the current firing cycle's ID so that we can + * tell which ones to work on. This ensures sane behavior if a trigger + * function chooses to do SET CONSTRAINTS: the inner SET CONSTRAINTS will + * only fire those events that weren't already scheduled for firing. + * + * state keeps track of the transaction-local effects of SET CONSTRAINTS. + * This is saved and restored across failed subtransactions. + * + * events is the current list of deferred events. This is global across + * all subtransactions of the current transaction. In a subtransaction + * abort, we know that the events added by the subtransaction are at the + * end of the list, so it is relatively easy to discard them. The event + * list chunks themselves are stored in event_cxt. + * + * query_depth is the current depth of nested AfterTriggerBeginQuery calls + * (-1 when the stack is empty). + * + * query_stack[query_depth] is the per-query-level data, including these fields: + * + * events is a list of AFTER trigger events queued by the current query. + * None of these are valid until the matching AfterTriggerEndQuery call + * occurs. At that point we fire immediate-mode triggers, and append any + * deferred events to the main events list. + * + * fdw_tuplestore is a tuplestore containing the foreign-table tuples + * needed by events queued by the current query. (Note: we use just one + * tuplestore even though more than one foreign table might be involved. + * This is okay because tuplestores don't really care what's in the tuples + * they store; but it's possible that someday it'd break.) + * + * tables is a List of AfterTriggersTableData structs for target tables + * of the current query (see below). + * + * maxquerydepth is just the allocated length of query_stack. + * + * trans_stack holds per-subtransaction data, including these fields: + * + * state is NULL or a pointer to a saved copy of the SET CONSTRAINTS + * state data. Each subtransaction level that modifies that state first + * saves a copy, which we use to restore the state if we abort. + * + * events is a copy of the events head/tail pointers, + * which we use to restore those values during subtransaction abort. + * + * query_depth is the subtransaction-start-time value of query_depth, + * which we similarly use to clean up at subtransaction abort. + * + * firing_counter is the subtransaction-start-time value of firing_counter. + * We use this to recognize which deferred triggers were fired (or marked + * for firing) within an aborted subtransaction. + * + * We use GetCurrentTransactionNestLevel() to determine the correct array + * index in trans_stack. maxtransdepth is the number of allocated entries in + * trans_stack. (By not keeping our own stack pointer, we can avoid trouble + * in cases where errors during subxact abort cause multiple invocations + * of AfterTriggerEndSubXact() at the same nesting depth.) + * + * We create an AfterTriggersTableData struct for each target table of the + * current query, and each operation mode (INSERT/UPDATE/DELETE), that has + * either transition tables or statement-level triggers. This is used to + * hold the relevant transition tables, as well as info tracking whether + * we already queued the statement triggers. (We use that info to prevent + * firing the same statement triggers more than once per statement, or really + * once per transition table set.) These structs, along with the transition + * table tuplestores, live in the (sub)transaction's CurTransactionContext. + * That's sufficient lifespan because we don't allow transition tables to be + * used by deferrable triggers, so they only need to survive until + * AfterTriggerEndQuery. + */ +typedef struct AfterTriggersQueryData AfterTriggersQueryData; +typedef struct AfterTriggersTransData AfterTriggersTransData; +typedef struct AfterTriggersTableData AfterTriggersTableData; + +typedef struct AfterTriggersData +{ + CommandId firing_counter; /* next firing ID to assign */ + SetConstraintState state; /* the active S C state */ + AfterTriggerEventList events; /* deferred-event list */ + MemoryContext event_cxt; /* memory context for events, if any */ + + /* per-query-level data: */ + AfterTriggersQueryData *query_stack; /* array of structs shown below */ + int query_depth; /* current index in above array */ + int maxquerydepth; /* allocated len of above array */ + + /* per-subtransaction-level data: */ + AfterTriggersTransData *trans_stack; /* array of structs shown below */ + int maxtransdepth; /* allocated len of above array */ +} AfterTriggersData; + +struct AfterTriggersQueryData +{ + AfterTriggerEventList events; /* events pending from this query */ + Tuplestorestate *fdw_tuplestore; /* foreign tuples for said events */ + List *tables; /* list of AfterTriggersTableData, see below */ +}; + +struct AfterTriggersTransData +{ + /* these fields are just for resetting at subtrans abort: */ + SetConstraintState state; /* saved S C state, or NULL if not yet saved */ + AfterTriggerEventList events; /* saved list pointer */ + int query_depth; /* saved query_depth */ + CommandId firing_counter; /* saved firing_counter */ +}; + +struct AfterTriggersTableData +{ + /* relid + cmdType form the lookup key for these structs: */ + Oid relid; /* target table's OID */ + CmdType cmdType; /* event type, CMD_INSERT/UPDATE/DELETE */ + bool closed; /* true when no longer OK to add tuples */ + bool before_trig_done; /* did we already queue BS triggers? */ + bool after_trig_done; /* did we already queue AS triggers? */ + AfterTriggerEventList after_trig_events; /* if so, saved list pointer */ + + /* + * We maintain separate transition tables for UPDATE/INSERT/DELETE since + * MERGE can run all three actions in a single statement. Note that UPDATE + * needs both old and new transition tables whereas INSERT needs only new, + * and DELETE needs only old. + */ + + /* "old" transition table for UPDATE, if any */ + Tuplestorestate *old_upd_tuplestore; + /* "new" transition table for UPDATE, if any */ + Tuplestorestate *new_upd_tuplestore; + /* "old" transition table for DELETE, if any */ + Tuplestorestate *old_del_tuplestore; + /* "new" transition table for INSERT, if any */ + Tuplestorestate *new_ins_tuplestore; + + TupleTableSlot *storeslot; /* for converting to tuplestore's format */ +}; + +static AfterTriggersData afterTriggers; + +static void AfterTriggerExecute(EState *estate, + AfterTriggerEvent event, + ResultRelInfo *relInfo, + ResultRelInfo *src_relInfo, + ResultRelInfo *dst_relInfo, + TriggerDesc *trigdesc, + FmgrInfo *finfo, + Instrumentation *instr, + MemoryContext per_tuple_context, + TupleTableSlot *trig_tuple_slot1, + TupleTableSlot *trig_tuple_slot2); +static AfterTriggersTableData *GetAfterTriggersTableData(Oid relid, + CmdType cmdType); +static TupleTableSlot *GetAfterTriggersStoreSlot(AfterTriggersTableData *table, + TupleDesc tupdesc); +static Tuplestorestate *GetAfterTriggersTransitionTable(int event, + TupleTableSlot *oldslot, + TupleTableSlot *newslot, + TransitionCaptureState *transition_capture); +static void TransitionTableAddTuple(EState *estate, + TransitionCaptureState *transition_capture, + ResultRelInfo *relinfo, + TupleTableSlot *slot, + TupleTableSlot *original_insert_tuple, + Tuplestorestate *tuplestore); +static void AfterTriggerFreeQuery(AfterTriggersQueryData *qs); +static SetConstraintState SetConstraintStateCreate(int numalloc); +static SetConstraintState SetConstraintStateCopy(SetConstraintState state); +static SetConstraintState SetConstraintStateAddItem(SetConstraintState state, + Oid tgoid, bool tgisdeferred); +static void cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent); + + +/* + * Get the FDW tuplestore for the current trigger query level, creating it + * if necessary. + */ +static Tuplestorestate * +GetCurrentFDWTuplestore(void) +{ + Tuplestorestate *ret; + + ret = afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore; + if (ret == NULL) + { + MemoryContext oldcxt; + ResourceOwner saveResourceOwner; + + /* + * Make the tuplestore valid until end of subtransaction. We really + * only need it until AfterTriggerEndQuery(). + */ + oldcxt = MemoryContextSwitchTo(CurTransactionContext); + saveResourceOwner = CurrentResourceOwner; + CurrentResourceOwner = CurTransactionResourceOwner; + + ret = tuplestore_begin_heap(false, false, work_mem); + + CurrentResourceOwner = saveResourceOwner; + MemoryContextSwitchTo(oldcxt); + + afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore = ret; + } + + return ret; +} + +/* ---------- + * afterTriggerCheckState() + * + * Returns true if the trigger event is actually in state DEFERRED. + * ---------- + */ +static bool +afterTriggerCheckState(AfterTriggerShared evtshared) +{ + Oid tgoid = evtshared->ats_tgoid; + SetConstraintState state = afterTriggers.state; + int i; + + /* + * For not-deferrable triggers (i.e. normal AFTER ROW triggers and + * constraints declared NOT DEFERRABLE), the state is always false. + */ + if ((evtshared->ats_event & AFTER_TRIGGER_DEFERRABLE) == 0) + return false; + + /* + * If constraint state exists, SET CONSTRAINTS might have been executed + * either for this trigger or for all triggers. + */ + if (state != NULL) + { + /* Check for SET CONSTRAINTS for this specific trigger. */ + for (i = 0; i < state->numstates; i++) + { + if (state->trigstates[i].sct_tgoid == tgoid) + return state->trigstates[i].sct_tgisdeferred; + } + + /* Check for SET CONSTRAINTS ALL. */ + if (state->all_isset) + return state->all_isdeferred; + } + + /* + * Otherwise return the default state for the trigger. + */ + return ((evtshared->ats_event & AFTER_TRIGGER_INITDEFERRED) != 0); +} + +/* ---------- + * afterTriggerCopyBitmap() + * + * Copy bitmap into AfterTriggerEvents memory context, which is where the after + * trigger events are kept. + * ---------- + */ +static Bitmapset * +afterTriggerCopyBitmap(Bitmapset *src) +{ + Bitmapset *dst; + MemoryContext oldcxt; + + if (src == NULL) + return NULL; + + /* Create event context if we didn't already */ + if (afterTriggers.event_cxt == NULL) + afterTriggers.event_cxt = + AllocSetContextCreate(TopTransactionContext, + "AfterTriggerEvents", + ALLOCSET_DEFAULT_SIZES); + + oldcxt = MemoryContextSwitchTo(afterTriggers.event_cxt); + + dst = bms_copy(src); + + MemoryContextSwitchTo(oldcxt); + + return dst; +} + +/* ---------- + * afterTriggerAddEvent() + * + * Add a new trigger event to the specified queue. + * The passed-in event data is copied. + * ---------- + */ +static void +afterTriggerAddEvent(AfterTriggerEventList *events, + AfterTriggerEvent event, AfterTriggerShared evtshared) +{ + Size eventsize = SizeofTriggerEvent(event); + Size needed = eventsize + sizeof(AfterTriggerSharedData); + AfterTriggerEventChunk *chunk; + AfterTriggerShared newshared; + AfterTriggerEvent newevent; + + /* + * If empty list or not enough room in the tail chunk, make a new chunk. + * We assume here that a new shared record will always be needed. + */ + chunk = events->tail; + if (chunk == NULL || + chunk->endfree - chunk->freeptr < needed) + { + Size chunksize; + + /* Create event context if we didn't already */ + if (afterTriggers.event_cxt == NULL) + afterTriggers.event_cxt = + AllocSetContextCreate(TopTransactionContext, + "AfterTriggerEvents", + ALLOCSET_DEFAULT_SIZES); + + /* + * Chunk size starts at 1KB and is allowed to increase up to 1MB. + * These numbers are fairly arbitrary, though there is a hard limit at + * AFTER_TRIGGER_OFFSET; else we couldn't link event records to their + * shared records using the available space in ate_flags. Another + * constraint is that if the chunk size gets too huge, the search loop + * below would get slow given a (not too common) usage pattern with + * many distinct event types in a chunk. Therefore, we double the + * preceding chunk size only if there weren't too many shared records + * in the preceding chunk; otherwise we halve it. This gives us some + * ability to adapt to the actual usage pattern of the current query + * while still having large chunk sizes in typical usage. All chunk + * sizes used should be MAXALIGN multiples, to ensure that the shared + * records will be aligned safely. + */ +#define MIN_CHUNK_SIZE 1024 +#define MAX_CHUNK_SIZE (1024*1024) + +#if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1) +#error MAX_CHUNK_SIZE must not exceed AFTER_TRIGGER_OFFSET +#endif + + if (chunk == NULL) + chunksize = MIN_CHUNK_SIZE; + else + { + /* preceding chunk size... */ + chunksize = chunk->endptr - (char *) chunk; + /* check number of shared records in preceding chunk */ + if ((chunk->endptr - chunk->endfree) <= + (100 * sizeof(AfterTriggerSharedData))) + chunksize *= 2; /* okay, double it */ + else + chunksize /= 2; /* too many shared records */ + chunksize = Min(chunksize, MAX_CHUNK_SIZE); + } + chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize); + chunk->next = NULL; + chunk->freeptr = CHUNK_DATA_START(chunk); + chunk->endptr = chunk->endfree = (char *) chunk + chunksize; + Assert(chunk->endfree - chunk->freeptr >= needed); + + if (events->head == NULL) + events->head = chunk; + else + events->tail->next = chunk; + events->tail = chunk; + /* events->tailfree is now out of sync, but we'll fix it below */ + } + + /* + * Try to locate a matching shared-data record already in the chunk. If + * none, make a new one. + */ + for (newshared = ((AfterTriggerShared) chunk->endptr) - 1; + (char *) newshared >= chunk->endfree; + newshared--) + { + if (newshared->ats_tgoid == evtshared->ats_tgoid && + newshared->ats_relid == evtshared->ats_relid && + newshared->ats_event == evtshared->ats_event && + newshared->ats_table == evtshared->ats_table && + newshared->ats_firing_id == 0) + break; + } + if ((char *) newshared < chunk->endfree) + { + *newshared = *evtshared; + newshared->ats_firing_id = 0; /* just to be sure */ + chunk->endfree = (char *) newshared; + } + + /* Insert the data */ + newevent = (AfterTriggerEvent) chunk->freeptr; + memcpy(newevent, event, eventsize); + /* ... and link the new event to its shared record */ + newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET; + newevent->ate_flags |= (char *) newshared - (char *) newevent; + + chunk->freeptr += eventsize; + events->tailfree = chunk->freeptr; +} + +/* ---------- + * afterTriggerFreeEventList() + * + * Free all the event storage in the given list. + * ---------- + */ +static void +afterTriggerFreeEventList(AfterTriggerEventList *events) +{ + AfterTriggerEventChunk *chunk; + + while ((chunk = events->head) != NULL) + { + events->head = chunk->next; + pfree(chunk); + } + events->tail = NULL; + events->tailfree = NULL; +} + +/* ---------- + * afterTriggerRestoreEventList() + * + * Restore an event list to its prior length, removing all the events + * added since it had the value old_events. + * ---------- + */ +static void +afterTriggerRestoreEventList(AfterTriggerEventList *events, + const AfterTriggerEventList *old_events) +{ + AfterTriggerEventChunk *chunk; + AfterTriggerEventChunk *next_chunk; + + if (old_events->tail == NULL) + { + /* restoring to a completely empty state, so free everything */ + afterTriggerFreeEventList(events); + } + else + { + *events = *old_events; + /* free any chunks after the last one we want to keep */ + for (chunk = events->tail->next; chunk != NULL; chunk = next_chunk) + { + next_chunk = chunk->next; + pfree(chunk); + } + /* and clean up the tail chunk to be the right length */ + events->tail->next = NULL; + events->tail->freeptr = events->tailfree; + + /* + * We don't make any effort to remove now-unused shared data records. + * They might still be useful, anyway. + */ + } +} + +/* ---------- + * afterTriggerDeleteHeadEventChunk() + * + * Remove the first chunk of events from the query level's event list. + * Keep any event list pointers elsewhere in the query level's data + * structures in sync. + * ---------- + */ +static void +afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs) +{ + AfterTriggerEventChunk *target = qs->events.head; + ListCell *lc; + + Assert(target && target->next); + + /* + * First, update any pointers in the per-table data, so that they won't be + * dangling. Resetting obsoleted pointers to NULL will make + * cancel_prior_stmt_triggers start from the list head, which is fine. + */ + foreach(lc, qs->tables) + { + AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc); + + if (table->after_trig_done && + table->after_trig_events.tail == target) + { + table->after_trig_events.head = NULL; + table->after_trig_events.tail = NULL; + table->after_trig_events.tailfree = NULL; + } + } + + /* Now we can flush the head chunk */ + qs->events.head = target->next; + pfree(target); +} + + +/* ---------- + * AfterTriggerExecute() + * + * Fetch the required tuples back from the heap and fire one + * single trigger function. + * + * Frequently, this will be fired many times in a row for triggers of + * a single relation. Therefore, we cache the open relation and provide + * fmgr lookup cache space at the caller level. (For triggers fired at + * the end of a query, we can even piggyback on the executor's state.) + * + * When fired for a cross-partition update of a partitioned table, the old + * tuple is fetched using 'src_relInfo' (the source leaf partition) and + * the new tuple using 'dst_relInfo' (the destination leaf partition), though + * both are converted into the root partitioned table's format before passing + * to the trigger function. + * + * event: event currently being fired. + * relInfo: result relation for event. + * src_relInfo: source partition of a cross-partition update + * dst_relInfo: its destination partition + * trigdesc: working copy of rel's trigger info. + * finfo: array of fmgr lookup cache entries (one per trigger in trigdesc). + * instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger), + * or NULL if no instrumentation is wanted. + * per_tuple_context: memory context to call trigger function in. + * trig_tuple_slot1: scratch slot for tg_trigtuple (foreign tables only) + * trig_tuple_slot2: scratch slot for tg_newtuple (foreign tables only) + * ---------- + */ +static void +AfterTriggerExecute(EState *estate, + AfterTriggerEvent event, + ResultRelInfo *relInfo, + ResultRelInfo *src_relInfo, + ResultRelInfo *dst_relInfo, + TriggerDesc *trigdesc, + FmgrInfo *finfo, Instrumentation *instr, + MemoryContext per_tuple_context, + TupleTableSlot *trig_tuple_slot1, + TupleTableSlot *trig_tuple_slot2) +{ + Relation rel = relInfo->ri_RelationDesc; + Relation src_rel = src_relInfo->ri_RelationDesc; + Relation dst_rel = dst_relInfo->ri_RelationDesc; + AfterTriggerShared evtshared = GetTriggerSharedData(event); + Oid tgoid = evtshared->ats_tgoid; + TriggerData LocTriggerData = {0}; + HeapTuple rettuple; + int tgindx; + bool should_free_trig = false; + bool should_free_new = false; + + /* + * Locate trigger in trigdesc. + */ + for (tgindx = 0; tgindx < trigdesc->numtriggers; tgindx++) + { + if (trigdesc->triggers[tgindx].tgoid == tgoid) + { + LocTriggerData.tg_trigger = &(trigdesc->triggers[tgindx]); + break; + } + } + if (LocTriggerData.tg_trigger == NULL) + elog(ERROR, "could not find trigger %u", tgoid); + + /* + * If doing EXPLAIN ANALYZE, start charging time to this trigger. We want + * to include time spent re-fetching tuples in the trigger cost. + */ + if (instr) + InstrStartNode(instr + tgindx); + + /* + * Fetch the required tuple(s). + */ + switch (event->ate_flags & AFTER_TRIGGER_TUP_BITS) + { + case AFTER_TRIGGER_FDW_FETCH: + { + Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore(); + + if (!tuplestore_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot1)) + elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); + + if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) == + TRIGGER_EVENT_UPDATE && + !tuplestore_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot2)) + elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); + } + /* fall through */ + case AFTER_TRIGGER_FDW_REUSE: + + /* + * Store tuple in the slot so that tg_trigtuple does not reference + * tuplestore memory. (It is formally possible for the trigger + * function to queue trigger events that add to the same + * tuplestore, which can push other tuples out of memory.) The + * distinction is academic, because we start with a minimal tuple + * that is stored as a heap tuple, constructed in different memory + * context, in the slot anyway. + */ + LocTriggerData.tg_trigslot = trig_tuple_slot1; + LocTriggerData.tg_trigtuple = + ExecFetchSlotHeapTuple(trig_tuple_slot1, true, &should_free_trig); + + if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) == + TRIGGER_EVENT_UPDATE) + { + LocTriggerData.tg_newslot = trig_tuple_slot2; + LocTriggerData.tg_newtuple = + ExecFetchSlotHeapTuple(trig_tuple_slot2, true, &should_free_new); + } + else + { + LocTriggerData.tg_newtuple = NULL; + } + break; + + default: + if (ItemPointerIsValid(&(event->ate_ctid1))) + { + TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate, + src_relInfo); + + if (!table_tuple_fetch_row_version(src_rel, + &(event->ate_ctid1), + SnapshotAny, + src_slot)) + elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); + + /* + * Store the tuple fetched from the source partition into the + * target (root partitioned) table slot, converting if needed. + */ + if (src_relInfo != relInfo) + { + TupleConversionMap *map = ExecGetChildToRootMap(src_relInfo); + + LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo); + if (map) + { + execute_attr_map_slot(map->attrMap, + src_slot, + LocTriggerData.tg_trigslot); + } + else + ExecCopySlot(LocTriggerData.tg_trigslot, src_slot); + } + else + LocTriggerData.tg_trigslot = src_slot; + LocTriggerData.tg_trigtuple = + ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig); + } + else + { + LocTriggerData.tg_trigtuple = NULL; + } + + /* don't touch ctid2 if not there */ + if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID || + (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) && + ItemPointerIsValid(&(event->ate_ctid2))) + { + TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate, + dst_relInfo); + + if (!table_tuple_fetch_row_version(dst_rel, + &(event->ate_ctid2), + SnapshotAny, + dst_slot)) + elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); + + /* + * Store the tuple fetched from the destination partition into + * the target (root partitioned) table slot, converting if + * needed. + */ + if (dst_relInfo != relInfo) + { + TupleConversionMap *map = ExecGetChildToRootMap(dst_relInfo); + + LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo); + if (map) + { + execute_attr_map_slot(map->attrMap, + dst_slot, + LocTriggerData.tg_newslot); + } + else + ExecCopySlot(LocTriggerData.tg_newslot, dst_slot); + } + else + LocTriggerData.tg_newslot = dst_slot; + LocTriggerData.tg_newtuple = + ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new); + } + else + { + LocTriggerData.tg_newtuple = NULL; + } + } + + /* + * Set up the tuplestore information to let the trigger have access to + * transition tables. When we first make a transition table available to + * a trigger, mark it "closed" so that it cannot change anymore. If any + * additional events of the same type get queued in the current trigger + * query level, they'll go into new transition tables. + */ + LocTriggerData.tg_oldtable = LocTriggerData.tg_newtable = NULL; + if (evtshared->ats_table) + { + if (LocTriggerData.tg_trigger->tgoldtable) + { + if (TRIGGER_FIRED_BY_UPDATE(evtshared->ats_event)) + LocTriggerData.tg_oldtable = evtshared->ats_table->old_upd_tuplestore; + else + LocTriggerData.tg_oldtable = evtshared->ats_table->old_del_tuplestore; + evtshared->ats_table->closed = true; + } + + if (LocTriggerData.tg_trigger->tgnewtable) + { + if (TRIGGER_FIRED_BY_INSERT(evtshared->ats_event)) + LocTriggerData.tg_newtable = evtshared->ats_table->new_ins_tuplestore; + else + LocTriggerData.tg_newtable = evtshared->ats_table->new_upd_tuplestore; + evtshared->ats_table->closed = true; + } + } + + /* + * Setup the remaining trigger information + */ + LocTriggerData.type = T_TriggerData; + LocTriggerData.tg_event = + evtshared->ats_event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW); + LocTriggerData.tg_relation = rel; + if (TRIGGER_FOR_UPDATE(LocTriggerData.tg_trigger->tgtype)) + LocTriggerData.tg_updatedcols = evtshared->ats_modifiedcols; + + MemoryContextReset(per_tuple_context); + + /* + * Call the trigger and throw away any possibly returned updated tuple. + * (Don't let ExecCallTriggerFunc measure EXPLAIN time.) + */ + rettuple = ExecCallTriggerFunc(&LocTriggerData, + tgindx, + finfo, + NULL, + per_tuple_context); + if (rettuple != NULL && + rettuple != LocTriggerData.tg_trigtuple && + rettuple != LocTriggerData.tg_newtuple) + heap_freetuple(rettuple); + + /* + * Release resources + */ + if (should_free_trig) + heap_freetuple(LocTriggerData.tg_trigtuple); + if (should_free_new) + heap_freetuple(LocTriggerData.tg_newtuple); + + /* don't clear slots' contents if foreign table */ + if (trig_tuple_slot1 == NULL) + { + if (LocTriggerData.tg_trigslot) + ExecClearTuple(LocTriggerData.tg_trigslot); + if (LocTriggerData.tg_newslot) + ExecClearTuple(LocTriggerData.tg_newslot); + } + + /* + * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count + * one "tuple returned" (really the number of firings). + */ + if (instr) + InstrStopNode(instr + tgindx, 1); +} + + +/* + * afterTriggerMarkEvents() + * + * Scan the given event list for not yet invoked events. Mark the ones + * that can be invoked now with the current firing ID. + * + * If move_list isn't NULL, events that are not to be invoked now are + * transferred to move_list. + * + * When immediate_only is true, do not invoke currently-deferred triggers. + * (This will be false only at main transaction exit.) + * + * Returns true if any invokable events were found. + */ +static bool +afterTriggerMarkEvents(AfterTriggerEventList *events, + AfterTriggerEventList *move_list, + bool immediate_only) +{ + bool found = false; + bool deferred_found = false; + AfterTriggerEvent event; + AfterTriggerEventChunk *chunk; + + for_each_event_chunk(event, chunk, *events) + { + AfterTriggerShared evtshared = GetTriggerSharedData(event); + bool defer_it = false; + + if (!(event->ate_flags & + (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))) + { + /* + * This trigger hasn't been called or scheduled yet. Check if we + * should call it now. + */ + if (immediate_only && afterTriggerCheckState(evtshared)) + { + defer_it = true; + } + else + { + /* + * Mark it as to be fired in this firing cycle. + */ + evtshared->ats_firing_id = afterTriggers.firing_counter; + event->ate_flags |= AFTER_TRIGGER_IN_PROGRESS; + found = true; + } + } + + /* + * If it's deferred, move it to move_list, if requested. + */ + if (defer_it && move_list != NULL) + { + deferred_found = true; + /* add it to move_list */ + afterTriggerAddEvent(move_list, event, evtshared); + /* mark original copy "done" so we don't do it again */ + event->ate_flags |= AFTER_TRIGGER_DONE; + } + } + + /* + * We could allow deferred triggers if, before the end of the + * security-restricted operation, we were to verify that a SET CONSTRAINTS + * ... IMMEDIATE has fired all such triggers. For now, don't bother. + */ + if (deferred_found && InSecurityRestrictedOperation()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot fire deferred trigger within security-restricted operation"))); + + return found; +} + +/* + * afterTriggerInvokeEvents() + * + * Scan the given event list for events that are marked as to be fired + * in the current firing cycle, and fire them. + * + * If estate isn't NULL, we use its result relation info to avoid repeated + * openings and closing of trigger target relations. If it is NULL, we + * make one locally to cache the info in case there are multiple trigger + * events per rel. + * + * When delete_ok is true, it's safe to delete fully-processed events. + * (We are not very tense about that: we simply reset a chunk to be empty + * if all its events got fired. The objective here is just to avoid useless + * rescanning of events when a trigger queues new events during transaction + * end, so it's not necessary to worry much about the case where only + * some events are fired.) + * + * Returns true if no unfired events remain in the list (this allows us + * to avoid repeating afterTriggerMarkEvents). + */ +static bool +afterTriggerInvokeEvents(AfterTriggerEventList *events, + CommandId firing_id, + EState *estate, + bool delete_ok) +{ + bool all_fired = true; + AfterTriggerEventChunk *chunk; + MemoryContext per_tuple_context; + bool local_estate = false; + ResultRelInfo *rInfo = NULL; + Relation rel = NULL; + TriggerDesc *trigdesc = NULL; + FmgrInfo *finfo = NULL; + Instrumentation *instr = NULL; + TupleTableSlot *slot1 = NULL, + *slot2 = NULL; + + /* Make a local EState if need be */ + if (estate == NULL) + { + estate = CreateExecutorState(); + local_estate = true; + } + + /* Make a per-tuple memory context for trigger function calls */ + per_tuple_context = + AllocSetContextCreate(CurrentMemoryContext, + "AfterTriggerTupleContext", + ALLOCSET_DEFAULT_SIZES); + + for_each_chunk(chunk, *events) + { + AfterTriggerEvent event; + bool all_fired_in_chunk = true; + + for_each_event(event, chunk) + { + AfterTriggerShared evtshared = GetTriggerSharedData(event); + + /* + * Is it one for me to fire? + */ + if ((event->ate_flags & AFTER_TRIGGER_IN_PROGRESS) && + evtshared->ats_firing_id == firing_id) + { + ResultRelInfo *src_rInfo, + *dst_rInfo; + + /* + * So let's fire it... but first, find the correct relation if + * this is not the same relation as before. + */ + if (rel == NULL || RelationGetRelid(rel) != evtshared->ats_relid) + { + rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid, + NULL); + rel = rInfo->ri_RelationDesc; + /* Catch calls with insufficient relcache refcounting */ + Assert(!RelationHasReferenceCountZero(rel)); + trigdesc = rInfo->ri_TrigDesc; + finfo = rInfo->ri_TrigFunctions; + instr = rInfo->ri_TrigInstrument; + if (slot1 != NULL) + { + ExecDropSingleTupleTableSlot(slot1); + ExecDropSingleTupleTableSlot(slot2); + slot1 = slot2 = NULL; + } + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + { + slot1 = MakeSingleTupleTableSlot(rel->rd_att, + &TTSOpsMinimalTuple); + slot2 = MakeSingleTupleTableSlot(rel->rd_att, + &TTSOpsMinimalTuple); + } + if (trigdesc == NULL) /* should not happen */ + elog(ERROR, "relation %u has no triggers", + evtshared->ats_relid); + } + + /* + * Look up source and destination partition result rels of a + * cross-partition update event. + */ + if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == + AFTER_TRIGGER_CP_UPDATE) + { + Assert(OidIsValid(event->ate_src_part) && + OidIsValid(event->ate_dst_part)); + src_rInfo = ExecGetTriggerResultRel(estate, + event->ate_src_part, + rInfo); + dst_rInfo = ExecGetTriggerResultRel(estate, + event->ate_dst_part, + rInfo); + } + else + src_rInfo = dst_rInfo = rInfo; + + /* + * Fire it. Note that the AFTER_TRIGGER_IN_PROGRESS flag is + * still set, so recursive examinations of the event list + * won't try to re-fire it. + */ + AfterTriggerExecute(estate, event, rInfo, + src_rInfo, dst_rInfo, + trigdesc, finfo, instr, + per_tuple_context, slot1, slot2); + + /* + * Mark the event as done. + */ + event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS; + event->ate_flags |= AFTER_TRIGGER_DONE; + } + else if (!(event->ate_flags & AFTER_TRIGGER_DONE)) + { + /* something remains to be done */ + all_fired = all_fired_in_chunk = false; + } + } + + /* Clear the chunk if delete_ok and nothing left of interest */ + if (delete_ok && all_fired_in_chunk) + { + chunk->freeptr = CHUNK_DATA_START(chunk); + chunk->endfree = chunk->endptr; + + /* + * If it's last chunk, must sync event list's tailfree too. Note + * that delete_ok must NOT be passed as true if there could be + * additional AfterTriggerEventList values pointing at this event + * list, since we'd fail to fix their copies of tailfree. + */ + if (chunk == events->tail) + events->tailfree = chunk->freeptr; + } + } + if (slot1 != NULL) + { + ExecDropSingleTupleTableSlot(slot1); + ExecDropSingleTupleTableSlot(slot2); + } + + /* Release working resources */ + MemoryContextDelete(per_tuple_context); + + if (local_estate) + { + ExecCloseResultRelations(estate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); + } + + return all_fired; +} + + +/* + * GetAfterTriggersTableData + * + * Find or create an AfterTriggersTableData struct for the specified + * trigger event (relation + operation type). Ignore existing structs + * marked "closed"; we don't want to put any additional tuples into them, + * nor change their stmt-triggers-fired state. + * + * Note: the AfterTriggersTableData list is allocated in the current + * (sub)transaction's CurTransactionContext. This is OK because + * we don't need it to live past AfterTriggerEndQuery. + */ +static AfterTriggersTableData * +GetAfterTriggersTableData(Oid relid, CmdType cmdType) +{ + AfterTriggersTableData *table; + AfterTriggersQueryData *qs; + MemoryContext oldcxt; + ListCell *lc; + + /* Caller should have ensured query_depth is OK. */ + Assert(afterTriggers.query_depth >= 0 && + afterTriggers.query_depth < afterTriggers.maxquerydepth); + qs = &afterTriggers.query_stack[afterTriggers.query_depth]; + + foreach(lc, qs->tables) + { + table = (AfterTriggersTableData *) lfirst(lc); + if (table->relid == relid && table->cmdType == cmdType && + !table->closed) + return table; + } + + oldcxt = MemoryContextSwitchTo(CurTransactionContext); + + table = (AfterTriggersTableData *) palloc0(sizeof(AfterTriggersTableData)); + table->relid = relid; + table->cmdType = cmdType; + qs->tables = lappend(qs->tables, table); + + MemoryContextSwitchTo(oldcxt); + + return table; +} + +/* + * Returns a TupleTableSlot suitable for holding the tuples to be put + * into AfterTriggersTableData's transition table tuplestores. + */ +static TupleTableSlot * +GetAfterTriggersStoreSlot(AfterTriggersTableData *table, + TupleDesc tupdesc) +{ + /* Create it if not already done. */ + if (!table->storeslot) + { + MemoryContext oldcxt; + + /* + * We need this slot only until AfterTriggerEndQuery, but making it + * last till end-of-subxact is good enough. It'll be freed by + * AfterTriggerFreeQuery(). However, the passed-in tupdesc might have + * a different lifespan, so we'd better make a copy of that. + */ + oldcxt = MemoryContextSwitchTo(CurTransactionContext); + tupdesc = CreateTupleDescCopy(tupdesc); + table->storeslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual); + MemoryContextSwitchTo(oldcxt); + } + + return table->storeslot; +} + +/* + * MakeTransitionCaptureState + * + * Make a TransitionCaptureState object for the given TriggerDesc, target + * relation, and operation type. The TCS object holds all the state needed + * to decide whether to capture tuples in transition tables. + * + * If there are no triggers in 'trigdesc' that request relevant transition + * tables, then return NULL. + * + * The resulting object can be passed to the ExecAR* functions. When + * dealing with child tables, the caller can set tcs_original_insert_tuple + * to avoid having to reconstruct the original tuple in the root table's + * format. + * + * Note that we copy the flags from a parent table into this struct (rather + * than subsequently using the relation's TriggerDesc directly) so that we can + * use it to control collection of transition tuples from child tables. + * + * Per SQL spec, all operations of the same kind (INSERT/UPDATE/DELETE) + * on the same table during one query should share one transition table. + * Therefore, the Tuplestores are owned by an AfterTriggersTableData struct + * looked up using the table OID + CmdType, and are merely referenced by + * the TransitionCaptureState objects we hand out to callers. + */ +TransitionCaptureState * +MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType) +{ + TransitionCaptureState *state; + bool need_old_upd, + need_new_upd, + need_old_del, + need_new_ins; + AfterTriggersTableData *table; + MemoryContext oldcxt; + ResourceOwner saveResourceOwner; + + if (trigdesc == NULL) + return NULL; + + /* Detect which table(s) we need. */ + switch (cmdType) + { + case CMD_INSERT: + need_old_upd = need_old_del = need_new_upd = false; + need_new_ins = trigdesc->trig_insert_new_table; + break; + case CMD_UPDATE: + need_old_upd = trigdesc->trig_update_old_table; + need_new_upd = trigdesc->trig_update_new_table; + need_old_del = need_new_ins = false; + break; + case CMD_DELETE: + need_old_del = trigdesc->trig_delete_old_table; + need_old_upd = need_new_upd = need_new_ins = false; + break; + case CMD_MERGE: + need_old_upd = trigdesc->trig_update_old_table; + need_new_upd = trigdesc->trig_update_new_table; + need_old_del = trigdesc->trig_delete_old_table; + need_new_ins = trigdesc->trig_insert_new_table; + break; + default: + elog(ERROR, "unexpected CmdType: %d", (int) cmdType); + /* keep compiler quiet */ + need_old_upd = need_new_upd = need_old_del = need_new_ins = false; + break; + } + if (!need_old_upd && !need_new_upd && !need_new_ins && !need_old_del) + return NULL; + + /* Check state, like AfterTriggerSaveEvent. */ + if (afterTriggers.query_depth < 0) + elog(ERROR, "MakeTransitionCaptureState() called outside of query"); + + /* Be sure we have enough space to record events at this query depth. */ + if (afterTriggers.query_depth >= afterTriggers.maxquerydepth) + AfterTriggerEnlargeQueryState(); + + /* + * Find or create an AfterTriggersTableData struct to hold the + * tuplestore(s). If there's a matching struct but it's marked closed, + * ignore it; we need a newer one. + * + * Note: the AfterTriggersTableData list, as well as the tuplestores, are + * allocated in the current (sub)transaction's CurTransactionContext, and + * the tuplestores are managed by the (sub)transaction's resource owner. + * This is sufficient lifespan because we do not allow triggers using + * transition tables to be deferrable; they will be fired during + * AfterTriggerEndQuery, after which it's okay to delete the data. + */ + table = GetAfterTriggersTableData(relid, cmdType); + + /* Now create required tuplestore(s), if we don't have them already. */ + oldcxt = MemoryContextSwitchTo(CurTransactionContext); + saveResourceOwner = CurrentResourceOwner; + CurrentResourceOwner = CurTransactionResourceOwner; + + if (need_old_upd && table->old_upd_tuplestore == NULL) + table->old_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem); + if (need_new_upd && table->new_upd_tuplestore == NULL) + table->new_upd_tuplestore = tuplestore_begin_heap(false, false, work_mem); + if (need_old_del && table->old_del_tuplestore == NULL) + table->old_del_tuplestore = tuplestore_begin_heap(false, false, work_mem); + if (need_new_ins && table->new_ins_tuplestore == NULL) + table->new_ins_tuplestore = tuplestore_begin_heap(false, false, work_mem); + + CurrentResourceOwner = saveResourceOwner; + MemoryContextSwitchTo(oldcxt); + + /* Now build the TransitionCaptureState struct, in caller's context */ + state = (TransitionCaptureState *) palloc0(sizeof(TransitionCaptureState)); + state->tcs_delete_old_table = trigdesc->trig_delete_old_table; + state->tcs_update_old_table = trigdesc->trig_update_old_table; + state->tcs_update_new_table = trigdesc->trig_update_new_table; + state->tcs_insert_new_table = trigdesc->trig_insert_new_table; + state->tcs_private = table; + + return state; +} + + +/* ---------- + * AfterTriggerBeginXact() + * + * Called at transaction start (either BEGIN or implicit for single + * statement outside of transaction block). + * ---------- + */ +void +AfterTriggerBeginXact(void) +{ + /* + * Initialize after-trigger state structure to empty + */ + afterTriggers.firing_counter = (CommandId) 1; /* mustn't be 0 */ + afterTriggers.query_depth = -1; + + /* + * Verify that there is no leftover state remaining. If these assertions + * trip, it means that AfterTriggerEndXact wasn't called or didn't clean + * up properly. + */ + Assert(afterTriggers.state == NULL); + Assert(afterTriggers.query_stack == NULL); + Assert(afterTriggers.maxquerydepth == 0); + Assert(afterTriggers.event_cxt == NULL); + Assert(afterTriggers.events.head == NULL); + Assert(afterTriggers.trans_stack == NULL); + Assert(afterTriggers.maxtransdepth == 0); +} + + +/* ---------- + * AfterTriggerBeginQuery() + * + * Called just before we start processing a single query within a + * transaction (or subtransaction). Most of the real work gets deferred + * until somebody actually tries to queue a trigger event. + * ---------- + */ +void +AfterTriggerBeginQuery(void) +{ + /* Increase the query stack depth */ + afterTriggers.query_depth++; +} + + +/* ---------- + * AfterTriggerEndQuery() + * + * Called after one query has been completely processed. At this time + * we invoke all AFTER IMMEDIATE trigger events queued by the query, and + * transfer deferred trigger events to the global deferred-trigger list. + * + * Note that this must be called BEFORE closing down the executor + * with ExecutorEnd, because we make use of the EState's info about + * target relations. Normally it is called from ExecutorFinish. + * ---------- + */ +void +AfterTriggerEndQuery(EState *estate) +{ + AfterTriggersQueryData *qs; + + /* Must be inside a query, too */ + Assert(afterTriggers.query_depth >= 0); + + /* + * If we never even got as far as initializing the event stack, there + * certainly won't be any events, so exit quickly. + */ + if (afterTriggers.query_depth >= afterTriggers.maxquerydepth) + { + afterTriggers.query_depth--; + return; + } + + /* + * Process all immediate-mode triggers queued by the query, and move the + * deferred ones to the main list of deferred events. + * + * Notice that we decide which ones will be fired, and put the deferred + * ones on the main list, before anything is actually fired. This ensures + * reasonably sane behavior if a trigger function does SET CONSTRAINTS ... + * IMMEDIATE: all events we have decided to defer will be available for it + * to fire. + * + * We loop in case a trigger queues more events at the same query level. + * Ordinary trigger functions, including all PL/pgSQL trigger functions, + * will instead fire any triggers in a dedicated query level. Foreign key + * enforcement triggers do add to the current query level, thanks to their + * passing fire_triggers = false to SPI_execute_snapshot(). Other + * C-language triggers might do likewise. + * + * If we find no firable events, we don't have to increment + * firing_counter. + */ + qs = &afterTriggers.query_stack[afterTriggers.query_depth]; + + for (;;) + { + if (afterTriggerMarkEvents(&qs->events, &afterTriggers.events, true)) + { + CommandId firing_id = afterTriggers.firing_counter++; + AfterTriggerEventChunk *oldtail = qs->events.tail; + + if (afterTriggerInvokeEvents(&qs->events, firing_id, estate, false)) + break; /* all fired */ + + /* + * Firing a trigger could result in query_stack being repalloc'd, + * so we must recalculate qs after each afterTriggerInvokeEvents + * call. Furthermore, it's unsafe to pass delete_ok = true here, + * because that could cause afterTriggerInvokeEvents to try to + * access qs->events after the stack has been repalloc'd. + */ + qs = &afterTriggers.query_stack[afterTriggers.query_depth]; + + /* + * We'll need to scan the events list again. To reduce the cost + * of doing so, get rid of completely-fired chunks. We know that + * all events were marked IN_PROGRESS or DONE at the conclusion of + * afterTriggerMarkEvents, so any still-interesting events must + * have been added after that, and so must be in the chunk that + * was then the tail chunk, or in later chunks. So, zap all + * chunks before oldtail. This is approximately the same set of + * events we would have gotten rid of by passing delete_ok = true. + */ + Assert(oldtail != NULL); + while (qs->events.head != oldtail) + afterTriggerDeleteHeadEventChunk(qs); + } + else + break; + } + + /* Release query-level-local storage, including tuplestores if any */ + AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]); + + afterTriggers.query_depth--; +} + + +/* + * AfterTriggerFreeQuery + * Release subsidiary storage for a trigger query level. + * This includes closing down tuplestores. + * Note: it's important for this to be safe if interrupted by an error + * and then called again for the same query level. + */ +static void +AfterTriggerFreeQuery(AfterTriggersQueryData *qs) +{ + Tuplestorestate *ts; + List *tables; + ListCell *lc; + + /* Drop the trigger events */ + afterTriggerFreeEventList(&qs->events); + + /* Drop FDW tuplestore if any */ + ts = qs->fdw_tuplestore; + qs->fdw_tuplestore = NULL; + if (ts) + tuplestore_end(ts); + + /* Release per-table subsidiary storage */ + tables = qs->tables; + foreach(lc, tables) + { + AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc); + + ts = table->old_upd_tuplestore; + table->old_upd_tuplestore = NULL; + if (ts) + tuplestore_end(ts); + ts = table->new_upd_tuplestore; + table->new_upd_tuplestore = NULL; + if (ts) + tuplestore_end(ts); + ts = table->old_del_tuplestore; + table->old_del_tuplestore = NULL; + if (ts) + tuplestore_end(ts); + ts = table->new_ins_tuplestore; + table->new_ins_tuplestore = NULL; + if (ts) + tuplestore_end(ts); + if (table->storeslot) + { + TupleTableSlot *slot = table->storeslot; + + table->storeslot = NULL; + ExecDropSingleTupleTableSlot(slot); + } + } + + /* + * Now free the AfterTriggersTableData structs and list cells. Reset list + * pointer first; if list_free_deep somehow gets an error, better to leak + * that storage than have an infinite loop. + */ + qs->tables = NIL; + list_free_deep(tables); +} + + +/* ---------- + * AfterTriggerFireDeferred() + * + * Called just before the current transaction is committed. At this + * time we invoke all pending DEFERRED triggers. + * + * It is possible for other modules to queue additional deferred triggers + * during pre-commit processing; therefore xact.c may have to call this + * multiple times. + * ---------- + */ +void +AfterTriggerFireDeferred(void) +{ + AfterTriggerEventList *events; + bool snap_pushed = false; + + /* Must not be inside a query */ + Assert(afterTriggers.query_depth == -1); + + /* + * If there are any triggers to fire, make sure we have set a snapshot for + * them to use. (Since PortalRunUtility doesn't set a snap for COMMIT, we + * can't assume ActiveSnapshot is valid on entry.) + */ + events = &afterTriggers.events; + if (events->head != NULL) + { + PushActiveSnapshot(GetTransactionSnapshot()); + snap_pushed = true; + } + + /* + * Run all the remaining triggers. Loop until they are all gone, in case + * some trigger queues more for us to do. + */ + while (afterTriggerMarkEvents(events, NULL, false)) + { + CommandId firing_id = afterTriggers.firing_counter++; + + if (afterTriggerInvokeEvents(events, firing_id, NULL, true)) + break; /* all fired */ + } + + /* + * We don't bother freeing the event list, since it will go away anyway + * (and more efficiently than via pfree) in AfterTriggerEndXact. + */ + + if (snap_pushed) + PopActiveSnapshot(); +} + + +/* ---------- + * AfterTriggerEndXact() + * + * The current transaction is finishing. + * + * Any unfired triggers are canceled so we simply throw + * away anything we know. + * + * Note: it is possible for this to be called repeatedly in case of + * error during transaction abort; therefore, do not complain if + * already closed down. + * ---------- + */ +void +AfterTriggerEndXact(bool isCommit) +{ + /* + * Forget the pending-events list. + * + * Since all the info is in TopTransactionContext or children thereof, we + * don't really need to do anything to reclaim memory. However, the + * pending-events list could be large, and so it's useful to discard it as + * soon as possible --- especially if we are aborting because we ran out + * of memory for the list! + */ + if (afterTriggers.event_cxt) + { + MemoryContextDelete(afterTriggers.event_cxt); + afterTriggers.event_cxt = NULL; + afterTriggers.events.head = NULL; + afterTriggers.events.tail = NULL; + afterTriggers.events.tailfree = NULL; + } + + /* + * Forget any subtransaction state as well. Since this can't be very + * large, we let the eventual reset of TopTransactionContext free the + * memory instead of doing it here. + */ + afterTriggers.trans_stack = NULL; + afterTriggers.maxtransdepth = 0; + + + /* + * Forget the query stack and constraint-related state information. As + * with the subtransaction state information, we don't bother freeing the + * memory here. + */ + afterTriggers.query_stack = NULL; + afterTriggers.maxquerydepth = 0; + afterTriggers.state = NULL; + + /* No more afterTriggers manipulation until next transaction starts. */ + afterTriggers.query_depth = -1; +} + +/* + * AfterTriggerBeginSubXact() + * + * Start a subtransaction. + */ +void +AfterTriggerBeginSubXact(void) +{ + int my_level = GetCurrentTransactionNestLevel(); + + /* + * Allocate more space in the trans_stack if needed. (Note: because the + * minimum nest level of a subtransaction is 2, we waste the first couple + * entries of the array; not worth the notational effort to avoid it.) + */ + while (my_level >= afterTriggers.maxtransdepth) + { + if (afterTriggers.maxtransdepth == 0) + { + /* Arbitrarily initialize for max of 8 subtransaction levels */ + afterTriggers.trans_stack = (AfterTriggersTransData *) + MemoryContextAlloc(TopTransactionContext, + 8 * sizeof(AfterTriggersTransData)); + afterTriggers.maxtransdepth = 8; + } + else + { + /* repalloc will keep the stack in the same context */ + int new_alloc = afterTriggers.maxtransdepth * 2; + + afterTriggers.trans_stack = (AfterTriggersTransData *) + repalloc(afterTriggers.trans_stack, + new_alloc * sizeof(AfterTriggersTransData)); + afterTriggers.maxtransdepth = new_alloc; + } + } + + /* + * Push the current information into the stack. The SET CONSTRAINTS state + * is not saved until/unless changed. Likewise, we don't make a + * per-subtransaction event context until needed. + */ + afterTriggers.trans_stack[my_level].state = NULL; + afterTriggers.trans_stack[my_level].events = afterTriggers.events; + afterTriggers.trans_stack[my_level].query_depth = afterTriggers.query_depth; + afterTriggers.trans_stack[my_level].firing_counter = afterTriggers.firing_counter; +} + +/* + * AfterTriggerEndSubXact() + * + * The current subtransaction is ending. + */ +void +AfterTriggerEndSubXact(bool isCommit) +{ + int my_level = GetCurrentTransactionNestLevel(); + SetConstraintState state; + AfterTriggerEvent event; + AfterTriggerEventChunk *chunk; + CommandId subxact_firing_id; + + /* + * Pop the prior state if needed. + */ + if (isCommit) + { + Assert(my_level < afterTriggers.maxtransdepth); + /* If we saved a prior state, we don't need it anymore */ + state = afterTriggers.trans_stack[my_level].state; + if (state != NULL) + pfree(state); + /* this avoids double pfree if error later: */ + afterTriggers.trans_stack[my_level].state = NULL; + Assert(afterTriggers.query_depth == + afterTriggers.trans_stack[my_level].query_depth); + } + else + { + /* + * Aborting. It is possible subxact start failed before calling + * AfterTriggerBeginSubXact, in which case we mustn't risk touching + * trans_stack levels that aren't there. + */ + if (my_level >= afterTriggers.maxtransdepth) + return; + + /* + * Release query-level storage for queries being aborted, and restore + * query_depth to its pre-subxact value. This assumes that a + * subtransaction will not add events to query levels started in a + * earlier transaction state. + */ + while (afterTriggers.query_depth > afterTriggers.trans_stack[my_level].query_depth) + { + if (afterTriggers.query_depth < afterTriggers.maxquerydepth) + AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]); + afterTriggers.query_depth--; + } + Assert(afterTriggers.query_depth == + afterTriggers.trans_stack[my_level].query_depth); + + /* + * Restore the global deferred-event list to its former length, + * discarding any events queued by the subxact. + */ + afterTriggerRestoreEventList(&afterTriggers.events, + &afterTriggers.trans_stack[my_level].events); + + /* + * Restore the trigger state. If the saved state is NULL, then this + * subxact didn't save it, so it doesn't need restoring. + */ + state = afterTriggers.trans_stack[my_level].state; + if (state != NULL) + { + pfree(afterTriggers.state); + afterTriggers.state = state; + } + /* this avoids double pfree if error later: */ + afterTriggers.trans_stack[my_level].state = NULL; + + /* + * Scan for any remaining deferred events that were marked DONE or IN + * PROGRESS by this subxact or a child, and un-mark them. We can + * recognize such events because they have a firing ID greater than or + * equal to the firing_counter value we saved at subtransaction start. + * (This essentially assumes that the current subxact includes all + * subxacts started after it.) + */ + subxact_firing_id = afterTriggers.trans_stack[my_level].firing_counter; + for_each_event_chunk(event, chunk, afterTriggers.events) + { + AfterTriggerShared evtshared = GetTriggerSharedData(event); + + if (event->ate_flags & + (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)) + { + if (evtshared->ats_firing_id >= subxact_firing_id) + event->ate_flags &= + ~(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS); + } + } + } +} + +/* + * Get the transition table for the given event and depending on whether we are + * processing the old or the new tuple. + */ +static Tuplestorestate * +GetAfterTriggersTransitionTable(int event, + TupleTableSlot *oldslot, + TupleTableSlot *newslot, + TransitionCaptureState *transition_capture) +{ + Tuplestorestate *tuplestore = NULL; + bool delete_old_table = transition_capture->tcs_delete_old_table; + bool update_old_table = transition_capture->tcs_update_old_table; + bool update_new_table = transition_capture->tcs_update_new_table; + bool insert_new_table = transition_capture->tcs_insert_new_table; + + /* + * For INSERT events NEW should be non-NULL, for DELETE events OLD should + * be non-NULL, whereas for UPDATE events normally both OLD and NEW are + * non-NULL. But for UPDATE events fired for capturing transition tuples + * during UPDATE partition-key row movement, OLD is NULL when the event is + * for a row being inserted, whereas NEW is NULL when the event is for a + * row being deleted. + */ + Assert(!(event == TRIGGER_EVENT_DELETE && delete_old_table && + TupIsNull(oldslot))); + Assert(!(event == TRIGGER_EVENT_INSERT && insert_new_table && + TupIsNull(newslot))); + + if (!TupIsNull(oldslot)) + { + Assert(TupIsNull(newslot)); + if (event == TRIGGER_EVENT_DELETE && delete_old_table) + tuplestore = transition_capture->tcs_private->old_del_tuplestore; + else if (event == TRIGGER_EVENT_UPDATE && update_old_table) + tuplestore = transition_capture->tcs_private->old_upd_tuplestore; + } + else if (!TupIsNull(newslot)) + { + Assert(TupIsNull(oldslot)); + if (event == TRIGGER_EVENT_INSERT && insert_new_table) + tuplestore = transition_capture->tcs_private->new_ins_tuplestore; + else if (event == TRIGGER_EVENT_UPDATE && update_new_table) + tuplestore = transition_capture->tcs_private->new_upd_tuplestore; + } + + return tuplestore; +} + +/* + * Add the given heap tuple to the given tuplestore, applying the conversion + * map if necessary. + * + * If original_insert_tuple is given, we can add that tuple without conversion. + */ +static void +TransitionTableAddTuple(EState *estate, + TransitionCaptureState *transition_capture, + ResultRelInfo *relinfo, + TupleTableSlot *slot, + TupleTableSlot *original_insert_tuple, + Tuplestorestate *tuplestore) +{ + TupleConversionMap *map; + + /* + * Nothing needs to be done if we don't have a tuplestore. + */ + if (tuplestore == NULL) + return; + + if (original_insert_tuple) + tuplestore_puttupleslot(tuplestore, original_insert_tuple); + else if ((map = ExecGetChildToRootMap(relinfo)) != NULL) + { + AfterTriggersTableData *table = transition_capture->tcs_private; + TupleTableSlot *storeslot; + + storeslot = GetAfterTriggersStoreSlot(table, map->outdesc); + execute_attr_map_slot(map->attrMap, slot, storeslot); + tuplestore_puttupleslot(tuplestore, storeslot); + } + else + tuplestore_puttupleslot(tuplestore, slot); +} + +/* ---------- + * AfterTriggerEnlargeQueryState() + * + * Prepare the necessary state so that we can record AFTER trigger events + * queued by a query. It is allowed to have nested queries within a + * (sub)transaction, so we need to have separate state for each query + * nesting level. + * ---------- + */ +static void +AfterTriggerEnlargeQueryState(void) +{ + int init_depth = afterTriggers.maxquerydepth; + + Assert(afterTriggers.query_depth >= afterTriggers.maxquerydepth); + + if (afterTriggers.maxquerydepth == 0) + { + int new_alloc = Max(afterTriggers.query_depth + 1, 8); + + afterTriggers.query_stack = (AfterTriggersQueryData *) + MemoryContextAlloc(TopTransactionContext, + new_alloc * sizeof(AfterTriggersQueryData)); + afterTriggers.maxquerydepth = new_alloc; + } + else + { + /* repalloc will keep the stack in the same context */ + int old_alloc = afterTriggers.maxquerydepth; + int new_alloc = Max(afterTriggers.query_depth + 1, + old_alloc * 2); + + afterTriggers.query_stack = (AfterTriggersQueryData *) + repalloc(afterTriggers.query_stack, + new_alloc * sizeof(AfterTriggersQueryData)); + afterTriggers.maxquerydepth = new_alloc; + } + + /* Initialize new array entries to empty */ + while (init_depth < afterTriggers.maxquerydepth) + { + AfterTriggersQueryData *qs = &afterTriggers.query_stack[init_depth]; + + qs->events.head = NULL; + qs->events.tail = NULL; + qs->events.tailfree = NULL; + qs->fdw_tuplestore = NULL; + qs->tables = NIL; + + ++init_depth; + } +} + +/* + * Create an empty SetConstraintState with room for numalloc trigstates + */ +static SetConstraintState +SetConstraintStateCreate(int numalloc) +{ + SetConstraintState state; + + /* Behave sanely with numalloc == 0 */ + if (numalloc <= 0) + numalloc = 1; + + /* + * We assume that zeroing will correctly initialize the state values. + */ + state = (SetConstraintState) + MemoryContextAllocZero(TopTransactionContext, + offsetof(SetConstraintStateData, trigstates) + + numalloc * sizeof(SetConstraintTriggerData)); + + state->numalloc = numalloc; + + return state; +} + +/* + * Copy a SetConstraintState + */ +static SetConstraintState +SetConstraintStateCopy(SetConstraintState origstate) +{ + SetConstraintState state; + + state = SetConstraintStateCreate(origstate->numstates); + + state->all_isset = origstate->all_isset; + state->all_isdeferred = origstate->all_isdeferred; + state->numstates = origstate->numstates; + memcpy(state->trigstates, origstate->trigstates, + origstate->numstates * sizeof(SetConstraintTriggerData)); + + return state; +} + +/* + * Add a per-trigger item to a SetConstraintState. Returns possibly-changed + * pointer to the state object (it will change if we have to repalloc). + */ +static SetConstraintState +SetConstraintStateAddItem(SetConstraintState state, + Oid tgoid, bool tgisdeferred) +{ + if (state->numstates >= state->numalloc) + { + int newalloc = state->numalloc * 2; + + newalloc = Max(newalloc, 8); /* in case original has size 0 */ + state = (SetConstraintState) + repalloc(state, + offsetof(SetConstraintStateData, trigstates) + + newalloc * sizeof(SetConstraintTriggerData)); + state->numalloc = newalloc; + Assert(state->numstates < state->numalloc); + } + + state->trigstates[state->numstates].sct_tgoid = tgoid; + state->trigstates[state->numstates].sct_tgisdeferred = tgisdeferred; + state->numstates++; + + return state; +} + +/* ---------- + * AfterTriggerSetState() + * + * Execute the SET CONSTRAINTS ... utility command. + * ---------- + */ +void +AfterTriggerSetState(ConstraintsSetStmt *stmt) +{ + int my_level = GetCurrentTransactionNestLevel(); + + /* If we haven't already done so, initialize our state. */ + if (afterTriggers.state == NULL) + afterTriggers.state = SetConstraintStateCreate(8); + + /* + * If in a subtransaction, and we didn't save the current state already, + * save it so it can be restored if the subtransaction aborts. + */ + if (my_level > 1 && + afterTriggers.trans_stack[my_level].state == NULL) + { + afterTriggers.trans_stack[my_level].state = + SetConstraintStateCopy(afterTriggers.state); + } + + /* + * Handle SET CONSTRAINTS ALL ... + */ + if (stmt->constraints == NIL) + { + /* + * Forget any previous SET CONSTRAINTS commands in this transaction. + */ + afterTriggers.state->numstates = 0; + + /* + * Set the per-transaction ALL state to known. + */ + afterTriggers.state->all_isset = true; + afterTriggers.state->all_isdeferred = stmt->deferred; + } + else + { + Relation conrel; + Relation tgrel; + List *conoidlist = NIL; + List *tgoidlist = NIL; + ListCell *lc; + + /* + * Handle SET CONSTRAINTS constraint-name [, ...] + * + * First, identify all the named constraints and make a list of their + * OIDs. Since, unlike the SQL spec, we allow multiple constraints of + * the same name within a schema, the specifications are not + * necessarily unique. Our strategy is to target all matching + * constraints within the first search-path schema that has any + * matches, but disregard matches in schemas beyond the first match. + * (This is a bit odd but it's the historical behavior.) + * + * A constraint in a partitioned table may have corresponding + * constraints in the partitions. Grab those too. + */ + conrel = table_open(ConstraintRelationId, AccessShareLock); + + foreach(lc, stmt->constraints) + { + RangeVar *constraint = lfirst(lc); + bool found; + List *namespacelist; + ListCell *nslc; + + if (constraint->catalogname) + { + if (strcmp(constraint->catalogname, get_database_name(MyDatabaseId)) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cross-database references are not implemented: \"%s.%s.%s\"", + constraint->catalogname, constraint->schemaname, + constraint->relname))); + } + + /* + * If we're given the schema name with the constraint, look only + * in that schema. If given a bare constraint name, use the + * search path to find the first matching constraint. + */ + if (constraint->schemaname) + { + Oid namespaceId = LookupExplicitNamespace(constraint->schemaname, + false); + + namespacelist = list_make1_oid(namespaceId); + } + else + { + namespacelist = fetch_search_path(true); + } + + found = false; + foreach(nslc, namespacelist) + { + Oid namespaceId = lfirst_oid(nslc); + SysScanDesc conscan; + ScanKeyData skey[2]; + HeapTuple tup; + + ScanKeyInit(&skey[0], + Anum_pg_constraint_conname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(constraint->relname)); + ScanKeyInit(&skey[1], + Anum_pg_constraint_connamespace, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(namespaceId)); + + conscan = systable_beginscan(conrel, ConstraintNameNspIndexId, + true, NULL, 2, skey); + + while (HeapTupleIsValid(tup = systable_getnext(conscan))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tup); + + if (con->condeferrable) + conoidlist = lappend_oid(conoidlist, con->oid); + else if (stmt->deferred) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("constraint \"%s\" is not deferrable", + constraint->relname))); + found = true; + } + + systable_endscan(conscan); + + /* + * Once we've found a matching constraint we do not search + * later parts of the search path. + */ + if (found) + break; + } + + list_free(namespacelist); + + /* + * Not found ? + */ + if (!found) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("constraint \"%s\" does not exist", + constraint->relname))); + } + + /* + * Scan for any possible descendants of the constraints. We append + * whatever we find to the same list that we're scanning; this has the + * effect that we create new scans for those, too, so if there are + * further descendents, we'll also catch them. + */ + foreach(lc, conoidlist) + { + Oid parent = lfirst_oid(lc); + ScanKeyData key; + SysScanDesc scan; + HeapTuple tuple; + + ScanKeyInit(&key, + Anum_pg_constraint_conparentid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(parent)); + + scan = systable_beginscan(conrel, ConstraintParentIndexId, true, NULL, 1, &key); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + + conoidlist = lappend_oid(conoidlist, con->oid); + } + + systable_endscan(scan); + } + + table_close(conrel, AccessShareLock); + + /* + * Now, locate the trigger(s) implementing each of these constraints, + * and make a list of their OIDs. + */ + tgrel = table_open(TriggerRelationId, AccessShareLock); + + foreach(lc, conoidlist) + { + Oid conoid = lfirst_oid(lc); + ScanKeyData skey; + SysScanDesc tgscan; + HeapTuple htup; + + ScanKeyInit(&skey, + Anum_pg_trigger_tgconstraint, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(conoid)); + + tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(tgscan))) + { + Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(htup); + + /* + * Silently skip triggers that are marked as non-deferrable in + * pg_trigger. This is not an error condition, since a + * deferrable RI constraint may have some non-deferrable + * actions. + */ + if (pg_trigger->tgdeferrable) + tgoidlist = lappend_oid(tgoidlist, pg_trigger->oid); + } + + systable_endscan(tgscan); + } + + table_close(tgrel, AccessShareLock); + + /* + * Now we can set the trigger states of individual triggers for this + * xact. + */ + foreach(lc, tgoidlist) + { + Oid tgoid = lfirst_oid(lc); + SetConstraintState state = afterTriggers.state; + bool found = false; + int i; + + for (i = 0; i < state->numstates; i++) + { + if (state->trigstates[i].sct_tgoid == tgoid) + { + state->trigstates[i].sct_tgisdeferred = stmt->deferred; + found = true; + break; + } + } + if (!found) + { + afterTriggers.state = + SetConstraintStateAddItem(state, tgoid, stmt->deferred); + } + } + } + + /* + * SQL99 requires that when a constraint is set to IMMEDIATE, any deferred + * checks against that constraint must be made when the SET CONSTRAINTS + * command is executed -- i.e. the effects of the SET CONSTRAINTS command + * apply retroactively. We've updated the constraints state, so scan the + * list of previously deferred events to fire any that have now become + * immediate. + * + * Obviously, if this was SET ... DEFERRED then it can't have converted + * any unfired events to immediate, so we need do nothing in that case. + */ + if (!stmt->deferred) + { + AfterTriggerEventList *events = &afterTriggers.events; + bool snapshot_set = false; + + while (afterTriggerMarkEvents(events, NULL, true)) + { + CommandId firing_id = afterTriggers.firing_counter++; + + /* + * Make sure a snapshot has been established in case trigger + * functions need one. Note that we avoid setting a snapshot if + * we don't find at least one trigger that has to be fired now. + * This is so that BEGIN; SET CONSTRAINTS ...; SET TRANSACTION + * ISOLATION LEVEL SERIALIZABLE; ... works properly. (If we are + * at the start of a transaction it's not possible for any trigger + * events to be queued yet.) + */ + if (!snapshot_set) + { + PushActiveSnapshot(GetTransactionSnapshot()); + snapshot_set = true; + } + + /* + * We can delete fired events if we are at top transaction level, + * but we'd better not if inside a subtransaction, since the + * subtransaction could later get rolled back. + */ + if (afterTriggerInvokeEvents(events, firing_id, NULL, + !IsSubTransaction())) + break; /* all fired */ + } + + if (snapshot_set) + PopActiveSnapshot(); + } +} + +/* ---------- + * AfterTriggerPendingOnRel() + * Test to see if there are any pending after-trigger events for rel. + * + * This is used by TRUNCATE, CLUSTER, ALTER TABLE, etc to detect whether + * it is unsafe to perform major surgery on a relation. Note that only + * local pending events are examined. We assume that having exclusive lock + * on a rel guarantees there are no unserviced events in other backends --- + * but having a lock does not prevent there being such events in our own. + * + * In some scenarios it'd be reasonable to remove pending events (more + * specifically, mark them DONE by the current subxact) but without a lot + * of knowledge of the trigger semantics we can't do this in general. + * ---------- + */ +bool +AfterTriggerPendingOnRel(Oid relid) +{ + AfterTriggerEvent event; + AfterTriggerEventChunk *chunk; + int depth; + + /* Scan queued events */ + for_each_event_chunk(event, chunk, afterTriggers.events) + { + AfterTriggerShared evtshared = GetTriggerSharedData(event); + + /* + * We can ignore completed events. (Even if a DONE flag is rolled + * back by subxact abort, it's OK because the effects of the TRUNCATE + * or whatever must get rolled back too.) + */ + if (event->ate_flags & AFTER_TRIGGER_DONE) + continue; + + if (evtshared->ats_relid == relid) + return true; + } + + /* + * Also scan events queued by incomplete queries. This could only matter + * if TRUNCATE/etc is executed by a function or trigger within an updating + * query on the same relation, which is pretty perverse, but let's check. + */ + for (depth = 0; depth <= afterTriggers.query_depth && depth < afterTriggers.maxquerydepth; depth++) + { + for_each_event_chunk(event, chunk, afterTriggers.query_stack[depth].events) + { + AfterTriggerShared evtshared = GetTriggerSharedData(event); + + if (event->ate_flags & AFTER_TRIGGER_DONE) + continue; + + if (evtshared->ats_relid == relid) + return true; + } + } + + return false; +} + +/* ---------- + * AfterTriggerSaveEvent() + * + * Called by ExecA[RS]...Triggers() to queue up the triggers that should + * be fired for an event. + * + * NOTE: this is called whenever there are any triggers associated with + * the event (even if they are disabled). This function decides which + * triggers actually need to be queued. It is also called after each row, + * even if there are no triggers for that event, if there are any AFTER + * STATEMENT triggers for the statement which use transition tables, so that + * the transition tuplestores can be built. Furthermore, if the transition + * capture is happening for UPDATEd rows being moved to another partition due + * to the partition-key being changed, then this function is called once when + * the row is deleted (to capture OLD row), and once when the row is inserted + * into another partition (to capture NEW row). This is done separately because + * DELETE and INSERT happen on different tables. + * + * Transition tuplestores are built now, rather than when events are pulled + * off of the queue because AFTER ROW triggers are allowed to select from the + * transition tables for the statement. + * + * This contains special support to queue the update events for the case where + * a partitioned table undergoing a cross-partition update may have foreign + * keys pointing into it. Normally, a partitioned table's row triggers are + * not fired because the leaf partition(s) which are modified as a result of + * the operation on the partitioned table contain the same triggers which are + * fired instead. But that general scheme can cause problematic behavior with + * foreign key triggers during cross-partition updates, which are implemented + * as DELETE on the source partition followed by INSERT into the destination + * partition. Specifically, firing DELETE triggers would lead to the wrong + * foreign key action to be enforced considering that the original command is + * UPDATE; in this case, this function is called with relinfo as the + * partitioned table, and src_partinfo and dst_partinfo referring to the + * source and target leaf partitions, respectively. + * + * is_crosspart_update is true either when a DELETE event is fired on the + * source partition (which is to be ignored) or an UPDATE event is fired on + * the root partitioned table. + * ---------- + */ +static void +AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, + ResultRelInfo *src_partinfo, + ResultRelInfo *dst_partinfo, + int event, bool row_trigger, + TupleTableSlot *oldslot, TupleTableSlot *newslot, + List *recheckIndexes, Bitmapset *modifiedCols, + TransitionCaptureState *transition_capture, + bool is_crosspart_update) +{ + Relation rel = relinfo->ri_RelationDesc; + TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + AfterTriggerEventData new_event; + AfterTriggerSharedData new_shared; + char relkind = rel->rd_rel->relkind; + int tgtype_event; + int tgtype_level; + int i; + Tuplestorestate *fdw_tuplestore = NULL; + + /* + * Check state. We use a normal test not Assert because it is possible to + * reach here in the wrong state given misconfigured RI triggers, in + * particular deferring a cascade action trigger. + */ + if (afterTriggers.query_depth < 0) + elog(ERROR, "AfterTriggerSaveEvent() called outside of query"); + + /* Be sure we have enough space to record events at this query depth. */ + if (afterTriggers.query_depth >= afterTriggers.maxquerydepth) + AfterTriggerEnlargeQueryState(); + + /* + * If the directly named relation has any triggers with transition tables, + * then we need to capture transition tuples. + */ + if (row_trigger && transition_capture != NULL) + { + TupleTableSlot *original_insert_tuple = transition_capture->tcs_original_insert_tuple; + + /* + * Capture the old tuple in the appropriate transition table based on + * the event. + */ + if (!TupIsNull(oldslot)) + { + Tuplestorestate *old_tuplestore; + + old_tuplestore = GetAfterTriggersTransitionTable(event, + oldslot, + NULL, + transition_capture); + TransitionTableAddTuple(estate, transition_capture, relinfo, + oldslot, NULL, old_tuplestore); + } + + /* + * Capture the new tuple in the appropriate transition table based on + * the event. + */ + if (!TupIsNull(newslot)) + { + Tuplestorestate *new_tuplestore; + + new_tuplestore = GetAfterTriggersTransitionTable(event, + NULL, + newslot, + transition_capture); + TransitionTableAddTuple(estate, transition_capture, relinfo, + newslot, original_insert_tuple, new_tuplestore); + } + + /* + * If transition tables are the only reason we're here, return. As + * mentioned above, we can also be here during update tuple routing in + * presence of transition tables, in which case this function is + * called separately for OLD and NEW, so we expect exactly one of them + * to be NULL. + */ + if (trigdesc == NULL || + (event == TRIGGER_EVENT_DELETE && !trigdesc->trig_delete_after_row) || + (event == TRIGGER_EVENT_INSERT && !trigdesc->trig_insert_after_row) || + (event == TRIGGER_EVENT_UPDATE && !trigdesc->trig_update_after_row) || + (event == TRIGGER_EVENT_UPDATE && (TupIsNull(oldslot) ^ TupIsNull(newslot)))) + return; + } + + /* + * We normally don't see partitioned tables here for row level triggers + * except in the special case of a cross-partition update. In that case, + * nodeModifyTable.c:ExecCrossPartitionUpdateForeignKey() calls here to + * queue an update event on the root target partitioned table, also + * passing the source and destination partitions and their tuples. + */ + Assert(!row_trigger || + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE || + (is_crosspart_update && + TRIGGER_FIRED_BY_UPDATE(event) && + src_partinfo != NULL && dst_partinfo != NULL)); + + /* + * Validate the event code and collect the associated tuple CTIDs. + * + * The event code will be used both as a bitmask and an array offset, so + * validation is important to make sure we don't walk off the edge of our + * arrays. + * + * Also, if we're considering statement-level triggers, check whether we + * already queued a set of them for this event, and cancel the prior set + * if so. This preserves the behavior that statement-level triggers fire + * just once per statement and fire after row-level triggers. + */ + switch (event) + { + case TRIGGER_EVENT_INSERT: + tgtype_event = TRIGGER_TYPE_INSERT; + if (row_trigger) + { + Assert(oldslot == NULL); + Assert(newslot != NULL); + ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1)); + ItemPointerSetInvalid(&(new_event.ate_ctid2)); + } + else + { + Assert(oldslot == NULL); + Assert(newslot == NULL); + ItemPointerSetInvalid(&(new_event.ate_ctid1)); + ItemPointerSetInvalid(&(new_event.ate_ctid2)); + cancel_prior_stmt_triggers(RelationGetRelid(rel), + CMD_INSERT, event); + } + break; + case TRIGGER_EVENT_DELETE: + tgtype_event = TRIGGER_TYPE_DELETE; + if (row_trigger) + { + Assert(oldslot != NULL); + Assert(newslot == NULL); + ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); + ItemPointerSetInvalid(&(new_event.ate_ctid2)); + } + else + { + Assert(oldslot == NULL); + Assert(newslot == NULL); + ItemPointerSetInvalid(&(new_event.ate_ctid1)); + ItemPointerSetInvalid(&(new_event.ate_ctid2)); + cancel_prior_stmt_triggers(RelationGetRelid(rel), + CMD_DELETE, event); + } + break; + case TRIGGER_EVENT_UPDATE: + tgtype_event = TRIGGER_TYPE_UPDATE; + if (row_trigger) + { + Assert(oldslot != NULL); + Assert(newslot != NULL); + ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); + ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2)); + + /* + * Also remember the OIDs of partitions to fetch these tuples + * out of later in AfterTriggerExecute(). + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + Assert(src_partinfo != NULL && dst_partinfo != NULL); + new_event.ate_src_part = + RelationGetRelid(src_partinfo->ri_RelationDesc); + new_event.ate_dst_part = + RelationGetRelid(dst_partinfo->ri_RelationDesc); + } + } + else + { + Assert(oldslot == NULL); + Assert(newslot == NULL); + ItemPointerSetInvalid(&(new_event.ate_ctid1)); + ItemPointerSetInvalid(&(new_event.ate_ctid2)); + cancel_prior_stmt_triggers(RelationGetRelid(rel), + CMD_UPDATE, event); + } + break; + case TRIGGER_EVENT_TRUNCATE: + tgtype_event = TRIGGER_TYPE_TRUNCATE; + Assert(oldslot == NULL); + Assert(newslot == NULL); + ItemPointerSetInvalid(&(new_event.ate_ctid1)); + ItemPointerSetInvalid(&(new_event.ate_ctid2)); + break; + default: + elog(ERROR, "invalid after-trigger event code: %d", event); + tgtype_event = 0; /* keep compiler quiet */ + break; + } + + /* Determine flags */ + if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) + { + if (row_trigger && event == TRIGGER_EVENT_UPDATE) + { + if (relkind == RELKIND_PARTITIONED_TABLE) + new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE; + else + new_event.ate_flags = AFTER_TRIGGER_2CTID; + } + else + new_event.ate_flags = AFTER_TRIGGER_1CTID; + } + + /* else, we'll initialize ate_flags for each trigger */ + + tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT); + + /* + * Must convert/copy the source and destination partition tuples into the + * root partitioned table's format/slot, because the processing in the + * loop below expects both oldslot and newslot tuples to be in that form. + */ + if (row_trigger && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + TupleTableSlot *rootslot; + TupleConversionMap *map; + + rootslot = ExecGetTriggerOldSlot(estate, relinfo); + map = ExecGetChildToRootMap(src_partinfo); + if (map) + oldslot = execute_attr_map_slot(map->attrMap, + oldslot, + rootslot); + else + oldslot = ExecCopySlot(rootslot, oldslot); + + rootslot = ExecGetTriggerNewSlot(estate, relinfo); + map = ExecGetChildToRootMap(dst_partinfo); + if (map) + newslot = execute_attr_map_slot(map->attrMap, + newslot, + rootslot); + else + newslot = ExecCopySlot(rootslot, newslot); + } + + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + + if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, + tgtype_level, + TRIGGER_TYPE_AFTER, + tgtype_event)) + continue; + if (!TriggerEnabled(estate, relinfo, trigger, event, + modifiedCols, oldslot, newslot)) + continue; + + if (relkind == RELKIND_FOREIGN_TABLE && row_trigger) + { + if (fdw_tuplestore == NULL) + { + fdw_tuplestore = GetCurrentFDWTuplestore(); + new_event.ate_flags = AFTER_TRIGGER_FDW_FETCH; + } + else + /* subsequent event for the same tuple */ + new_event.ate_flags = AFTER_TRIGGER_FDW_REUSE; + } + + /* + * If the trigger is a foreign key enforcement trigger, there are + * certain cases where we can skip queueing the event because we can + * tell by inspection that the FK constraint will still pass. There + * are also some cases during cross-partition updates of a partitioned + * table where queuing the event can be skipped. + */ + if (TRIGGER_FIRED_BY_UPDATE(event) || TRIGGER_FIRED_BY_DELETE(event)) + { + switch (RI_FKey_trigger_type(trigger->tgfoid)) + { + case RI_TRIGGER_PK: + + /* + * For cross-partitioned updates of partitioned PK table, + * skip the event fired by the component delete on the + * source leaf partition unless the constraint originates + * in the partition itself (!tgisclone), because the + * update event that will be fired on the root + * (partitioned) target table will be used to perform the + * necessary foreign key enforcement action. + */ + if (is_crosspart_update && + TRIGGER_FIRED_BY_DELETE(event) && + trigger->tgisclone) + continue; + + /* Update or delete on trigger's PK table */ + if (!RI_FKey_pk_upd_check_required(trigger, rel, + oldslot, newslot)) + { + /* skip queuing this event */ + continue; + } + break; + + case RI_TRIGGER_FK: + + /* + * Update on trigger's FK table. We can skip the update + * event fired on a partitioned table during a + * cross-partition of that table, because the insert event + * that is fired on the destination leaf partition would + * suffice to perform the necessary foreign key check. + * Moreover, RI_FKey_fk_upd_check_required() expects to be + * passed a tuple that contains system attributes, most of + * which are not present in the virtual slot belonging to + * a partitioned table. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + !RI_FKey_fk_upd_check_required(trigger, rel, + oldslot, newslot)) + { + /* skip queuing this event */ + continue; + } + break; + + case RI_TRIGGER_NONE: + + /* + * Not an FK trigger. No need to queue the update event + * fired during a cross-partitioned update of a + * partitioned table, because the same row trigger must be + * present in the leaf partition(s) that are affected as + * part of this update and the events fired on them are + * queued instead. + */ + if (row_trigger && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + continue; + break; + } + } + + /* + * If the trigger is a deferred unique constraint check trigger, only + * queue it if the unique constraint was potentially violated, which + * we know from index insertion time. + */ + if (trigger->tgfoid == F_UNIQUE_KEY_RECHECK) + { + if (!list_member_oid(recheckIndexes, trigger->tgconstrindid)) + continue; /* Uniqueness definitely not violated */ + } + + /* + * Fill in event structure and add it to the current query's queue. + * Note we set ats_table to NULL whenever this trigger doesn't use + * transition tables, to improve sharability of the shared event data. + */ + new_shared.ats_event = + (event & TRIGGER_EVENT_OPMASK) | + (row_trigger ? TRIGGER_EVENT_ROW : 0) | + (trigger->tgdeferrable ? AFTER_TRIGGER_DEFERRABLE : 0) | + (trigger->tginitdeferred ? AFTER_TRIGGER_INITDEFERRED : 0); + new_shared.ats_tgoid = trigger->tgoid; + new_shared.ats_relid = RelationGetRelid(rel); + new_shared.ats_firing_id = 0; + if ((trigger->tgoldtable || trigger->tgnewtable) && + transition_capture != NULL) + new_shared.ats_table = transition_capture->tcs_private; + else + new_shared.ats_table = NULL; + new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols); + + afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events, + &new_event, &new_shared); + } + + /* + * Finally, spool any foreign tuple(s). The tuplestore squashes them to + * minimal tuples, so this loses any system columns. The executor lost + * those columns before us, for an unrelated reason, so this is fine. + */ + if (fdw_tuplestore) + { + if (oldslot != NULL) + tuplestore_puttupleslot(fdw_tuplestore, oldslot); + if (newslot != NULL) + tuplestore_puttupleslot(fdw_tuplestore, newslot); + } +} + +/* + * Detect whether we already queued BEFORE STATEMENT triggers for the given + * relation + operation, and set the flag so the next call will report "true". + */ +static bool +before_stmt_triggers_fired(Oid relid, CmdType cmdType) +{ + bool result; + AfterTriggersTableData *table; + + /* Check state, like AfterTriggerSaveEvent. */ + if (afterTriggers.query_depth < 0) + elog(ERROR, "before_stmt_triggers_fired() called outside of query"); + + /* Be sure we have enough space to record events at this query depth. */ + if (afterTriggers.query_depth >= afterTriggers.maxquerydepth) + AfterTriggerEnlargeQueryState(); + + /* + * We keep this state in the AfterTriggersTableData that also holds + * transition tables for the relation + operation. In this way, if we are + * forced to make a new set of transition tables because more tuples get + * entered after we've already fired triggers, we will allow a new set of + * statement triggers to get queued. + */ + table = GetAfterTriggersTableData(relid, cmdType); + result = table->before_trig_done; + table->before_trig_done = true; + return result; +} + +/* + * If we previously queued a set of AFTER STATEMENT triggers for the given + * relation + operation, and they've not been fired yet, cancel them. The + * caller will queue a fresh set that's after any row-level triggers that may + * have been queued by the current sub-statement, preserving (as much as + * possible) the property that AFTER ROW triggers fire before AFTER STATEMENT + * triggers, and that the latter only fire once. This deals with the + * situation where several FK enforcement triggers sequentially queue triggers + * for the same table into the same trigger query level. We can't fully + * prevent odd behavior though: if there are AFTER ROW triggers taking + * transition tables, we don't want to change the transition tables once the + * first such trigger has seen them. In such a case, any additional events + * will result in creating new transition tables and allowing new firings of + * statement triggers. + * + * This also saves the current event list location so that a later invocation + * of this function can cheaply find the triggers we're about to queue and + * cancel them. + */ +static void +cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent) +{ + AfterTriggersTableData *table; + AfterTriggersQueryData *qs = &afterTriggers.query_stack[afterTriggers.query_depth]; + + /* + * We keep this state in the AfterTriggersTableData that also holds + * transition tables for the relation + operation. In this way, if we are + * forced to make a new set of transition tables because more tuples get + * entered after we've already fired triggers, we will allow a new set of + * statement triggers to get queued without canceling the old ones. + */ + table = GetAfterTriggersTableData(relid, cmdType); + + if (table->after_trig_done) + { + /* + * We want to start scanning from the tail location that existed just + * before we inserted any statement triggers. But the events list + * might've been entirely empty then, in which case scan from the + * current head. + */ + AfterTriggerEvent event; + AfterTriggerEventChunk *chunk; + + if (table->after_trig_events.tail) + { + chunk = table->after_trig_events.tail; + event = (AfterTriggerEvent) table->after_trig_events.tailfree; + } + else + { + chunk = qs->events.head; + event = NULL; + } + + for_each_chunk_from(chunk) + { + if (event == NULL) + event = (AfterTriggerEvent) CHUNK_DATA_START(chunk); + for_each_event_from(event, chunk) + { + AfterTriggerShared evtshared = GetTriggerSharedData(event); + + /* + * Exit loop when we reach events that aren't AS triggers for + * the target relation. + */ + if (evtshared->ats_relid != relid) + goto done; + if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) != tgevent) + goto done; + if (!TRIGGER_FIRED_FOR_STATEMENT(evtshared->ats_event)) + goto done; + if (!TRIGGER_FIRED_AFTER(evtshared->ats_event)) + goto done; + /* OK, mark it DONE */ + event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS; + event->ate_flags |= AFTER_TRIGGER_DONE; + } + /* signal we must reinitialize event ptr for next chunk */ + event = NULL; + } + } +done: + + /* In any case, save current insertion point for next time */ + table->after_trig_done = true; + table->after_trig_events = qs->events; +} + +/* + * SQL function pg_trigger_depth() + */ +Datum +pg_trigger_depth(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(MyTriggerDepth); +} diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c new file mode 100644 index 0000000..4cc4e3c --- /dev/null +++ b/src/backend/commands/tsearchcmds.c @@ -0,0 +1,1759 @@ +/*------------------------------------------------------------------------- + * + * tsearchcmds.c + * + * Routines for tsearch manipulation commands + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/tsearchcmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "catalog/pg_type.h" +#include "commands/alter.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "common/string.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "parser/parse_func.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, + HeapTuple tup, Relation relMap); +static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt, + HeapTuple tup, Relation relMap); +static DefElem *buildDefItem(const char *name, const char *val, + bool was_quoted); + + +/* --------------------- TS Parser commands ------------------------ */ + +/* + * lookup a parser support function and return its OID (as a Datum) + * + * attnum is the pg_ts_parser column the function will go into + */ +static Datum +get_ts_parser_func(DefElem *defel, int attnum) +{ + List *funcName = defGetQualifiedName(defel); + Oid typeId[3]; + Oid retTypeId; + int nargs; + Oid procOid; + + retTypeId = INTERNALOID; /* correct for most */ + typeId[0] = INTERNALOID; + switch (attnum) + { + case Anum_pg_ts_parser_prsstart: + nargs = 2; + typeId[1] = INT4OID; + break; + case Anum_pg_ts_parser_prstoken: + nargs = 3; + typeId[1] = INTERNALOID; + typeId[2] = INTERNALOID; + break; + case Anum_pg_ts_parser_prsend: + nargs = 1; + retTypeId = VOIDOID; + break; + case Anum_pg_ts_parser_prsheadline: + nargs = 3; + typeId[1] = INTERNALOID; + typeId[2] = TSQUERYOID; + break; + case Anum_pg_ts_parser_prslextype: + nargs = 1; + + /* + * Note: because the lextype method returns type internal, it must + * have an internal-type argument for security reasons. The + * argument is not actually used, but is just passed as a zero. + */ + break; + default: + /* should not be here */ + elog(ERROR, "unrecognized attribute for text search parser: %d", + attnum); + nargs = 0; /* keep compiler quiet */ + } + + procOid = LookupFuncName(funcName, nargs, typeId, false); + if (get_func_rettype(procOid) != retTypeId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("function %s should return type %s", + func_signature_string(funcName, nargs, NIL, typeId), + format_type_be(retTypeId)))); + + return ObjectIdGetDatum(procOid); +} + +/* + * make pg_depend entries for a new pg_ts_parser entry + * + * Return value is the address of said new entry. + */ +static ObjectAddress +makeParserDependencies(HeapTuple tuple) +{ + Form_pg_ts_parser prs = (Form_pg_ts_parser) GETSTRUCT(tuple); + ObjectAddress myself, + referenced; + ObjectAddresses *addrs; + + ObjectAddressSet(myself, TSParserRelationId, prs->oid); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, false); + + addrs = new_object_addresses(); + + /* dependency on namespace */ + ObjectAddressSet(referenced, NamespaceRelationId, prs->prsnamespace); + add_exact_object_address(&referenced, addrs); + + /* dependencies on functions */ + ObjectAddressSet(referenced, ProcedureRelationId, prs->prsstart); + add_exact_object_address(&referenced, addrs); + + referenced.objectId = prs->prstoken; + add_exact_object_address(&referenced, addrs); + + referenced.objectId = prs->prsend; + add_exact_object_address(&referenced, addrs); + + referenced.objectId = prs->prslextype; + add_exact_object_address(&referenced, addrs); + + if (OidIsValid(prs->prsheadline)) + { + referenced.objectId = prs->prsheadline; + add_exact_object_address(&referenced, addrs); + } + + record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL); + free_object_addresses(addrs); + + return myself; +} + +/* + * CREATE TEXT SEARCH PARSER + */ +ObjectAddress +DefineTSParser(List *names, List *parameters) +{ + char *prsname; + ListCell *pl; + Relation prsRel; + HeapTuple tup; + Datum values[Natts_pg_ts_parser]; + bool nulls[Natts_pg_ts_parser]; + NameData pname; + Oid prsOid; + Oid namespaceoid; + ObjectAddress address; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create text search parsers"))); + + prsRel = table_open(TSParserRelationId, RowExclusiveLock); + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &prsname); + + /* initialize tuple fields with name/namespace */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + prsOid = GetNewOidWithIndex(prsRel, TSParserOidIndexId, + Anum_pg_ts_parser_oid); + values[Anum_pg_ts_parser_oid - 1] = ObjectIdGetDatum(prsOid); + namestrcpy(&pname, prsname); + values[Anum_pg_ts_parser_prsname - 1] = NameGetDatum(&pname); + values[Anum_pg_ts_parser_prsnamespace - 1] = ObjectIdGetDatum(namespaceoid); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (strcmp(defel->defname, "start") == 0) + { + values[Anum_pg_ts_parser_prsstart - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prsstart); + } + else if (strcmp(defel->defname, "gettoken") == 0) + { + values[Anum_pg_ts_parser_prstoken - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prstoken); + } + else if (strcmp(defel->defname, "end") == 0) + { + values[Anum_pg_ts_parser_prsend - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prsend); + } + else if (strcmp(defel->defname, "headline") == 0) + { + values[Anum_pg_ts_parser_prsheadline - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prsheadline); + } + else if (strcmp(defel->defname, "lextypes") == 0) + { + values[Anum_pg_ts_parser_prslextype - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prslextype); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search parser parameter \"%s\" not recognized", + defel->defname))); + } + + /* + * Validation + */ + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsstart - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser start method is required"))); + + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prstoken - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser gettoken method is required"))); + + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsend - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser end method is required"))); + + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prslextype - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser lextypes method is required"))); + + /* + * Looks good, insert + */ + tup = heap_form_tuple(prsRel->rd_att, values, nulls); + + CatalogTupleInsert(prsRel, tup); + + address = makeParserDependencies(tup); + + /* Post creation hook for new text search parser */ + InvokeObjectPostCreateHook(TSParserRelationId, prsOid, 0); + + heap_freetuple(tup); + + table_close(prsRel, RowExclusiveLock); + + return address; +} + +/* ---------------------- TS Dictionary commands -----------------------*/ + +/* + * make pg_depend entries for a new pg_ts_dict entry + * + * Return value is address of the new entry + */ +static ObjectAddress +makeDictionaryDependencies(HeapTuple tuple) +{ + Form_pg_ts_dict dict = (Form_pg_ts_dict) GETSTRUCT(tuple); + ObjectAddress myself, + referenced; + ObjectAddresses *addrs; + + ObjectAddressSet(myself, TSDictionaryRelationId, dict->oid); + + /* dependency on owner */ + recordDependencyOnOwner(myself.classId, myself.objectId, dict->dictowner); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, false); + + addrs = new_object_addresses(); + + /* dependency on namespace */ + ObjectAddressSet(referenced, NamespaceRelationId, dict->dictnamespace); + add_exact_object_address(&referenced, addrs); + + /* dependency on template */ + ObjectAddressSet(referenced, TSTemplateRelationId, dict->dicttemplate); + add_exact_object_address(&referenced, addrs); + + record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL); + free_object_addresses(addrs); + + return myself; +} + +/* + * verify that a template's init method accepts a proposed option list + */ +static void +verify_dictoptions(Oid tmplId, List *dictoptions) +{ + HeapTuple tup; + Form_pg_ts_template tform; + Oid initmethod; + + /* + * Suppress this test when running in a standalone backend. This is a + * hack to allow initdb to create prefab dictionaries that might not + * actually be usable in template1's encoding (due to using external files + * that can't be translated into template1's encoding). We want to create + * them anyway, since they might be usable later in other databases. + */ + if (!IsUnderPostmaster) + return; + + tup = SearchSysCache1(TSTEMPLATEOID, ObjectIdGetDatum(tmplId)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search template %u", + tmplId); + tform = (Form_pg_ts_template) GETSTRUCT(tup); + + initmethod = tform->tmplinit; + + if (!OidIsValid(initmethod)) + { + /* If there is no init method, disallow any options */ + if (dictoptions) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search template \"%s\" does not accept options", + NameStr(tform->tmplname)))); + } + else + { + /* + * Copy the options just in case init method thinks it can scribble on + * them ... + */ + dictoptions = copyObject(dictoptions); + + /* + * Call the init method and see if it complains. We don't worry about + * it leaking memory, since our command will soon be over anyway. + */ + (void) OidFunctionCall1(initmethod, PointerGetDatum(dictoptions)); + } + + ReleaseSysCache(tup); +} + +/* + * CREATE TEXT SEARCH DICTIONARY + */ +ObjectAddress +DefineTSDictionary(List *names, List *parameters) +{ + ListCell *pl; + Relation dictRel; + HeapTuple tup; + Datum values[Natts_pg_ts_dict]; + bool nulls[Natts_pg_ts_dict]; + NameData dname; + Oid templId = InvalidOid; + List *dictoptions = NIL; + Oid dictOid; + Oid namespaceoid; + AclResult aclresult; + char *dictname; + ObjectAddress address; + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &dictname); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceoid)); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (strcmp(defel->defname, "template") == 0) + { + templId = get_ts_template_oid(defGetQualifiedName(defel), false); + } + else + { + /* Assume it's an option for the dictionary itself */ + dictoptions = lappend(dictoptions, defel); + } + } + + /* + * Validation + */ + if (!OidIsValid(templId)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search template is required"))); + + verify_dictoptions(templId, dictoptions); + + + dictRel = table_open(TSDictionaryRelationId, RowExclusiveLock); + + /* + * Looks good, insert + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + dictOid = GetNewOidWithIndex(dictRel, TSDictionaryOidIndexId, + Anum_pg_ts_dict_oid); + values[Anum_pg_ts_dict_oid - 1] = ObjectIdGetDatum(dictOid); + namestrcpy(&dname, dictname); + values[Anum_pg_ts_dict_dictname - 1] = NameGetDatum(&dname); + values[Anum_pg_ts_dict_dictnamespace - 1] = ObjectIdGetDatum(namespaceoid); + values[Anum_pg_ts_dict_dictowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_ts_dict_dicttemplate - 1] = ObjectIdGetDatum(templId); + if (dictoptions) + values[Anum_pg_ts_dict_dictinitoption - 1] = + PointerGetDatum(serialize_deflist(dictoptions)); + else + nulls[Anum_pg_ts_dict_dictinitoption - 1] = true; + + tup = heap_form_tuple(dictRel->rd_att, values, nulls); + + CatalogTupleInsert(dictRel, tup); + + address = makeDictionaryDependencies(tup); + + /* Post creation hook for new text search dictionary */ + InvokeObjectPostCreateHook(TSDictionaryRelationId, dictOid, 0); + + heap_freetuple(tup); + + table_close(dictRel, RowExclusiveLock); + + return address; +} + +/* + * ALTER TEXT SEARCH DICTIONARY + */ +ObjectAddress +AlterTSDictionary(AlterTSDictionaryStmt *stmt) +{ + HeapTuple tup, + newtup; + Relation rel; + Oid dictId; + ListCell *pl; + List *dictoptions; + Datum opt; + bool isnull; + Datum repl_val[Natts_pg_ts_dict]; + bool repl_null[Natts_pg_ts_dict]; + bool repl_repl[Natts_pg_ts_dict]; + ObjectAddress address; + + dictId = get_ts_dict_oid(stmt->dictname, false); + + rel = table_open(TSDictionaryRelationId, RowExclusiveLock); + + tup = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictId)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + + /* must be owner */ + if (!pg_ts_dict_ownercheck(dictId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSDICTIONARY, + NameListToString(stmt->dictname)); + + /* deserialize the existing set of options */ + opt = SysCacheGetAttr(TSDICTOID, tup, + Anum_pg_ts_dict_dictinitoption, + &isnull); + if (isnull) + dictoptions = NIL; + else + dictoptions = deserialize_deflist(opt); + + /* + * Modify the options list as per specified changes + */ + foreach(pl, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(pl); + ListCell *cell; + + /* + * Remove any matches ... + */ + foreach(cell, dictoptions) + { + DefElem *oldel = (DefElem *) lfirst(cell); + + if (strcmp(oldel->defname, defel->defname) == 0) + dictoptions = foreach_delete_current(dictoptions, cell); + } + + /* + * and add new value if it's got one + */ + if (defel->arg) + dictoptions = lappend(dictoptions, defel); + } + + /* + * Validate + */ + verify_dictoptions(((Form_pg_ts_dict) GETSTRUCT(tup))->dicttemplate, + dictoptions); + + /* + * Looks good, update + */ + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + if (dictoptions) + repl_val[Anum_pg_ts_dict_dictinitoption - 1] = + PointerGetDatum(serialize_deflist(dictoptions)); + else + repl_null[Anum_pg_ts_dict_dictinitoption - 1] = true; + repl_repl[Anum_pg_ts_dict_dictinitoption - 1] = true; + + newtup = heap_modify_tuple(tup, RelationGetDescr(rel), + repl_val, repl_null, repl_repl); + + CatalogTupleUpdate(rel, &newtup->t_self, newtup); + + InvokeObjectPostAlterHook(TSDictionaryRelationId, dictId, 0); + + ObjectAddressSet(address, TSDictionaryRelationId, dictId); + + /* + * NOTE: because we only support altering the options, not the template, + * there is no need to update dependencies. This might have to change if + * the options ever reference inside-the-database objects. + */ + + heap_freetuple(newtup); + ReleaseSysCache(tup); + + table_close(rel, RowExclusiveLock); + + return address; +} + +/* ---------------------- TS Template commands -----------------------*/ + +/* + * lookup a template support function and return its OID (as a Datum) + * + * attnum is the pg_ts_template column the function will go into + */ +static Datum +get_ts_template_func(DefElem *defel, int attnum) +{ + List *funcName = defGetQualifiedName(defel); + Oid typeId[4]; + Oid retTypeId; + int nargs; + Oid procOid; + + retTypeId = INTERNALOID; + typeId[0] = INTERNALOID; + typeId[1] = INTERNALOID; + typeId[2] = INTERNALOID; + typeId[3] = INTERNALOID; + switch (attnum) + { + case Anum_pg_ts_template_tmplinit: + nargs = 1; + break; + case Anum_pg_ts_template_tmpllexize: + nargs = 4; + break; + default: + /* should not be here */ + elog(ERROR, "unrecognized attribute for text search template: %d", + attnum); + nargs = 0; /* keep compiler quiet */ + } + + procOid = LookupFuncName(funcName, nargs, typeId, false); + if (get_func_rettype(procOid) != retTypeId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("function %s should return type %s", + func_signature_string(funcName, nargs, NIL, typeId), + format_type_be(retTypeId)))); + + return ObjectIdGetDatum(procOid); +} + +/* + * make pg_depend entries for a new pg_ts_template entry + */ +static ObjectAddress +makeTSTemplateDependencies(HeapTuple tuple) +{ + Form_pg_ts_template tmpl = (Form_pg_ts_template) GETSTRUCT(tuple); + ObjectAddress myself, + referenced; + ObjectAddresses *addrs; + + ObjectAddressSet(myself, TSTemplateRelationId, tmpl->oid); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, false); + + addrs = new_object_addresses(); + + /* dependency on namespace */ + ObjectAddressSet(referenced, NamespaceRelationId, tmpl->tmplnamespace); + add_exact_object_address(&referenced, addrs); + + /* dependencies on functions */ + ObjectAddressSet(referenced, ProcedureRelationId, tmpl->tmpllexize); + add_exact_object_address(&referenced, addrs); + + if (OidIsValid(tmpl->tmplinit)) + { + referenced.objectId = tmpl->tmplinit; + add_exact_object_address(&referenced, addrs); + } + + record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL); + free_object_addresses(addrs); + + return myself; +} + +/* + * CREATE TEXT SEARCH TEMPLATE + */ +ObjectAddress +DefineTSTemplate(List *names, List *parameters) +{ + ListCell *pl; + Relation tmplRel; + HeapTuple tup; + Datum values[Natts_pg_ts_template]; + bool nulls[Natts_pg_ts_template]; + NameData dname; + int i; + Oid tmplOid; + Oid namespaceoid; + char *tmplname; + ObjectAddress address; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create text search templates"))); + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &tmplname); + + tmplRel = table_open(TSTemplateRelationId, RowExclusiveLock); + + for (i = 0; i < Natts_pg_ts_template; i++) + { + nulls[i] = false; + values[i] = ObjectIdGetDatum(InvalidOid); + } + + tmplOid = GetNewOidWithIndex(tmplRel, TSTemplateOidIndexId, + Anum_pg_ts_dict_oid); + values[Anum_pg_ts_template_oid - 1] = ObjectIdGetDatum(tmplOid); + namestrcpy(&dname, tmplname); + values[Anum_pg_ts_template_tmplname - 1] = NameGetDatum(&dname); + values[Anum_pg_ts_template_tmplnamespace - 1] = ObjectIdGetDatum(namespaceoid); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (strcmp(defel->defname, "init") == 0) + { + values[Anum_pg_ts_template_tmplinit - 1] = + get_ts_template_func(defel, Anum_pg_ts_template_tmplinit); + nulls[Anum_pg_ts_template_tmplinit - 1] = false; + } + else if (strcmp(defel->defname, "lexize") == 0) + { + values[Anum_pg_ts_template_tmpllexize - 1] = + get_ts_template_func(defel, Anum_pg_ts_template_tmpllexize); + nulls[Anum_pg_ts_template_tmpllexize - 1] = false; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search template parameter \"%s\" not recognized", + defel->defname))); + } + + /* + * Validation + */ + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_template_tmpllexize - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search template lexize method is required"))); + + /* + * Looks good, insert + */ + tup = heap_form_tuple(tmplRel->rd_att, values, nulls); + + CatalogTupleInsert(tmplRel, tup); + + address = makeTSTemplateDependencies(tup); + + /* Post creation hook for new text search template */ + InvokeObjectPostCreateHook(TSTemplateRelationId, tmplOid, 0); + + heap_freetuple(tup); + + table_close(tmplRel, RowExclusiveLock); + + return address; +} + +/* ---------------------- TS Configuration commands -----------------------*/ + +/* + * Finds syscache tuple of configuration. + * Returns NULL if no such cfg. + */ +static HeapTuple +GetTSConfigTuple(List *names) +{ + HeapTuple tup; + Oid cfgId; + + cfgId = get_ts_config_oid(names, true); + if (!OidIsValid(cfgId)) + return NULL; + + tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId)); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + + return tup; +} + +/* + * make pg_depend entries for a new or updated pg_ts_config entry + * + * Pass opened pg_ts_config_map relation if there might be any config map + * entries for the config. + */ +static ObjectAddress +makeConfigurationDependencies(HeapTuple tuple, bool removeOld, + Relation mapRel) +{ + Form_pg_ts_config cfg = (Form_pg_ts_config) GETSTRUCT(tuple); + ObjectAddresses *addrs; + ObjectAddress myself, + referenced; + + myself.classId = TSConfigRelationId; + myself.objectId = cfg->oid; + myself.objectSubId = 0; + + /* for ALTER case, first flush old dependencies, except extension deps */ + if (removeOld) + { + deleteDependencyRecordsFor(myself.classId, myself.objectId, true); + deleteSharedDependencyRecordsFor(myself.classId, myself.objectId, 0); + } + + /* + * We use an ObjectAddresses list to remove possible duplicate + * dependencies from the config map info. The pg_ts_config items + * shouldn't be duplicates, but might as well fold them all into one call. + */ + addrs = new_object_addresses(); + + /* dependency on namespace */ + referenced.classId = NamespaceRelationId; + referenced.objectId = cfg->cfgnamespace; + referenced.objectSubId = 0; + add_exact_object_address(&referenced, addrs); + + /* dependency on owner */ + recordDependencyOnOwner(myself.classId, myself.objectId, cfg->cfgowner); + + /* dependency on extension */ + recordDependencyOnCurrentExtension(&myself, removeOld); + + /* dependency on parser */ + referenced.classId = TSParserRelationId; + referenced.objectId = cfg->cfgparser; + referenced.objectSubId = 0; + add_exact_object_address(&referenced, addrs); + + /* dependencies on dictionaries listed in config map */ + if (mapRel) + { + ScanKeyData skey; + SysScanDesc scan; + HeapTuple maptup; + + /* CCI to ensure we can see effects of caller's changes */ + CommandCounterIncrement(); + + ScanKeyInit(&skey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(myself.objectId)); + + scan = systable_beginscan(mapRel, TSConfigMapIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + + referenced.classId = TSDictionaryRelationId; + referenced.objectId = cfgmap->mapdict; + referenced.objectSubId = 0; + add_exact_object_address(&referenced, addrs); + } + + systable_endscan(scan); + } + + /* Record 'em (this includes duplicate elimination) */ + record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL); + + free_object_addresses(addrs); + + return myself; +} + +/* + * CREATE TEXT SEARCH CONFIGURATION + */ +ObjectAddress +DefineTSConfiguration(List *names, List *parameters, ObjectAddress *copied) +{ + Relation cfgRel; + Relation mapRel = NULL; + HeapTuple tup; + Datum values[Natts_pg_ts_config]; + bool nulls[Natts_pg_ts_config]; + AclResult aclresult; + Oid namespaceoid; + char *cfgname; + NameData cname; + Oid sourceOid = InvalidOid; + Oid prsOid = InvalidOid; + Oid cfgOid; + ListCell *pl; + ObjectAddress address; + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &cfgname); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceoid)); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (strcmp(defel->defname, "parser") == 0) + prsOid = get_ts_parser_oid(defGetQualifiedName(defel), false); + else if (strcmp(defel->defname, "copy") == 0) + sourceOid = get_ts_config_oid(defGetQualifiedName(defel), false); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search configuration parameter \"%s\" not recognized", + defel->defname))); + } + + if (OidIsValid(sourceOid) && OidIsValid(prsOid)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify both PARSER and COPY options"))); + + /* make copied tsconfig available to callers */ + if (copied && OidIsValid(sourceOid)) + { + ObjectAddressSet(*copied, + TSConfigRelationId, + sourceOid); + } + + /* + * Look up source config if given. + */ + if (OidIsValid(sourceOid)) + { + Form_pg_ts_config cfg; + + tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(sourceOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search configuration %u", + sourceOid); + + cfg = (Form_pg_ts_config) GETSTRUCT(tup); + + /* use source's parser */ + prsOid = cfg->cfgparser; + + ReleaseSysCache(tup); + } + + /* + * Validation + */ + if (!OidIsValid(prsOid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser is required"))); + + cfgRel = table_open(TSConfigRelationId, RowExclusiveLock); + + /* + * Looks good, build tuple and insert + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + cfgOid = GetNewOidWithIndex(cfgRel, TSConfigOidIndexId, + Anum_pg_ts_config_oid); + values[Anum_pg_ts_config_oid - 1] = ObjectIdGetDatum(cfgOid); + namestrcpy(&cname, cfgname); + values[Anum_pg_ts_config_cfgname - 1] = NameGetDatum(&cname); + values[Anum_pg_ts_config_cfgnamespace - 1] = ObjectIdGetDatum(namespaceoid); + values[Anum_pg_ts_config_cfgowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_ts_config_cfgparser - 1] = ObjectIdGetDatum(prsOid); + + tup = heap_form_tuple(cfgRel->rd_att, values, nulls); + + CatalogTupleInsert(cfgRel, tup); + + if (OidIsValid(sourceOid)) + { + /* + * Copy token-dicts map from source config + */ + ScanKeyData skey; + SysScanDesc scan; + HeapTuple maptup; + + mapRel = table_open(TSConfigMapRelationId, RowExclusiveLock); + + ScanKeyInit(&skey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(sourceOid)); + + scan = systable_beginscan(mapRel, TSConfigMapIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + HeapTuple newmaptup; + Datum mapvalues[Natts_pg_ts_config_map]; + bool mapnulls[Natts_pg_ts_config_map]; + + memset(mapvalues, 0, sizeof(mapvalues)); + memset(mapnulls, false, sizeof(mapnulls)); + + mapvalues[Anum_pg_ts_config_map_mapcfg - 1] = cfgOid; + mapvalues[Anum_pg_ts_config_map_maptokentype - 1] = cfgmap->maptokentype; + mapvalues[Anum_pg_ts_config_map_mapseqno - 1] = cfgmap->mapseqno; + mapvalues[Anum_pg_ts_config_map_mapdict - 1] = cfgmap->mapdict; + + newmaptup = heap_form_tuple(mapRel->rd_att, mapvalues, mapnulls); + + CatalogTupleInsert(mapRel, newmaptup); + + heap_freetuple(newmaptup); + } + + systable_endscan(scan); + } + + address = makeConfigurationDependencies(tup, false, mapRel); + + /* Post creation hook for new text search configuration */ + InvokeObjectPostCreateHook(TSConfigRelationId, cfgOid, 0); + + heap_freetuple(tup); + + if (mapRel) + table_close(mapRel, RowExclusiveLock); + table_close(cfgRel, RowExclusiveLock); + + return address; +} + +/* + * Guts of TS configuration deletion. + */ +void +RemoveTSConfigurationById(Oid cfgId) +{ + Relation relCfg, + relMap; + HeapTuple tup; + ScanKeyData skey; + SysScanDesc scan; + + /* Remove the pg_ts_config entry */ + relCfg = table_open(TSConfigRelationId, RowExclusiveLock); + + tup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + cfgId); + + CatalogTupleDelete(relCfg, &tup->t_self); + + ReleaseSysCache(tup); + + table_close(relCfg, RowExclusiveLock); + + /* Remove any pg_ts_config_map entries */ + relMap = table_open(TSConfigMapRelationId, RowExclusiveLock); + + ScanKeyInit(&skey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid((tup = systable_getnext(scan)))) + { + CatalogTupleDelete(relMap, &tup->t_self); + } + + systable_endscan(scan); + + table_close(relMap, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH CONFIGURATION - main entry point + */ +ObjectAddress +AlterTSConfiguration(AlterTSConfigurationStmt *stmt) +{ + HeapTuple tup; + Oid cfgId; + Relation relMap; + ObjectAddress address; + + /* Find the configuration */ + tup = GetTSConfigTuple(stmt->cfgname); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search configuration \"%s\" does not exist", + NameListToString(stmt->cfgname)))); + + cfgId = ((Form_pg_ts_config) GETSTRUCT(tup))->oid; + + /* must be owner */ + if (!pg_ts_config_ownercheck(cfgId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TSCONFIGURATION, + NameListToString(stmt->cfgname)); + + relMap = table_open(TSConfigMapRelationId, RowExclusiveLock); + + /* Add or drop mappings */ + if (stmt->dicts) + MakeConfigurationMapping(stmt, tup, relMap); + else if (stmt->tokentype) + DropConfigurationMapping(stmt, tup, relMap); + + /* Update dependencies */ + makeConfigurationDependencies(tup, true, relMap); + + InvokeObjectPostAlterHook(TSConfigRelationId, cfgId, 0); + + ObjectAddressSet(address, TSConfigRelationId, cfgId); + + table_close(relMap, RowExclusiveLock); + + ReleaseSysCache(tup); + + return address; +} + +/* + * Translate a list of token type names to an array of token type numbers + */ +static int * +getTokenTypes(Oid prsId, List *tokennames) +{ + TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId); + LexDescr *list; + int *res, + i, + ntoken; + ListCell *tn; + + ntoken = list_length(tokennames); + if (ntoken == 0) + return NULL; + res = (int *) palloc(sizeof(int) * ntoken); + + if (!OidIsValid(prs->lextypeOid)) + elog(ERROR, "method lextype isn't defined for text search parser %u", + prsId); + + /* lextype takes one dummy argument */ + list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid, + (Datum) 0)); + + i = 0; + foreach(tn, tokennames) + { + String *val = lfirst_node(String, tn); + bool found = false; + int j; + + j = 0; + while (list && list[j].lexid) + { + if (strcmp(strVal(val), list[j].alias) == 0) + { + res[i] = list[j].lexid; + found = true; + break; + } + j++; + } + if (!found) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("token type \"%s\" does not exist", + strVal(val)))); + i++; + } + + return res; +} + +/* + * ALTER TEXT SEARCH CONFIGURATION ADD/ALTER MAPPING + */ +static void +MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, + HeapTuple tup, Relation relMap) +{ + Form_pg_ts_config tsform; + Oid cfgId; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple maptup; + int i; + int j; + Oid prsId; + int *tokens, + ntoken; + Oid *dictIds; + int ndict; + ListCell *c; + + tsform = (Form_pg_ts_config) GETSTRUCT(tup); + cfgId = tsform->oid; + prsId = tsform->cfgparser; + + tokens = getTokenTypes(prsId, stmt->tokentype); + ntoken = list_length(stmt->tokentype); + + if (stmt->override) + { + /* + * delete maps for tokens if they exist and command was ALTER + */ + for (i = 0; i < ntoken; i++) + { + ScanKeyInit(&skey[0], + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + ScanKeyInit(&skey[1], + Anum_pg_ts_config_map_maptokentype, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(tokens[i])); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + NULL, 2, skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + CatalogTupleDelete(relMap, &maptup->t_self); + } + + systable_endscan(scan); + } + } + + /* + * Convert list of dictionary names to array of dict OIDs + */ + ndict = list_length(stmt->dicts); + dictIds = (Oid *) palloc(sizeof(Oid) * ndict); + i = 0; + foreach(c, stmt->dicts) + { + List *names = (List *) lfirst(c); + + dictIds[i] = get_ts_dict_oid(names, false); + i++; + } + + if (stmt->replace) + { + /* + * Replace a specific dictionary in existing entries + */ + Oid dictOld = dictIds[0], + dictNew = dictIds[1]; + + ScanKeyInit(&skey[0], + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + NULL, 1, skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + + /* + * check if it's one of target token types + */ + if (tokens) + { + bool tokmatch = false; + + for (j = 0; j < ntoken; j++) + { + if (cfgmap->maptokentype == tokens[j]) + { + tokmatch = true; + break; + } + } + if (!tokmatch) + continue; + } + + /* + * replace dictionary if match + */ + if (cfgmap->mapdict == dictOld) + { + Datum repl_val[Natts_pg_ts_config_map]; + bool repl_null[Natts_pg_ts_config_map]; + bool repl_repl[Natts_pg_ts_config_map]; + HeapTuple newtup; + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_val[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictNew); + repl_repl[Anum_pg_ts_config_map_mapdict - 1] = true; + + newtup = heap_modify_tuple(maptup, + RelationGetDescr(relMap), + repl_val, repl_null, repl_repl); + CatalogTupleUpdate(relMap, &newtup->t_self, newtup); + } + } + + systable_endscan(scan); + } + else + { + /* + * Insertion of new entries + */ + for (i = 0; i < ntoken; i++) + { + for (j = 0; j < ndict; j++) + { + Datum values[Natts_pg_ts_config_map]; + bool nulls[Natts_pg_ts_config_map]; + + memset(nulls, false, sizeof(nulls)); + values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId); + values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]); + values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1); + values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]); + + tup = heap_form_tuple(relMap->rd_att, values, nulls); + CatalogTupleInsert(relMap, tup); + + heap_freetuple(tup); + } + } + } + + EventTriggerCollectAlterTSConfig(stmt, cfgId, dictIds, ndict); +} + +/* + * ALTER TEXT SEARCH CONFIGURATION DROP MAPPING + */ +static void +DropConfigurationMapping(AlterTSConfigurationStmt *stmt, + HeapTuple tup, Relation relMap) +{ + Form_pg_ts_config tsform; + Oid cfgId; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple maptup; + int i; + Oid prsId; + int *tokens; + ListCell *c; + + tsform = (Form_pg_ts_config) GETSTRUCT(tup); + cfgId = tsform->oid; + prsId = tsform->cfgparser; + + tokens = getTokenTypes(prsId, stmt->tokentype); + + i = 0; + foreach(c, stmt->tokentype) + { + String *val = lfirst_node(String, c); + bool found = false; + + ScanKeyInit(&skey[0], + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + ScanKeyInit(&skey[1], + Anum_pg_ts_config_map_maptokentype, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(tokens[i])); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + NULL, 2, skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + CatalogTupleDelete(relMap, &maptup->t_self); + found = true; + } + + systable_endscan(scan); + + if (!found) + { + if (!stmt->missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("mapping for token type \"%s\" does not exist", + strVal(val)))); + } + else + { + ereport(NOTICE, + (errmsg("mapping for token type \"%s\" does not exist, skipping", + strVal(val)))); + } + } + + i++; + } + + EventTriggerCollectAlterTSConfig(stmt, cfgId, NULL, 0); +} + + +/* + * Serialize dictionary options, producing a TEXT datum from a List of DefElem + * + * This is used to form the value stored in pg_ts_dict.dictinitoption. + * For the convenience of pg_dump, the output is formatted exactly as it + * would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the + * same options. + */ +text * +serialize_deflist(List *deflist) +{ + text *result; + StringInfoData buf; + ListCell *l; + + initStringInfo(&buf); + + foreach(l, deflist) + { + DefElem *defel = (DefElem *) lfirst(l); + char *val = defGetString(defel); + + appendStringInfo(&buf, "%s = ", + quote_identifier(defel->defname)); + + /* + * If the value is a T_Integer or T_Float, emit it without quotes, + * otherwise with quotes. This is essential to allow correct + * reconstruction of the node type as well as the value. + */ + if (IsA(defel->arg, Integer) || IsA(defel->arg, Float)) + appendStringInfoString(&buf, val); + else + { + /* If backslashes appear, force E syntax to quote them safely */ + if (strchr(val, '\\')) + appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX); + appendStringInfoChar(&buf, '\''); + while (*val) + { + char ch = *val++; + + if (SQL_STR_DOUBLE(ch, true)) + appendStringInfoChar(&buf, ch); + appendStringInfoChar(&buf, ch); + } + appendStringInfoChar(&buf, '\''); + } + if (lnext(deflist, l) != NULL) + appendStringInfoString(&buf, ", "); + } + + result = cstring_to_text_with_len(buf.data, buf.len); + pfree(buf.data); + return result; +} + +/* + * Deserialize dictionary options, reconstructing a List of DefElem from TEXT + * + * This is also used for prsheadline options, so for backward compatibility + * we need to accept a few things serialize_deflist() will never emit: + * in particular, unquoted and double-quoted strings. + */ +List * +deserialize_deflist(Datum txt) +{ + text *in = DatumGetTextPP(txt); /* in case it's toasted */ + List *result = NIL; + int len = VARSIZE_ANY_EXHDR(in); + char *ptr, + *endptr, + *workspace, + *wsptr = NULL, + *startvalue = NULL; + typedef enum + { + CS_WAITKEY, + CS_INKEY, + CS_INQKEY, + CS_WAITEQ, + CS_WAITVALUE, + CS_INSQVALUE, + CS_INDQVALUE, + CS_INWVALUE + } ds_state; + ds_state state = CS_WAITKEY; + + workspace = (char *) palloc(len + 1); /* certainly enough room */ + ptr = VARDATA_ANY(in); + endptr = ptr + len; + for (; ptr < endptr; ptr++) + { + switch (state) + { + case CS_WAITKEY: + if (isspace((unsigned char) *ptr) || *ptr == ',') + continue; + if (*ptr == '"') + { + wsptr = workspace; + state = CS_INQKEY; + } + else + { + wsptr = workspace; + *wsptr++ = *ptr; + state = CS_INKEY; + } + break; + case CS_INKEY: + if (isspace((unsigned char) *ptr)) + { + *wsptr++ = '\0'; + state = CS_WAITEQ; + } + else if (*ptr == '=') + { + *wsptr++ = '\0'; + state = CS_WAITVALUE; + } + else + { + *wsptr++ = *ptr; + } + break; + case CS_INQKEY: + if (*ptr == '"') + { + if (ptr + 1 < endptr && ptr[1] == '"') + { + /* copy only one of the two quotes */ + *wsptr++ = *ptr++; + } + else + { + *wsptr++ = '\0'; + state = CS_WAITEQ; + } + } + else + { + *wsptr++ = *ptr; + } + break; + case CS_WAITEQ: + if (*ptr == '=') + state = CS_WAITVALUE; + else if (!isspace((unsigned char) *ptr)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid parameter list format: \"%s\"", + text_to_cstring(in)))); + break; + case CS_WAITVALUE: + if (*ptr == '\'') + { + startvalue = wsptr; + state = CS_INSQVALUE; + } + else if (*ptr == 'E' && ptr + 1 < endptr && ptr[1] == '\'') + { + ptr++; + startvalue = wsptr; + state = CS_INSQVALUE; + } + else if (*ptr == '"') + { + startvalue = wsptr; + state = CS_INDQVALUE; + } + else if (!isspace((unsigned char) *ptr)) + { + startvalue = wsptr; + *wsptr++ = *ptr; + state = CS_INWVALUE; + } + break; + case CS_INSQVALUE: + if (*ptr == '\'') + { + if (ptr + 1 < endptr && ptr[1] == '\'') + { + /* copy only one of the two quotes */ + *wsptr++ = *ptr++; + } + else + { + *wsptr++ = '\0'; + result = lappend(result, + buildDefItem(workspace, + startvalue, + true)); + state = CS_WAITKEY; + } + } + else if (*ptr == '\\') + { + if (ptr + 1 < endptr && ptr[1] == '\\') + { + /* copy only one of the two backslashes */ + *wsptr++ = *ptr++; + } + else + *wsptr++ = *ptr; + } + else + { + *wsptr++ = *ptr; + } + break; + case CS_INDQVALUE: + if (*ptr == '"') + { + if (ptr + 1 < endptr && ptr[1] == '"') + { + /* copy only one of the two quotes */ + *wsptr++ = *ptr++; + } + else + { + *wsptr++ = '\0'; + result = lappend(result, + buildDefItem(workspace, + startvalue, + true)); + state = CS_WAITKEY; + } + } + else + { + *wsptr++ = *ptr; + } + break; + case CS_INWVALUE: + if (*ptr == ',' || isspace((unsigned char) *ptr)) + { + *wsptr++ = '\0'; + result = lappend(result, + buildDefItem(workspace, + startvalue, + false)); + state = CS_WAITKEY; + } + else + { + *wsptr++ = *ptr; + } + break; + default: + elog(ERROR, "unrecognized deserialize_deflist state: %d", + state); + } + } + + if (state == CS_INWVALUE) + { + *wsptr++ = '\0'; + result = lappend(result, + buildDefItem(workspace, + startvalue, + false)); + } + else if (state != CS_WAITKEY) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid parameter list format: \"%s\"", + text_to_cstring(in)))); + + pfree(workspace); + + return result; +} + +/* + * Build one DefElem for deserialize_deflist + */ +static DefElem * +buildDefItem(const char *name, const char *val, bool was_quoted) +{ + /* If input was quoted, always emit as string */ + if (!was_quoted && val[0] != '\0') + { + int v; + char *endptr; + + /* Try to parse as an integer */ + errno = 0; + v = strtoint(val, &endptr, 10); + if (errno == 0 && *endptr == '\0') + return makeDefElem(pstrdup(name), + (Node *) makeInteger(v), + -1); + /* Nope, how about as a float? */ + errno = 0; + (void) strtod(val, &endptr); + if (errno == 0 && *endptr == '\0') + return makeDefElem(pstrdup(name), + (Node *) makeFloat(pstrdup(val)), + -1); + + if (strcmp(val, "true") == 0) + return makeDefElem(pstrdup(name), + (Node *) makeBoolean(true), + -1); + if (strcmp(val, "false") == 0) + return makeDefElem(pstrdup(name), + (Node *) makeBoolean(false), + -1); + } + /* Just make it a string */ + return makeDefElem(pstrdup(name), + (Node *) makeString(pstrdup(val)), + -1); +} diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c new file mode 100644 index 0000000..9b92b04 --- /dev/null +++ b/src/backend/commands/typecmds.c @@ -0,0 +1,4495 @@ +/*------------------------------------------------------------------------- + * + * typecmds.c + * Routines for SQL commands that manipulate types (and domains). + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/typecmds.c + * + * DESCRIPTION + * The "DefineFoo" routines take the parse tree and pick out the + * appropriate arguments/flags, passing the results to the + * corresponding "FooDefine" routines (in src/catalog) that do + * the actual catalog-munging. These routines also verify permission + * of the user to execute the command. + * + * NOTES + * These things must be defined and committed in the following order: + * "create function": + * input/output, recv/send functions + * "create type": + * type + * "create operator": + * operators + * + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "catalog/binary_upgrade.h" +#include "catalog/catalog.h" +#include "catalog/heap.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_am.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_cast.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_enum.h" +#include "catalog/pg_language.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_range.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "commands/tablecmds.h" +#include "commands/typecmds.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parse_coerce.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_func.h" +#include "parser/parse_type.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/ruleutils.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +/* result structure for get_rels_with_domain() */ +typedef struct +{ + Relation rel; /* opened and locked relation */ + int natts; /* number of attributes of interest */ + int *atts; /* attribute numbers */ + /* atts[] is of allocated length RelationGetNumberOfAttributes(rel) */ +} RelToCheck; + +/* parameter structure for AlterTypeRecurse() */ +typedef struct +{ + /* Flags indicating which type attributes to update */ + bool updateStorage; + bool updateReceive; + bool updateSend; + bool updateTypmodin; + bool updateTypmodout; + bool updateAnalyze; + bool updateSubscript; + /* New values for relevant attributes */ + char storage; + Oid receiveOid; + Oid sendOid; + Oid typmodinOid; + Oid typmodoutOid; + Oid analyzeOid; + Oid subscriptOid; +} AlterTypeRecurseParams; + +/* Potentially set by pg_upgrade_support functions */ +Oid binary_upgrade_next_array_pg_type_oid = InvalidOid; +Oid binary_upgrade_next_mrng_pg_type_oid = InvalidOid; +Oid binary_upgrade_next_mrng_array_pg_type_oid = InvalidOid; + +static void makeRangeConstructors(const char *name, Oid namespace, + Oid rangeOid, Oid subtype); +static void makeMultirangeConstructors(const char *name, Oid namespace, + Oid multirangeOid, Oid rangeOid, + Oid rangeArrayOid, Oid *castFuncOid); +static Oid findTypeInputFunction(List *procname, Oid typeOid); +static Oid findTypeOutputFunction(List *procname, Oid typeOid); +static Oid findTypeReceiveFunction(List *procname, Oid typeOid); +static Oid findTypeSendFunction(List *procname, Oid typeOid); +static Oid findTypeTypmodinFunction(List *procname); +static Oid findTypeTypmodoutFunction(List *procname); +static Oid findTypeAnalyzeFunction(List *procname, Oid typeOid); +static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid); +static Oid findRangeSubOpclass(List *opcname, Oid subtype); +static Oid findRangeCanonicalFunction(List *procname, Oid typeOid); +static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype); +static void validateDomainConstraint(Oid domainoid, char *ccbin); +static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode); +static void checkEnumOwner(HeapTuple tup); +static char *domainAddConstraint(Oid domainOid, Oid domainNamespace, + Oid baseTypeOid, + int typMod, Constraint *constr, + const char *domainName, ObjectAddress *constrAddr); +static Node *replace_domain_constraint_value(ParseState *pstate, + ColumnRef *cref); +static void AlterTypeRecurse(Oid typeOid, bool isImplicitArray, + HeapTuple tup, Relation catalog, + AlterTypeRecurseParams *atparams); + + +/* + * DefineType + * Registers a new base type. + */ +ObjectAddress +DefineType(ParseState *pstate, List *names, List *parameters) +{ + char *typeName; + Oid typeNamespace; + int16 internalLength = -1; /* default: variable-length */ + List *inputName = NIL; + List *outputName = NIL; + List *receiveName = NIL; + List *sendName = NIL; + List *typmodinName = NIL; + List *typmodoutName = NIL; + List *analyzeName = NIL; + List *subscriptName = NIL; + char category = TYPCATEGORY_USER; + bool preferred = false; + char delimiter = DEFAULT_TYPDELIM; + Oid elemType = InvalidOid; + char *defaultValue = NULL; + bool byValue = false; + char alignment = TYPALIGN_INT; /* default alignment */ + char storage = TYPSTORAGE_PLAIN; /* default TOAST storage method */ + Oid collation = InvalidOid; + DefElem *likeTypeEl = NULL; + DefElem *internalLengthEl = NULL; + DefElem *inputNameEl = NULL; + DefElem *outputNameEl = NULL; + DefElem *receiveNameEl = NULL; + DefElem *sendNameEl = NULL; + DefElem *typmodinNameEl = NULL; + DefElem *typmodoutNameEl = NULL; + DefElem *analyzeNameEl = NULL; + DefElem *subscriptNameEl = NULL; + DefElem *categoryEl = NULL; + DefElem *preferredEl = NULL; + DefElem *delimiterEl = NULL; + DefElem *elemTypeEl = NULL; + DefElem *defaultValueEl = NULL; + DefElem *byValueEl = NULL; + DefElem *alignmentEl = NULL; + DefElem *storageEl = NULL; + DefElem *collatableEl = NULL; + Oid inputOid; + Oid outputOid; + Oid receiveOid = InvalidOid; + Oid sendOid = InvalidOid; + Oid typmodinOid = InvalidOid; + Oid typmodoutOid = InvalidOid; + Oid analyzeOid = InvalidOid; + Oid subscriptOid = InvalidOid; + char *array_type; + Oid array_oid; + Oid typoid; + ListCell *pl; + ObjectAddress address; + + /* + * As of Postgres 8.4, we require superuser privilege to create a base + * type. This is simple paranoia: there are too many ways to mess up the + * system with an incorrect type definition (for instance, representation + * parameters that don't match what the C code expects). In practice it + * takes superuser privilege to create the I/O functions, and so the + * former requirement that you own the I/O functions pretty much forced + * superuserness anyway. We're just making doubly sure here. + * + * XXX re-enable NOT_USED code sections below if you remove this test. + */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create a base type"))); + + /* Convert list of names to a name and namespace */ + typeNamespace = QualifiedNameGetCreationNamespace(names, &typeName); + +#ifdef NOT_USED + /* XXX this is unnecessary given the superuser check above */ + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(typeNamespace, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(typeNamespace)); +#endif + + /* + * Look to see if type already exists. + */ + typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, + CStringGetDatum(typeName), + ObjectIdGetDatum(typeNamespace)); + + /* + * If it's not a shell, see if it's an autogenerated array type, and if so + * rename it out of the way. + */ + if (OidIsValid(typoid) && get_typisdefined(typoid)) + { + if (moveArrayTypeName(typoid, typeName, typeNamespace)) + typoid = InvalidOid; + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists", typeName))); + } + + /* + * If this command is a parameterless CREATE TYPE, then we're just here to + * make a shell type, so do that (or fail if there already is a shell). + */ + if (parameters == NIL) + { + if (OidIsValid(typoid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists", typeName))); + + address = TypeShellMake(typeName, typeNamespace, GetUserId()); + return address; + } + + /* + * Otherwise, we must already have a shell type, since there is no other + * way that the I/O functions could have been created. + */ + if (!OidIsValid(typoid)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" does not exist", typeName), + errhint("Create the type as a shell type, then create its I/O functions, then do a full CREATE TYPE."))); + + /* Extract the parameters from the parameter list */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + DefElem **defelp; + + if (strcmp(defel->defname, "like") == 0) + defelp = &likeTypeEl; + else if (strcmp(defel->defname, "internallength") == 0) + defelp = &internalLengthEl; + else if (strcmp(defel->defname, "input") == 0) + defelp = &inputNameEl; + else if (strcmp(defel->defname, "output") == 0) + defelp = &outputNameEl; + else if (strcmp(defel->defname, "receive") == 0) + defelp = &receiveNameEl; + else if (strcmp(defel->defname, "send") == 0) + defelp = &sendNameEl; + else if (strcmp(defel->defname, "typmod_in") == 0) + defelp = &typmodinNameEl; + else if (strcmp(defel->defname, "typmod_out") == 0) + defelp = &typmodoutNameEl; + else if (strcmp(defel->defname, "analyze") == 0 || + strcmp(defel->defname, "analyse") == 0) + defelp = &analyzeNameEl; + else if (strcmp(defel->defname, "subscript") == 0) + defelp = &subscriptNameEl; + else if (strcmp(defel->defname, "category") == 0) + defelp = &categoryEl; + else if (strcmp(defel->defname, "preferred") == 0) + defelp = &preferredEl; + else if (strcmp(defel->defname, "delimiter") == 0) + defelp = &delimiterEl; + else if (strcmp(defel->defname, "element") == 0) + defelp = &elemTypeEl; + else if (strcmp(defel->defname, "default") == 0) + defelp = &defaultValueEl; + else if (strcmp(defel->defname, "passedbyvalue") == 0) + defelp = &byValueEl; + else if (strcmp(defel->defname, "alignment") == 0) + defelp = &alignmentEl; + else if (strcmp(defel->defname, "storage") == 0) + defelp = &storageEl; + else if (strcmp(defel->defname, "collatable") == 0) + defelp = &collatableEl; + else + { + /* WARNING, not ERROR, for historical backwards-compatibility */ + ereport(WARNING, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type attribute \"%s\" not recognized", + defel->defname), + parser_errposition(pstate, defel->location))); + continue; + } + if (*defelp != NULL) + errorConflictingDefElem(defel, pstate); + *defelp = defel; + } + + /* + * Now interpret the options; we do this separately so that LIKE can be + * overridden by other options regardless of the ordering in the parameter + * list. + */ + if (likeTypeEl) + { + Type likeType; + Form_pg_type likeForm; + + likeType = typenameType(NULL, defGetTypeName(likeTypeEl), NULL); + likeForm = (Form_pg_type) GETSTRUCT(likeType); + internalLength = likeForm->typlen; + byValue = likeForm->typbyval; + alignment = likeForm->typalign; + storage = likeForm->typstorage; + ReleaseSysCache(likeType); + } + if (internalLengthEl) + internalLength = defGetTypeLength(internalLengthEl); + if (inputNameEl) + inputName = defGetQualifiedName(inputNameEl); + if (outputNameEl) + outputName = defGetQualifiedName(outputNameEl); + if (receiveNameEl) + receiveName = defGetQualifiedName(receiveNameEl); + if (sendNameEl) + sendName = defGetQualifiedName(sendNameEl); + if (typmodinNameEl) + typmodinName = defGetQualifiedName(typmodinNameEl); + if (typmodoutNameEl) + typmodoutName = defGetQualifiedName(typmodoutNameEl); + if (analyzeNameEl) + analyzeName = defGetQualifiedName(analyzeNameEl); + if (subscriptNameEl) + subscriptName = defGetQualifiedName(subscriptNameEl); + if (categoryEl) + { + char *p = defGetString(categoryEl); + + category = p[0]; + /* restrict to non-control ASCII */ + if (category < 32 || category > 126) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid type category \"%s\": must be simple ASCII", + p))); + } + if (preferredEl) + preferred = defGetBoolean(preferredEl); + if (delimiterEl) + { + char *p = defGetString(delimiterEl); + + delimiter = p[0]; + /* XXX shouldn't we restrict the delimiter? */ + } + if (elemTypeEl) + { + elemType = typenameTypeId(NULL, defGetTypeName(elemTypeEl)); + /* disallow arrays of pseudotypes */ + if (get_typtype(elemType) == TYPTYPE_PSEUDO) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array element type cannot be %s", + format_type_be(elemType)))); + } + if (defaultValueEl) + defaultValue = defGetString(defaultValueEl); + if (byValueEl) + byValue = defGetBoolean(byValueEl); + if (alignmentEl) + { + char *a = defGetString(alignmentEl); + + /* + * Note: if argument was an unquoted identifier, parser will have + * applied translations to it, so be prepared to recognize translated + * type names as well as the nominal form. + */ + if (pg_strcasecmp(a, "double") == 0 || + pg_strcasecmp(a, "float8") == 0 || + pg_strcasecmp(a, "pg_catalog.float8") == 0) + alignment = TYPALIGN_DOUBLE; + else if (pg_strcasecmp(a, "int4") == 0 || + pg_strcasecmp(a, "pg_catalog.int4") == 0) + alignment = TYPALIGN_INT; + else if (pg_strcasecmp(a, "int2") == 0 || + pg_strcasecmp(a, "pg_catalog.int2") == 0) + alignment = TYPALIGN_SHORT; + else if (pg_strcasecmp(a, "char") == 0 || + pg_strcasecmp(a, "pg_catalog.bpchar") == 0) + alignment = TYPALIGN_CHAR; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("alignment \"%s\" not recognized", a))); + } + if (storageEl) + { + char *a = defGetString(storageEl); + + if (pg_strcasecmp(a, "plain") == 0) + storage = TYPSTORAGE_PLAIN; + else if (pg_strcasecmp(a, "external") == 0) + storage = TYPSTORAGE_EXTERNAL; + else if (pg_strcasecmp(a, "extended") == 0) + storage = TYPSTORAGE_EXTENDED; + else if (pg_strcasecmp(a, "main") == 0) + storage = TYPSTORAGE_MAIN; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("storage \"%s\" not recognized", a))); + } + if (collatableEl) + collation = defGetBoolean(collatableEl) ? DEFAULT_COLLATION_OID : InvalidOid; + + /* + * make sure we have our required definitions + */ + if (inputName == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type input function must be specified"))); + if (outputName == NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type output function must be specified"))); + + if (typmodinName == NIL && typmodoutName != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type modifier output function is useless without a type modifier input function"))); + + /* + * Convert I/O proc names to OIDs + */ + inputOid = findTypeInputFunction(inputName, typoid); + outputOid = findTypeOutputFunction(outputName, typoid); + if (receiveName) + receiveOid = findTypeReceiveFunction(receiveName, typoid); + if (sendName) + sendOid = findTypeSendFunction(sendName, typoid); + + /* + * Convert typmodin/out function proc names to OIDs. + */ + if (typmodinName) + typmodinOid = findTypeTypmodinFunction(typmodinName); + if (typmodoutName) + typmodoutOid = findTypeTypmodoutFunction(typmodoutName); + + /* + * Convert analysis function proc name to an OID. If no analysis function + * is specified, we'll use zero to select the built-in default algorithm. + */ + if (analyzeName) + analyzeOid = findTypeAnalyzeFunction(analyzeName, typoid); + + /* + * Likewise look up the subscripting function if any. If it is not + * specified, but a typelem is specified, allow that if + * raw_array_subscript_handler can be used. (This is for backwards + * compatibility; maybe someday we should throw an error instead.) + */ + if (subscriptName) + subscriptOid = findTypeSubscriptingFunction(subscriptName, typoid); + else if (OidIsValid(elemType)) + { + if (internalLength > 0 && !byValue && get_typlen(elemType) > 0) + subscriptOid = F_RAW_ARRAY_SUBSCRIPT_HANDLER; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("element type cannot be specified without a subscripting function"))); + } + + /* + * Check permissions on functions. We choose to require the creator/owner + * of a type to also own the underlying functions. Since creating a type + * is tantamount to granting public execute access on the functions, the + * minimum sane check would be for execute-with-grant-option. But we + * don't have a way to make the type go away if the grant option is + * revoked, so ownership seems better. + * + * XXX For now, this is all unnecessary given the superuser check above. + * If we ever relax that, these calls likely should be moved into + * findTypeInputFunction et al, where they could be shared by AlterType. + */ +#ifdef NOT_USED + if (inputOid && !pg_proc_ownercheck(inputOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(inputName)); + if (outputOid && !pg_proc_ownercheck(outputOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(outputName)); + if (receiveOid && !pg_proc_ownercheck(receiveOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(receiveName)); + if (sendOid && !pg_proc_ownercheck(sendOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(sendName)); + if (typmodinOid && !pg_proc_ownercheck(typmodinOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(typmodinName)); + if (typmodoutOid && !pg_proc_ownercheck(typmodoutOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(typmodoutName)); + if (analyzeOid && !pg_proc_ownercheck(analyzeOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(analyzeName)); + if (subscriptOid && !pg_proc_ownercheck(subscriptOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_FUNCTION, + NameListToString(subscriptName)); +#endif + + /* + * OK, we're done checking, time to make the type. We must assign the + * array type OID ahead of calling TypeCreate, since the base type and + * array type each refer to the other. + */ + array_oid = AssignTypeArrayOid(); + + /* + * now have TypeCreate do all the real work. + * + * Note: the pg_type.oid is stored in user tables as array elements (base + * types) in ArrayType and in composite types in DatumTupleFields. This + * oid must be preserved by binary upgrades. + */ + address = + TypeCreate(InvalidOid, /* no predetermined type OID */ + typeName, /* type name */ + typeNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + internalLength, /* internal size */ + TYPTYPE_BASE, /* type-type (base type) */ + category, /* type-category */ + preferred, /* is it a preferred type? */ + delimiter, /* array element delimiter */ + inputOid, /* input procedure */ + outputOid, /* output procedure */ + receiveOid, /* receive procedure */ + sendOid, /* send procedure */ + typmodinOid, /* typmodin procedure */ + typmodoutOid, /* typmodout procedure */ + analyzeOid, /* analyze procedure */ + subscriptOid, /* subscript procedure */ + elemType, /* element type ID */ + false, /* this is not an implicit array type */ + array_oid, /* array type we are about to create */ + InvalidOid, /* base type ID (only for domains) */ + defaultValue, /* default type value */ + NULL, /* no binary form available */ + byValue, /* passed by value */ + alignment, /* required alignment */ + storage, /* TOAST strategy */ + -1, /* typMod (Domains only) */ + 0, /* Array Dimensions of typbasetype */ + false, /* Type NOT NULL */ + collation); /* type's collation */ + Assert(typoid == address.objectId); + + /* + * Create the array type that goes with it. + */ + array_type = makeArrayTypeName(typeName, typeNamespace); + + /* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for arrays */ + alignment = (alignment == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT; + + TypeCreate(array_oid, /* force assignment of this type OID */ + array_type, /* type name */ + typeNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + -1, /* internal size (always varlena) */ + TYPTYPE_BASE, /* type-type (base type) */ + TYPCATEGORY_ARRAY, /* type-category (array) */ + false, /* array types are never preferred */ + delimiter, /* array element delimiter */ + F_ARRAY_IN, /* input procedure */ + F_ARRAY_OUT, /* output procedure */ + F_ARRAY_RECV, /* receive procedure */ + F_ARRAY_SEND, /* send procedure */ + typmodinOid, /* typmodin procedure */ + typmodoutOid, /* typmodout procedure */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ + F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */ + typoid, /* element type ID */ + true, /* yes this is an array type */ + InvalidOid, /* no further array type */ + InvalidOid, /* base type ID */ + NULL, /* never a default type value */ + NULL, /* binary default isn't sent either */ + false, /* never passed by value */ + alignment, /* see above */ + TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + collation); /* type's collation */ + + pfree(array_type); + + return address; +} + +/* + * Guts of type deletion. + */ +void +RemoveTypeById(Oid typeOid) +{ + Relation relation; + HeapTuple tup; + + relation = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typeOid); + + CatalogTupleDelete(relation, &tup->t_self); + + /* + * If it is an enum, delete the pg_enum entries too; we don't bother with + * making dependency entries for those, so it has to be done "by hand" + * here. + */ + if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_ENUM) + EnumValuesDelete(typeOid); + + /* + * If it is a range type, delete the pg_range entry too; we don't bother + * with making a dependency entry for that, so it has to be done "by hand" + * here. + */ + if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_RANGE) + RangeDelete(typeOid); + + ReleaseSysCache(tup); + + table_close(relation, RowExclusiveLock); +} + + +/* + * DefineDomain + * Registers a new domain. + */ +ObjectAddress +DefineDomain(CreateDomainStmt *stmt) +{ + char *domainName; + char *domainArrayName; + Oid domainNamespace; + AclResult aclresult; + int16 internalLength; + Oid inputProcedure; + Oid outputProcedure; + Oid receiveProcedure; + Oid sendProcedure; + Oid analyzeProcedure; + bool byValue; + char category; + char delimiter; + char alignment; + char storage; + char typtype; + Datum datum; + bool isnull; + char *defaultValue = NULL; + char *defaultValueBin = NULL; + bool saw_default = false; + bool typNotNull = false; + bool nullDefined = false; + int32 typNDims = list_length(stmt->typeName->arrayBounds); + HeapTuple typeTup; + List *schema = stmt->constraints; + ListCell *listptr; + Oid basetypeoid; + Oid old_type_oid; + Oid domaincoll; + Oid domainArrayOid; + Form_pg_type baseType; + int32 basetypeMod; + Oid baseColl; + ObjectAddress address; + + /* Convert list of names to a name and namespace */ + domainNamespace = QualifiedNameGetCreationNamespace(stmt->domainname, + &domainName); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(domainNamespace, GetUserId(), + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(domainNamespace)); + + /* + * Check for collision with an existing type name. If there is one and + * it's an autogenerated array, we can rename it out of the way. + */ + old_type_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, + CStringGetDatum(domainName), + ObjectIdGetDatum(domainNamespace)); + if (OidIsValid(old_type_oid)) + { + if (!moveArrayTypeName(old_type_oid, domainName, domainNamespace)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists", domainName))); + } + + /* + * Look up the base type. + */ + typeTup = typenameType(NULL, stmt->typeName, &basetypeMod); + baseType = (Form_pg_type) GETSTRUCT(typeTup); + basetypeoid = baseType->oid; + + /* + * Base type must be a plain base type, a composite type, another domain, + * an enum or a range type. Domains over pseudotypes would create a + * security hole. (It would be shorter to code this to just check for + * pseudotypes; but it seems safer to call out the specific typtypes that + * are supported, rather than assume that all future typtypes would be + * automatically supported.) + */ + typtype = baseType->typtype; + if (typtype != TYPTYPE_BASE && + typtype != TYPTYPE_COMPOSITE && + typtype != TYPTYPE_DOMAIN && + typtype != TYPTYPE_ENUM && + typtype != TYPTYPE_RANGE && + typtype != TYPTYPE_MULTIRANGE) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("\"%s\" is not a valid base type for a domain", + TypeNameToString(stmt->typeName)))); + + aclresult = pg_type_aclcheck(basetypeoid, GetUserId(), ACL_USAGE); + if (aclresult != ACLCHECK_OK) + aclcheck_error_type(aclresult, basetypeoid); + + /* + * Collect the properties of the new domain. Some are inherited from the + * base type, some are not. If you change any of this inheritance + * behavior, be sure to update AlterTypeRecurse() to match! + */ + + /* + * Identify the collation if any + */ + baseColl = baseType->typcollation; + if (stmt->collClause) + domaincoll = get_collation_oid(stmt->collClause->collname, false); + else + domaincoll = baseColl; + + /* Complain if COLLATE is applied to an uncollatable type */ + if (OidIsValid(domaincoll) && !OidIsValid(baseColl)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("collations are not supported by type %s", + format_type_be(basetypeoid)))); + + /* passed by value */ + byValue = baseType->typbyval; + + /* Required Alignment */ + alignment = baseType->typalign; + + /* TOAST Strategy */ + storage = baseType->typstorage; + + /* Storage Length */ + internalLength = baseType->typlen; + + /* Type Category */ + category = baseType->typcategory; + + /* Array element Delimiter */ + delimiter = baseType->typdelim; + + /* I/O Functions */ + inputProcedure = F_DOMAIN_IN; + outputProcedure = baseType->typoutput; + receiveProcedure = F_DOMAIN_RECV; + sendProcedure = baseType->typsend; + + /* Domains never accept typmods, so no typmodin/typmodout needed */ + + /* Analysis function */ + analyzeProcedure = baseType->typanalyze; + + /* + * Domains don't need a subscript function, since they are not + * subscriptable on their own. If the base type is subscriptable, the + * parser will reduce the type to the base type before subscripting. + */ + + /* Inherited default value */ + datum = SysCacheGetAttr(TYPEOID, typeTup, + Anum_pg_type_typdefault, &isnull); + if (!isnull) + defaultValue = TextDatumGetCString(datum); + + /* Inherited default binary value */ + datum = SysCacheGetAttr(TYPEOID, typeTup, + Anum_pg_type_typdefaultbin, &isnull); + if (!isnull) + defaultValueBin = TextDatumGetCString(datum); + + /* + * Run through constraints manually to avoid the additional processing + * conducted by DefineRelation() and friends. + */ + foreach(listptr, schema) + { + Constraint *constr = lfirst(listptr); + + if (!IsA(constr, Constraint)) + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(constr)); + switch (constr->contype) + { + case CONSTR_DEFAULT: + + /* + * The inherited default value may be overridden by the user + * with the DEFAULT clause ... but only once. + */ + if (saw_default) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("multiple default expressions"))); + saw_default = true; + + if (constr->raw_expr) + { + ParseState *pstate; + Node *defaultExpr; + + /* Create a dummy ParseState for transformExpr */ + pstate = make_parsestate(NULL); + + /* + * Cook the constr->raw_expr into an expression. Note: + * name is strictly for error message + */ + defaultExpr = cookDefault(pstate, constr->raw_expr, + basetypeoid, + basetypeMod, + domainName, + 0); + + /* + * If the expression is just a NULL constant, we treat it + * like not having a default. + * + * Note that if the basetype is another domain, we'll see + * a CoerceToDomain expr here and not discard the default. + * This is critical because the domain default needs to be + * retained to override any default that the base domain + * might have. + */ + if (defaultExpr == NULL || + (IsA(defaultExpr, Const) && + ((Const *) defaultExpr)->constisnull)) + { + defaultValue = NULL; + defaultValueBin = NULL; + } + else + { + /* + * Expression must be stored as a nodeToString result, + * but we also require a valid textual representation + * (mainly to make life easier for pg_dump). + */ + defaultValue = + deparse_expression(defaultExpr, + NIL, false, false); + defaultValueBin = nodeToString(defaultExpr); + } + } + else + { + /* No default (can this still happen?) */ + defaultValue = NULL; + defaultValueBin = NULL; + } + break; + + case CONSTR_NOTNULL: + if (nullDefined && !typNotNull) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting NULL/NOT NULL constraints"))); + typNotNull = true; + nullDefined = true; + break; + + case CONSTR_NULL: + if (nullDefined && typNotNull) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting NULL/NOT NULL constraints"))); + typNotNull = false; + nullDefined = true; + break; + + case CONSTR_CHECK: + + /* + * Check constraints are handled after domain creation, as + * they require the Oid of the domain; at this point we can + * only check that they're not marked NO INHERIT, because that + * would be bogus. + */ + if (constr->is_no_inherit) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("check constraints for domains cannot be marked NO INHERIT"))); + break; + + /* + * All else are error cases + */ + case CONSTR_UNIQUE: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unique constraints not possible for domains"))); + break; + + case CONSTR_PRIMARY: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("primary key constraints not possible for domains"))); + break; + + case CONSTR_EXCLUSION: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("exclusion constraints not possible for domains"))); + break; + + case CONSTR_FOREIGN: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("foreign key constraints not possible for domains"))); + break; + + case CONSTR_ATTR_DEFERRABLE: + case CONSTR_ATTR_NOT_DEFERRABLE: + case CONSTR_ATTR_DEFERRED: + case CONSTR_ATTR_IMMEDIATE: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("specifying constraint deferrability not supported for domains"))); + break; + + default: + elog(ERROR, "unrecognized constraint subtype: %d", + (int) constr->contype); + break; + } + } + + /* Allocate OID for array type */ + domainArrayOid = AssignTypeArrayOid(); + + /* + * Have TypeCreate do all the real work. + */ + address = + TypeCreate(InvalidOid, /* no predetermined type OID */ + domainName, /* type name */ + domainNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + internalLength, /* internal size */ + TYPTYPE_DOMAIN, /* type-type (domain type) */ + category, /* type-category */ + false, /* domain types are never preferred */ + delimiter, /* array element delimiter */ + inputProcedure, /* input procedure */ + outputProcedure, /* output procedure */ + receiveProcedure, /* receive procedure */ + sendProcedure, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + analyzeProcedure, /* analyze procedure */ + InvalidOid, /* subscript procedure - none */ + InvalidOid, /* no array element type */ + false, /* this isn't an array */ + domainArrayOid, /* array type we are about to create */ + basetypeoid, /* base type ID */ + defaultValue, /* default type value (text) */ + defaultValueBin, /* default type value (binary) */ + byValue, /* passed by value */ + alignment, /* required alignment */ + storage, /* TOAST strategy */ + basetypeMod, /* typeMod value */ + typNDims, /* Array dimensions for base type */ + typNotNull, /* Type NOT NULL */ + domaincoll); /* type's collation */ + + /* + * Create the array type that goes with it. + */ + domainArrayName = makeArrayTypeName(domainName, domainNamespace); + + /* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for arrays */ + alignment = (alignment == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT; + + TypeCreate(domainArrayOid, /* force assignment of this type OID */ + domainArrayName, /* type name */ + domainNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + -1, /* internal size (always varlena) */ + TYPTYPE_BASE, /* type-type (base type) */ + TYPCATEGORY_ARRAY, /* type-category (array) */ + false, /* array types are never preferred */ + delimiter, /* array element delimiter */ + F_ARRAY_IN, /* input procedure */ + F_ARRAY_OUT, /* output procedure */ + F_ARRAY_RECV, /* receive procedure */ + F_ARRAY_SEND, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ + F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */ + address.objectId, /* element type ID */ + true, /* yes this is an array type */ + InvalidOid, /* no further array type */ + InvalidOid, /* base type ID */ + NULL, /* never a default type value */ + NULL, /* binary default isn't sent either */ + false, /* never passed by value */ + alignment, /* see above */ + TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + domaincoll); /* type's collation */ + + pfree(domainArrayName); + + /* + * Process constraints which refer to the domain ID returned by TypeCreate + */ + foreach(listptr, schema) + { + Constraint *constr = lfirst(listptr); + + /* it must be a Constraint, per check above */ + + switch (constr->contype) + { + case CONSTR_CHECK: + domainAddConstraint(address.objectId, domainNamespace, + basetypeoid, basetypeMod, + constr, domainName, NULL); + break; + + /* Other constraint types were fully processed above */ + + default: + break; + } + + /* CCI so we can detect duplicate constraint names */ + CommandCounterIncrement(); + } + + /* + * Now we can clean up. + */ + ReleaseSysCache(typeTup); + + return address; +} + + +/* + * DefineEnum + * Registers a new enum. + */ +ObjectAddress +DefineEnum(CreateEnumStmt *stmt) +{ + char *enumName; + char *enumArrayName; + Oid enumNamespace; + AclResult aclresult; + Oid old_type_oid; + Oid enumArrayOid; + ObjectAddress enumTypeAddr; + + /* Convert list of names to a name and namespace */ + enumNamespace = QualifiedNameGetCreationNamespace(stmt->typeName, + &enumName); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(enumNamespace, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(enumNamespace)); + + /* + * Check for collision with an existing type name. If there is one and + * it's an autogenerated array, we can rename it out of the way. + */ + old_type_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, + CStringGetDatum(enumName), + ObjectIdGetDatum(enumNamespace)); + if (OidIsValid(old_type_oid)) + { + if (!moveArrayTypeName(old_type_oid, enumName, enumNamespace)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists", enumName))); + } + + /* Allocate OID for array type */ + enumArrayOid = AssignTypeArrayOid(); + + /* Create the pg_type entry */ + enumTypeAddr = + TypeCreate(InvalidOid, /* no predetermined type OID */ + enumName, /* type name */ + enumNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + sizeof(Oid), /* internal size */ + TYPTYPE_ENUM, /* type-type (enum type) */ + TYPCATEGORY_ENUM, /* type-category (enum type) */ + false, /* enum types are never preferred */ + DEFAULT_TYPDELIM, /* array element delimiter */ + F_ENUM_IN, /* input procedure */ + F_ENUM_OUT, /* output procedure */ + F_ENUM_RECV, /* receive procedure */ + F_ENUM_SEND, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + InvalidOid, /* analyze procedure - default */ + InvalidOid, /* subscript procedure - none */ + InvalidOid, /* element type ID */ + false, /* this is not an array type */ + enumArrayOid, /* array type we are about to create */ + InvalidOid, /* base type ID (only for domains) */ + NULL, /* never a default type value */ + NULL, /* binary default isn't sent either */ + true, /* always passed by value */ + TYPALIGN_INT, /* int alignment */ + TYPSTORAGE_PLAIN, /* TOAST strategy always plain */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + InvalidOid); /* type's collation */ + + /* Enter the enum's values into pg_enum */ + EnumValuesCreate(enumTypeAddr.objectId, stmt->vals); + + /* + * Create the array type that goes with it. + */ + enumArrayName = makeArrayTypeName(enumName, enumNamespace); + + TypeCreate(enumArrayOid, /* force assignment of this type OID */ + enumArrayName, /* type name */ + enumNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + -1, /* internal size (always varlena) */ + TYPTYPE_BASE, /* type-type (base type) */ + TYPCATEGORY_ARRAY, /* type-category (array) */ + false, /* array types are never preferred */ + DEFAULT_TYPDELIM, /* array element delimiter */ + F_ARRAY_IN, /* input procedure */ + F_ARRAY_OUT, /* output procedure */ + F_ARRAY_RECV, /* receive procedure */ + F_ARRAY_SEND, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ + F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */ + enumTypeAddr.objectId, /* element type ID */ + true, /* yes this is an array type */ + InvalidOid, /* no further array type */ + InvalidOid, /* base type ID */ + NULL, /* never a default type value */ + NULL, /* binary default isn't sent either */ + false, /* never passed by value */ + TYPALIGN_INT, /* enums have int align, so do their arrays */ + TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + InvalidOid); /* type's collation */ + + pfree(enumArrayName); + + return enumTypeAddr; +} + +/* + * AlterEnum + * Adds a new label to an existing enum. + */ +ObjectAddress +AlterEnum(AlterEnumStmt *stmt) +{ + Oid enum_type_oid; + TypeName *typename; + HeapTuple tup; + ObjectAddress address; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(stmt->typeName); + enum_type_oid = typenameTypeId(NULL, typename); + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(enum_type_oid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", enum_type_oid); + + /* Check it's an enum and check user has permission to ALTER the enum */ + checkEnumOwner(tup); + + ReleaseSysCache(tup); + + if (stmt->oldVal) + { + /* Rename an existing label */ + RenameEnumLabel(enum_type_oid, stmt->oldVal, stmt->newVal); + } + else + { + /* Add a new label */ + AddEnumLabel(enum_type_oid, stmt->newVal, + stmt->newValNeighbor, stmt->newValIsAfter, + stmt->skipIfNewValExists); + } + + InvokeObjectPostAlterHook(TypeRelationId, enum_type_oid, 0); + + ObjectAddressSet(address, TypeRelationId, enum_type_oid); + + return address; +} + + +/* + * checkEnumOwner + * + * Check that the type is actually an enum and that the current user + * has permission to do ALTER TYPE on it. Throw an error if not. + */ +static void +checkEnumOwner(HeapTuple tup) +{ + Form_pg_type typTup = (Form_pg_type) GETSTRUCT(tup); + + /* Check that this is actually an enum */ + if (typTup->typtype != TYPTYPE_ENUM) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not an enum", + format_type_be(typTup->oid)))); + + /* Permission check: must own type */ + if (!pg_type_ownercheck(typTup->oid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid); +} + + +/* + * DefineRange + * Registers a new range type. + * + * Perhaps it might be worthwhile to set pg_type.typelem to the base type, + * and likewise on multiranges to set it to the range type. But having a + * non-zero typelem is treated elsewhere as a synonym for being an array, + * and users might have queries with that same assumption. + */ +ObjectAddress +DefineRange(ParseState *pstate, CreateRangeStmt *stmt) +{ + char *typeName; + Oid typeNamespace; + Oid typoid; + char *rangeArrayName; + char *multirangeTypeName = NULL; + char *multirangeArrayName; + Oid multirangeNamespace = InvalidOid; + Oid rangeArrayOid; + Oid multirangeOid; + Oid multirangeArrayOid; + Oid rangeSubtype = InvalidOid; + List *rangeSubOpclassName = NIL; + List *rangeCollationName = NIL; + List *rangeCanonicalName = NIL; + List *rangeSubtypeDiffName = NIL; + Oid rangeSubOpclass; + Oid rangeCollation; + regproc rangeCanonical; + regproc rangeSubtypeDiff; + int16 subtyplen; + bool subtypbyval; + char subtypalign; + char alignment; + AclResult aclresult; + ListCell *lc; + ObjectAddress address; + ObjectAddress mltrngaddress PG_USED_FOR_ASSERTS_ONLY; + Oid castFuncOid; + + /* Convert list of names to a name and namespace */ + typeNamespace = QualifiedNameGetCreationNamespace(stmt->typeName, + &typeName); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(typeNamespace, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(typeNamespace)); + + /* + * Look to see if type already exists. + */ + typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, + CStringGetDatum(typeName), + ObjectIdGetDatum(typeNamespace)); + + /* + * If it's not a shell, see if it's an autogenerated array type, and if so + * rename it out of the way. + */ + if (OidIsValid(typoid) && get_typisdefined(typoid)) + { + if (moveArrayTypeName(typoid, typeName, typeNamespace)) + typoid = InvalidOid; + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists", typeName))); + } + + /* + * Unlike DefineType(), we don't insist on a shell type existing first, as + * it's only needed if the user wants to specify a canonical function. + */ + + /* Extract the parameters from the parameter list */ + foreach(lc, stmt->params) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "subtype") == 0) + { + if (OidIsValid(rangeSubtype)) + errorConflictingDefElem(defel, pstate); + /* we can look up the subtype name immediately */ + rangeSubtype = typenameTypeId(NULL, defGetTypeName(defel)); + } + else if (strcmp(defel->defname, "subtype_opclass") == 0) + { + if (rangeSubOpclassName != NIL) + errorConflictingDefElem(defel, pstate); + rangeSubOpclassName = defGetQualifiedName(defel); + } + else if (strcmp(defel->defname, "collation") == 0) + { + if (rangeCollationName != NIL) + errorConflictingDefElem(defel, pstate); + rangeCollationName = defGetQualifiedName(defel); + } + else if (strcmp(defel->defname, "canonical") == 0) + { + if (rangeCanonicalName != NIL) + errorConflictingDefElem(defel, pstate); + rangeCanonicalName = defGetQualifiedName(defel); + } + else if (strcmp(defel->defname, "subtype_diff") == 0) + { + if (rangeSubtypeDiffName != NIL) + errorConflictingDefElem(defel, pstate); + rangeSubtypeDiffName = defGetQualifiedName(defel); + } + else if (strcmp(defel->defname, "multirange_type_name") == 0) + { + if (multirangeTypeName != NULL) + errorConflictingDefElem(defel, pstate); + /* we can look up the subtype name immediately */ + multirangeNamespace = QualifiedNameGetCreationNamespace(defGetQualifiedName(defel), + &multirangeTypeName); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type attribute \"%s\" not recognized", + defel->defname))); + } + + /* Must have a subtype */ + if (!OidIsValid(rangeSubtype)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type attribute \"subtype\" is required"))); + /* disallow ranges of pseudotypes */ + if (get_typtype(rangeSubtype) == TYPTYPE_PSEUDO) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("range subtype cannot be %s", + format_type_be(rangeSubtype)))); + + /* Identify subopclass */ + rangeSubOpclass = findRangeSubOpclass(rangeSubOpclassName, rangeSubtype); + + /* Identify collation to use, if any */ + if (type_is_collatable(rangeSubtype)) + { + if (rangeCollationName != NIL) + rangeCollation = get_collation_oid(rangeCollationName, false); + else + rangeCollation = get_typcollation(rangeSubtype); + } + else + { + if (rangeCollationName != NIL) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("range collation specified but subtype does not support collation"))); + rangeCollation = InvalidOid; + } + + /* Identify support functions, if provided */ + if (rangeCanonicalName != NIL) + { + if (!OidIsValid(typoid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot specify a canonical function without a pre-created shell type"), + errhint("Create the type as a shell type, then create its canonicalization function, then do a full CREATE TYPE."))); + rangeCanonical = findRangeCanonicalFunction(rangeCanonicalName, + typoid); + } + else + rangeCanonical = InvalidOid; + + if (rangeSubtypeDiffName != NIL) + rangeSubtypeDiff = findRangeSubtypeDiffFunction(rangeSubtypeDiffName, + rangeSubtype); + else + rangeSubtypeDiff = InvalidOid; + + get_typlenbyvalalign(rangeSubtype, + &subtyplen, &subtypbyval, &subtypalign); + + /* alignment must be TYPALIGN_INT or TYPALIGN_DOUBLE for ranges */ + alignment = (subtypalign == TYPALIGN_DOUBLE) ? TYPALIGN_DOUBLE : TYPALIGN_INT; + + /* Allocate OID for array type, its multirange, and its multirange array */ + rangeArrayOid = AssignTypeArrayOid(); + multirangeOid = AssignTypeMultirangeOid(); + multirangeArrayOid = AssignTypeMultirangeArrayOid(); + + /* Create the pg_type entry */ + address = + TypeCreate(InvalidOid, /* no predetermined type OID */ + typeName, /* type name */ + typeNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + -1, /* internal size (always varlena) */ + TYPTYPE_RANGE, /* type-type (range type) */ + TYPCATEGORY_RANGE, /* type-category (range type) */ + false, /* range types are never preferred */ + DEFAULT_TYPDELIM, /* array element delimiter */ + F_RANGE_IN, /* input procedure */ + F_RANGE_OUT, /* output procedure */ + F_RANGE_RECV, /* receive procedure */ + F_RANGE_SEND, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + F_RANGE_TYPANALYZE, /* analyze procedure */ + InvalidOid, /* subscript procedure - none */ + InvalidOid, /* element type ID - none */ + false, /* this is not an array type */ + rangeArrayOid, /* array type we are about to create */ + InvalidOid, /* base type ID (only for domains) */ + NULL, /* never a default type value */ + NULL, /* no binary form available either */ + false, /* never passed by value */ + alignment, /* alignment */ + TYPSTORAGE_EXTENDED, /* TOAST strategy (always extended) */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + InvalidOid); /* type's collation (ranges never have one) */ + Assert(typoid == InvalidOid || typoid == address.objectId); + typoid = address.objectId; + + /* Create the multirange that goes with it */ + if (multirangeTypeName) + { + Oid old_typoid; + + /* + * Look to see if multirange type already exists. + */ + old_typoid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, + CStringGetDatum(multirangeTypeName), + ObjectIdGetDatum(multirangeNamespace)); + + /* + * If it's not a shell, see if it's an autogenerated array type, and + * if so rename it out of the way. + */ + if (OidIsValid(old_typoid) && get_typisdefined(old_typoid)) + { + if (!moveArrayTypeName(old_typoid, multirangeTypeName, multirangeNamespace)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists", multirangeTypeName))); + } + } + else + { + /* Generate multirange name automatically */ + multirangeNamespace = typeNamespace; + multirangeTypeName = makeMultirangeTypeName(typeName, multirangeNamespace); + } + + mltrngaddress = + TypeCreate(multirangeOid, /* force assignment of this type OID */ + multirangeTypeName, /* type name */ + multirangeNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + -1, /* internal size (always varlena) */ + TYPTYPE_MULTIRANGE, /* type-type (multirange type) */ + TYPCATEGORY_RANGE, /* type-category (range type) */ + false, /* multirange types are never preferred */ + DEFAULT_TYPDELIM, /* array element delimiter */ + F_MULTIRANGE_IN, /* input procedure */ + F_MULTIRANGE_OUT, /* output procedure */ + F_MULTIRANGE_RECV, /* receive procedure */ + F_MULTIRANGE_SEND, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + F_MULTIRANGE_TYPANALYZE, /* analyze procedure */ + InvalidOid, /* subscript procedure - none */ + InvalidOid, /* element type ID - none */ + false, /* this is not an array type */ + multirangeArrayOid, /* array type we are about to create */ + InvalidOid, /* base type ID (only for domains) */ + NULL, /* never a default type value */ + NULL, /* no binary form available either */ + false, /* never passed by value */ + alignment, /* alignment */ + 'x', /* TOAST strategy (always extended) */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + InvalidOid); /* type's collation (ranges never have one) */ + Assert(multirangeOid == mltrngaddress.objectId); + + /* Create the entry in pg_range */ + RangeCreate(typoid, rangeSubtype, rangeCollation, rangeSubOpclass, + rangeCanonical, rangeSubtypeDiff, multirangeOid); + + /* + * Create the array type that goes with it. + */ + rangeArrayName = makeArrayTypeName(typeName, typeNamespace); + + TypeCreate(rangeArrayOid, /* force assignment of this type OID */ + rangeArrayName, /* type name */ + typeNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + -1, /* internal size (always varlena) */ + TYPTYPE_BASE, /* type-type (base type) */ + TYPCATEGORY_ARRAY, /* type-category (array) */ + false, /* array types are never preferred */ + DEFAULT_TYPDELIM, /* array element delimiter */ + F_ARRAY_IN, /* input procedure */ + F_ARRAY_OUT, /* output procedure */ + F_ARRAY_RECV, /* receive procedure */ + F_ARRAY_SEND, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ + F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */ + typoid, /* element type ID */ + true, /* yes this is an array type */ + InvalidOid, /* no further array type */ + InvalidOid, /* base type ID */ + NULL, /* never a default type value */ + NULL, /* binary default isn't sent either */ + false, /* never passed by value */ + alignment, /* alignment - same as range's */ + TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + InvalidOid); /* typcollation */ + + pfree(rangeArrayName); + + /* Create the multirange's array type */ + + multirangeArrayName = makeArrayTypeName(multirangeTypeName, typeNamespace); + + TypeCreate(multirangeArrayOid, /* force assignment of this type OID */ + multirangeArrayName, /* type name */ + multirangeNamespace, /* namespace */ + InvalidOid, /* relation oid (n/a here) */ + 0, /* relation kind (ditto) */ + GetUserId(), /* owner's ID */ + -1, /* internal size (always varlena) */ + TYPTYPE_BASE, /* type-type (base type) */ + TYPCATEGORY_ARRAY, /* type-category (array) */ + false, /* array types are never preferred */ + DEFAULT_TYPDELIM, /* array element delimiter */ + F_ARRAY_IN, /* input procedure */ + F_ARRAY_OUT, /* output procedure */ + F_ARRAY_RECV, /* receive procedure */ + F_ARRAY_SEND, /* send procedure */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + F_ARRAY_TYPANALYZE, /* analyze procedure */ + F_ARRAY_SUBSCRIPT_HANDLER, /* array subscript procedure */ + multirangeOid, /* element type ID */ + true, /* yes this is an array type */ + InvalidOid, /* no further array type */ + InvalidOid, /* base type ID */ + NULL, /* never a default type value */ + NULL, /* binary default isn't sent either */ + false, /* never passed by value */ + alignment, /* alignment - same as range's */ + 'x', /* ARRAY is always toastable */ + -1, /* typMod (Domains only) */ + 0, /* Array dimensions of typbasetype */ + false, /* Type NOT NULL */ + InvalidOid); /* typcollation */ + + /* And create the constructor functions for this range type */ + makeRangeConstructors(typeName, typeNamespace, typoid, rangeSubtype); + makeMultirangeConstructors(multirangeTypeName, typeNamespace, + multirangeOid, typoid, rangeArrayOid, + &castFuncOid); + + /* Create cast from the range type to its multirange type */ + CastCreate(typoid, multirangeOid, castFuncOid, 'e', 'f', DEPENDENCY_INTERNAL); + + pfree(multirangeArrayName); + + return address; +} + +/* + * Because there may exist several range types over the same subtype, the + * range type can't be uniquely determined from the subtype. So it's + * impossible to define a polymorphic constructor; we have to generate new + * constructor functions explicitly for each range type. + * + * We actually define 4 functions, with 0 through 3 arguments. This is just + * to offer more convenience for the user. + */ +static void +makeRangeConstructors(const char *name, Oid namespace, + Oid rangeOid, Oid subtype) +{ + static const char *const prosrc[2] = {"range_constructor2", + "range_constructor3"}; + static const int pronargs[2] = {2, 3}; + + Oid constructorArgTypes[3]; + ObjectAddress myself, + referenced; + int i; + + constructorArgTypes[0] = subtype; + constructorArgTypes[1] = subtype; + constructorArgTypes[2] = TEXTOID; + + referenced.classId = TypeRelationId; + referenced.objectId = rangeOid; + referenced.objectSubId = 0; + + for (i = 0; i < lengthof(prosrc); i++) + { + oidvector *constructorArgTypesVector; + + constructorArgTypesVector = buildoidvector(constructorArgTypes, + pronargs[i]); + + myself = ProcedureCreate(name, /* name: same as range type */ + namespace, /* namespace */ + false, /* replace */ + false, /* returns set */ + rangeOid, /* return type */ + BOOTSTRAP_SUPERUSERID, /* proowner */ + INTERNALlanguageId, /* language */ + F_FMGR_INTERNAL_VALIDATOR, /* language validator */ + prosrc[i], /* prosrc */ + NULL, /* probin */ + NULL, /* prosqlbody */ + PROKIND_FUNCTION, + false, /* security_definer */ + false, /* leakproof */ + false, /* isStrict */ + PROVOLATILE_IMMUTABLE, /* volatility */ + PROPARALLEL_SAFE, /* parallel safety */ + constructorArgTypesVector, /* parameterTypes */ + PointerGetDatum(NULL), /* allParameterTypes */ + PointerGetDatum(NULL), /* parameterModes */ + PointerGetDatum(NULL), /* parameterNames */ + NIL, /* parameterDefaults */ + PointerGetDatum(NULL), /* trftypes */ + PointerGetDatum(NULL), /* proconfig */ + InvalidOid, /* prosupport */ + 1.0, /* procost */ + 0.0); /* prorows */ + + /* + * Make the constructors internally-dependent on the range type so + * that they go away silently when the type is dropped. Note that + * pg_dump depends on this choice to avoid dumping the constructors. + */ + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); + } +} + +/* + * We make a separate multirange constructor for each range type + * so its name can include the base type, like range constructors do. + * If we had an anyrangearray polymorphic type we could use it here, + * but since each type has its own constructor name there's no need. + * + * Sets castFuncOid to the oid of the new constructor that can be used + * to cast from a range to a multirange. + */ +static void +makeMultirangeConstructors(const char *name, Oid namespace, + Oid multirangeOid, Oid rangeOid, Oid rangeArrayOid, + Oid *castFuncOid) +{ + ObjectAddress myself, + referenced; + oidvector *argtypes; + Datum allParamTypes; + ArrayType *allParameterTypes; + Datum paramModes; + ArrayType *parameterModes; + + referenced.classId = TypeRelationId; + referenced.objectId = multirangeOid; + referenced.objectSubId = 0; + + /* 0-arg constructor - for empty multiranges */ + argtypes = buildoidvector(NULL, 0); + myself = ProcedureCreate(name, /* name: same as multirange type */ + namespace, + false, /* replace */ + false, /* returns set */ + multirangeOid, /* return type */ + BOOTSTRAP_SUPERUSERID, /* proowner */ + INTERNALlanguageId, /* language */ + F_FMGR_INTERNAL_VALIDATOR, + "multirange_constructor0", /* prosrc */ + NULL, /* probin */ + NULL, /* prosqlbody */ + PROKIND_FUNCTION, + false, /* security_definer */ + false, /* leakproof */ + true, /* isStrict */ + PROVOLATILE_IMMUTABLE, /* volatility */ + PROPARALLEL_SAFE, /* parallel safety */ + argtypes, /* parameterTypes */ + PointerGetDatum(NULL), /* allParameterTypes */ + PointerGetDatum(NULL), /* parameterModes */ + PointerGetDatum(NULL), /* parameterNames */ + NIL, /* parameterDefaults */ + PointerGetDatum(NULL), /* trftypes */ + PointerGetDatum(NULL), /* proconfig */ + InvalidOid, /* prosupport */ + 1.0, /* procost */ + 0.0); /* prorows */ + + /* + * Make the constructor internally-dependent on the multirange type so + * that they go away silently when the type is dropped. Note that pg_dump + * depends on this choice to avoid dumping the constructors. + */ + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); + pfree(argtypes); + + /* + * 1-arg constructor - for casts + * + * In theory we shouldn't need both this and the vararg (n-arg) + * constructor, but having a separate 1-arg function lets us define casts + * against it. + */ + argtypes = buildoidvector(&rangeOid, 1); + myself = ProcedureCreate(name, /* name: same as multirange type */ + namespace, + false, /* replace */ + false, /* returns set */ + multirangeOid, /* return type */ + BOOTSTRAP_SUPERUSERID, /* proowner */ + INTERNALlanguageId, /* language */ + F_FMGR_INTERNAL_VALIDATOR, + "multirange_constructor1", /* prosrc */ + NULL, /* probin */ + NULL, /* prosqlbody */ + PROKIND_FUNCTION, + false, /* security_definer */ + false, /* leakproof */ + true, /* isStrict */ + PROVOLATILE_IMMUTABLE, /* volatility */ + PROPARALLEL_SAFE, /* parallel safety */ + argtypes, /* parameterTypes */ + PointerGetDatum(NULL), /* allParameterTypes */ + PointerGetDatum(NULL), /* parameterModes */ + PointerGetDatum(NULL), /* parameterNames */ + NIL, /* parameterDefaults */ + PointerGetDatum(NULL), /* trftypes */ + PointerGetDatum(NULL), /* proconfig */ + InvalidOid, /* prosupport */ + 1.0, /* procost */ + 0.0); /* prorows */ + /* ditto */ + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); + pfree(argtypes); + *castFuncOid = myself.objectId; + + /* n-arg constructor - vararg */ + argtypes = buildoidvector(&rangeArrayOid, 1); + allParamTypes = ObjectIdGetDatum(rangeArrayOid); + allParameterTypes = construct_array(&allParamTypes, + 1, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + paramModes = CharGetDatum(FUNC_PARAM_VARIADIC); + parameterModes = construct_array(¶mModes, 1, CHAROID, + 1, true, TYPALIGN_CHAR); + myself = ProcedureCreate(name, /* name: same as multirange type */ + namespace, + false, /* replace */ + false, /* returns set */ + multirangeOid, /* return type */ + BOOTSTRAP_SUPERUSERID, /* proowner */ + INTERNALlanguageId, /* language */ + F_FMGR_INTERNAL_VALIDATOR, + "multirange_constructor2", /* prosrc */ + NULL, /* probin */ + NULL, /* prosqlbody */ + PROKIND_FUNCTION, + false, /* security_definer */ + false, /* leakproof */ + true, /* isStrict */ + PROVOLATILE_IMMUTABLE, /* volatility */ + PROPARALLEL_SAFE, /* parallel safety */ + argtypes, /* parameterTypes */ + PointerGetDatum(allParameterTypes), /* allParameterTypes */ + PointerGetDatum(parameterModes), /* parameterModes */ + PointerGetDatum(NULL), /* parameterNames */ + NIL, /* parameterDefaults */ + PointerGetDatum(NULL), /* trftypes */ + PointerGetDatum(NULL), /* proconfig */ + InvalidOid, /* prosupport */ + 1.0, /* procost */ + 0.0); /* prorows */ + /* ditto */ + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); + pfree(argtypes); + pfree(allParameterTypes); + pfree(parameterModes); +} + +/* + * Find suitable I/O and other support functions for a type. + * + * typeOid is the type's OID (which will already exist, if only as a shell + * type). + */ + +static Oid +findTypeInputFunction(List *procname, Oid typeOid) +{ + Oid argList[3]; + Oid procOid; + Oid procOid2; + + /* + * Input functions can take a single argument of type CSTRING, or three + * arguments (string, typioparam OID, typmod). Whine about ambiguity if + * both forms exist. + */ + argList[0] = CSTRINGOID; + argList[1] = OIDOID; + argList[2] = INT4OID; + + procOid = LookupFuncName(procname, 1, argList, true); + procOid2 = LookupFuncName(procname, 3, argList, true); + if (OidIsValid(procOid)) + { + if (OidIsValid(procOid2)) + ereport(ERROR, + (errcode(ERRCODE_AMBIGUOUS_FUNCTION), + errmsg("type input function %s has multiple matches", + NameListToString(procname)))); + } + else + { + procOid = procOid2; + /* If not found, reference the 1-argument signature in error msg */ + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + } + + /* Input functions must return the target type. */ + if (get_func_rettype(procOid) != typeOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type input function %s must return type %s", + NameListToString(procname), format_type_be(typeOid)))); + + /* + * Print warnings if any of the type's I/O functions are marked volatile. + * There is a general assumption that I/O functions are stable or + * immutable; this allows us for example to mark record_in/record_out + * stable rather than volatile. Ideally we would throw errors not just + * warnings here; but since this check is new as of 9.5, and since the + * volatility marking might be just an error-of-omission and not a true + * indication of how the function behaves, we'll let it pass as a warning + * for now. + */ + if (func_volatile(procOid) == PROVOLATILE_VOLATILE) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type input function %s should not be volatile", + NameListToString(procname)))); + + return procOid; +} + +static Oid +findTypeOutputFunction(List *procname, Oid typeOid) +{ + Oid argList[1]; + Oid procOid; + + /* + * Output functions always take a single argument of the type and return + * cstring. + */ + argList[0] = typeOid; + + procOid = LookupFuncName(procname, 1, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != CSTRINGOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type output function %s must return type %s", + NameListToString(procname), "cstring"))); + + /* Just a warning for now, per comments in findTypeInputFunction */ + if (func_volatile(procOid) == PROVOLATILE_VOLATILE) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type output function %s should not be volatile", + NameListToString(procname)))); + + return procOid; +} + +static Oid +findTypeReceiveFunction(List *procname, Oid typeOid) +{ + Oid argList[3]; + Oid procOid; + Oid procOid2; + + /* + * Receive functions can take a single argument of type INTERNAL, or three + * arguments (internal, typioparam OID, typmod). Whine about ambiguity if + * both forms exist. + */ + argList[0] = INTERNALOID; + argList[1] = OIDOID; + argList[2] = INT4OID; + + procOid = LookupFuncName(procname, 1, argList, true); + procOid2 = LookupFuncName(procname, 3, argList, true); + if (OidIsValid(procOid)) + { + if (OidIsValid(procOid2)) + ereport(ERROR, + (errcode(ERRCODE_AMBIGUOUS_FUNCTION), + errmsg("type receive function %s has multiple matches", + NameListToString(procname)))); + } + else + { + procOid = procOid2; + /* If not found, reference the 1-argument signature in error msg */ + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + } + + /* Receive functions must return the target type. */ + if (get_func_rettype(procOid) != typeOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type receive function %s must return type %s", + NameListToString(procname), format_type_be(typeOid)))); + + /* Just a warning for now, per comments in findTypeInputFunction */ + if (func_volatile(procOid) == PROVOLATILE_VOLATILE) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type receive function %s should not be volatile", + NameListToString(procname)))); + + return procOid; +} + +static Oid +findTypeSendFunction(List *procname, Oid typeOid) +{ + Oid argList[1]; + Oid procOid; + + /* + * Send functions always take a single argument of the type and return + * bytea. + */ + argList[0] = typeOid; + + procOid = LookupFuncName(procname, 1, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != BYTEAOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type send function %s must return type %s", + NameListToString(procname), "bytea"))); + + /* Just a warning for now, per comments in findTypeInputFunction */ + if (func_volatile(procOid) == PROVOLATILE_VOLATILE) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type send function %s should not be volatile", + NameListToString(procname)))); + + return procOid; +} + +static Oid +findTypeTypmodinFunction(List *procname) +{ + Oid argList[1]; + Oid procOid; + + /* + * typmodin functions always take one cstring[] argument and return int4. + */ + argList[0] = CSTRINGARRAYOID; + + procOid = LookupFuncName(procname, 1, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("typmod_in function %s must return type %s", + NameListToString(procname), "integer"))); + + /* Just a warning for now, per comments in findTypeInputFunction */ + if (func_volatile(procOid) == PROVOLATILE_VOLATILE) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type modifier input function %s should not be volatile", + NameListToString(procname)))); + + return procOid; +} + +static Oid +findTypeTypmodoutFunction(List *procname) +{ + Oid argList[1]; + Oid procOid; + + /* + * typmodout functions always take one int4 argument and return cstring. + */ + argList[0] = INT4OID; + + procOid = LookupFuncName(procname, 1, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != CSTRINGOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("typmod_out function %s must return type %s", + NameListToString(procname), "cstring"))); + + /* Just a warning for now, per comments in findTypeInputFunction */ + if (func_volatile(procOid) == PROVOLATILE_VOLATILE) + ereport(WARNING, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type modifier output function %s should not be volatile", + NameListToString(procname)))); + + return procOid; +} + +static Oid +findTypeAnalyzeFunction(List *procname, Oid typeOid) +{ + Oid argList[1]; + Oid procOid; + + /* + * Analyze functions always take one INTERNAL argument and return bool. + */ + argList[0] = INTERNALOID; + + procOid = LookupFuncName(procname, 1, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != BOOLOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type analyze function %s must return type %s", + NameListToString(procname), "boolean"))); + + return procOid; +} + +static Oid +findTypeSubscriptingFunction(List *procname, Oid typeOid) +{ + Oid argList[1]; + Oid procOid; + + /* + * Subscripting support functions always take one INTERNAL argument and + * return INTERNAL. (The argument is not used, but we must have it to + * maintain type safety.) + */ + argList[0] = INTERNALOID; + + procOid = LookupFuncName(procname, 1, argList, true); + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != INTERNALOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("type subscripting function %s must return type %s", + NameListToString(procname), "internal"))); + + /* + * We disallow array_subscript_handler() from being selected explicitly, + * since that must only be applied to autogenerated array types. + */ + if (procOid == F_ARRAY_SUBSCRIPT_HANDLER) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("user-defined types cannot use subscripting function %s", + NameListToString(procname)))); + + return procOid; +} + +/* + * Find suitable support functions and opclasses for a range type. + */ + +/* + * Find named btree opclass for subtype, or default btree opclass if + * opcname is NIL. + */ +static Oid +findRangeSubOpclass(List *opcname, Oid subtype) +{ + Oid opcid; + Oid opInputType; + + if (opcname != NIL) + { + opcid = get_opclass_oid(BTREE_AM_OID, opcname, false); + + /* + * Verify that the operator class accepts this datatype. Note we will + * accept binary compatibility. + */ + opInputType = get_opclass_input_type(opcid); + if (!IsBinaryCoercible(subtype, opInputType)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("operator class \"%s\" does not accept data type %s", + NameListToString(opcname), + format_type_be(subtype)))); + } + else + { + opcid = GetDefaultOpClass(subtype, BTREE_AM_OID); + if (!OidIsValid(opcid)) + { + /* We spell the error message identically to ResolveOpClass */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default operator class for access method \"%s\"", + format_type_be(subtype), "btree"), + errhint("You must specify an operator class for the range type or define a default operator class for the subtype."))); + } + } + + return opcid; +} + +static Oid +findRangeCanonicalFunction(List *procname, Oid typeOid) +{ + Oid argList[1]; + Oid procOid; + AclResult aclresult; + + /* + * Range canonical functions must take and return the range type, and must + * be immutable. + */ + argList[0] = typeOid; + + procOid = LookupFuncName(procname, 1, argList, true); + + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 1, NIL, argList)))); + + if (get_func_rettype(procOid) != typeOid) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("range canonical function %s must return range type", + func_signature_string(procname, 1, NIL, argList)))); + + if (func_volatile(procOid) != PROVOLATILE_IMMUTABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("range canonical function %s must be immutable", + func_signature_string(procname, 1, NIL, argList)))); + + /* Also, range type's creator must have permission to call function */ + aclresult = pg_proc_aclcheck(procOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(procOid)); + + return procOid; +} + +static Oid +findRangeSubtypeDiffFunction(List *procname, Oid subtype) +{ + Oid argList[2]; + Oid procOid; + AclResult aclresult; + + /* + * Range subtype diff functions must take two arguments of the subtype, + * must return float8, and must be immutable. + */ + argList[0] = subtype; + argList[1] = subtype; + + procOid = LookupFuncName(procname, 2, argList, true); + + if (!OidIsValid(procOid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(procname, 2, NIL, argList)))); + + if (get_func_rettype(procOid) != FLOAT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("range subtype diff function %s must return type %s", + func_signature_string(procname, 2, NIL, argList), + "double precision"))); + + if (func_volatile(procOid) != PROVOLATILE_IMMUTABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("range subtype diff function %s must be immutable", + func_signature_string(procname, 2, NIL, argList)))); + + /* Also, range type's creator must have permission to call function */ + aclresult = pg_proc_aclcheck(procOid, GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, get_func_name(procOid)); + + return procOid; +} + +/* + * AssignTypeArrayOid + * + * Pre-assign the type's array OID for use in pg_type.typarray + */ +Oid +AssignTypeArrayOid(void) +{ + Oid type_array_oid; + + /* Use binary-upgrade override for pg_type.typarray? */ + if (IsBinaryUpgrade) + { + if (!OidIsValid(binary_upgrade_next_array_pg_type_oid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_type array OID value not set when in binary upgrade mode"))); + + type_array_oid = binary_upgrade_next_array_pg_type_oid; + binary_upgrade_next_array_pg_type_oid = InvalidOid; + } + else + { + Relation pg_type = table_open(TypeRelationId, AccessShareLock); + + type_array_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId, + Anum_pg_type_oid); + table_close(pg_type, AccessShareLock); + } + + return type_array_oid; +} + +/* + * AssignTypeMultirangeOid + * + * Pre-assign the range type's multirange OID for use in pg_type.oid + */ +Oid +AssignTypeMultirangeOid(void) +{ + Oid type_multirange_oid; + + /* Use binary-upgrade override for pg_type.oid? */ + if (IsBinaryUpgrade) + { + if (!OidIsValid(binary_upgrade_next_mrng_pg_type_oid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_type multirange OID value not set when in binary upgrade mode"))); + + type_multirange_oid = binary_upgrade_next_mrng_pg_type_oid; + binary_upgrade_next_mrng_pg_type_oid = InvalidOid; + } + else + { + Relation pg_type = table_open(TypeRelationId, AccessShareLock); + + type_multirange_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId, + Anum_pg_type_oid); + table_close(pg_type, AccessShareLock); + } + + return type_multirange_oid; +} + +/* + * AssignTypeMultirangeArrayOid + * + * Pre-assign the range type's multirange array OID for use in pg_type.typarray + */ +Oid +AssignTypeMultirangeArrayOid(void) +{ + Oid type_multirange_array_oid; + + /* Use binary-upgrade override for pg_type.oid? */ + if (IsBinaryUpgrade) + { + if (!OidIsValid(binary_upgrade_next_mrng_array_pg_type_oid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_type multirange array OID value not set when in binary upgrade mode"))); + + type_multirange_array_oid = binary_upgrade_next_mrng_array_pg_type_oid; + binary_upgrade_next_mrng_array_pg_type_oid = InvalidOid; + } + else + { + Relation pg_type = table_open(TypeRelationId, AccessShareLock); + + type_multirange_array_oid = GetNewOidWithIndex(pg_type, TypeOidIndexId, + Anum_pg_type_oid); + table_close(pg_type, AccessShareLock); + } + + return type_multirange_array_oid; +} + + +/*------------------------------------------------------------------- + * DefineCompositeType + * + * Create a Composite Type relation. + * `DefineRelation' does all the work, we just provide the correct + * arguments! + * + * If the relation already exists, then 'DefineRelation' will abort + * the xact... + * + * Return type is the new type's object address. + *------------------------------------------------------------------- + */ +ObjectAddress +DefineCompositeType(RangeVar *typevar, List *coldeflist) +{ + CreateStmt *createStmt = makeNode(CreateStmt); + Oid old_type_oid; + Oid typeNamespace; + ObjectAddress address; + + /* + * now set the parameters for keys/inheritance etc. All of these are + * uninteresting for composite types... + */ + createStmt->relation = typevar; + createStmt->tableElts = coldeflist; + createStmt->inhRelations = NIL; + createStmt->constraints = NIL; + createStmt->options = NIL; + createStmt->oncommit = ONCOMMIT_NOOP; + createStmt->tablespacename = NULL; + createStmt->if_not_exists = false; + + /* + * Check for collision with an existing type name. If there is one and + * it's an autogenerated array, we can rename it out of the way. This + * check is here mainly to get a better error message about a "type" + * instead of below about a "relation". + */ + typeNamespace = RangeVarGetAndCheckCreationNamespace(createStmt->relation, + NoLock, NULL); + RangeVarAdjustRelationPersistence(createStmt->relation, typeNamespace); + old_type_oid = + GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, + CStringGetDatum(createStmt->relation->relname), + ObjectIdGetDatum(typeNamespace)); + if (OidIsValid(old_type_oid)) + { + if (!moveArrayTypeName(old_type_oid, createStmt->relation->relname, typeNamespace)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists", createStmt->relation->relname))); + } + + /* + * Finally create the relation. This also creates the type. + */ + DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address, + NULL); + + return address; +} + +/* + * AlterDomainDefault + * + * Routine implementing ALTER DOMAIN SET/DROP DEFAULT statements. + * + * Returns ObjectAddress of the modified domain. + */ +ObjectAddress +AlterDomainDefault(List *names, Node *defaultRaw) +{ + TypeName *typename; + Oid domainoid; + HeapTuple tup; + ParseState *pstate; + Relation rel; + char *defaultValue; + Node *defaultExpr = NULL; /* NULL if no default specified */ + Datum new_record[Natts_pg_type]; + bool new_record_nulls[Natts_pg_type]; + bool new_record_repl[Natts_pg_type]; + HeapTuple newtuple; + Form_pg_type typTup; + ObjectAddress address; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + domainoid = typenameTypeId(NULL, typename); + + /* Look up the domain in the type table */ + rel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", domainoid); + typTup = (Form_pg_type) GETSTRUCT(tup); + + /* Check it's a domain and check user has permission for ALTER DOMAIN */ + checkDomainOwner(tup); + + /* Setup new tuple */ + MemSet(new_record, (Datum) 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + MemSet(new_record_repl, false, sizeof(new_record_repl)); + + /* Store the new default into the tuple */ + if (defaultRaw) + { + /* Create a dummy ParseState for transformExpr */ + pstate = make_parsestate(NULL); + + /* + * Cook the colDef->raw_expr into an expression. Note: Name is + * strictly for error message + */ + defaultExpr = cookDefault(pstate, defaultRaw, + typTup->typbasetype, + typTup->typtypmod, + NameStr(typTup->typname), + 0); + + /* + * If the expression is just a NULL constant, we treat the command + * like ALTER ... DROP DEFAULT. (But see note for same test in + * DefineDomain.) + */ + if (defaultExpr == NULL || + (IsA(defaultExpr, Const) && ((Const *) defaultExpr)->constisnull)) + { + /* Default is NULL, drop it */ + defaultExpr = NULL; + new_record_nulls[Anum_pg_type_typdefaultbin - 1] = true; + new_record_repl[Anum_pg_type_typdefaultbin - 1] = true; + new_record_nulls[Anum_pg_type_typdefault - 1] = true; + new_record_repl[Anum_pg_type_typdefault - 1] = true; + } + else + { + /* + * Expression must be stored as a nodeToString result, but we also + * require a valid textual representation (mainly to make life + * easier for pg_dump). + */ + defaultValue = deparse_expression(defaultExpr, + NIL, false, false); + + /* + * Form an updated tuple with the new default and write it back. + */ + new_record[Anum_pg_type_typdefaultbin - 1] = CStringGetTextDatum(nodeToString(defaultExpr)); + + new_record_repl[Anum_pg_type_typdefaultbin - 1] = true; + new_record[Anum_pg_type_typdefault - 1] = CStringGetTextDatum(defaultValue); + new_record_repl[Anum_pg_type_typdefault - 1] = true; + } + } + else + { + /* ALTER ... DROP DEFAULT */ + new_record_nulls[Anum_pg_type_typdefaultbin - 1] = true; + new_record_repl[Anum_pg_type_typdefaultbin - 1] = true; + new_record_nulls[Anum_pg_type_typdefault - 1] = true; + new_record_repl[Anum_pg_type_typdefault - 1] = true; + } + + newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), + new_record, new_record_nulls, + new_record_repl); + + CatalogTupleUpdate(rel, &tup->t_self, newtuple); + + /* Rebuild dependencies */ + GenerateTypeDependencies(newtuple, + rel, + defaultExpr, + NULL, /* don't have typacl handy */ + 0, /* relation kind is n/a */ + false, /* a domain isn't an implicit array */ + false, /* nor is it any kind of dependent type */ + false, /* don't touch extension membership */ + true); /* We do need to rebuild dependencies */ + + InvokeObjectPostAlterHook(TypeRelationId, domainoid, 0); + + ObjectAddressSet(address, TypeRelationId, domainoid); + + /* Clean up */ + table_close(rel, RowExclusiveLock); + heap_freetuple(newtuple); + + return address; +} + +/* + * AlterDomainNotNull + * + * Routine implementing ALTER DOMAIN SET/DROP NOT NULL statements. + * + * Returns ObjectAddress of the modified domain. + */ +ObjectAddress +AlterDomainNotNull(List *names, bool notNull) +{ + TypeName *typename; + Oid domainoid; + Relation typrel; + HeapTuple tup; + Form_pg_type typTup; + ObjectAddress address = InvalidObjectAddress; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + domainoid = typenameTypeId(NULL, typename); + + /* Look up the domain in the type table */ + typrel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", domainoid); + typTup = (Form_pg_type) GETSTRUCT(tup); + + /* Check it's a domain and check user has permission for ALTER DOMAIN */ + checkDomainOwner(tup); + + /* Is the domain already set to the desired constraint? */ + if (typTup->typnotnull == notNull) + { + table_close(typrel, RowExclusiveLock); + return address; + } + + /* Adding a NOT NULL constraint requires checking existing columns */ + if (notNull) + { + List *rels; + ListCell *rt; + + /* Fetch relation list with attributes based on this domain */ + /* ShareLock is sufficient to prevent concurrent data changes */ + + rels = get_rels_with_domain(domainoid, ShareLock); + + foreach(rt, rels) + { + RelToCheck *rtc = (RelToCheck *) lfirst(rt); + Relation testrel = rtc->rel; + TupleDesc tupdesc = RelationGetDescr(testrel); + TupleTableSlot *slot; + TableScanDesc scan; + Snapshot snapshot; + + /* Scan all tuples in this relation */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = table_beginscan(testrel, snapshot, 0, NULL); + slot = table_slot_create(testrel, NULL); + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) + { + int i; + + /* Test attributes that are of the domain */ + for (i = 0; i < rtc->natts; i++) + { + int attnum = rtc->atts[i]; + Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1); + + if (slot_attisnull(slot, attnum)) + { + /* + * In principle the auxiliary information for this + * error should be errdatatype(), but errtablecol() + * seems considerably more useful in practice. Since + * this code only executes in an ALTER DOMAIN command, + * the client should already know which domain is in + * question. + */ + ereport(ERROR, + (errcode(ERRCODE_NOT_NULL_VIOLATION), + errmsg("column \"%s\" of table \"%s\" contains null values", + NameStr(attr->attname), + RelationGetRelationName(testrel)), + errtablecol(testrel, attnum))); + } + } + } + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); + UnregisterSnapshot(snapshot); + + /* Close each rel after processing, but keep lock */ + table_close(testrel, NoLock); + } + } + + /* + * Okay to update pg_type row. We can scribble on typTup because it's a + * copy. + */ + typTup->typnotnull = notNull; + + CatalogTupleUpdate(typrel, &tup->t_self, tup); + + InvokeObjectPostAlterHook(TypeRelationId, domainoid, 0); + + ObjectAddressSet(address, TypeRelationId, domainoid); + + /* Clean up */ + heap_freetuple(tup); + table_close(typrel, RowExclusiveLock); + + return address; +} + +/* + * AlterDomainDropConstraint + * + * Implements the ALTER DOMAIN DROP CONSTRAINT statement + * + * Returns ObjectAddress of the modified domain. + */ +ObjectAddress +AlterDomainDropConstraint(List *names, const char *constrName, + DropBehavior behavior, bool missing_ok) +{ + TypeName *typename; + Oid domainoid; + HeapTuple tup; + Relation rel; + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey[3]; + HeapTuple contup; + bool found = false; + ObjectAddress address; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + domainoid = typenameTypeId(NULL, typename); + + /* Look up the domain in the type table */ + rel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", domainoid); + + /* Check it's a domain and check user has permission for ALTER DOMAIN */ + checkDomainOwner(tup); + + /* Grab an appropriate lock on the pg_constraint relation */ + conrel = table_open(ConstraintRelationId, RowExclusiveLock); + + /* Find and remove the target constraint */ + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + ScanKeyInit(&skey[1], + Anum_pg_constraint_contypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(domainoid)); + ScanKeyInit(&skey[2], + Anum_pg_constraint_conname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(constrName)); + + conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true, + NULL, 3, skey); + + /* There can be at most one matching row */ + if ((contup = systable_getnext(conscan)) != NULL) + { + ObjectAddress conobj; + + conobj.classId = ConstraintRelationId; + conobj.objectId = ((Form_pg_constraint) GETSTRUCT(contup))->oid; + conobj.objectSubId = 0; + + performDeletion(&conobj, behavior, 0); + found = true; + } + + /* Clean up after the scan */ + systable_endscan(conscan); + table_close(conrel, RowExclusiveLock); + + if (!found) + { + if (!missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("constraint \"%s\" of domain \"%s\" does not exist", + constrName, TypeNameToString(typename)))); + else + ereport(NOTICE, + (errmsg("constraint \"%s\" of domain \"%s\" does not exist, skipping", + constrName, TypeNameToString(typename)))); + } + + /* + * We must send out an sinval message for the domain, to ensure that any + * dependent plans get rebuilt. Since this command doesn't change the + * domain's pg_type row, that won't happen automatically; do it manually. + */ + CacheInvalidateHeapTuple(rel, tup, NULL); + + ObjectAddressSet(address, TypeRelationId, domainoid); + + /* Clean up */ + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * AlterDomainAddConstraint + * + * Implements the ALTER DOMAIN .. ADD CONSTRAINT statement. + */ +ObjectAddress +AlterDomainAddConstraint(List *names, Node *newConstraint, + ObjectAddress *constrAddr) +{ + TypeName *typename; + Oid domainoid; + Relation typrel; + HeapTuple tup; + Form_pg_type typTup; + Constraint *constr; + char *ccbin; + ObjectAddress address; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + domainoid = typenameTypeId(NULL, typename); + + /* Look up the domain in the type table */ + typrel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(domainoid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", domainoid); + typTup = (Form_pg_type) GETSTRUCT(tup); + + /* Check it's a domain and check user has permission for ALTER DOMAIN */ + checkDomainOwner(tup); + + if (!IsA(newConstraint, Constraint)) + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(newConstraint)); + + constr = (Constraint *) newConstraint; + + switch (constr->contype) + { + case CONSTR_CHECK: + /* processed below */ + break; + + case CONSTR_UNIQUE: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unique constraints not possible for domains"))); + break; + + case CONSTR_PRIMARY: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("primary key constraints not possible for domains"))); + break; + + case CONSTR_EXCLUSION: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("exclusion constraints not possible for domains"))); + break; + + case CONSTR_FOREIGN: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("foreign key constraints not possible for domains"))); + break; + + case CONSTR_ATTR_DEFERRABLE: + case CONSTR_ATTR_NOT_DEFERRABLE: + case CONSTR_ATTR_DEFERRED: + case CONSTR_ATTR_IMMEDIATE: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("specifying constraint deferrability not supported for domains"))); + break; + + default: + elog(ERROR, "unrecognized constraint subtype: %d", + (int) constr->contype); + break; + } + + /* + * Since all other constraint types throw errors, this must be a check + * constraint. First, process the constraint expression and add an entry + * to pg_constraint. + */ + + ccbin = domainAddConstraint(domainoid, typTup->typnamespace, + typTup->typbasetype, typTup->typtypmod, + constr, NameStr(typTup->typname), constrAddr); + + /* + * If requested to validate the constraint, test all values stored in the + * attributes based on the domain the constraint is being added to. + */ + if (!constr->skip_validation) + validateDomainConstraint(domainoid, ccbin); + + /* + * We must send out an sinval message for the domain, to ensure that any + * dependent plans get rebuilt. Since this command doesn't change the + * domain's pg_type row, that won't happen automatically; do it manually. + */ + CacheInvalidateHeapTuple(typrel, tup, NULL); + + ObjectAddressSet(address, TypeRelationId, domainoid); + + /* Clean up */ + table_close(typrel, RowExclusiveLock); + + return address; +} + +/* + * AlterDomainValidateConstraint + * + * Implements the ALTER DOMAIN .. VALIDATE CONSTRAINT statement. + */ +ObjectAddress +AlterDomainValidateConstraint(List *names, const char *constrName) +{ + TypeName *typename; + Oid domainoid; + Relation typrel; + Relation conrel; + HeapTuple tup; + Form_pg_constraint con; + Form_pg_constraint copy_con; + char *conbin; + SysScanDesc scan; + Datum val; + bool isnull; + HeapTuple tuple; + HeapTuple copyTuple; + ScanKeyData skey[3]; + ObjectAddress address; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + domainoid = typenameTypeId(NULL, typename); + + /* Look up the domain in the type table */ + typrel = table_open(TypeRelationId, AccessShareLock); + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(domainoid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", domainoid); + + /* Check it's a domain and check user has permission for ALTER DOMAIN */ + checkDomainOwner(tup); + + /* + * Find and check the target constraint + */ + conrel = table_open(ConstraintRelationId, RowExclusiveLock); + + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + ScanKeyInit(&skey[1], + Anum_pg_constraint_contypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(domainoid)); + ScanKeyInit(&skey[2], + Anum_pg_constraint_conname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(constrName)); + + scan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true, + NULL, 3, skey); + + /* There can be at most one matching row */ + if (!HeapTupleIsValid(tuple = systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("constraint \"%s\" of domain \"%s\" does not exist", + constrName, TypeNameToString(typename)))); + + con = (Form_pg_constraint) GETSTRUCT(tuple); + if (con->contype != CONSTRAINT_CHECK) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("constraint \"%s\" of domain \"%s\" is not a check constraint", + constrName, TypeNameToString(typename)))); + + val = SysCacheGetAttr(CONSTROID, tuple, + Anum_pg_constraint_conbin, + &isnull); + if (isnull) + elog(ERROR, "null conbin for constraint %u", + con->oid); + conbin = TextDatumGetCString(val); + + validateDomainConstraint(domainoid, conbin); + + /* + * Now update the catalog, while we have the door open. + */ + copyTuple = heap_copytuple(tuple); + copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple); + copy_con->convalidated = true; + CatalogTupleUpdate(conrel, ©Tuple->t_self, copyTuple); + + InvokeObjectPostAlterHook(ConstraintRelationId, con->oid, 0); + + ObjectAddressSet(address, TypeRelationId, domainoid); + + heap_freetuple(copyTuple); + + systable_endscan(scan); + + table_close(typrel, AccessShareLock); + table_close(conrel, RowExclusiveLock); + + ReleaseSysCache(tup); + + return address; +} + +static void +validateDomainConstraint(Oid domainoid, char *ccbin) +{ + Expr *expr = (Expr *) stringToNode(ccbin); + List *rels; + ListCell *rt; + EState *estate; + ExprContext *econtext; + ExprState *exprstate; + + /* Need an EState to run ExecEvalExpr */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + + /* build execution state for expr */ + exprstate = ExecPrepareExpr(expr, estate); + + /* Fetch relation list with attributes based on this domain */ + /* ShareLock is sufficient to prevent concurrent data changes */ + + rels = get_rels_with_domain(domainoid, ShareLock); + + foreach(rt, rels) + { + RelToCheck *rtc = (RelToCheck *) lfirst(rt); + Relation testrel = rtc->rel; + TupleDesc tupdesc = RelationGetDescr(testrel); + TupleTableSlot *slot; + TableScanDesc scan; + Snapshot snapshot; + + /* Scan all tuples in this relation */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = table_beginscan(testrel, snapshot, 0, NULL); + slot = table_slot_create(testrel, NULL); + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) + { + int i; + + /* Test attributes that are of the domain */ + for (i = 0; i < rtc->natts; i++) + { + int attnum = rtc->atts[i]; + Datum d; + bool isNull; + Datum conResult; + Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1); + + d = slot_getattr(slot, attnum, &isNull); + + econtext->domainValue_datum = d; + econtext->domainValue_isNull = isNull; + + conResult = ExecEvalExprSwitchContext(exprstate, + econtext, + &isNull); + + if (!isNull && !DatumGetBool(conResult)) + { + /* + * In principle the auxiliary information for this error + * should be errdomainconstraint(), but errtablecol() + * seems considerably more useful in practice. Since this + * code only executes in an ALTER DOMAIN command, the + * client should already know which domain is in question, + * and which constraint too. + */ + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("column \"%s\" of table \"%s\" contains values that violate the new constraint", + NameStr(attr->attname), + RelationGetRelationName(testrel)), + errtablecol(testrel, attnum))); + } + } + + ResetExprContext(econtext); + } + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); + UnregisterSnapshot(snapshot); + + /* Hold relation lock till commit (XXX bad for concurrency) */ + table_close(testrel, NoLock); + } + + FreeExecutorState(estate); +} + +/* + * get_rels_with_domain + * + * Fetch all relations / attributes which are using the domain + * + * The result is a list of RelToCheck structs, one for each distinct + * relation, each containing one or more attribute numbers that are of + * the domain type. We have opened each rel and acquired the specified lock + * type on it. + * + * We support nested domains by including attributes that are of derived + * domain types. Current callers do not need to distinguish between attributes + * that are of exactly the given domain and those that are of derived domains. + * + * XXX this is completely broken because there is no way to lock the domain + * to prevent columns from being added or dropped while our command runs. + * We can partially protect against column drops by locking relations as we + * come across them, but there is still a race condition (the window between + * seeing a pg_depend entry and acquiring lock on the relation it references). + * Also, holding locks on all these relations simultaneously creates a non- + * trivial risk of deadlock. We can minimize but not eliminate the deadlock + * risk by using the weakest suitable lock (ShareLock for most callers). + * + * XXX the API for this is not sufficient to support checking domain values + * that are inside container types, such as composite types, arrays, or + * ranges. Currently we just error out if a container type containing the + * target domain is stored anywhere. + * + * Generally used for retrieving a list of tests when adding + * new constraints to a domain. + */ +static List * +get_rels_with_domain(Oid domainOid, LOCKMODE lockmode) +{ + List *result = NIL; + char *domainTypeName = format_type_be(domainOid); + Relation depRel; + ScanKeyData key[2]; + SysScanDesc depScan; + HeapTuple depTup; + + Assert(lockmode != NoLock); + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* + * We scan pg_depend to find those things that depend on the domain. (We + * assume we can ignore refobjsubid for a domain.) + */ + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(TypeRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(domainOid)); + + depScan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(depTup = systable_getnext(depScan))) + { + Form_pg_depend pg_depend = (Form_pg_depend) GETSTRUCT(depTup); + RelToCheck *rtc = NULL; + ListCell *rellist; + Form_pg_attribute pg_att; + int ptr; + + /* Check for directly dependent types */ + if (pg_depend->classid == TypeRelationId) + { + if (get_typtype(pg_depend->objid) == TYPTYPE_DOMAIN) + { + /* + * This is a sub-domain, so recursively add dependent columns + * to the output list. This is a bit inefficient since we may + * fail to combine RelToCheck entries when attributes of the + * same rel have different derived domain types, but it's + * probably not worth improving. + */ + result = list_concat(result, + get_rels_with_domain(pg_depend->objid, + lockmode)); + } + else + { + /* + * Otherwise, it is some container type using the domain, so + * fail if there are any columns of this type. + */ + find_composite_type_dependencies(pg_depend->objid, + NULL, + domainTypeName); + } + continue; + } + + /* Else, ignore dependees that aren't user columns of relations */ + /* (we assume system columns are never of domain types) */ + if (pg_depend->classid != RelationRelationId || + pg_depend->objsubid <= 0) + continue; + + /* See if we already have an entry for this relation */ + foreach(rellist, result) + { + RelToCheck *rt = (RelToCheck *) lfirst(rellist); + + if (RelationGetRelid(rt->rel) == pg_depend->objid) + { + rtc = rt; + break; + } + } + + if (rtc == NULL) + { + /* First attribute found for this relation */ + Relation rel; + + /* Acquire requested lock on relation */ + rel = relation_open(pg_depend->objid, lockmode); + + /* + * Check to see if rowtype is stored anyplace as a composite-type + * column; if so we have to fail, for now anyway. + */ + if (OidIsValid(rel->rd_rel->reltype)) + find_composite_type_dependencies(rel->rd_rel->reltype, + NULL, + domainTypeName); + + /* + * Otherwise, we can ignore relations except those with both + * storage and user-chosen column types. + * + * XXX If an index-only scan could satisfy "col::some_domain" from + * a suitable expression index, this should also check expression + * index columns. + */ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW) + { + relation_close(rel, lockmode); + continue; + } + + /* Build the RelToCheck entry with enough space for all atts */ + rtc = (RelToCheck *) palloc(sizeof(RelToCheck)); + rtc->rel = rel; + rtc->natts = 0; + rtc->atts = (int *) palloc(sizeof(int) * RelationGetNumberOfAttributes(rel)); + result = lappend(result, rtc); + } + + /* + * Confirm column has not been dropped, and is of the expected type. + * This defends against an ALTER DROP COLUMN occurring just before we + * acquired lock ... but if the whole table were dropped, we'd still + * have a problem. + */ + if (pg_depend->objsubid > RelationGetNumberOfAttributes(rtc->rel)) + continue; + pg_att = TupleDescAttr(rtc->rel->rd_att, pg_depend->objsubid - 1); + if (pg_att->attisdropped || pg_att->atttypid != domainOid) + continue; + + /* + * Okay, add column to result. We store the columns in column-number + * order; this is just a hack to improve predictability of regression + * test output ... + */ + Assert(rtc->natts < RelationGetNumberOfAttributes(rtc->rel)); + + ptr = rtc->natts++; + while (ptr > 0 && rtc->atts[ptr - 1] > pg_depend->objsubid) + { + rtc->atts[ptr] = rtc->atts[ptr - 1]; + ptr--; + } + rtc->atts[ptr] = pg_depend->objsubid; + } + + systable_endscan(depScan); + + relation_close(depRel, AccessShareLock); + + return result; +} + +/* + * checkDomainOwner + * + * Check that the type is actually a domain and that the current user + * has permission to do ALTER DOMAIN on it. Throw an error if not. + */ +void +checkDomainOwner(HeapTuple tup) +{ + Form_pg_type typTup = (Form_pg_type) GETSTRUCT(tup); + + /* Check that this is actually a domain */ + if (typTup->typtype != TYPTYPE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not a domain", + format_type_be(typTup->oid)))); + + /* Permission check: must own type */ + if (!pg_type_ownercheck(typTup->oid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid); +} + +/* + * domainAddConstraint - code shared between CREATE and ALTER DOMAIN + */ +static char * +domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid, + int typMod, Constraint *constr, + const char *domainName, ObjectAddress *constrAddr) +{ + Node *expr; + char *ccbin; + ParseState *pstate; + CoerceToDomainValue *domVal; + Oid ccoid; + + /* + * Assign or validate constraint name + */ + if (constr->conname) + { + if (ConstraintNameIsUsed(CONSTRAINT_DOMAIN, + domainOid, + constr->conname)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("constraint \"%s\" for domain \"%s\" already exists", + constr->conname, domainName))); + } + else + constr->conname = ChooseConstraintName(domainName, + NULL, + "check", + domainNamespace, + NIL); + + /* + * Convert the A_EXPR in raw_expr into an EXPR + */ + pstate = make_parsestate(NULL); + + /* + * Set up a CoerceToDomainValue to represent the occurrence of VALUE in + * the expression. Note that it will appear to have the type of the base + * type, not the domain. This seems correct since within the check + * expression, we should not assume the input value can be considered a + * member of the domain. + */ + domVal = makeNode(CoerceToDomainValue); + domVal->typeId = baseTypeOid; + domVal->typeMod = typMod; + domVal->collation = get_typcollation(baseTypeOid); + domVal->location = -1; /* will be set when/if used */ + + pstate->p_pre_columnref_hook = replace_domain_constraint_value; + pstate->p_ref_hook_state = (void *) domVal; + + expr = transformExpr(pstate, constr->raw_expr, EXPR_KIND_DOMAIN_CHECK); + + /* + * Make sure it yields a boolean result. + */ + expr = coerce_to_boolean(pstate, expr, "CHECK"); + + /* + * Fix up collation information. + */ + assign_expr_collations(pstate, expr); + + /* + * Domains don't allow variables (this is probably dead code now that + * add_missing_from is history, but let's be sure). + */ + if (list_length(pstate->p_rtable) != 0 || + contain_var_clause(expr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("cannot use table references in domain check constraint"))); + + /* + * Convert to string form for storage. + */ + ccbin = nodeToString(expr); + + /* + * Store the constraint in pg_constraint + */ + ccoid = + CreateConstraintEntry(constr->conname, /* Constraint Name */ + domainNamespace, /* namespace */ + CONSTRAINT_CHECK, /* Constraint Type */ + false, /* Is Deferrable */ + false, /* Is Deferred */ + !constr->skip_validation, /* Is Validated */ + InvalidOid, /* no parent constraint */ + InvalidOid, /* not a relation constraint */ + NULL, + 0, + 0, + domainOid, /* domain constraint */ + InvalidOid, /* no associated index */ + InvalidOid, /* Foreign key fields */ + NULL, + NULL, + NULL, + NULL, + 0, + ' ', + ' ', + NULL, + 0, + ' ', + NULL, /* not an exclusion constraint */ + expr, /* Tree form of check constraint */ + ccbin, /* Binary form of check constraint */ + true, /* is local */ + 0, /* inhcount */ + false, /* connoinherit */ + false); /* is_internal */ + if (constrAddr) + ObjectAddressSet(*constrAddr, ConstraintRelationId, ccoid); + + /* + * Return the compiled constraint expression so the calling routine can + * perform any additional required tests. + */ + return ccbin; +} + +/* Parser pre_columnref_hook for domain CHECK constraint parsing */ +static Node * +replace_domain_constraint_value(ParseState *pstate, ColumnRef *cref) +{ + /* + * Check for a reference to "value", and if that's what it is, replace + * with a CoerceToDomainValue as prepared for us by domainAddConstraint. + * (We handle VALUE as a name, not a keyword, to avoid breaking a lot of + * applications that have used VALUE as a column name in the past.) + */ + if (list_length(cref->fields) == 1) + { + Node *field1 = (Node *) linitial(cref->fields); + char *colname; + + Assert(IsA(field1, String)); + colname = strVal(field1); + if (strcmp(colname, "value") == 0) + { + CoerceToDomainValue *domVal = copyObject(pstate->p_ref_hook_state); + + /* Propagate location knowledge, if any */ + domVal->location = cref->location; + return (Node *) domVal; + } + } + return NULL; +} + + +/* + * Execute ALTER TYPE RENAME + */ +ObjectAddress +RenameType(RenameStmt *stmt) +{ + List *names = castNode(List, stmt->object); + const char *newTypeName = stmt->newname; + TypeName *typename; + Oid typeOid; + Relation rel; + HeapTuple tup; + Form_pg_type typTup; + ObjectAddress address; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + typeOid = typenameTypeId(NULL, typename); + + /* Look up the type in the type table */ + rel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typeOid); + typTup = (Form_pg_type) GETSTRUCT(tup); + + /* check permissions on type */ + if (!pg_type_ownercheck(typeOid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid); + + /* ALTER DOMAIN used on a non-domain? */ + if (stmt->renameType == OBJECT_DOMAIN && typTup->typtype != TYPTYPE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not a domain", + format_type_be(typeOid)))); + + /* + * If it's a composite type, we need to check that it really is a + * free-standing composite type, and not a table's rowtype. We want people + * to use ALTER TABLE not ALTER TYPE for that case. + */ + if (typTup->typtype == TYPTYPE_COMPOSITE && + get_rel_relkind(typTup->typrelid) != RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is a table's row type", + format_type_be(typeOid)), + errhint("Use ALTER TABLE instead."))); + + /* don't allow direct alteration of array types, either */ + if (IsTrueArrayType(typTup)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot alter array type %s", + format_type_be(typeOid)), + errhint("You can alter type %s, which will alter the array type as well.", + format_type_be(typTup->typelem)))); + + /* + * If type is composite we need to rename associated pg_class entry too. + * RenameRelationInternal will call RenameTypeInternal automatically. + */ + if (typTup->typtype == TYPTYPE_COMPOSITE) + RenameRelationInternal(typTup->typrelid, newTypeName, false, false); + else + RenameTypeInternal(typeOid, newTypeName, + typTup->typnamespace); + + ObjectAddressSet(address, TypeRelationId, typeOid); + /* Clean up */ + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change the owner of a type. + */ +ObjectAddress +AlterTypeOwner(List *names, Oid newOwnerId, ObjectType objecttype) +{ + TypeName *typename; + Oid typeOid; + Relation rel; + HeapTuple tup; + HeapTuple newtup; + Form_pg_type typTup; + AclResult aclresult; + ObjectAddress address; + + rel = table_open(TypeRelationId, RowExclusiveLock); + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + + /* Use LookupTypeName here so that shell types can be processed */ + tup = LookupTypeName(NULL, typename, NULL, false); + if (tup == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type \"%s\" does not exist", + TypeNameToString(typename)))); + typeOid = typeTypeId(tup); + + /* Copy the syscache entry so we can scribble on it below */ + newtup = heap_copytuple(tup); + ReleaseSysCache(tup); + tup = newtup; + typTup = (Form_pg_type) GETSTRUCT(tup); + + /* Don't allow ALTER DOMAIN on a type */ + if (objecttype == OBJECT_DOMAIN && typTup->typtype != TYPTYPE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not a domain", + format_type_be(typeOid)))); + + /* + * If it's a composite type, we need to check that it really is a + * free-standing composite type, and not a table's rowtype. We want people + * to use ALTER TABLE not ALTER TYPE for that case. + */ + if (typTup->typtype == TYPTYPE_COMPOSITE && + get_rel_relkind(typTup->typrelid) != RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is a table's row type", + format_type_be(typeOid)), + errhint("Use ALTER TABLE instead."))); + + /* don't allow direct alteration of array types, either */ + if (IsTrueArrayType(typTup)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot alter array type %s", + format_type_be(typeOid)), + errhint("You can alter type %s, which will alter the array type as well.", + format_type_be(typTup->typelem)))); + + /* + * If the new owner is the same as the existing owner, consider the + * command to have succeeded. This is for dump restoration purposes. + */ + if (typTup->typowner != newOwnerId) + { + /* Superusers can always do it */ + if (!superuser()) + { + /* Otherwise, must be owner of the existing object */ + if (!pg_type_ownercheck(typTup->oid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typTup->oid); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* New owner must have CREATE privilege on namespace */ + aclresult = pg_namespace_aclcheck(typTup->typnamespace, + newOwnerId, + ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(typTup->typnamespace)); + } + + AlterTypeOwner_oid(typeOid, newOwnerId, true); + } + + ObjectAddressSet(address, TypeRelationId, typeOid); + + /* Clean up */ + table_close(rel, RowExclusiveLock); + + return address; +} + +/* + * AlterTypeOwner_oid - change type owner unconditionally + * + * This function recurses to handle a pg_class entry, if necessary. It + * invokes any necessary access object hooks. If hasDependEntry is true, this + * function modifies the pg_shdepend entry appropriately (this should be + * passed as false only for table rowtypes and array types). + * + * This is used by ALTER TABLE/TYPE OWNER commands, as well as by REASSIGN + * OWNED BY. It assumes the caller has done all needed check. + */ +void +AlterTypeOwner_oid(Oid typeOid, Oid newOwnerId, bool hasDependEntry) +{ + Relation rel; + HeapTuple tup; + Form_pg_type typTup; + + rel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typeOid); + typTup = (Form_pg_type) GETSTRUCT(tup); + + /* + * If it's a composite type, invoke ATExecChangeOwner so that we fix up + * the pg_class entry properly. That will call back to + * AlterTypeOwnerInternal to take care of the pg_type entry(s). + */ + if (typTup->typtype == TYPTYPE_COMPOSITE) + ATExecChangeOwner(typTup->typrelid, newOwnerId, true, AccessExclusiveLock); + else + AlterTypeOwnerInternal(typeOid, newOwnerId); + + /* Update owner dependency reference */ + if (hasDependEntry) + changeDependencyOnOwner(TypeRelationId, typeOid, newOwnerId); + + InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0); + + ReleaseSysCache(tup); + table_close(rel, RowExclusiveLock); +} + +/* + * AlterTypeOwnerInternal - bare-bones type owner change. + * + * This routine simply modifies the owner of a pg_type entry, and recurses + * to handle a possible array type. + */ +void +AlterTypeOwnerInternal(Oid typeOid, Oid newOwnerId) +{ + Relation rel; + HeapTuple tup; + Form_pg_type typTup; + Datum repl_val[Natts_pg_type]; + bool repl_null[Natts_pg_type]; + bool repl_repl[Natts_pg_type]; + Acl *newAcl; + Datum aclDatum; + bool isNull; + + rel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typeOid); + typTup = (Form_pg_type) GETSTRUCT(tup); + + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_repl[Anum_pg_type_typowner - 1] = true; + repl_val[Anum_pg_type_typowner - 1] = ObjectIdGetDatum(newOwnerId); + + aclDatum = heap_getattr(tup, + Anum_pg_type_typacl, + RelationGetDescr(rel), + &isNull); + /* Null ACLs do not require changes */ + if (!isNull) + { + newAcl = aclnewowner(DatumGetAclP(aclDatum), + typTup->typowner, newOwnerId); + repl_repl[Anum_pg_type_typacl - 1] = true; + repl_val[Anum_pg_type_typacl - 1] = PointerGetDatum(newAcl); + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val, repl_null, + repl_repl); + + CatalogTupleUpdate(rel, &tup->t_self, tup); + + /* If it has an array type, update that too */ + if (OidIsValid(typTup->typarray)) + AlterTypeOwnerInternal(typTup->typarray, newOwnerId); + + /* Clean up */ + table_close(rel, RowExclusiveLock); +} + +/* + * Execute ALTER TYPE SET SCHEMA + */ +ObjectAddress +AlterTypeNamespace(List *names, const char *newschema, ObjectType objecttype, + Oid *oldschema) +{ + TypeName *typename; + Oid typeOid; + Oid nspOid; + Oid oldNspOid; + ObjectAddresses *objsMoved; + ObjectAddress myself; + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(names); + typeOid = typenameTypeId(NULL, typename); + + /* Don't allow ALTER DOMAIN on a type */ + if (objecttype == OBJECT_DOMAIN && get_typtype(typeOid) != TYPTYPE_DOMAIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not a domain", + format_type_be(typeOid)))); + + /* get schema OID and check its permissions */ + nspOid = LookupCreationNamespace(newschema); + + objsMoved = new_object_addresses(); + oldNspOid = AlterTypeNamespace_oid(typeOid, nspOid, objsMoved); + free_object_addresses(objsMoved); + + if (oldschema) + *oldschema = oldNspOid; + + ObjectAddressSet(myself, TypeRelationId, typeOid); + + return myself; +} + +Oid +AlterTypeNamespace_oid(Oid typeOid, Oid nspOid, ObjectAddresses *objsMoved) +{ + Oid elemOid; + + /* check permissions on type */ + if (!pg_type_ownercheck(typeOid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid); + + /* don't allow direct alteration of array types */ + elemOid = get_element_type(typeOid); + if (OidIsValid(elemOid) && get_array_type(elemOid) == typeOid) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot alter array type %s", + format_type_be(typeOid)), + errhint("You can alter type %s, which will alter the array type as well.", + format_type_be(elemOid)))); + + /* and do the work */ + return AlterTypeNamespaceInternal(typeOid, nspOid, false, true, objsMoved); +} + +/* + * Move specified type to new namespace. + * + * Caller must have already checked privileges. + * + * The function automatically recurses to process the type's array type, + * if any. isImplicitArray should be true only when doing this internal + * recursion (outside callers must never try to move an array type directly). + * + * If errorOnTableType is true, the function errors out if the type is + * a table type. ALTER TABLE has to be used to move a table to a new + * namespace. + * + * Returns the type's old namespace OID. + */ +Oid +AlterTypeNamespaceInternal(Oid typeOid, Oid nspOid, + bool isImplicitArray, + bool errorOnTableType, + ObjectAddresses *objsMoved) +{ + Relation rel; + HeapTuple tup; + Form_pg_type typform; + Oid oldNspOid; + Oid arrayOid; + bool isCompositeType; + ObjectAddress thisobj; + + /* + * Make sure we haven't moved this object previously. + */ + thisobj.classId = TypeRelationId; + thisobj.objectId = typeOid; + thisobj.objectSubId = 0; + + if (object_address_present(&thisobj, objsMoved)) + return InvalidOid; + + rel = table_open(TypeRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typeOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typeOid); + typform = (Form_pg_type) GETSTRUCT(tup); + + oldNspOid = typform->typnamespace; + arrayOid = typform->typarray; + + /* If the type is already there, we scan skip these next few checks. */ + if (oldNspOid != nspOid) + { + /* common checks on switching namespaces */ + CheckSetNamespace(oldNspOid, nspOid); + + /* check for duplicate name (more friendly than unique-index failure) */ + if (SearchSysCacheExists2(TYPENAMENSP, + NameGetDatum(&typform->typname), + ObjectIdGetDatum(nspOid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("type \"%s\" already exists in schema \"%s\"", + NameStr(typform->typname), + get_namespace_name(nspOid)))); + } + + /* Detect whether type is a composite type (but not a table rowtype) */ + isCompositeType = + (typform->typtype == TYPTYPE_COMPOSITE && + get_rel_relkind(typform->typrelid) == RELKIND_COMPOSITE_TYPE); + + /* Enforce not-table-type if requested */ + if (typform->typtype == TYPTYPE_COMPOSITE && !isCompositeType && + errorOnTableType) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is a table's row type", + format_type_be(typeOid)), + errhint("Use ALTER TABLE instead."))); + + if (oldNspOid != nspOid) + { + /* OK, modify the pg_type row */ + + /* tup is a copy, so we can scribble directly on it */ + typform->typnamespace = nspOid; + + CatalogTupleUpdate(rel, &tup->t_self, tup); + } + + /* + * Composite types have pg_class entries. + * + * We need to modify the pg_class tuple as well to reflect the change of + * schema. + */ + if (isCompositeType) + { + Relation classRel; + + classRel = table_open(RelationRelationId, RowExclusiveLock); + + AlterRelationNamespaceInternal(classRel, typform->typrelid, + oldNspOid, nspOid, + false, objsMoved); + + table_close(classRel, RowExclusiveLock); + + /* + * Check for constraints associated with the composite type (we don't + * currently support this, but probably will someday). + */ + AlterConstraintNamespaces(typform->typrelid, oldNspOid, + nspOid, false, objsMoved); + } + else + { + /* If it's a domain, it might have constraints */ + if (typform->typtype == TYPTYPE_DOMAIN) + AlterConstraintNamespaces(typeOid, oldNspOid, nspOid, true, + objsMoved); + } + + /* + * Update dependency on schema, if any --- a table rowtype has not got + * one, and neither does an implicit array. + */ + if (oldNspOid != nspOid && + (isCompositeType || typform->typtype != TYPTYPE_COMPOSITE) && + !isImplicitArray) + if (changeDependencyFor(TypeRelationId, typeOid, + NamespaceRelationId, oldNspOid, nspOid) != 1) + elog(ERROR, "failed to change schema dependency for type %s", + format_type_be(typeOid)); + + InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0); + + heap_freetuple(tup); + + table_close(rel, RowExclusiveLock); + + add_exact_object_address(&thisobj, objsMoved); + + /* Recursively alter the associated array type, if any */ + if (OidIsValid(arrayOid)) + AlterTypeNamespaceInternal(arrayOid, nspOid, true, true, objsMoved); + + return oldNspOid; +} + +/* + * AlterType + * ALTER TYPE SET (option = ...) + * + * NOTE: the set of changes that can be allowed here is constrained by many + * non-obvious implementation restrictions. Tread carefully when considering + * adding new flexibility. + */ +ObjectAddress +AlterType(AlterTypeStmt *stmt) +{ + ObjectAddress address; + Relation catalog; + TypeName *typename; + HeapTuple tup; + Oid typeOid; + Form_pg_type typForm; + bool requireSuper = false; + AlterTypeRecurseParams atparams; + ListCell *pl; + + catalog = table_open(TypeRelationId, RowExclusiveLock); + + /* Make a TypeName so we can use standard type lookup machinery */ + typename = makeTypeNameFromNameList(stmt->typeName); + tup = typenameType(NULL, typename, NULL); + + typeOid = typeTypeId(tup); + typForm = (Form_pg_type) GETSTRUCT(tup); + + /* Process options */ + memset(&atparams, 0, sizeof(atparams)); + foreach(pl, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (strcmp(defel->defname, "storage") == 0) + { + char *a = defGetString(defel); + + if (pg_strcasecmp(a, "plain") == 0) + atparams.storage = TYPSTORAGE_PLAIN; + else if (pg_strcasecmp(a, "external") == 0) + atparams.storage = TYPSTORAGE_EXTERNAL; + else if (pg_strcasecmp(a, "extended") == 0) + atparams.storage = TYPSTORAGE_EXTENDED; + else if (pg_strcasecmp(a, "main") == 0) + atparams.storage = TYPSTORAGE_MAIN; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("storage \"%s\" not recognized", a))); + + /* + * Validate the storage request. If the type isn't varlena, it + * certainly doesn't support non-PLAIN storage. + */ + if (atparams.storage != TYPSTORAGE_PLAIN && typForm->typlen != -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("fixed-size types must have storage PLAIN"))); + + /* + * Switching from PLAIN to non-PLAIN is allowed, but it requires + * superuser, since we can't validate that the type's C functions + * will support it. Switching from non-PLAIN to PLAIN is + * disallowed outright, because it's not practical to ensure that + * no tables have toasted values of the type. Switching among + * different non-PLAIN settings is OK, since it just constitutes a + * change in the strategy requested for columns created in the + * future. + */ + if (atparams.storage != TYPSTORAGE_PLAIN && + typForm->typstorage == TYPSTORAGE_PLAIN) + requireSuper = true; + else if (atparams.storage == TYPSTORAGE_PLAIN && + typForm->typstorage != TYPSTORAGE_PLAIN) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot change type's storage to PLAIN"))); + + atparams.updateStorage = true; + } + else if (strcmp(defel->defname, "receive") == 0) + { + if (defel->arg != NULL) + atparams.receiveOid = + findTypeReceiveFunction(defGetQualifiedName(defel), + typeOid); + else + atparams.receiveOid = InvalidOid; /* NONE, remove function */ + atparams.updateReceive = true; + /* Replacing an I/O function requires superuser. */ + requireSuper = true; + } + else if (strcmp(defel->defname, "send") == 0) + { + if (defel->arg != NULL) + atparams.sendOid = + findTypeSendFunction(defGetQualifiedName(defel), + typeOid); + else + atparams.sendOid = InvalidOid; /* NONE, remove function */ + atparams.updateSend = true; + /* Replacing an I/O function requires superuser. */ + requireSuper = true; + } + else if (strcmp(defel->defname, "typmod_in") == 0) + { + if (defel->arg != NULL) + atparams.typmodinOid = + findTypeTypmodinFunction(defGetQualifiedName(defel)); + else + atparams.typmodinOid = InvalidOid; /* NONE, remove function */ + atparams.updateTypmodin = true; + /* Replacing an I/O function requires superuser. */ + requireSuper = true; + } + else if (strcmp(defel->defname, "typmod_out") == 0) + { + if (defel->arg != NULL) + atparams.typmodoutOid = + findTypeTypmodoutFunction(defGetQualifiedName(defel)); + else + atparams.typmodoutOid = InvalidOid; /* NONE, remove function */ + atparams.updateTypmodout = true; + /* Replacing an I/O function requires superuser. */ + requireSuper = true; + } + else if (strcmp(defel->defname, "analyze") == 0) + { + if (defel->arg != NULL) + atparams.analyzeOid = + findTypeAnalyzeFunction(defGetQualifiedName(defel), + typeOid); + else + atparams.analyzeOid = InvalidOid; /* NONE, remove function */ + atparams.updateAnalyze = true; + /* Replacing an analyze function requires superuser. */ + requireSuper = true; + } + else if (strcmp(defel->defname, "subscript") == 0) + { + if (defel->arg != NULL) + atparams.subscriptOid = + findTypeSubscriptingFunction(defGetQualifiedName(defel), + typeOid); + else + atparams.subscriptOid = InvalidOid; /* NONE, remove function */ + atparams.updateSubscript = true; + /* Replacing a subscript function requires superuser. */ + requireSuper = true; + } + + /* + * The rest of the options that CREATE accepts cannot be changed. + * Check for them so that we can give a meaningful error message. + */ + else if (strcmp(defel->defname, "input") == 0 || + strcmp(defel->defname, "output") == 0 || + strcmp(defel->defname, "internallength") == 0 || + strcmp(defel->defname, "passedbyvalue") == 0 || + strcmp(defel->defname, "alignment") == 0 || + strcmp(defel->defname, "like") == 0 || + strcmp(defel->defname, "category") == 0 || + strcmp(defel->defname, "preferred") == 0 || + strcmp(defel->defname, "default") == 0 || + strcmp(defel->defname, "element") == 0 || + strcmp(defel->defname, "delimiter") == 0 || + strcmp(defel->defname, "collatable") == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type attribute \"%s\" cannot be changed", + defel->defname))); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type attribute \"%s\" not recognized", + defel->defname))); + } + + /* + * Permissions check. Require superuser if we decided the command + * requires that, else must own the type. + */ + if (requireSuper) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter a type"))); + } + else + { + if (!pg_type_ownercheck(typeOid, GetUserId())) + aclcheck_error_type(ACLCHECK_NOT_OWNER, typeOid); + } + + /* + * We disallow all forms of ALTER TYPE SET on types that aren't plain base + * types. It would for example be highly unsafe, not to mention + * pointless, to change the send/receive functions for a composite type. + * Moreover, pg_dump has no support for changing these properties on + * non-base types. We might weaken this someday, but not now. + * + * Note: if you weaken this enough to allow composite types, be sure to + * adjust the GenerateTypeDependencies call in AlterTypeRecurse. + */ + if (typForm->typtype != TYPTYPE_BASE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not a base type", + format_type_be(typeOid)))); + + /* + * For the same reasons, don't allow direct alteration of array types. + */ + if (IsTrueArrayType(typForm)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not a base type", + format_type_be(typeOid)))); + + /* OK, recursively update this type and any arrays/domains over it */ + AlterTypeRecurse(typeOid, false, tup, catalog, &atparams); + + /* Clean up */ + ReleaseSysCache(tup); + + table_close(catalog, RowExclusiveLock); + + ObjectAddressSet(address, TypeRelationId, typeOid); + + return address; +} + +/* + * AlterTypeRecurse: one recursion step for AlterType() + * + * Apply the changes specified by "atparams" to the type identified by + * "typeOid", whose existing pg_type tuple is "tup". If necessary, + * recursively update its array type as well. Then search for any domains + * over this type, and recursively apply (most of) the same changes to those + * domains. + * + * We need this because the system generally assumes that a domain inherits + * many properties from its base type. See DefineDomain() above for details + * of what is inherited. Arrays inherit a smaller number of properties, + * but not none. + * + * There's a race condition here, in that some other transaction could + * concurrently add another domain atop this base type; we'd miss updating + * that one. Hence, be wary of allowing ALTER TYPE to change properties for + * which it'd be really fatal for a domain to be out of sync with its base + * type (typlen, for example). In practice, races seem unlikely to be an + * issue for plausible use-cases for ALTER TYPE. If one does happen, it could + * be fixed by re-doing the same ALTER TYPE once all prior transactions have + * committed. + */ +static void +AlterTypeRecurse(Oid typeOid, bool isImplicitArray, + HeapTuple tup, Relation catalog, + AlterTypeRecurseParams *atparams) +{ + Datum values[Natts_pg_type]; + bool nulls[Natts_pg_type]; + bool replaces[Natts_pg_type]; + HeapTuple newtup; + SysScanDesc scan; + ScanKeyData key[1]; + HeapTuple domainTup; + + /* Since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + /* Update the current type's tuple */ + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + memset(replaces, 0, sizeof(replaces)); + + if (atparams->updateStorage) + { + replaces[Anum_pg_type_typstorage - 1] = true; + values[Anum_pg_type_typstorage - 1] = CharGetDatum(atparams->storage); + } + if (atparams->updateReceive) + { + replaces[Anum_pg_type_typreceive - 1] = true; + values[Anum_pg_type_typreceive - 1] = ObjectIdGetDatum(atparams->receiveOid); + } + if (atparams->updateSend) + { + replaces[Anum_pg_type_typsend - 1] = true; + values[Anum_pg_type_typsend - 1] = ObjectIdGetDatum(atparams->sendOid); + } + if (atparams->updateTypmodin) + { + replaces[Anum_pg_type_typmodin - 1] = true; + values[Anum_pg_type_typmodin - 1] = ObjectIdGetDatum(atparams->typmodinOid); + } + if (atparams->updateTypmodout) + { + replaces[Anum_pg_type_typmodout - 1] = true; + values[Anum_pg_type_typmodout - 1] = ObjectIdGetDatum(atparams->typmodoutOid); + } + if (atparams->updateAnalyze) + { + replaces[Anum_pg_type_typanalyze - 1] = true; + values[Anum_pg_type_typanalyze - 1] = ObjectIdGetDatum(atparams->analyzeOid); + } + if (atparams->updateSubscript) + { + replaces[Anum_pg_type_typsubscript - 1] = true; + values[Anum_pg_type_typsubscript - 1] = ObjectIdGetDatum(atparams->subscriptOid); + } + + newtup = heap_modify_tuple(tup, RelationGetDescr(catalog), + values, nulls, replaces); + + CatalogTupleUpdate(catalog, &newtup->t_self, newtup); + + /* Rebuild dependencies for this type */ + GenerateTypeDependencies(newtup, + catalog, + NULL, /* don't have defaultExpr handy */ + NULL, /* don't have typacl handy */ + 0, /* we rejected composite types above */ + isImplicitArray, /* it might be an array */ + isImplicitArray, /* dependent iff it's array */ + false, /* don't touch extension membership */ + true); + + InvokeObjectPostAlterHook(TypeRelationId, typeOid, 0); + + /* + * Arrays inherit their base type's typmodin and typmodout, but none of + * the other properties we're concerned with here. Recurse to the array + * type if needed. + */ + if (!isImplicitArray && + (atparams->updateTypmodin || atparams->updateTypmodout)) + { + Oid arrtypoid = ((Form_pg_type) GETSTRUCT(newtup))->typarray; + + if (OidIsValid(arrtypoid)) + { + HeapTuple arrtup; + AlterTypeRecurseParams arrparams; + + arrtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(arrtypoid)); + if (!HeapTupleIsValid(arrtup)) + elog(ERROR, "cache lookup failed for type %u", arrtypoid); + + memset(&arrparams, 0, sizeof(arrparams)); + arrparams.updateTypmodin = atparams->updateTypmodin; + arrparams.updateTypmodout = atparams->updateTypmodout; + arrparams.typmodinOid = atparams->typmodinOid; + arrparams.typmodoutOid = atparams->typmodoutOid; + + AlterTypeRecurse(arrtypoid, true, arrtup, catalog, &arrparams); + + ReleaseSysCache(arrtup); + } + } + + /* + * Now we need to recurse to domains. However, some properties are not + * inherited by domains, so clear the update flags for those. + */ + atparams->updateReceive = false; /* domains use F_DOMAIN_RECV */ + atparams->updateTypmodin = false; /* domains don't have typmods */ + atparams->updateTypmodout = false; + atparams->updateSubscript = false; /* domains don't have subscriptors */ + + /* Skip the scan if nothing remains to be done */ + if (!(atparams->updateStorage || + atparams->updateSend || + atparams->updateAnalyze)) + return; + + /* Search pg_type for possible domains over this type */ + ScanKeyInit(&key[0], + Anum_pg_type_typbasetype, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(typeOid)); + + scan = systable_beginscan(catalog, InvalidOid, false, + NULL, 1, key); + + while ((domainTup = systable_getnext(scan)) != NULL) + { + Form_pg_type domainForm = (Form_pg_type) GETSTRUCT(domainTup); + + /* + * Shouldn't have a nonzero typbasetype in a non-domain, but let's + * check + */ + if (domainForm->typtype != TYPTYPE_DOMAIN) + continue; + + AlterTypeRecurse(domainForm->oid, false, domainTup, catalog, atparams); + } + + systable_endscan(scan); +} diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c new file mode 100644 index 0000000..cba8e19 --- /dev/null +++ b/src/backend/commands/user.c @@ -0,0 +1,1645 @@ +/*------------------------------------------------------------------------- + * + * user.c + * Commands for manipulating roles (formerly called users). + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/commands/user.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/binary_upgrade.h" +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_auth_members.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_database.h" +#include "catalog/pg_db_role_setting.h" +#include "commands/comment.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/seclabel.h" +#include "commands/user.h" +#include "libpq/crypt.h" +#include "miscadmin.h" +#include "storage/lmgr.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" + +/* Potentially set by pg_upgrade_support functions */ +Oid binary_upgrade_next_pg_authid_oid = InvalidOid; + + +/* GUC parameter */ +int Password_encryption = PASSWORD_TYPE_SCRAM_SHA_256; + +/* Hook to check passwords in CreateRole() and AlterRole() */ +check_password_hook_type check_password_hook = NULL; + +static void AddRoleMems(const char *rolename, Oid roleid, + List *memberSpecs, List *memberIds, + Oid grantorId, bool admin_opt); +static void DelRoleMems(const char *rolename, Oid roleid, + List *memberSpecs, List *memberIds, + bool admin_opt); + + +/* Check if current user has createrole privileges */ +static bool +have_createrole_privilege(void) +{ + return has_createrole_privilege(GetUserId()); +} + + +/* + * CREATE ROLE + */ +Oid +CreateRole(ParseState *pstate, CreateRoleStmt *stmt) +{ + Relation pg_authid_rel; + TupleDesc pg_authid_dsc; + HeapTuple tuple; + Datum new_record[Natts_pg_authid]; + bool new_record_nulls[Natts_pg_authid]; + Oid roleid; + ListCell *item; + ListCell *option; + char *password = NULL; /* user password */ + bool issuper = false; /* Make the user a superuser? */ + bool inherit = true; /* Auto inherit privileges? */ + bool createrole = false; /* Can this user create roles? */ + bool createdb = false; /* Can the user create databases? */ + bool canlogin = false; /* Can this user login? */ + bool isreplication = false; /* Is this a replication role? */ + bool bypassrls = false; /* Is this a row security enabled role? */ + int connlimit = -1; /* maximum connections allowed */ + List *addroleto = NIL; /* roles to make this a member of */ + List *rolemembers = NIL; /* roles to be members of this role */ + List *adminmembers = NIL; /* roles to be admins of this role */ + char *validUntil = NULL; /* time the login is valid until */ + Datum validUntil_datum; /* same, as timestamptz Datum */ + bool validUntil_null; + DefElem *dpassword = NULL; + DefElem *dissuper = NULL; + DefElem *dinherit = NULL; + DefElem *dcreaterole = NULL; + DefElem *dcreatedb = NULL; + DefElem *dcanlogin = NULL; + DefElem *disreplication = NULL; + DefElem *dconnlimit = NULL; + DefElem *daddroleto = NULL; + DefElem *drolemembers = NULL; + DefElem *dadminmembers = NULL; + DefElem *dvalidUntil = NULL; + DefElem *dbypassRLS = NULL; + + /* The defaults can vary depending on the original statement type */ + switch (stmt->stmt_type) + { + case ROLESTMT_ROLE: + break; + case ROLESTMT_USER: + canlogin = true; + /* may eventually want inherit to default to false here */ + break; + case ROLESTMT_GROUP: + break; + } + + /* Extract options from the statement node tree */ + foreach(option, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(option); + + if (strcmp(defel->defname, "password") == 0) + { + if (dpassword) + errorConflictingDefElem(defel, pstate); + dpassword = defel; + } + else if (strcmp(defel->defname, "sysid") == 0) + { + ereport(NOTICE, + (errmsg("SYSID can no longer be specified"))); + } + else if (strcmp(defel->defname, "superuser") == 0) + { + if (dissuper) + errorConflictingDefElem(defel, pstate); + dissuper = defel; + } + else if (strcmp(defel->defname, "inherit") == 0) + { + if (dinherit) + errorConflictingDefElem(defel, pstate); + dinherit = defel; + } + else if (strcmp(defel->defname, "createrole") == 0) + { + if (dcreaterole) + errorConflictingDefElem(defel, pstate); + dcreaterole = defel; + } + else if (strcmp(defel->defname, "createdb") == 0) + { + if (dcreatedb) + errorConflictingDefElem(defel, pstate); + dcreatedb = defel; + } + else if (strcmp(defel->defname, "canlogin") == 0) + { + if (dcanlogin) + errorConflictingDefElem(defel, pstate); + dcanlogin = defel; + } + else if (strcmp(defel->defname, "isreplication") == 0) + { + if (disreplication) + errorConflictingDefElem(defel, pstate); + disreplication = defel; + } + else if (strcmp(defel->defname, "connectionlimit") == 0) + { + if (dconnlimit) + errorConflictingDefElem(defel, pstate); + dconnlimit = defel; + } + else if (strcmp(defel->defname, "addroleto") == 0) + { + if (daddroleto) + errorConflictingDefElem(defel, pstate); + daddroleto = defel; + } + else if (strcmp(defel->defname, "rolemembers") == 0) + { + if (drolemembers) + errorConflictingDefElem(defel, pstate); + drolemembers = defel; + } + else if (strcmp(defel->defname, "adminmembers") == 0) + { + if (dadminmembers) + errorConflictingDefElem(defel, pstate); + dadminmembers = defel; + } + else if (strcmp(defel->defname, "validUntil") == 0) + { + if (dvalidUntil) + errorConflictingDefElem(defel, pstate); + dvalidUntil = defel; + } + else if (strcmp(defel->defname, "bypassrls") == 0) + { + if (dbypassRLS) + errorConflictingDefElem(defel, pstate); + dbypassRLS = defel; + } + else + elog(ERROR, "option \"%s\" not recognized", + defel->defname); + } + + if (dpassword && dpassword->arg) + password = strVal(dpassword->arg); + if (dissuper) + issuper = boolVal(dissuper->arg); + if (dinherit) + inherit = boolVal(dinherit->arg); + if (dcreaterole) + createrole = boolVal(dcreaterole->arg); + if (dcreatedb) + createdb = boolVal(dcreatedb->arg); + if (dcanlogin) + canlogin = boolVal(dcanlogin->arg); + if (disreplication) + isreplication = boolVal(disreplication->arg); + if (dconnlimit) + { + connlimit = intVal(dconnlimit->arg); + if (connlimit < -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid connection limit: %d", connlimit))); + } + if (daddroleto) + addroleto = (List *) daddroleto->arg; + if (drolemembers) + rolemembers = (List *) drolemembers->arg; + if (dadminmembers) + adminmembers = (List *) dadminmembers->arg; + if (dvalidUntil) + validUntil = strVal(dvalidUntil->arg); + if (dbypassRLS) + bypassrls = boolVal(dbypassRLS->arg); + + /* Check some permissions first */ + if (issuper) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create superusers"))); + } + else if (isreplication) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create replication users"))); + } + else if (bypassrls) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create bypassrls users"))); + } + else + { + if (!have_createrole_privilege()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to create role"))); + } + + /* + * Check that the user is not trying to create a role in the reserved + * "pg_" namespace. + */ + if (IsReservedName(stmt->role)) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("role name \"%s\" is reserved", + stmt->role), + errdetail("Role names starting with \"pg_\" are reserved."))); + + /* + * If built with appropriate switch, whine when regression-testing + * conventions for role names are violated. + */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (strncmp(stmt->role, "regress_", 8) != 0) + elog(WARNING, "roles created by regression test cases should have names starting with \"regress_\""); +#endif + + /* + * Check the pg_authid relation to be certain the role doesn't already + * exist. + */ + pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock); + pg_authid_dsc = RelationGetDescr(pg_authid_rel); + + if (OidIsValid(get_role_oid(stmt->role, true))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("role \"%s\" already exists", + stmt->role))); + + /* Convert validuntil to internal form */ + if (validUntil) + { + validUntil_datum = DirectFunctionCall3(timestamptz_in, + CStringGetDatum(validUntil), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + validUntil_null = false; + } + else + { + validUntil_datum = (Datum) 0; + validUntil_null = true; + } + + /* + * Call the password checking hook if there is one defined + */ + if (check_password_hook && password) + (*check_password_hook) (stmt->role, + password, + get_password_type(password), + validUntil_datum, + validUntil_null); + + /* + * Build a tuple to insert + */ + MemSet(new_record, 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + + new_record[Anum_pg_authid_rolname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->role)); + + new_record[Anum_pg_authid_rolsuper - 1] = BoolGetDatum(issuper); + new_record[Anum_pg_authid_rolinherit - 1] = BoolGetDatum(inherit); + new_record[Anum_pg_authid_rolcreaterole - 1] = BoolGetDatum(createrole); + new_record[Anum_pg_authid_rolcreatedb - 1] = BoolGetDatum(createdb); + new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(canlogin); + new_record[Anum_pg_authid_rolreplication - 1] = BoolGetDatum(isreplication); + new_record[Anum_pg_authid_rolconnlimit - 1] = Int32GetDatum(connlimit); + + if (password) + { + char *shadow_pass; + const char *logdetail = NULL; + + /* + * Don't allow an empty password. Libpq treats an empty password the + * same as no password at all, and won't even try to authenticate. But + * other clients might, so allowing it would be confusing. By clearing + * the password when an empty string is specified, the account is + * consistently locked for all clients. + * + * Note that this only covers passwords stored in the database itself. + * There are also checks in the authentication code, to forbid an + * empty password from being used with authentication methods that + * fetch the password from an external system, like LDAP or PAM. + */ + if (password[0] == '\0' || + plain_crypt_verify(stmt->role, password, "", &logdetail) == STATUS_OK) + { + ereport(NOTICE, + (errmsg("empty string is not a valid password, clearing password"))); + new_record_nulls[Anum_pg_authid_rolpassword - 1] = true; + } + else + { + /* Encrypt the password to the requested format. */ + shadow_pass = encrypt_password(Password_encryption, stmt->role, + password); + new_record[Anum_pg_authid_rolpassword - 1] = + CStringGetTextDatum(shadow_pass); + } + } + else + new_record_nulls[Anum_pg_authid_rolpassword - 1] = true; + + new_record[Anum_pg_authid_rolvaliduntil - 1] = validUntil_datum; + new_record_nulls[Anum_pg_authid_rolvaliduntil - 1] = validUntil_null; + + new_record[Anum_pg_authid_rolbypassrls - 1] = BoolGetDatum(bypassrls); + + /* + * pg_largeobject_metadata contains pg_authid.oid's, so we use the + * binary-upgrade override. + */ + if (IsBinaryUpgrade) + { + if (!OidIsValid(binary_upgrade_next_pg_authid_oid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("pg_authid OID value not set when in binary upgrade mode"))); + + roleid = binary_upgrade_next_pg_authid_oid; + binary_upgrade_next_pg_authid_oid = InvalidOid; + } + else + { + roleid = GetNewOidWithIndex(pg_authid_rel, AuthIdOidIndexId, + Anum_pg_authid_oid); + } + + new_record[Anum_pg_authid_oid - 1] = ObjectIdGetDatum(roleid); + + tuple = heap_form_tuple(pg_authid_dsc, new_record, new_record_nulls); + + /* + * Insert new record in the pg_authid table + */ + CatalogTupleInsert(pg_authid_rel, tuple); + + /* + * Advance command counter so we can see new record; else tests in + * AddRoleMems may fail. + */ + if (addroleto || adminmembers || rolemembers) + CommandCounterIncrement(); + + /* + * Add the new role to the specified existing roles. + */ + if (addroleto) + { + RoleSpec *thisrole = makeNode(RoleSpec); + List *thisrole_list = list_make1(thisrole); + List *thisrole_oidlist = list_make1_oid(roleid); + + thisrole->roletype = ROLESPEC_CSTRING; + thisrole->rolename = stmt->role; + thisrole->location = -1; + + foreach(item, addroleto) + { + RoleSpec *oldrole = lfirst(item); + HeapTuple oldroletup = get_rolespec_tuple(oldrole); + Form_pg_authid oldroleform = (Form_pg_authid) GETSTRUCT(oldroletup); + Oid oldroleid = oldroleform->oid; + char *oldrolename = NameStr(oldroleform->rolname); + + AddRoleMems(oldrolename, oldroleid, + thisrole_list, + thisrole_oidlist, + GetUserId(), false); + + ReleaseSysCache(oldroletup); + } + } + + /* + * Add the specified members to this new role. adminmembers get the admin + * option, rolemembers don't. + */ + AddRoleMems(stmt->role, roleid, + adminmembers, roleSpecsToIds(adminmembers), + GetUserId(), true); + AddRoleMems(stmt->role, roleid, + rolemembers, roleSpecsToIds(rolemembers), + GetUserId(), false); + + /* Post creation hook for new role */ + InvokeObjectPostCreateHook(AuthIdRelationId, roleid, 0); + + /* + * Close pg_authid, but keep lock till commit. + */ + table_close(pg_authid_rel, NoLock); + + return roleid; +} + + +/* + * ALTER ROLE + * + * Note: the rolemembers option accepted here is intended to support the + * backwards-compatible ALTER GROUP syntax. Although it will work to say + * "ALTER ROLE role ROLE rolenames", we don't document it. + */ +Oid +AlterRole(ParseState *pstate, AlterRoleStmt *stmt) +{ + Datum new_record[Natts_pg_authid]; + bool new_record_nulls[Natts_pg_authid]; + bool new_record_repl[Natts_pg_authid]; + Relation pg_authid_rel; + TupleDesc pg_authid_dsc; + HeapTuple tuple, + new_tuple; + Form_pg_authid authform; + ListCell *option; + char *rolename; + char *password = NULL; /* user password */ + int connlimit = -1; /* maximum connections allowed */ + char *validUntil = NULL; /* time the login is valid until */ + Datum validUntil_datum; /* same, as timestamptz Datum */ + bool validUntil_null; + DefElem *dpassword = NULL; + DefElem *dissuper = NULL; + DefElem *dinherit = NULL; + DefElem *dcreaterole = NULL; + DefElem *dcreatedb = NULL; + DefElem *dcanlogin = NULL; + DefElem *disreplication = NULL; + DefElem *dconnlimit = NULL; + DefElem *drolemembers = NULL; + DefElem *dvalidUntil = NULL; + DefElem *dbypassRLS = NULL; + Oid roleid; + + check_rolespec_name(stmt->role, + _("Cannot alter reserved roles.")); + + /* Extract options from the statement node tree */ + foreach(option, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(option); + + if (strcmp(defel->defname, "password") == 0) + { + if (dpassword) + errorConflictingDefElem(defel, pstate); + dpassword = defel; + } + else if (strcmp(defel->defname, "superuser") == 0) + { + if (dissuper) + errorConflictingDefElem(defel, pstate); + dissuper = defel; + } + else if (strcmp(defel->defname, "inherit") == 0) + { + if (dinherit) + errorConflictingDefElem(defel, pstate); + dinherit = defel; + } + else if (strcmp(defel->defname, "createrole") == 0) + { + if (dcreaterole) + errorConflictingDefElem(defel, pstate); + dcreaterole = defel; + } + else if (strcmp(defel->defname, "createdb") == 0) + { + if (dcreatedb) + errorConflictingDefElem(defel, pstate); + dcreatedb = defel; + } + else if (strcmp(defel->defname, "canlogin") == 0) + { + if (dcanlogin) + errorConflictingDefElem(defel, pstate); + dcanlogin = defel; + } + else if (strcmp(defel->defname, "isreplication") == 0) + { + if (disreplication) + errorConflictingDefElem(defel, pstate); + disreplication = defel; + } + else if (strcmp(defel->defname, "connectionlimit") == 0) + { + if (dconnlimit) + errorConflictingDefElem(defel, pstate); + dconnlimit = defel; + } + else if (strcmp(defel->defname, "rolemembers") == 0 && + stmt->action != 0) + { + if (drolemembers) + errorConflictingDefElem(defel, pstate); + drolemembers = defel; + } + else if (strcmp(defel->defname, "validUntil") == 0) + { + if (dvalidUntil) + errorConflictingDefElem(defel, pstate); + dvalidUntil = defel; + } + else if (strcmp(defel->defname, "bypassrls") == 0) + { + if (dbypassRLS) + errorConflictingDefElem(defel, pstate); + dbypassRLS = defel; + } + else + elog(ERROR, "option \"%s\" not recognized", + defel->defname); + } + + if (dpassword && dpassword->arg) + password = strVal(dpassword->arg); + if (dconnlimit) + { + connlimit = intVal(dconnlimit->arg); + if (connlimit < -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid connection limit: %d", connlimit))); + } + if (dvalidUntil) + validUntil = strVal(dvalidUntil->arg); + + /* + * Scan the pg_authid relation to be certain the user exists. + */ + pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock); + pg_authid_dsc = RelationGetDescr(pg_authid_rel); + + tuple = get_rolespec_tuple(stmt->role); + authform = (Form_pg_authid) GETSTRUCT(tuple); + rolename = pstrdup(NameStr(authform->rolname)); + roleid = authform->oid; + + /* + * To mess with a superuser or replication role in any way you gotta be + * superuser. We also insist on superuser to change the BYPASSRLS + * property. Otherwise, if you don't have createrole, you're only allowed + * to change your own password. + */ + if (authform->rolsuper || dissuper) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter superuser roles or change superuser attribute"))); + } + else if (authform->rolreplication || disreplication) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter replication roles or change replication attribute"))); + } + else if (dbypassRLS) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to change bypassrls attribute"))); + } + else if (!have_createrole_privilege()) + { + /* check the rest */ + if (dinherit || dcreaterole || dcreatedb || dcanlogin || dconnlimit || + drolemembers || dvalidUntil || !dpassword || roleid != GetUserId()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied"))); + } + + /* Convert validuntil to internal form */ + if (dvalidUntil) + { + validUntil_datum = DirectFunctionCall3(timestamptz_in, + CStringGetDatum(validUntil), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1)); + validUntil_null = false; + } + else + { + /* fetch existing setting in case hook needs it */ + validUntil_datum = SysCacheGetAttr(AUTHNAME, tuple, + Anum_pg_authid_rolvaliduntil, + &validUntil_null); + } + + /* + * Call the password checking hook if there is one defined + */ + if (check_password_hook && password) + (*check_password_hook) (rolename, + password, + get_password_type(password), + validUntil_datum, + validUntil_null); + + /* + * Build an updated tuple, perusing the information just obtained + */ + MemSet(new_record, 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + MemSet(new_record_repl, false, sizeof(new_record_repl)); + + /* + * issuper/createrole/etc + */ + if (dissuper) + { + new_record[Anum_pg_authid_rolsuper - 1] = BoolGetDatum(boolVal(dissuper->arg)); + new_record_repl[Anum_pg_authid_rolsuper - 1] = true; + } + + if (dinherit) + { + new_record[Anum_pg_authid_rolinherit - 1] = BoolGetDatum(boolVal(dinherit->arg)); + new_record_repl[Anum_pg_authid_rolinherit - 1] = true; + } + + if (dcreaterole) + { + new_record[Anum_pg_authid_rolcreaterole - 1] = BoolGetDatum(boolVal(dcreaterole->arg)); + new_record_repl[Anum_pg_authid_rolcreaterole - 1] = true; + } + + if (dcreatedb) + { + new_record[Anum_pg_authid_rolcreatedb - 1] = BoolGetDatum(boolVal(dcreatedb->arg)); + new_record_repl[Anum_pg_authid_rolcreatedb - 1] = true; + } + + if (dcanlogin) + { + new_record[Anum_pg_authid_rolcanlogin - 1] = BoolGetDatum(boolVal(dcanlogin->arg)); + new_record_repl[Anum_pg_authid_rolcanlogin - 1] = true; + } + + if (disreplication) + { + new_record[Anum_pg_authid_rolreplication - 1] = BoolGetDatum(boolVal(disreplication->arg)); + new_record_repl[Anum_pg_authid_rolreplication - 1] = true; + } + + if (dconnlimit) + { + new_record[Anum_pg_authid_rolconnlimit - 1] = Int32GetDatum(connlimit); + new_record_repl[Anum_pg_authid_rolconnlimit - 1] = true; + } + + /* password */ + if (password) + { + char *shadow_pass; + const char *logdetail = NULL; + + /* Like in CREATE USER, don't allow an empty password. */ + if (password[0] == '\0' || + plain_crypt_verify(rolename, password, "", &logdetail) == STATUS_OK) + { + ereport(NOTICE, + (errmsg("empty string is not a valid password, clearing password"))); + new_record_nulls[Anum_pg_authid_rolpassword - 1] = true; + } + else + { + /* Encrypt the password to the requested format. */ + shadow_pass = encrypt_password(Password_encryption, rolename, + password); + new_record[Anum_pg_authid_rolpassword - 1] = + CStringGetTextDatum(shadow_pass); + } + new_record_repl[Anum_pg_authid_rolpassword - 1] = true; + } + + /* unset password */ + if (dpassword && dpassword->arg == NULL) + { + new_record_repl[Anum_pg_authid_rolpassword - 1] = true; + new_record_nulls[Anum_pg_authid_rolpassword - 1] = true; + } + + /* valid until */ + new_record[Anum_pg_authid_rolvaliduntil - 1] = validUntil_datum; + new_record_nulls[Anum_pg_authid_rolvaliduntil - 1] = validUntil_null; + new_record_repl[Anum_pg_authid_rolvaliduntil - 1] = true; + + if (dbypassRLS) + { + new_record[Anum_pg_authid_rolbypassrls - 1] = BoolGetDatum(boolVal(dbypassRLS->arg)); + new_record_repl[Anum_pg_authid_rolbypassrls - 1] = true; + } + + new_tuple = heap_modify_tuple(tuple, pg_authid_dsc, new_record, + new_record_nulls, new_record_repl); + CatalogTupleUpdate(pg_authid_rel, &tuple->t_self, new_tuple); + + InvokeObjectPostAlterHook(AuthIdRelationId, roleid, 0); + + ReleaseSysCache(tuple); + heap_freetuple(new_tuple); + + /* + * Advance command counter so we can see new record; else tests in + * AddRoleMems may fail. + */ + if (drolemembers) + { + List *rolemembers = (List *) drolemembers->arg; + + CommandCounterIncrement(); + + if (stmt->action == +1) /* add members to role */ + AddRoleMems(rolename, roleid, + rolemembers, roleSpecsToIds(rolemembers), + GetUserId(), false); + else if (stmt->action == -1) /* drop members from role */ + DelRoleMems(rolename, roleid, + rolemembers, roleSpecsToIds(rolemembers), + false); + } + + /* + * Close pg_authid, but keep lock till commit. + */ + table_close(pg_authid_rel, NoLock); + + return roleid; +} + + +/* + * ALTER ROLE ... SET + */ +Oid +AlterRoleSet(AlterRoleSetStmt *stmt) +{ + HeapTuple roletuple; + Form_pg_authid roleform; + Oid databaseid = InvalidOid; + Oid roleid = InvalidOid; + + if (stmt->role) + { + check_rolespec_name(stmt->role, + _("Cannot alter reserved roles.")); + + roletuple = get_rolespec_tuple(stmt->role); + roleform = (Form_pg_authid) GETSTRUCT(roletuple); + roleid = roleform->oid; + + /* + * Obtain a lock on the role and make sure it didn't go away in the + * meantime. + */ + shdepLockAndCheckObject(AuthIdRelationId, roleid); + + /* + * To mess with a superuser you gotta be superuser; else you need + * createrole, or just want to change your own settings + */ + if (roleform->rolsuper) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter superusers"))); + } + else + { + if (!have_createrole_privilege() && roleid != GetUserId()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied"))); + } + + ReleaseSysCache(roletuple); + } + + /* look up and lock the database, if specified */ + if (stmt->database != NULL) + { + databaseid = get_database_oid(stmt->database, false); + shdepLockAndCheckObject(DatabaseRelationId, databaseid); + + if (!stmt->role) + { + /* + * If no role is specified, then this is effectively the same as + * ALTER DATABASE ... SET, so use the same permission check. + */ + if (!pg_database_ownercheck(databaseid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE, + stmt->database); + } + } + + if (!stmt->role && !stmt->database) + { + /* Must be superuser to alter settings globally. */ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter settings globally"))); + } + + AlterSetting(databaseid, roleid, stmt->setstmt); + + return roleid; +} + + +/* + * DROP ROLE + */ +void +DropRole(DropRoleStmt *stmt) +{ + Relation pg_authid_rel, + pg_auth_members_rel; + ListCell *item; + + if (!have_createrole_privilege()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to drop role"))); + + /* + * Scan the pg_authid relation to find the Oid of the role(s) to be + * deleted. + */ + pg_authid_rel = table_open(AuthIdRelationId, RowExclusiveLock); + pg_auth_members_rel = table_open(AuthMemRelationId, RowExclusiveLock); + + foreach(item, stmt->roles) + { + RoleSpec *rolspec = lfirst(item); + char *role; + HeapTuple tuple, + tmp_tuple; + Form_pg_authid roleform; + ScanKeyData scankey; + char *detail; + char *detail_log; + SysScanDesc sscan; + Oid roleid; + + if (rolspec->roletype != ROLESPEC_CSTRING) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot use special role specifier in DROP ROLE"))); + role = rolspec->rolename; + + tuple = SearchSysCache1(AUTHNAME, PointerGetDatum(role)); + if (!HeapTupleIsValid(tuple)) + { + if (!stmt->missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", role))); + } + else + { + ereport(NOTICE, + (errmsg("role \"%s\" does not exist, skipping", + role))); + } + + continue; + } + + roleform = (Form_pg_authid) GETSTRUCT(tuple); + roleid = roleform->oid; + + if (roleid == GetUserId()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("current user cannot be dropped"))); + if (roleid == GetOuterUserId()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("current user cannot be dropped"))); + if (roleid == GetSessionUserId()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("session user cannot be dropped"))); + + /* + * For safety's sake, we allow createrole holders to drop ordinary + * roles but not superuser roles. This is mainly to avoid the + * scenario where you accidentally drop the last superuser. + */ + if (roleform->rolsuper && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to drop superusers"))); + + /* DROP hook for the role being removed */ + InvokeObjectDropHook(AuthIdRelationId, roleid, 0); + + /* + * Lock the role, so nobody can add dependencies to her while we drop + * her. We keep the lock until the end of transaction. + */ + LockSharedObject(AuthIdRelationId, roleid, 0, AccessExclusiveLock); + + /* Check for pg_shdepend entries depending on this role */ + if (checkSharedDependencies(AuthIdRelationId, roleid, + &detail, &detail_log)) + ereport(ERROR, + (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), + errmsg("role \"%s\" cannot be dropped because some objects depend on it", + role), + errdetail_internal("%s", detail), + errdetail_log("%s", detail_log))); + + /* + * Remove the role from the pg_authid table + */ + CatalogTupleDelete(pg_authid_rel, &tuple->t_self); + + ReleaseSysCache(tuple); + + /* + * Remove role from the pg_auth_members table. We have to remove all + * tuples that show it as either a role or a member. + * + * XXX what about grantor entries? Maybe we should do one heap scan. + */ + ScanKeyInit(&scankey, + Anum_pg_auth_members_roleid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(roleid)); + + sscan = systable_beginscan(pg_auth_members_rel, AuthMemRoleMemIndexId, + true, NULL, 1, &scankey); + + while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan))) + { + CatalogTupleDelete(pg_auth_members_rel, &tmp_tuple->t_self); + } + + systable_endscan(sscan); + + ScanKeyInit(&scankey, + Anum_pg_auth_members_member, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(roleid)); + + sscan = systable_beginscan(pg_auth_members_rel, AuthMemMemRoleIndexId, + true, NULL, 1, &scankey); + + while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan))) + { + CatalogTupleDelete(pg_auth_members_rel, &tmp_tuple->t_self); + } + + systable_endscan(sscan); + + /* + * Remove any comments or security labels on this role. + */ + DeleteSharedComments(roleid, AuthIdRelationId); + DeleteSharedSecurityLabel(roleid, AuthIdRelationId); + + /* + * Remove settings for this role. + */ + DropSetting(InvalidOid, roleid); + + /* + * Advance command counter so that later iterations of this loop will + * see the changes already made. This is essential if, for example, + * we are trying to drop both a role and one of its direct members --- + * we'll get an error if we try to delete the linking pg_auth_members + * tuple twice. (We do not need a CCI between the two delete loops + * above, because it's not allowed for a role to directly contain + * itself.) + */ + CommandCounterIncrement(); + } + + /* + * Now we can clean up; but keep locks until commit. + */ + table_close(pg_auth_members_rel, NoLock); + table_close(pg_authid_rel, NoLock); +} + +/* + * Rename role + */ +ObjectAddress +RenameRole(const char *oldname, const char *newname) +{ + HeapTuple oldtuple, + newtuple; + TupleDesc dsc; + Relation rel; + Datum datum; + bool isnull; + Datum repl_val[Natts_pg_authid]; + bool repl_null[Natts_pg_authid]; + bool repl_repl[Natts_pg_authid]; + int i; + Oid roleid; + ObjectAddress address; + Form_pg_authid authform; + + rel = table_open(AuthIdRelationId, RowExclusiveLock); + dsc = RelationGetDescr(rel); + + oldtuple = SearchSysCache1(AUTHNAME, CStringGetDatum(oldname)); + if (!HeapTupleIsValid(oldtuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", oldname))); + + /* + * XXX Client applications probably store the session user somewhere, so + * renaming it could cause confusion. On the other hand, there may not be + * an actual problem besides a little confusion, so think about this and + * decide. Same for SET ROLE ... we don't restrict renaming the current + * effective userid, though. + */ + + authform = (Form_pg_authid) GETSTRUCT(oldtuple); + roleid = authform->oid; + + if (roleid == GetSessionUserId()) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("session user cannot be renamed"))); + if (roleid == GetOuterUserId()) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("current user cannot be renamed"))); + + /* + * Check that the user is not trying to rename a system role and not + * trying to rename a role into the reserved "pg_" namespace. + */ + if (IsReservedName(NameStr(authform->rolname))) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("role name \"%s\" is reserved", + NameStr(authform->rolname)), + errdetail("Role names starting with \"pg_\" are reserved."))); + + if (IsReservedName(newname)) + ereport(ERROR, + (errcode(ERRCODE_RESERVED_NAME), + errmsg("role name \"%s\" is reserved", + newname), + errdetail("Role names starting with \"pg_\" are reserved."))); + + /* + * If built with appropriate switch, whine when regression-testing + * conventions for role names are violated. + */ +#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS + if (strncmp(newname, "regress_", 8) != 0) + elog(WARNING, "roles created by regression test cases should have names starting with \"regress_\""); +#endif + + /* make sure the new name doesn't exist */ + if (SearchSysCacheExists1(AUTHNAME, CStringGetDatum(newname))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("role \"%s\" already exists", newname))); + + /* + * createrole is enough privilege unless you want to mess with a superuser + */ + if (((Form_pg_authid) GETSTRUCT(oldtuple))->rolsuper) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to rename superusers"))); + } + else + { + if (!have_createrole_privilege()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to rename role"))); + } + + /* OK, construct the modified tuple */ + for (i = 0; i < Natts_pg_authid; i++) + repl_repl[i] = false; + + repl_repl[Anum_pg_authid_rolname - 1] = true; + repl_val[Anum_pg_authid_rolname - 1] = DirectFunctionCall1(namein, + CStringGetDatum(newname)); + repl_null[Anum_pg_authid_rolname - 1] = false; + + datum = heap_getattr(oldtuple, Anum_pg_authid_rolpassword, dsc, &isnull); + + if (!isnull && get_password_type(TextDatumGetCString(datum)) == PASSWORD_TYPE_MD5) + { + /* MD5 uses the username as salt, so just clear it on a rename */ + repl_repl[Anum_pg_authid_rolpassword - 1] = true; + repl_null[Anum_pg_authid_rolpassword - 1] = true; + + ereport(NOTICE, + (errmsg("MD5 password cleared because of role rename"))); + } + + newtuple = heap_modify_tuple(oldtuple, dsc, repl_val, repl_null, repl_repl); + CatalogTupleUpdate(rel, &oldtuple->t_self, newtuple); + + InvokeObjectPostAlterHook(AuthIdRelationId, roleid, 0); + + ObjectAddressSet(address, AuthIdRelationId, roleid); + + ReleaseSysCache(oldtuple); + + /* + * Close pg_authid, but keep lock till commit. + */ + table_close(rel, NoLock); + + return address; +} + +/* + * GrantRoleStmt + * + * Grant/Revoke roles to/from roles + */ +void +GrantRole(GrantRoleStmt *stmt) +{ + Relation pg_authid_rel; + Oid grantor; + List *grantee_ids; + ListCell *item; + + if (stmt->grantor) + grantor = get_rolespec_oid(stmt->grantor, false); + else + grantor = GetUserId(); + + grantee_ids = roleSpecsToIds(stmt->grantee_roles); + + /* AccessShareLock is enough since we aren't modifying pg_authid */ + pg_authid_rel = table_open(AuthIdRelationId, AccessShareLock); + + /* + * Step through all of the granted roles and add/remove entries for the + * grantees, or, if admin_opt is set, then just add/remove the admin + * option. + * + * Note: Permissions checking is done by AddRoleMems/DelRoleMems + */ + foreach(item, stmt->granted_roles) + { + AccessPriv *priv = (AccessPriv *) lfirst(item); + char *rolename = priv->priv_name; + Oid roleid; + + /* Must reject priv(columns) and ALL PRIVILEGES(columns) */ + if (rolename == NULL || priv->cols != NIL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_GRANT_OPERATION), + errmsg("column names cannot be included in GRANT/REVOKE ROLE"))); + + roleid = get_role_oid(rolename, false); + if (stmt->is_grant) + AddRoleMems(rolename, roleid, + stmt->grantee_roles, grantee_ids, + grantor, stmt->admin_opt); + else + DelRoleMems(rolename, roleid, + stmt->grantee_roles, grantee_ids, + stmt->admin_opt); + } + + /* + * Close pg_authid, but keep lock till commit. + */ + table_close(pg_authid_rel, NoLock); +} + +/* + * DropOwnedObjects + * + * Drop the objects owned by a given list of roles. + */ +void +DropOwnedObjects(DropOwnedStmt *stmt) +{ + List *role_ids = roleSpecsToIds(stmt->roles); + ListCell *cell; + + /* Check privileges */ + foreach(cell, role_ids) + { + Oid roleid = lfirst_oid(cell); + + if (!has_privs_of_role(GetUserId(), roleid)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to drop objects"))); + } + + /* Ok, do it */ + shdepDropOwned(role_ids, stmt->behavior); +} + +/* + * ReassignOwnedObjects + * + * Give the objects owned by a given list of roles away to another user. + */ +void +ReassignOwnedObjects(ReassignOwnedStmt *stmt) +{ + List *role_ids = roleSpecsToIds(stmt->roles); + ListCell *cell; + Oid newrole; + + /* Check privileges */ + foreach(cell, role_ids) + { + Oid roleid = lfirst_oid(cell); + + if (!has_privs_of_role(GetUserId(), roleid)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to reassign objects"))); + } + + /* Must have privileges on the receiving side too */ + newrole = get_rolespec_oid(stmt->newrole, false); + + if (!has_privs_of_role(GetUserId(), newrole)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to reassign objects"))); + + /* Ok, do it */ + shdepReassignOwned(role_ids, newrole); +} + +/* + * roleSpecsToIds + * + * Given a list of RoleSpecs, generate a list of role OIDs in the same order. + * + * ROLESPEC_PUBLIC is not allowed. + */ +List * +roleSpecsToIds(List *memberNames) +{ + List *result = NIL; + ListCell *l; + + foreach(l, memberNames) + { + RoleSpec *rolespec = lfirst_node(RoleSpec, l); + Oid roleid; + + roleid = get_rolespec_oid(rolespec, false); + result = lappend_oid(result, roleid); + } + return result; +} + +/* + * AddRoleMems -- Add given members to the specified role + * + * rolename: name of role to add to (used only for error messages) + * roleid: OID of role to add to + * memberSpecs: list of RoleSpec of roles to add (used only for error messages) + * memberIds: OIDs of roles to add + * grantorId: who is granting the membership + * admin_opt: granting admin option? + */ +static void +AddRoleMems(const char *rolename, Oid roleid, + List *memberSpecs, List *memberIds, + Oid grantorId, bool admin_opt) +{ + Relation pg_authmem_rel; + TupleDesc pg_authmem_dsc; + ListCell *specitem; + ListCell *iditem; + + Assert(list_length(memberSpecs) == list_length(memberIds)); + + /* Skip permission check if nothing to do */ + if (!memberIds) + return; + + /* + * Check permissions: must have createrole or admin option on the role to + * be changed. To mess with a superuser role, you gotta be superuser. + */ + if (superuser_arg(roleid)) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter superusers"))); + } + else + { + if (!have_createrole_privilege() && + !is_admin_of_role(grantorId, roleid)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must have admin option on role \"%s\"", + rolename))); + } + + /* + * The charter of pg_database_owner is to have exactly one, implicit, + * situation-dependent member. There's no technical need for this + * restriction. (One could lift it and take the further step of making + * pg_database_ownercheck() equivalent to has_privs_of_role(roleid, + * ROLE_PG_DATABASE_OWNER), in which case explicit, situation-independent + * members could act as the owner of any database.) + */ + if (roleid == ROLE_PG_DATABASE_OWNER) + ereport(ERROR, + errmsg("role \"%s\" cannot have explicit members", rolename)); + + /* + * The role membership grantor of record has little significance at + * present. Nonetheless, inasmuch as users might look to it for a crude + * audit trail, let only superusers impute the grant to a third party. + */ + if (grantorId != GetUserId() && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to set grantor"))); + + pg_authmem_rel = table_open(AuthMemRelationId, RowExclusiveLock); + pg_authmem_dsc = RelationGetDescr(pg_authmem_rel); + + forboth(specitem, memberSpecs, iditem, memberIds) + { + RoleSpec *memberRole = lfirst_node(RoleSpec, specitem); + Oid memberid = lfirst_oid(iditem); + HeapTuple authmem_tuple; + HeapTuple tuple; + Datum new_record[Natts_pg_auth_members]; + bool new_record_nulls[Natts_pg_auth_members]; + bool new_record_repl[Natts_pg_auth_members]; + + /* + * pg_database_owner is never a role member. Lifting this restriction + * would require a policy decision about membership loops. One could + * prevent loops, which would include making "ALTER DATABASE x OWNER + * TO proposed_datdba" fail if is_member_of_role(pg_database_owner, + * proposed_datdba). Hence, gaining a membership could reduce what a + * role could do. Alternately, one could allow these memberships to + * complete loops. A role could then have actual WITH ADMIN OPTION on + * itself, prompting a decision about is_admin_of_role() treatment of + * the case. + * + * Lifting this restriction also has policy implications for ownership + * of shared objects (databases and tablespaces). We allow such + * ownership, but we might find cause to ban it in the future. + * Designing such a ban would more troublesome if the design had to + * address pg_database_owner being a member of role FOO that owns a + * shared object. (The effect of such ownership is that any owner of + * another database can act as the owner of affected shared objects.) + */ + if (memberid == ROLE_PG_DATABASE_OWNER) + ereport(ERROR, + errmsg("role \"%s\" cannot be a member of any role", + get_rolespec_name(memberRole))); + + /* + * Refuse creation of membership loops, including the trivial case + * where a role is made a member of itself. We do this by checking to + * see if the target role is already a member of the proposed member + * role. We have to ignore possible superuserness, however, else we + * could never grant membership in a superuser-privileged role. + */ + if (is_member_of_role_nosuper(roleid, memberid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_GRANT_OPERATION), + errmsg("role \"%s\" is a member of role \"%s\"", + rolename, get_rolespec_name(memberRole)))); + + /* + * Check if entry for this role/member already exists; if so, give + * warning unless we are adding admin option. + */ + authmem_tuple = SearchSysCache2(AUTHMEMROLEMEM, + ObjectIdGetDatum(roleid), + ObjectIdGetDatum(memberid)); + if (HeapTupleIsValid(authmem_tuple) && + (!admin_opt || + ((Form_pg_auth_members) GETSTRUCT(authmem_tuple))->admin_option)) + { + ereport(NOTICE, + (errmsg("role \"%s\" is already a member of role \"%s\"", + get_rolespec_name(memberRole), rolename))); + ReleaseSysCache(authmem_tuple); + continue; + } + + /* Build a tuple to insert or update */ + MemSet(new_record, 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + MemSet(new_record_repl, false, sizeof(new_record_repl)); + + new_record[Anum_pg_auth_members_roleid - 1] = ObjectIdGetDatum(roleid); + new_record[Anum_pg_auth_members_member - 1] = ObjectIdGetDatum(memberid); + new_record[Anum_pg_auth_members_grantor - 1] = ObjectIdGetDatum(grantorId); + new_record[Anum_pg_auth_members_admin_option - 1] = BoolGetDatum(admin_opt); + + if (HeapTupleIsValid(authmem_tuple)) + { + new_record_repl[Anum_pg_auth_members_grantor - 1] = true; + new_record_repl[Anum_pg_auth_members_admin_option - 1] = true; + tuple = heap_modify_tuple(authmem_tuple, pg_authmem_dsc, + new_record, + new_record_nulls, new_record_repl); + CatalogTupleUpdate(pg_authmem_rel, &tuple->t_self, tuple); + ReleaseSysCache(authmem_tuple); + } + else + { + tuple = heap_form_tuple(pg_authmem_dsc, + new_record, new_record_nulls); + CatalogTupleInsert(pg_authmem_rel, tuple); + } + + /* CCI after each change, in case there are duplicates in list */ + CommandCounterIncrement(); + } + + /* + * Close pg_authmem, but keep lock till commit. + */ + table_close(pg_authmem_rel, NoLock); +} + +/* + * DelRoleMems -- Remove given members from the specified role + * + * rolename: name of role to del from (used only for error messages) + * roleid: OID of role to del from + * memberSpecs: list of RoleSpec of roles to del (used only for error messages) + * memberIds: OIDs of roles to del + * admin_opt: remove admin option only? + */ +static void +DelRoleMems(const char *rolename, Oid roleid, + List *memberSpecs, List *memberIds, + bool admin_opt) +{ + Relation pg_authmem_rel; + TupleDesc pg_authmem_dsc; + ListCell *specitem; + ListCell *iditem; + + Assert(list_length(memberSpecs) == list_length(memberIds)); + + /* Skip permission check if nothing to do */ + if (!memberIds) + return; + + /* + * Check permissions: must have createrole or admin option on the role to + * be changed. To mess with a superuser role, you gotta be superuser. + */ + if (superuser_arg(roleid)) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to alter superusers"))); + } + else + { + if (!have_createrole_privilege() && + !is_admin_of_role(GetUserId(), roleid)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must have admin option on role \"%s\"", + rolename))); + } + + pg_authmem_rel = table_open(AuthMemRelationId, RowExclusiveLock); + pg_authmem_dsc = RelationGetDescr(pg_authmem_rel); + + forboth(specitem, memberSpecs, iditem, memberIds) + { + RoleSpec *memberRole = lfirst(specitem); + Oid memberid = lfirst_oid(iditem); + HeapTuple authmem_tuple; + + /* + * Find entry for this role/member + */ + authmem_tuple = SearchSysCache2(AUTHMEMROLEMEM, + ObjectIdGetDatum(roleid), + ObjectIdGetDatum(memberid)); + if (!HeapTupleIsValid(authmem_tuple)) + { + ereport(WARNING, + (errmsg("role \"%s\" is not a member of role \"%s\"", + get_rolespec_name(memberRole), rolename))); + continue; + } + + if (!admin_opt) + { + /* Remove the entry altogether */ + CatalogTupleDelete(pg_authmem_rel, &authmem_tuple->t_self); + } + else + { + /* Just turn off the admin option */ + HeapTuple tuple; + Datum new_record[Natts_pg_auth_members]; + bool new_record_nulls[Natts_pg_auth_members]; + bool new_record_repl[Natts_pg_auth_members]; + + /* Build a tuple to update with */ + MemSet(new_record, 0, sizeof(new_record)); + MemSet(new_record_nulls, false, sizeof(new_record_nulls)); + MemSet(new_record_repl, false, sizeof(new_record_repl)); + + new_record[Anum_pg_auth_members_admin_option - 1] = BoolGetDatum(false); + new_record_repl[Anum_pg_auth_members_admin_option - 1] = true; + + tuple = heap_modify_tuple(authmem_tuple, pg_authmem_dsc, + new_record, + new_record_nulls, new_record_repl); + CatalogTupleUpdate(pg_authmem_rel, &tuple->t_self, tuple); + } + + ReleaseSysCache(authmem_tuple); + + /* CCI after each change, in case there are duplicates in list */ + CommandCounterIncrement(); + } + + /* + * Close pg_authmem, but keep lock till commit. + */ + table_close(pg_authmem_rel, NoLock); +} diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c new file mode 100644 index 0000000..75b0ca9 --- /dev/null +++ b/src/backend/commands/vacuum.c @@ -0,0 +1,2465 @@ +/*------------------------------------------------------------------------- + * + * vacuum.c + * The postgres vacuum cleaner. + * + * This file includes (a) control and dispatch code for VACUUM and ANALYZE + * commands, (b) code to compute various vacuum thresholds, and (c) index + * vacuum code. + * + * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in + * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of + * CLUSTER, handled in cluster.c. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/vacuum.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/clog.h" +#include "access/commit_ts.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/tableam.h" +#include "access/transam.h" +#include "access/xact.h" +#include "catalog/namespace.h" +#include "catalog/index.h" +#include "catalog/pg_database.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_namespace.h" +#include "commands/cluster.h" +#include "commands/defrem.h" +#include "commands/vacuum.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "pgstat.h" +#include "postmaster/autovacuum.h" +#include "postmaster/bgworker_internals.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/pmsignal.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "utils/acl.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +/* + * GUC parameters + */ +int vacuum_freeze_min_age; +int vacuum_freeze_table_age; +int vacuum_multixact_freeze_min_age; +int vacuum_multixact_freeze_table_age; +int vacuum_failsafe_age; +int vacuum_multixact_failsafe_age; + + +/* A few variables that don't seem worth passing around as parameters */ +static MemoryContext vac_context = NULL; +static BufferAccessStrategy vac_strategy; + + +/* + * Variables for cost-based parallel vacuum. See comments atop + * compute_parallel_delay to understand how it works. + */ +pg_atomic_uint32 *VacuumSharedCostBalance = NULL; +pg_atomic_uint32 *VacuumActiveNWorkers = NULL; +int VacuumCostBalanceLocal = 0; + +/* non-export function prototypes */ +static List *expand_vacuum_rel(VacuumRelation *vrel, int options); +static List *get_all_vacuum_rels(int options); +static void vac_truncate_clog(TransactionId frozenXID, + MultiXactId minMulti, + TransactionId lastSaneFrozenXid, + MultiXactId lastSaneMinMulti); +static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params); +static double compute_parallel_delay(void); +static VacOptValue get_vacoptval_from_boolean(DefElem *def); +static bool vac_tid_reaped(ItemPointer itemptr, void *state); +static int vac_cmp_itemptr(const void *left, const void *right); + +/* + * Primary entry point for manual VACUUM and ANALYZE commands + * + * This is mainly a preparation wrapper for the real operations that will + * happen in vacuum(). + */ +void +ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) +{ + VacuumParams params; + bool verbose = false; + bool skip_locked = false; + bool analyze = false; + bool freeze = false; + bool full = false; + bool disable_page_skipping = false; + bool process_toast = true; + ListCell *lc; + + /* index_cleanup and truncate values unspecified for now */ + params.index_cleanup = VACOPTVALUE_UNSPECIFIED; + params.truncate = VACOPTVALUE_UNSPECIFIED; + + /* By default parallel vacuum is enabled */ + params.nworkers = 0; + + /* Parse options list */ + foreach(lc, vacstmt->options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + /* Parse common options for VACUUM and ANALYZE */ + if (strcmp(opt->defname, "verbose") == 0) + verbose = defGetBoolean(opt); + else if (strcmp(opt->defname, "skip_locked") == 0) + skip_locked = defGetBoolean(opt); + else if (!vacstmt->is_vacuumcmd) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized ANALYZE option \"%s\"", opt->defname), + parser_errposition(pstate, opt->location))); + + /* Parse options available on VACUUM */ + else if (strcmp(opt->defname, "analyze") == 0) + analyze = defGetBoolean(opt); + else if (strcmp(opt->defname, "freeze") == 0) + freeze = defGetBoolean(opt); + else if (strcmp(opt->defname, "full") == 0) + full = defGetBoolean(opt); + else if (strcmp(opt->defname, "disable_page_skipping") == 0) + disable_page_skipping = defGetBoolean(opt); + else if (strcmp(opt->defname, "index_cleanup") == 0) + { + /* Interpret no string as the default, which is 'auto' */ + if (!opt->arg) + params.index_cleanup = VACOPTVALUE_AUTO; + else + { + char *sval = defGetString(opt); + + /* Try matching on 'auto' string, or fall back on boolean */ + if (pg_strcasecmp(sval, "auto") == 0) + params.index_cleanup = VACOPTVALUE_AUTO; + else + params.index_cleanup = get_vacoptval_from_boolean(opt); + } + } + else if (strcmp(opt->defname, "process_toast") == 0) + process_toast = defGetBoolean(opt); + else if (strcmp(opt->defname, "truncate") == 0) + params.truncate = get_vacoptval_from_boolean(opt); + else if (strcmp(opt->defname, "parallel") == 0) + { + if (opt->arg == NULL) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parallel option requires a value between 0 and %d", + MAX_PARALLEL_WORKER_LIMIT), + parser_errposition(pstate, opt->location))); + } + else + { + int nworkers; + + nworkers = defGetInt32(opt); + if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parallel workers for vacuum must be between 0 and %d", + MAX_PARALLEL_WORKER_LIMIT), + parser_errposition(pstate, opt->location))); + + /* + * Disable parallel vacuum, if user has specified parallel + * degree as zero. + */ + if (nworkers == 0) + params.nworkers = -1; + else + params.nworkers = nworkers; + } + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized VACUUM option \"%s\"", opt->defname), + parser_errposition(pstate, opt->location))); + } + + /* Set vacuum options */ + params.options = + (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) | + (verbose ? VACOPT_VERBOSE : 0) | + (skip_locked ? VACOPT_SKIP_LOCKED : 0) | + (analyze ? VACOPT_ANALYZE : 0) | + (freeze ? VACOPT_FREEZE : 0) | + (full ? VACOPT_FULL : 0) | + (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) | + (process_toast ? VACOPT_PROCESS_TOAST : 0); + + /* sanity checks on options */ + Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE)); + Assert((params.options & VACOPT_VACUUM) || + !(params.options & (VACOPT_FULL | VACOPT_FREEZE))); + + if ((params.options & VACOPT_FULL) && params.nworkers > 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("VACUUM FULL cannot be performed in parallel"))); + + /* + * Make sure VACOPT_ANALYZE is specified if any column lists are present. + */ + if (!(params.options & VACOPT_ANALYZE)) + { + ListCell *lc; + + foreach(lc, vacstmt->rels) + { + VacuumRelation *vrel = lfirst_node(VacuumRelation, lc); + + if (vrel->va_cols != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ANALYZE option must be specified when a column list is provided"))); + } + } + + /* + * All freeze ages are zero if the FREEZE option is given; otherwise pass + * them as -1 which means to use the default values. + */ + if (params.options & VACOPT_FREEZE) + { + params.freeze_min_age = 0; + params.freeze_table_age = 0; + params.multixact_freeze_min_age = 0; + params.multixact_freeze_table_age = 0; + } + else + { + params.freeze_min_age = -1; + params.freeze_table_age = -1; + params.multixact_freeze_min_age = -1; + params.multixact_freeze_table_age = -1; + } + + /* user-invoked vacuum is never "for wraparound" */ + params.is_wraparound = false; + + /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */ + params.log_min_duration = -1; + + /* Now go through the common routine */ + vacuum(vacstmt->rels, ¶ms, NULL, isTopLevel); +} + +/* + * Internal entry point for VACUUM and ANALYZE commands. + * + * relations, if not NIL, is a list of VacuumRelation to process; otherwise, + * we process all relevant tables in the database. For each VacuumRelation, + * if a valid OID is supplied, the table with that OID is what to process; + * otherwise, the VacuumRelation's RangeVar indicates what to process. + * + * params contains a set of parameters that can be used to customize the + * behavior. + * + * bstrategy is normally given as NULL, but in autovacuum it can be passed + * in to use the same buffer strategy object across multiple vacuum() calls. + * + * isTopLevel should be passed down from ProcessUtility. + * + * It is the caller's responsibility that all parameters are allocated in a + * memory context that will not disappear at transaction commit. + */ +void +vacuum(List *relations, VacuumParams *params, + BufferAccessStrategy bstrategy, bool isTopLevel) +{ + static bool in_vacuum = false; + + const char *stmttype; + volatile bool in_outer_xact, + use_own_xacts; + + Assert(params != NULL); + + stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; + + /* + * We cannot run VACUUM inside a user transaction block; if we were inside + * a transaction, then our commit- and start-transaction-command calls + * would not have the intended effect! There are numerous other subtle + * dependencies on this, too. + * + * ANALYZE (without VACUUM) can run either way. + */ + if (params->options & VACOPT_VACUUM) + { + PreventInTransactionBlock(isTopLevel, stmttype); + in_outer_xact = false; + } + else + in_outer_xact = IsInTransactionBlock(isTopLevel); + + /* + * Due to static variables vac_context, anl_context and vac_strategy, + * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE + * calls a hostile index expression that itself calls ANALYZE. + */ + if (in_vacuum) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("%s cannot be executed from VACUUM or ANALYZE", + stmttype))); + + /* + * Sanity check DISABLE_PAGE_SKIPPING option. + */ + if ((params->options & VACOPT_FULL) != 0 && + (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL"))); + + /* sanity check for PROCESS_TOAST */ + if ((params->options & VACOPT_FULL) != 0 && + (params->options & VACOPT_PROCESS_TOAST) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("PROCESS_TOAST required with VACUUM FULL"))); + + /* + * Create special memory context for cross-transaction storage. + * + * Since it is a child of PortalContext, it will go away eventually even + * if we suffer an error; there's no need for special abort cleanup logic. + */ + vac_context = AllocSetContextCreate(PortalContext, + "Vacuum", + ALLOCSET_DEFAULT_SIZES); + + /* + * If caller didn't give us a buffer strategy object, make one in the + * cross-transaction memory context. + */ + if (bstrategy == NULL) + { + MemoryContext old_context = MemoryContextSwitchTo(vac_context); + + bstrategy = GetAccessStrategy(BAS_VACUUM); + MemoryContextSwitchTo(old_context); + } + vac_strategy = bstrategy; + + /* + * Build list of relation(s) to process, putting any new data in + * vac_context for safekeeping. + */ + if (relations != NIL) + { + List *newrels = NIL; + ListCell *lc; + + foreach(lc, relations) + { + VacuumRelation *vrel = lfirst_node(VacuumRelation, lc); + List *sublist; + MemoryContext old_context; + + sublist = expand_vacuum_rel(vrel, params->options); + old_context = MemoryContextSwitchTo(vac_context); + newrels = list_concat(newrels, sublist); + MemoryContextSwitchTo(old_context); + } + relations = newrels; + } + else + relations = get_all_vacuum_rels(params->options); + + /* + * Decide whether we need to start/commit our own transactions. + * + * For VACUUM (with or without ANALYZE): always do so, so that we can + * release locks as soon as possible. (We could possibly use the outer + * transaction for a one-table VACUUM, but handling TOAST tables would be + * problematic.) + * + * For ANALYZE (no VACUUM): if inside a transaction block, we cannot + * start/commit our own transactions. Also, there's no need to do so if + * only processing one relation. For multiple relations when not within a + * transaction block, and also in an autovacuum worker, use own + * transactions so we can release locks sooner. + */ + if (params->options & VACOPT_VACUUM) + use_own_xacts = true; + else + { + Assert(params->options & VACOPT_ANALYZE); + if (IsAutoVacuumWorkerProcess()) + use_own_xacts = true; + else if (in_outer_xact) + use_own_xacts = false; + else if (list_length(relations) > 1) + use_own_xacts = true; + else + use_own_xacts = false; + } + + /* + * vacuum_rel expects to be entered with no transaction active; it will + * start and commit its own transaction. But we are called by an SQL + * command, and so we are executing inside a transaction already. We + * commit the transaction started in PostgresMain() here, and start + * another one before exiting to match the commit waiting for us back in + * PostgresMain(). + */ + if (use_own_xacts) + { + Assert(!in_outer_xact); + + /* ActiveSnapshot is not set by autovacuum */ + if (ActiveSnapshotSet()) + PopActiveSnapshot(); + + /* matches the StartTransaction in PostgresMain() */ + CommitTransactionCommand(); + } + + /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */ + PG_TRY(); + { + ListCell *cur; + + in_vacuum = true; + VacuumCostActive = (VacuumCostDelay > 0); + VacuumCostBalance = 0; + VacuumPageHit = 0; + VacuumPageMiss = 0; + VacuumPageDirty = 0; + VacuumCostBalanceLocal = 0; + VacuumSharedCostBalance = NULL; + VacuumActiveNWorkers = NULL; + + /* + * Loop to process each selected relation. + */ + foreach(cur, relations) + { + VacuumRelation *vrel = lfirst_node(VacuumRelation, cur); + + if (params->options & VACOPT_VACUUM) + { + if (!vacuum_rel(vrel->oid, vrel->relation, params)) + continue; + } + + if (params->options & VACOPT_ANALYZE) + { + /* + * If using separate xacts, start one for analyze. Otherwise, + * we can use the outer transaction. + */ + if (use_own_xacts) + { + StartTransactionCommand(); + /* functions in indexes may want a snapshot set */ + PushActiveSnapshot(GetTransactionSnapshot()); + } + + analyze_rel(vrel->oid, vrel->relation, params, + vrel->va_cols, in_outer_xact, vac_strategy); + + if (use_own_xacts) + { + PopActiveSnapshot(); + CommitTransactionCommand(); + } + else + { + /* + * If we're not using separate xacts, better separate the + * ANALYZE actions with CCIs. This avoids trouble if user + * says "ANALYZE t, t". + */ + CommandCounterIncrement(); + } + } + } + } + PG_FINALLY(); + { + in_vacuum = false; + VacuumCostActive = false; + } + PG_END_TRY(); + + /* + * Finish up processing. + */ + if (use_own_xacts) + { + /* here, we are not in a transaction */ + + /* + * This matches the CommitTransaction waiting for us in + * PostgresMain(). + */ + StartTransactionCommand(); + } + + if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) + { + /* + * Update pg_database.datfrozenxid, and truncate pg_xact if possible. + * (autovacuum.c does this for itself.) + */ + vac_update_datfrozenxid(); + } + + /* + * Clean up working storage --- note we must do this after + * StartTransactionCommand, else we might be trying to delete the active + * context! + */ + MemoryContextDelete(vac_context); + vac_context = NULL; +} + +/* + * Check if a given relation can be safely vacuumed or analyzed. If the + * user is not the relation owner, issue a WARNING log message and return + * false to let the caller decide what to do with this relation. This + * routine is used to decide if a relation can be processed for VACUUM or + * ANALYZE. + */ +bool +vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, bits32 options) +{ + char *relname; + + Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0); + + /* + * Check permissions. + * + * We allow the user to vacuum or analyze a table if he is superuser, the + * table owner, or the database owner (but in the latter case, only if + * it's not a shared relation). pg_class_ownercheck includes the + * superuser case. + * + * Note we choose to treat permissions failure as a WARNING and keep + * trying to vacuum or analyze the rest of the DB --- is this appropriate? + */ + if (pg_class_ownercheck(relid, GetUserId()) || + (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared)) + return true; + + relname = NameStr(reltuple->relname); + + if ((options & VACOPT_VACUUM) != 0) + { + if (reltuple->relisshared) + ereport(WARNING, + (errmsg("skipping \"%s\" --- only superuser can vacuum it", + relname))); + else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE) + ereport(WARNING, + (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it", + relname))); + else + ereport(WARNING, + (errmsg("skipping \"%s\" --- only table or database owner can vacuum it", + relname))); + + /* + * For VACUUM ANALYZE, both logs could show up, but just generate + * information for VACUUM as that would be the first one to be + * processed. + */ + return false; + } + + if ((options & VACOPT_ANALYZE) != 0) + { + if (reltuple->relisshared) + ereport(WARNING, + (errmsg("skipping \"%s\" --- only superuser can analyze it", + relname))); + else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE) + ereport(WARNING, + (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it", + relname))); + else + ereport(WARNING, + (errmsg("skipping \"%s\" --- only table or database owner can analyze it", + relname))); + } + + return false; +} + + +/* + * vacuum_open_relation + * + * This routine is used for attempting to open and lock a relation which + * is going to be vacuumed or analyzed. If the relation cannot be opened + * or locked, a log is emitted if possible. + */ +Relation +vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options, + bool verbose, LOCKMODE lmode) +{ + Relation rel; + bool rel_lock = true; + int elevel; + + Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0); + + /* + * Open the relation and get the appropriate lock on it. + * + * There's a race condition here: the relation may have gone away since + * the last time we saw it. If so, we don't need to vacuum or analyze it. + * + * If we've been asked not to wait for the relation lock, acquire it first + * in non-blocking mode, before calling try_relation_open(). + */ + if (!(options & VACOPT_SKIP_LOCKED)) + rel = try_relation_open(relid, lmode); + else if (ConditionalLockRelationOid(relid, lmode)) + rel = try_relation_open(relid, NoLock); + else + { + rel = NULL; + rel_lock = false; + } + + /* if relation is opened, leave */ + if (rel) + return rel; + + /* + * Relation could not be opened, hence generate if possible a log + * informing on the situation. + * + * If the RangeVar is not defined, we do not have enough information to + * provide a meaningful log statement. Chances are that the caller has + * intentionally not provided this information so that this logging is + * skipped, anyway. + */ + if (relation == NULL) + return NULL; + + /* + * Determine the log level. + * + * For manual VACUUM or ANALYZE, we emit a WARNING to match the log + * statements in the permission checks; otherwise, only log if the caller + * so requested. + */ + if (!IsAutoVacuumWorkerProcess()) + elevel = WARNING; + else if (verbose) + elevel = LOG; + else + return NULL; + + if ((options & VACOPT_VACUUM) != 0) + { + if (!rel_lock) + ereport(elevel, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("skipping vacuum of \"%s\" --- lock not available", + relation->relname))); + else + ereport(elevel, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("skipping vacuum of \"%s\" --- relation no longer exists", + relation->relname))); + + /* + * For VACUUM ANALYZE, both logs could show up, but just generate + * information for VACUUM as that would be the first one to be + * processed. + */ + return NULL; + } + + if ((options & VACOPT_ANALYZE) != 0) + { + if (!rel_lock) + ereport(elevel, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("skipping analyze of \"%s\" --- lock not available", + relation->relname))); + else + ereport(elevel, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("skipping analyze of \"%s\" --- relation no longer exists", + relation->relname))); + } + + return NULL; +} + + +/* + * Given a VacuumRelation, fill in the table OID if it wasn't specified, + * and optionally add VacuumRelations for partitions of the table. + * + * If a VacuumRelation does not have an OID supplied and is a partitioned + * table, an extra entry will be added to the output for each partition. + * Presently, only autovacuum supplies OIDs when calling vacuum(), and + * it does not want us to expand partitioned tables. + * + * We take care not to modify the input data structure, but instead build + * new VacuumRelation(s) to return. (But note that they will reference + * unmodified parts of the input, eg column lists.) New data structures + * are made in vac_context. + */ +static List * +expand_vacuum_rel(VacuumRelation *vrel, int options) +{ + List *vacrels = NIL; + MemoryContext oldcontext; + + /* If caller supplied OID, there's nothing we need do here. */ + if (OidIsValid(vrel->oid)) + { + oldcontext = MemoryContextSwitchTo(vac_context); + vacrels = lappend(vacrels, vrel); + MemoryContextSwitchTo(oldcontext); + } + else + { + /* Process a specific relation, and possibly partitions thereof */ + Oid relid; + HeapTuple tuple; + Form_pg_class classForm; + bool include_parts; + int rvr_opts; + + /* + * Since autovacuum workers supply OIDs when calling vacuum(), no + * autovacuum worker should reach this code. + */ + Assert(!IsAutoVacuumWorkerProcess()); + + /* + * We transiently take AccessShareLock to protect the syscache lookup + * below, as well as find_all_inheritors's expectation that the caller + * holds some lock on the starting relation. + */ + rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0; + relid = RangeVarGetRelidExtended(vrel->relation, + AccessShareLock, + rvr_opts, + NULL, NULL); + + /* + * If the lock is unavailable, emit the same log statement that + * vacuum_rel() and analyze_rel() would. + */ + if (!OidIsValid(relid)) + { + if (options & VACOPT_VACUUM) + ereport(WARNING, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("skipping vacuum of \"%s\" --- lock not available", + vrel->relation->relname))); + else + ereport(WARNING, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("skipping analyze of \"%s\" --- lock not available", + vrel->relation->relname))); + return vacrels; + } + + /* + * To check whether the relation is a partitioned table and its + * ownership, fetch its syscache entry. + */ + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + classForm = (Form_pg_class) GETSTRUCT(tuple); + + /* + * Make a returnable VacuumRelation for this rel if user is a proper + * owner. + */ + if (vacuum_is_relation_owner(relid, classForm, options)) + { + oldcontext = MemoryContextSwitchTo(vac_context); + vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation, + relid, + vrel->va_cols)); + MemoryContextSwitchTo(oldcontext); + } + + + include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE); + ReleaseSysCache(tuple); + + /* + * If it is, make relation list entries for its partitions. Note that + * the list returned by find_all_inheritors() includes the passed-in + * OID, so we have to skip that. There's no point in taking locks on + * the individual partitions yet, and doing so would just add + * unnecessary deadlock risk. For this last reason we do not check + * yet the ownership of the partitions, which get added to the list to + * process. Ownership will be checked later on anyway. + */ + if (include_parts) + { + List *part_oids = find_all_inheritors(relid, NoLock, NULL); + ListCell *part_lc; + + foreach(part_lc, part_oids) + { + Oid part_oid = lfirst_oid(part_lc); + + if (part_oid == relid) + continue; /* ignore original table */ + + /* + * We omit a RangeVar since it wouldn't be appropriate to + * complain about failure to open one of these relations + * later. + */ + oldcontext = MemoryContextSwitchTo(vac_context); + vacrels = lappend(vacrels, makeVacuumRelation(NULL, + part_oid, + vrel->va_cols)); + MemoryContextSwitchTo(oldcontext); + } + } + + /* + * Release lock again. This means that by the time we actually try to + * process the table, it might be gone or renamed. In the former case + * we'll silently ignore it; in the latter case we'll process it + * anyway, but we must beware that the RangeVar doesn't necessarily + * identify it anymore. This isn't ideal, perhaps, but there's little + * practical alternative, since we're typically going to commit this + * transaction and begin a new one between now and then. Moreover, + * holding locks on multiple relations would create significant risk + * of deadlock. + */ + UnlockRelationOid(relid, AccessShareLock); + } + + return vacrels; +} + +/* + * Construct a list of VacuumRelations for all vacuumable rels in + * the current database. The list is built in vac_context. + */ +static List * +get_all_vacuum_rels(int options) +{ + List *vacrels = NIL; + Relation pgclass; + TableScanDesc scan; + HeapTuple tuple; + + pgclass = table_open(RelationRelationId, AccessShareLock); + + scan = table_beginscan_catalog(pgclass, 0, NULL); + + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); + MemoryContext oldcontext; + Oid relid = classForm->oid; + + /* check permissions of relation */ + if (!vacuum_is_relation_owner(relid, classForm, options)) + continue; + + /* + * We include partitioned tables here; depending on which operation is + * to be performed, caller will decide whether to process or ignore + * them. + */ + if (classForm->relkind != RELKIND_RELATION && + classForm->relkind != RELKIND_MATVIEW && + classForm->relkind != RELKIND_PARTITIONED_TABLE) + continue; + + /* + * Build VacuumRelation(s) specifying the table OIDs to be processed. + * We omit a RangeVar since it wouldn't be appropriate to complain + * about failure to open one of these relations later. + */ + oldcontext = MemoryContextSwitchTo(vac_context); + vacrels = lappend(vacrels, makeVacuumRelation(NULL, + relid, + NIL)); + MemoryContextSwitchTo(oldcontext); + } + + table_endscan(scan); + table_close(pgclass, AccessShareLock); + + return vacrels; +} + +/* + * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points + * + * Input parameters are the target relation, applicable freeze age settings. + * + * The output parameters are: + * - oldestXmin is the Xid below which tuples deleted by any xact (that + * committed) should be considered DEAD, not just RECENTLY_DEAD. + * - oldestMxact is the Mxid below which MultiXacts are definitely not + * seen as visible by any running transaction. + * - freezeLimit is the Xid below which all Xids are definitely replaced by + * FrozenTransactionId during aggressive vacuums. + * - multiXactCutoff is the value below which all MultiXactIds are definitely + * removed from Xmax during aggressive vacuums. + * + * Return value indicates if vacuumlazy.c caller should make its VACUUM + * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to + * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a + * minimum). + * + * oldestXmin and oldestMxact are the most recent values that can ever be + * passed to vac_update_relstats() as frozenxid and minmulti arguments by our + * vacuumlazy.c caller later on. These values should be passed when it turns + * out that VACUUM will leave no unfrozen XIDs/XMIDs behind in the table. + */ +bool +vacuum_set_xid_limits(Relation rel, + int freeze_min_age, + int freeze_table_age, + int multixact_freeze_min_age, + int multixact_freeze_table_age, + TransactionId *oldestXmin, + MultiXactId *oldestMxact, + TransactionId *freezeLimit, + MultiXactId *multiXactCutoff) +{ + int freezemin; + int mxid_freezemin; + int effective_multixact_freeze_max_age; + TransactionId limit; + TransactionId safeLimit; + MultiXactId mxactLimit; + MultiXactId safeMxactLimit; + int freezetable; + + /* + * We can always ignore processes running lazy vacuum. This is because we + * use these values only for deciding which tuples we must keep in the + * tables. Since lazy vacuum doesn't write its XID anywhere (usually no + * XID assigned), it's safe to ignore it. In theory it could be + * problematic to ignore lazy vacuums in a full vacuum, but keep in mind + * that only one vacuum process can be working on a particular table at + * any time, and that each vacuum is always an independent transaction. + */ + *oldestXmin = GetOldestNonRemovableTransactionId(rel); + + if (OldSnapshotThresholdActive()) + { + TransactionId limit_xmin; + TimestampTz limit_ts; + + if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel, + &limit_xmin, &limit_ts)) + { + /* + * TODO: We should only set the threshold if we are pruning on the + * basis of the increased limits. Not as crucial here as it is + * for opportunistic pruning (which often happens at a much higher + * frequency), but would still be a significant improvement. + */ + SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin); + *oldestXmin = limit_xmin; + } + } + + Assert(TransactionIdIsNormal(*oldestXmin)); + + /* + * Determine the minimum freeze age to use: as specified by the caller, or + * vacuum_freeze_min_age, but in any case not more than half + * autovacuum_freeze_max_age, so that autovacuums to prevent XID + * wraparound won't occur too frequently. + */ + freezemin = freeze_min_age; + if (freezemin < 0) + freezemin = vacuum_freeze_min_age; + freezemin = Min(freezemin, autovacuum_freeze_max_age / 2); + Assert(freezemin >= 0); + + /* + * Compute the cutoff XID, being careful not to generate a "permanent" XID + */ + limit = *oldestXmin - freezemin; + if (!TransactionIdIsNormal(limit)) + limit = FirstNormalTransactionId; + + /* + * If oldestXmin is very far back (in practice, more than + * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum + * freeze age of zero. + */ + safeLimit = ReadNextTransactionId() - autovacuum_freeze_max_age; + if (!TransactionIdIsNormal(safeLimit)) + safeLimit = FirstNormalTransactionId; + + if (TransactionIdPrecedes(limit, safeLimit)) + { + ereport(WARNING, + (errmsg("oldest xmin is far in the past"), + errhint("Close open transactions soon to avoid wraparound problems.\n" + "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); + limit = *oldestXmin; + } + + *freezeLimit = limit; + + /* + * Compute the multixact age for which freezing is urgent. This is + * normally autovacuum_multixact_freeze_max_age, but may be less if we are + * short of multixact member space. + */ + effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + + /* + * Determine the minimum multixact freeze age to use: as specified by + * caller, or vacuum_multixact_freeze_min_age, but in any case not more + * than half effective_multixact_freeze_max_age, so that autovacuums to + * prevent MultiXact wraparound won't occur too frequently. + */ + mxid_freezemin = multixact_freeze_min_age; + if (mxid_freezemin < 0) + mxid_freezemin = vacuum_multixact_freeze_min_age; + mxid_freezemin = Min(mxid_freezemin, + effective_multixact_freeze_max_age / 2); + Assert(mxid_freezemin >= 0); + + /* Remember for caller */ + *oldestMxact = GetOldestMultiXactId(); + + /* compute the cutoff multi, being careful to generate a valid value */ + mxactLimit = *oldestMxact - mxid_freezemin; + if (mxactLimit < FirstMultiXactId) + mxactLimit = FirstMultiXactId; + + safeMxactLimit = + ReadNextMultiXactId() - effective_multixact_freeze_max_age; + if (safeMxactLimit < FirstMultiXactId) + safeMxactLimit = FirstMultiXactId; + + if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit)) + { + ereport(WARNING, + (errmsg("oldest multixact is far in the past"), + errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); + /* Use the safe limit, unless an older mxact is still running */ + if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit)) + mxactLimit = *oldestMxact; + else + mxactLimit = safeMxactLimit; + } + + *multiXactCutoff = mxactLimit; + + /* + * Done setting output parameters; just need to figure out if caller needs + * to do an aggressive VACUUM or not. + * + * Determine the table freeze age to use: as specified by the caller, or + * vacuum_freeze_table_age, but in any case not more than + * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly + * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples + * before anti-wraparound autovacuum is launched. + */ + freezetable = freeze_table_age; + if (freezetable < 0) + freezetable = vacuum_freeze_table_age; + freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95); + Assert(freezetable >= 0); + + /* + * Compute XID limit causing an aggressive vacuum, being careful not to + * generate a "permanent" XID + */ + limit = ReadNextTransactionId() - freezetable; + if (!TransactionIdIsNormal(limit)) + limit = FirstNormalTransactionId; + if (TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid, + limit)) + return true; + + /* + * Similar to the above, determine the table freeze age to use for + * multixacts: as specified by the caller, or + * vacuum_multixact_freeze_table_age, but in any case not more than + * autovacuum_multixact_freeze_table_age * 0.95, so that if you have e.g. + * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze + * multixacts before anti-wraparound autovacuum is launched. + */ + freezetable = multixact_freeze_table_age; + if (freezetable < 0) + freezetable = vacuum_multixact_freeze_table_age; + freezetable = Min(freezetable, + effective_multixact_freeze_max_age * 0.95); + Assert(freezetable >= 0); + + /* + * Compute MultiXact limit causing an aggressive vacuum, being careful to + * generate a valid MultiXact value + */ + mxactLimit = ReadNextMultiXactId() - freezetable; + if (mxactLimit < FirstMultiXactId) + mxactLimit = FirstMultiXactId; + if (MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid, + mxactLimit)) + return true; + + return false; +} + +/* + * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe + * mechanism to determine if its table's relfrozenxid and relminmxid are now + * dangerously far in the past. + * + * Input parameters are the target relation's relfrozenxid and relminmxid. + * + * When we return true, VACUUM caller triggers the failsafe. + */ +bool +vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid) +{ + TransactionId xid_skip_limit; + MultiXactId multi_skip_limit; + int skip_index_vacuum; + + Assert(TransactionIdIsNormal(relfrozenxid)); + Assert(MultiXactIdIsValid(relminmxid)); + + /* + * Determine the index skipping age to use. In any case no less than + * autovacuum_freeze_max_age * 1.05. + */ + skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05); + + xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum; + if (!TransactionIdIsNormal(xid_skip_limit)) + xid_skip_limit = FirstNormalTransactionId; + + if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit)) + { + /* The table's relfrozenxid is too old */ + return true; + } + + /* + * Similar to above, determine the index skipping age to use for + * multixact. In any case no less than autovacuum_multixact_freeze_max_age * + * 1.05. + */ + skip_index_vacuum = Max(vacuum_multixact_failsafe_age, + autovacuum_multixact_freeze_max_age * 1.05); + + multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum; + if (multi_skip_limit < FirstMultiXactId) + multi_skip_limit = FirstMultiXactId; + + if (MultiXactIdPrecedes(relminmxid, multi_skip_limit)) + { + /* The table's relminmxid is too old */ + return true; + } + + return false; +} + +/* + * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples + * + * If we scanned the whole relation then we should just use the count of + * live tuples seen; but if we did not, we should not blindly extrapolate + * from that number, since VACUUM may have scanned a quite nonrandom + * subset of the table. When we have only partial information, we take + * the old value of pg_class.reltuples/pg_class.relpages as a measurement + * of the tuple density in the unscanned pages. + * + * Note: scanned_tuples should count only *live* tuples, since + * pg_class.reltuples is defined that way. + */ +double +vac_estimate_reltuples(Relation relation, + BlockNumber total_pages, + BlockNumber scanned_pages, + double scanned_tuples) +{ + BlockNumber old_rel_pages = relation->rd_rel->relpages; + double old_rel_tuples = relation->rd_rel->reltuples; + double old_density; + double unscanned_pages; + double total_tuples; + + /* If we did scan the whole table, just use the count as-is */ + if (scanned_pages >= total_pages) + return scanned_tuples; + + /* + * When successive VACUUM commands scan the same few pages again and + * again, without anything from the table really changing, there is a risk + * that our beliefs about tuple density will gradually become distorted. + * This might be caused by vacuumlazy.c implementation details, such as + * its tendency to always scan the last heap page. Handle that here. + * + * If the relation is _exactly_ the same size according to the existing + * pg_class entry, and only a few of its pages (less than 2%) were + * scanned, keep the existing value of reltuples. Also keep the existing + * value when only a subset of rel's pages <= a single page were scanned. + * + * (Note: we might be returning -1 here.) + */ + if (old_rel_pages == total_pages && + scanned_pages < (double) total_pages * 0.02) + return old_rel_tuples; + if (scanned_pages <= 1) + return old_rel_tuples; + + /* + * If old density is unknown, we can't do much except scale up + * scanned_tuples to match total_pages. + */ + if (old_rel_tuples < 0 || old_rel_pages == 0) + return floor((scanned_tuples / scanned_pages) * total_pages + 0.5); + + /* + * Okay, we've covered the corner cases. The normal calculation is to + * convert the old measurement to a density (tuples per page), then + * estimate the number of tuples in the unscanned pages using that figure, + * and finally add on the number of tuples in the scanned pages. + */ + old_density = old_rel_tuples / old_rel_pages; + unscanned_pages = (double) total_pages - (double) scanned_pages; + total_tuples = old_density * unscanned_pages + scanned_tuples; + return floor(total_tuples + 0.5); +} + + +/* + * vac_update_relstats() -- update statistics for one relation + * + * Update the whole-relation statistics that are kept in its pg_class + * row. There are additional stats that will be updated if we are + * doing ANALYZE, but we always update these stats. This routine works + * for both index and heap relation entries in pg_class. + * + * We violate transaction semantics here by overwriting the rel's + * existing pg_class tuple with the new values. This is reasonably + * safe as long as we're sure that the new values are correct whether or + * not this transaction commits. The reason for doing this is that if + * we updated these tuples in the usual way, vacuuming pg_class itself + * wouldn't work very well --- by the time we got done with a vacuum + * cycle, most of the tuples in pg_class would've been obsoleted. Of + * course, this only works for fixed-size not-null columns, but these are. + * + * Another reason for doing it this way is that when we are in a lazy + * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates. + * Somebody vacuuming pg_class might think they could delete a tuple + * marked with xmin = our xid. + * + * In addition to fundamentally nontransactional statistics such as + * relpages and relallvisible, we try to maintain certain lazily-updated + * DDL flags such as relhasindex, by clearing them if no longer correct. + * It's safe to do this in VACUUM, which can't run in parallel with + * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block. + * However, it's *not* safe to do it in an ANALYZE that's within an + * outer transaction, because for example the current transaction might + * have dropped the last index; then we'd think relhasindex should be + * cleared, but if the transaction later rolls back this would be wrong. + * So we refrain from updating the DDL flags if we're inside an outer + * transaction. This is OK since postponing the flag maintenance is + * always allowable. + * + * Note: num_tuples should count only *live* tuples, since + * pg_class.reltuples is defined that way. + * + * This routine is shared by VACUUM and ANALYZE. + */ +void +vac_update_relstats(Relation relation, + BlockNumber num_pages, double num_tuples, + BlockNumber num_all_visible_pages, + bool hasindex, TransactionId frozenxid, + MultiXactId minmulti, + bool *frozenxid_updated, bool *minmulti_updated, + bool in_outer_xact) +{ + Oid relid = RelationGetRelid(relation); + Relation rd; + HeapTuple ctup; + Form_pg_class pgcform; + bool dirty, + futurexid, + futuremxid; + TransactionId oldfrozenxid; + MultiXactId oldminmulti; + + rd = table_open(RelationRelationId, RowExclusiveLock); + + /* Fetch a copy of the tuple to scribble on */ + ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(ctup)) + elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", + relid); + pgcform = (Form_pg_class) GETSTRUCT(ctup); + + /* Apply statistical updates, if any, to copied tuple */ + + dirty = false; + if (pgcform->relpages != (int32) num_pages) + { + pgcform->relpages = (int32) num_pages; + dirty = true; + } + if (pgcform->reltuples != (float4) num_tuples) + { + pgcform->reltuples = (float4) num_tuples; + dirty = true; + } + if (pgcform->relallvisible != (int32) num_all_visible_pages) + { + pgcform->relallvisible = (int32) num_all_visible_pages; + dirty = true; + } + + /* Apply DDL updates, but not inside an outer transaction (see above) */ + + if (!in_outer_xact) + { + /* + * If we didn't find any indexes, reset relhasindex. + */ + if (pgcform->relhasindex && !hasindex) + { + pgcform->relhasindex = false; + dirty = true; + } + + /* We also clear relhasrules and relhastriggers if needed */ + if (pgcform->relhasrules && relation->rd_rules == NULL) + { + pgcform->relhasrules = false; + dirty = true; + } + if (pgcform->relhastriggers && relation->trigdesc == NULL) + { + pgcform->relhastriggers = false; + dirty = true; + } + } + + /* + * Update relfrozenxid, unless caller passed InvalidTransactionId + * indicating it has no new data. + * + * Ordinarily, we don't let relfrozenxid go backwards. However, if the + * stored relfrozenxid is "in the future" then it seems best to assume + * it's corrupt, and overwrite with the oldest remaining XID in the table. + * This should match vac_update_datfrozenxid() concerning what we consider + * to be "in the future". + */ + oldfrozenxid = pgcform->relfrozenxid; + futurexid = false; + if (frozenxid_updated) + *frozenxid_updated = false; + if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid) + { + bool update = false; + + if (TransactionIdPrecedes(oldfrozenxid, frozenxid)) + update = true; + else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid)) + futurexid = update = true; + + if (update) + { + pgcform->relfrozenxid = frozenxid; + dirty = true; + if (frozenxid_updated) + *frozenxid_updated = true; + } + } + + /* Similarly for relminmxid */ + oldminmulti = pgcform->relminmxid; + futuremxid = false; + if (minmulti_updated) + *minmulti_updated = false; + if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti) + { + bool update = false; + + if (MultiXactIdPrecedes(oldminmulti, minmulti)) + update = true; + else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti)) + futuremxid = update = true; + + if (update) + { + pgcform->relminmxid = minmulti; + dirty = true; + if (minmulti_updated) + *minmulti_updated = true; + } + } + + /* If anything changed, write out the tuple. */ + if (dirty) + heap_inplace_update(rd, ctup); + + table_close(rd, RowExclusiveLock); + + if (futurexid) + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"", + oldfrozenxid, frozenxid, + RelationGetRelationName(relation)))); + if (futuremxid) + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"", + oldminmulti, minmulti, + RelationGetRelationName(relation)))); +} + + +/* + * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB + * + * Update pg_database's datfrozenxid entry for our database to be the + * minimum of the pg_class.relfrozenxid values. + * + * Similarly, update our datminmxid to be the minimum of the + * pg_class.relminmxid values. + * + * If we are able to advance either pg_database value, also try to + * truncate pg_xact and pg_multixact. + * + * We violate transaction semantics here by overwriting the database's + * existing pg_database tuple with the new values. This is reasonably + * safe since the new values are correct whether or not this transaction + * commits. As with vac_update_relstats, this avoids leaving dead tuples + * behind after a VACUUM. + */ +void +vac_update_datfrozenxid(void) +{ + HeapTuple tuple; + Form_pg_database dbform; + Relation relation; + SysScanDesc scan; + HeapTuple classTup; + TransactionId newFrozenXid; + MultiXactId newMinMulti; + TransactionId lastSaneFrozenXid; + MultiXactId lastSaneMinMulti; + bool bogus = false; + bool dirty = false; + ScanKeyData key[1]; + + /* + * Restrict this task to one backend per database. This avoids race + * conditions that would move datfrozenxid or datminmxid backward. It + * avoids calling vac_truncate_clog() with a datfrozenxid preceding a + * datfrozenxid passed to an earlier vac_truncate_clog() call. + */ + LockDatabaseFrozenIds(ExclusiveLock); + + /* + * Initialize the "min" calculation with + * GetOldestNonRemovableTransactionId(), which is a reasonable + * approximation to the minimum relfrozenxid for not-yet-committed + * pg_class entries for new tables; see AddNewRelationTuple(). So we + * cannot produce a wrong minimum by starting with this. + */ + newFrozenXid = GetOldestNonRemovableTransactionId(NULL); + + /* + * Similarly, initialize the MultiXact "min" with the value that would be + * used on pg_class for new tables. See AddNewRelationTuple(). + */ + newMinMulti = GetOldestMultiXactId(); + + /* + * Identify the latest relfrozenxid and relminmxid values that we could + * validly see during the scan. These are conservative values, but it's + * not really worth trying to be more exact. + */ + lastSaneFrozenXid = ReadNextTransactionId(); + lastSaneMinMulti = ReadNextMultiXactId(); + + /* + * We must seqscan pg_class to find the minimum Xid, because there is no + * index that can help us here. + */ + relation = table_open(RelationRelationId, AccessShareLock); + + scan = systable_beginscan(relation, InvalidOid, false, + NULL, 0, NULL); + + while ((classTup = systable_getnext(scan)) != NULL) + { + Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup); + + /* + * Only consider relations able to hold unfrozen XIDs (anything else + * should have InvalidTransactionId in relfrozenxid anyway). + */ + if (classForm->relkind != RELKIND_RELATION && + classForm->relkind != RELKIND_MATVIEW && + classForm->relkind != RELKIND_TOASTVALUE) + { + Assert(!TransactionIdIsValid(classForm->relfrozenxid)); + Assert(!MultiXactIdIsValid(classForm->relminmxid)); + continue; + } + + /* + * Some table AMs might not need per-relation xid / multixid horizons. + * It therefore seems reasonable to allow relfrozenxid and relminmxid + * to not be set (i.e. set to their respective Invalid*Id) + * independently. Thus validate and compute horizon for each only if + * set. + * + * If things are working properly, no relation should have a + * relfrozenxid or relminmxid that is "in the future". However, such + * cases have been known to arise due to bugs in pg_upgrade. If we + * see any entries that are "in the future", chicken out and don't do + * anything. This ensures we won't truncate clog & multixact SLRUs + * before those relations have been scanned and cleaned up. + */ + + if (TransactionIdIsValid(classForm->relfrozenxid)) + { + Assert(TransactionIdIsNormal(classForm->relfrozenxid)); + + /* check for values in the future */ + if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid)) + { + bogus = true; + break; + } + + /* determine new horizon */ + if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid)) + newFrozenXid = classForm->relfrozenxid; + } + + if (MultiXactIdIsValid(classForm->relminmxid)) + { + /* check for values in the future */ + if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid)) + { + bogus = true; + break; + } + + /* determine new horizon */ + if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti)) + newMinMulti = classForm->relminmxid; + } + } + + /* we're done with pg_class */ + systable_endscan(scan); + table_close(relation, AccessShareLock); + + /* chicken out if bogus data found */ + if (bogus) + return; + + Assert(TransactionIdIsNormal(newFrozenXid)); + Assert(MultiXactIdIsValid(newMinMulti)); + + /* Now fetch the pg_database tuple we need to update. */ + relation = table_open(DatabaseRelationId, RowExclusiveLock); + + /* + * Get the pg_database tuple to scribble on. Note that this does not + * directly rely on the syscache to avoid issues with flattened toast + * values for the in-place update. + */ + ScanKeyInit(&key[0], + Anum_pg_database_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(MyDatabaseId)); + + scan = systable_beginscan(relation, DatabaseOidIndexId, true, + NULL, 1, key); + tuple = systable_getnext(scan); + tuple = heap_copytuple(tuple); + systable_endscan(scan); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for database %u", MyDatabaseId); + + dbform = (Form_pg_database) GETSTRUCT(tuple); + + /* + * As in vac_update_relstats(), we ordinarily don't want to let + * datfrozenxid go backward; but if it's "in the future" then it must be + * corrupt and it seems best to overwrite it. + */ + if (dbform->datfrozenxid != newFrozenXid && + (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) || + TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid))) + { + dbform->datfrozenxid = newFrozenXid; + dirty = true; + } + else + newFrozenXid = dbform->datfrozenxid; + + /* Ditto for datminmxid */ + if (dbform->datminmxid != newMinMulti && + (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) || + MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid))) + { + dbform->datminmxid = newMinMulti; + dirty = true; + } + else + newMinMulti = dbform->datminmxid; + + if (dirty) + heap_inplace_update(relation, tuple); + + heap_freetuple(tuple); + table_close(relation, RowExclusiveLock); + + /* + * If we were able to advance datfrozenxid or datminmxid, see if we can + * truncate pg_xact and/or pg_multixact. Also do it if the shared + * XID-wrap-limit info is stale, since this action will update that too. + */ + if (dirty || ForceTransactionIdLimitUpdate()) + vac_truncate_clog(newFrozenXid, newMinMulti, + lastSaneFrozenXid, lastSaneMinMulti); +} + + +/* + * vac_truncate_clog() -- attempt to truncate the commit log + * + * Scan pg_database to determine the system-wide oldest datfrozenxid, + * and use it to truncate the transaction commit log (pg_xact). + * Also update the XID wrap limit info maintained by varsup.c. + * Likewise for datminmxid. + * + * The passed frozenXID and minMulti are the updated values for my own + * pg_database entry. They're used to initialize the "min" calculations. + * The caller also passes the "last sane" XID and MXID, since it has + * those at hand already. + * + * This routine is only invoked when we've managed to change our + * DB's datfrozenxid/datminmxid values, or we found that the shared + * XID-wrap-limit info is stale. + */ +static void +vac_truncate_clog(TransactionId frozenXID, + MultiXactId minMulti, + TransactionId lastSaneFrozenXid, + MultiXactId lastSaneMinMulti) +{ + TransactionId nextXID = ReadNextTransactionId(); + Relation relation; + TableScanDesc scan; + HeapTuple tuple; + Oid oldestxid_datoid; + Oid minmulti_datoid; + bool bogus = false; + bool frozenAlreadyWrapped = false; + + /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */ + LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE); + + /* init oldest datoids to sync with my frozenXID/minMulti values */ + oldestxid_datoid = MyDatabaseId; + minmulti_datoid = MyDatabaseId; + + /* + * Scan pg_database to compute the minimum datfrozenxid/datminmxid + * + * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place, + * the values could change while we look at them. Fetch each one just + * once to ensure sane behavior of the comparison logic. (Here, as in + * many other places, we assume that fetching or updating an XID in shared + * storage is atomic.) + * + * Note: we need not worry about a race condition with new entries being + * inserted by CREATE DATABASE. Any such entry will have a copy of some + * existing DB's datfrozenxid, and that source DB cannot be ours because + * of the interlock against copying a DB containing an active backend. + * Hence the new entry will not reduce the minimum. Also, if two VACUUMs + * concurrently modify the datfrozenxid's of different databases, the + * worst possible outcome is that pg_xact is not truncated as aggressively + * as it could be. + */ + relation = table_open(DatabaseRelationId, AccessShareLock); + + scan = table_beginscan_catalog(relation, 0, NULL); + + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple); + TransactionId datfrozenxid = dbform->datfrozenxid; + TransactionId datminmxid = dbform->datminmxid; + + Assert(TransactionIdIsNormal(datfrozenxid)); + Assert(MultiXactIdIsValid(datminmxid)); + + /* + * If database is in the process of getting dropped, or has been + * interrupted while doing so, no connections to it are possible + * anymore. Therefore we don't need to take it into account here. + * Which is good, because it can't be processed by autovacuum either. + */ + if (database_is_invalid_form((Form_pg_database) dbform)) + { + elog(DEBUG2, + "skipping invalid database \"%s\" while computing relfrozenxid", + NameStr(dbform->datname)); + continue; + } + + /* + * If things are working properly, no database should have a + * datfrozenxid or datminmxid that is "in the future". However, such + * cases have been known to arise due to bugs in pg_upgrade. If we + * see any entries that are "in the future", chicken out and don't do + * anything. This ensures we won't truncate clog before those + * databases have been scanned and cleaned up. (We will issue the + * "already wrapped" warning if appropriate, though.) + */ + if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) || + MultiXactIdPrecedes(lastSaneMinMulti, datminmxid)) + bogus = true; + + if (TransactionIdPrecedes(nextXID, datfrozenxid)) + frozenAlreadyWrapped = true; + else if (TransactionIdPrecedes(datfrozenxid, frozenXID)) + { + frozenXID = datfrozenxid; + oldestxid_datoid = dbform->oid; + } + + if (MultiXactIdPrecedes(datminmxid, minMulti)) + { + minMulti = datminmxid; + minmulti_datoid = dbform->oid; + } + } + + table_endscan(scan); + + table_close(relation, AccessShareLock); + + /* + * Do not truncate CLOG if we seem to have suffered wraparound already; + * the computed minimum XID might be bogus. This case should now be + * impossible due to the defenses in GetNewTransactionId, but we keep the + * test anyway. + */ + if (frozenAlreadyWrapped) + { + ereport(WARNING, + (errmsg("some databases have not been vacuumed in over 2 billion transactions"), + errdetail("You might have already suffered transaction-wraparound data loss."))); + LWLockRelease(WrapLimitsVacuumLock); + return; + } + + /* chicken out if data is bogus in any other way */ + if (bogus) + { + LWLockRelease(WrapLimitsVacuumLock); + return; + } + + /* + * Advance the oldest value for commit timestamps before truncating, so + * that if a user requests a timestamp for a transaction we're truncating + * away right after this point, they get NULL instead of an ugly "file not + * found" error from slru.c. This doesn't matter for xact/multixact + * because they are not subject to arbitrary lookups from users. + */ + AdvanceOldestCommitTsXid(frozenXID); + + /* + * Truncate CLOG, multixact and CommitTs to the oldest computed value. + */ + TruncateCLOG(frozenXID, oldestxid_datoid); + TruncateCommitTs(frozenXID); + TruncateMultiXact(minMulti, minmulti_datoid); + + /* + * Update the wrap limit for GetNewTransactionId and creation of new + * MultiXactIds. Note: these functions will also signal the postmaster + * for an(other) autovac cycle if needed. XXX should we avoid possibly + * signaling twice? + */ + SetTransactionIdLimit(frozenXID, oldestxid_datoid); + SetMultiXactIdLimit(minMulti, minmulti_datoid, false); + + LWLockRelease(WrapLimitsVacuumLock); +} + + +/* + * vacuum_rel() -- vacuum one heap relation + * + * relid identifies the relation to vacuum. If relation is supplied, + * use the name therein for reporting any failure to open/lock the rel; + * do not use it once we've successfully opened the rel, since it might + * be stale. + * + * Returns true if it's okay to proceed with a requested ANALYZE + * operation on this table. + * + * Doing one heap at a time incurs extra overhead, since we need to + * check that the heap exists again just before we vacuum it. The + * reason that we do this is so that vacuuming can be spread across + * many small transactions. Otherwise, two-phase locking would require + * us to lock the entire database during one pass of the vacuum cleaner. + * + * At entry and exit, we are not inside a transaction. + */ +static bool +vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) +{ + LOCKMODE lmode; + Relation rel; + LockRelId lockrelid; + Oid toast_relid; + Oid save_userid; + int save_sec_context; + int save_nestlevel; + + Assert(params != NULL); + + /* Begin a transaction for vacuuming this relation */ + StartTransactionCommand(); + + if (!(params->options & VACOPT_FULL)) + { + /* + * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets + * other concurrent VACUUMs know that they can ignore this one while + * determining their OldestXmin. (The reason we don't set it during a + * full VACUUM is exactly that we may have to run user-defined + * functions for functional indexes, and we want to make sure that if + * they use the snapshot set above, any tuples it requires can't get + * removed from other tables. An index function that depends on the + * contents of other tables is arguably broken, but we won't break it + * here by violating transaction semantics.) + * + * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by + * autovacuum; it's used to avoid canceling a vacuum that was invoked + * in an emergency. + * + * Note: these flags remain set until CommitTransaction or + * AbortTransaction. We don't want to clear them until we reset + * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId() + * might appear to go backwards, which is probably Not Good. (We also + * set PROC_IN_VACUUM *before* taking our own snapshot, so that our + * xmin doesn't become visible ahead of setting the flag.) + */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->statusFlags |= PROC_IN_VACUUM; + if (params->is_wraparound) + MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND; + ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; + LWLockRelease(ProcArrayLock); + } + + /* + * Need to acquire a snapshot to prevent pg_subtrans from being truncated, + * cutoff xids in local memory wrapping around, and to have updated xmin + * horizons. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* + * Check for user-requested abort. Note we want this to be inside a + * transaction, so xact.c doesn't issue useless WARNING. + */ + CHECK_FOR_INTERRUPTS(); + + /* + * Determine the type of lock we want --- hard exclusive lock for a FULL + * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either + * way, we can be sure that no other backend is vacuuming the same table. + */ + lmode = (params->options & VACOPT_FULL) ? + AccessExclusiveLock : ShareUpdateExclusiveLock; + + /* open the relation and get the appropriate lock on it */ + rel = vacuum_open_relation(relid, relation, params->options, + params->log_min_duration >= 0, lmode); + + /* leave if relation could not be opened or locked */ + if (!rel) + { + PopActiveSnapshot(); + CommitTransactionCommand(); + return false; + } + + /* + * Check if relation needs to be skipped based on ownership. This check + * happens also when building the relation list to vacuum for a manual + * operation, and needs to be done additionally here as VACUUM could + * happen across multiple transactions where relation ownership could have + * changed in-between. Make sure to only generate logs for VACUUM in this + * case. + */ + if (!vacuum_is_relation_owner(RelationGetRelid(rel), + rel->rd_rel, + params->options & VACOPT_VACUUM)) + { + relation_close(rel, lmode); + PopActiveSnapshot(); + CommitTransactionCommand(); + return false; + } + + /* + * Check that it's of a vacuumable relkind. + */ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW && + rel->rd_rel->relkind != RELKIND_TOASTVALUE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + { + ereport(WARNING, + (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables", + RelationGetRelationName(rel)))); + relation_close(rel, lmode); + PopActiveSnapshot(); + CommitTransactionCommand(); + return false; + } + + /* + * Silently ignore tables that are temp tables of other backends --- + * trying to vacuum these will lead to great unhappiness, since their + * contents are probably not up-to-date on disk. (We don't throw a + * warning here; it would just lead to chatter during a database-wide + * VACUUM.) + */ + if (RELATION_IS_OTHER_TEMP(rel)) + { + relation_close(rel, lmode); + PopActiveSnapshot(); + CommitTransactionCommand(); + return false; + } + + /* + * Silently ignore partitioned tables as there is no work to be done. The + * useful work is on their child partitions, which have been queued up for + * us separately. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + relation_close(rel, lmode); + PopActiveSnapshot(); + CommitTransactionCommand(); + /* It's OK to proceed with ANALYZE on this table */ + return true; + } + + /* + * Get a session-level lock too. This will protect our access to the + * relation across multiple transactions, so that we can vacuum the + * relation's TOAST table (if any) secure in the knowledge that no one is + * deleting the parent relation. + * + * NOTE: this cannot block, even if someone else is waiting for access, + * because the lock manager knows that both lock requests are from the + * same process. + */ + lockrelid = rel->rd_lockInfo.lockRelId; + LockRelationIdForSession(&lockrelid, lmode); + + /* + * Set index_cleanup option based on index_cleanup reloption if it wasn't + * specified in VACUUM command, or when running in an autovacuum worker + */ + if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED) + { + StdRdOptIndexCleanup vacuum_index_cleanup; + + if (rel->rd_options == NULL) + vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO; + else + vacuum_index_cleanup = + ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup; + + if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO) + params->index_cleanup = VACOPTVALUE_AUTO; + else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON) + params->index_cleanup = VACOPTVALUE_ENABLED; + else + { + Assert(vacuum_index_cleanup == + STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF); + params->index_cleanup = VACOPTVALUE_DISABLED; + } + } + + /* + * Set truncate option based on truncate reloption if it wasn't specified + * in VACUUM command, or when running in an autovacuum worker + */ + if (params->truncate == VACOPTVALUE_UNSPECIFIED) + { + if (rel->rd_options == NULL || + ((StdRdOptions *) rel->rd_options)->vacuum_truncate) + params->truncate = VACOPTVALUE_ENABLED; + else + params->truncate = VACOPTVALUE_DISABLED; + } + + /* + * Remember the relation's TOAST relation for later, if the caller asked + * us to process it. In VACUUM FULL, though, the toast table is + * automatically rebuilt by cluster_rel so we shouldn't recurse to it. + */ + if ((params->options & VACOPT_PROCESS_TOAST) != 0 && + (params->options & VACOPT_FULL) == 0) + toast_relid = rel->rd_rel->reltoastrelid; + else + toast_relid = InvalidOid; + + /* + * Switch to the table owner's userid, so that any index functions are run + * as that user. Also lock down security-restricted operations and + * arrange to make GUC variable changes local to this command. (This is + * unnecessary, but harmless, for lazy VACUUM.) + */ + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(rel->rd_rel->relowner, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + + /* + * Do the actual work --- either FULL or "lazy" vacuum + */ + if (params->options & VACOPT_FULL) + { + ClusterParams cluster_params = {0}; + + /* close relation before vacuuming, but hold lock until commit */ + relation_close(rel, NoLock); + rel = NULL; + + if ((params->options & VACOPT_VERBOSE) != 0) + cluster_params.options |= CLUOPT_VERBOSE; + + /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ + cluster_rel(relid, InvalidOid, &cluster_params); + } + else + table_relation_vacuum(rel, params, vac_strategy); + + /* Roll back any GUC changes executed by index functions */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + + /* all done with this class, but hold lock until commit */ + if (rel) + relation_close(rel, NoLock); + + /* + * Complete the transaction and free all temporary memory used. + */ + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* + * If the relation has a secondary toast rel, vacuum that too while we + * still hold the session lock on the main table. Note however that + * "analyze" will not get done on the toast table. This is good, because + * the toaster always uses hardcoded index access and statistics are + * totally unimportant for toast relations. + */ + if (toast_relid != InvalidOid) + vacuum_rel(toast_relid, NULL, params); + + /* + * Now release the session-level lock on the main table. + */ + UnlockRelationIdForSession(&lockrelid, lmode); + + /* Report that we really did it. */ + return true; +} + + +/* + * Open all the vacuumable indexes of the given relation, obtaining the + * specified kind of lock on each. Return an array of Relation pointers for + * the indexes into *Irel, and the number of indexes into *nindexes. + * + * We consider an index vacuumable if it is marked insertable (indisready). + * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in + * execution, and what we have is too corrupt to be processable. We will + * vacuum even if the index isn't indisvalid; this is important because in a + * unique index, uniqueness checks will be performed anyway and had better not + * hit dangling index pointers. + */ +void +vac_open_indexes(Relation relation, LOCKMODE lockmode, + int *nindexes, Relation **Irel) +{ + List *indexoidlist; + ListCell *indexoidscan; + int i; + + Assert(lockmode != NoLock); + + indexoidlist = RelationGetIndexList(relation); + + /* allocate enough memory for all indexes */ + i = list_length(indexoidlist); + + if (i > 0) + *Irel = (Relation *) palloc(i * sizeof(Relation)); + else + *Irel = NULL; + + /* collect just the ready indexes */ + i = 0; + foreach(indexoidscan, indexoidlist) + { + Oid indexoid = lfirst_oid(indexoidscan); + Relation indrel; + + indrel = index_open(indexoid, lockmode); + if (indrel->rd_index->indisready) + (*Irel)[i++] = indrel; + else + index_close(indrel, lockmode); + } + + *nindexes = i; + + list_free(indexoidlist); +} + +/* + * Release the resources acquired by vac_open_indexes. Optionally release + * the locks (say NoLock to keep 'em). + */ +void +vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) +{ + if (Irel == NULL) + return; + + while (nindexes--) + { + Relation ind = Irel[nindexes]; + + index_close(ind, lockmode); + } + pfree(Irel); +} + +/* + * vacuum_delay_point --- check for interrupts and cost-based delay. + * + * This should be called in each major loop of VACUUM processing, + * typically once per page processed. + */ +void +vacuum_delay_point(void) +{ + double msec = 0; + + /* Always check for interrupts */ + CHECK_FOR_INTERRUPTS(); + + if (!VacuumCostActive || InterruptPending) + return; + + /* + * For parallel vacuum, the delay is computed based on the shared cost + * balance. See compute_parallel_delay. + */ + if (VacuumSharedCostBalance != NULL) + msec = compute_parallel_delay(); + else if (VacuumCostBalance >= VacuumCostLimit) + msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit; + + /* Nap if appropriate */ + if (msec > 0) + { + if (msec > VacuumCostDelay * 4) + msec = VacuumCostDelay * 4; + + pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); + pg_usleep(msec * 1000); + pgstat_report_wait_end(); + + /* + * We don't want to ignore postmaster death during very long vacuums + * with vacuum_cost_delay configured. We can't use the usual + * WaitLatch() approach here because we want microsecond-based sleep + * durations above. + */ + if (IsUnderPostmaster && !PostmasterIsAlive()) + exit(1); + + VacuumCostBalance = 0; + + /* update balance values for workers */ + AutoVacuumUpdateDelay(); + + /* Might have gotten an interrupt while sleeping */ + CHECK_FOR_INTERRUPTS(); + } +} + +/* + * Computes the vacuum delay for parallel workers. + * + * The basic idea of a cost-based delay for parallel vacuum is to allow each + * worker to sleep in proportion to the share of work it's done. We achieve this + * by allowing all parallel vacuum workers including the leader process to + * have a shared view of cost related parameters (mainly VacuumCostBalance). + * We allow each worker to update it as and when it has incurred any cost and + * then based on that decide whether it needs to sleep. We compute the time + * to sleep for a worker based on the cost it has incurred + * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by + * that amount. This avoids putting to sleep those workers which have done less + * I/O than other workers and therefore ensure that workers + * which are doing more I/O got throttled more. + * + * We allow a worker to sleep only if it has performed I/O above a certain + * threshold, which is calculated based on the number of active workers + * (VacuumActiveNWorkers), and the overall cost balance is more than + * VacuumCostLimit set by the system. Testing reveals that we achieve + * the required throttling if we force a worker that has done more than 50% + * of its share of work to sleep. + */ +static double +compute_parallel_delay(void) +{ + double msec = 0; + uint32 shared_balance; + int nworkers; + + /* Parallel vacuum must be active */ + Assert(VacuumSharedCostBalance); + + nworkers = pg_atomic_read_u32(VacuumActiveNWorkers); + + /* At least count itself */ + Assert(nworkers >= 1); + + /* Update the shared cost balance value atomically */ + shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance); + + /* Compute the total local balance for the current worker */ + VacuumCostBalanceLocal += VacuumCostBalance; + + if ((shared_balance >= VacuumCostLimit) && + (VacuumCostBalanceLocal > 0.5 * ((double) VacuumCostLimit / nworkers))) + { + /* Compute sleep time based on the local cost balance */ + msec = VacuumCostDelay * VacuumCostBalanceLocal / VacuumCostLimit; + pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal); + VacuumCostBalanceLocal = 0; + } + + /* + * Reset the local balance as we accumulated it into the shared value. + */ + VacuumCostBalance = 0; + + return msec; +} + +/* + * A wrapper function of defGetBoolean(). + * + * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead + * of true and false. + */ +static VacOptValue +get_vacoptval_from_boolean(DefElem *def) +{ + return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED; +} + +/* + * vac_bulkdel_one_index() -- bulk-deletion for index relation. + * + * Returns bulk delete stats derived from input stats + */ +IndexBulkDeleteResult * +vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, + VacDeadItems *dead_items) +{ + /* Do bulk deletion */ + istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped, + (void *) dead_items); + + ereport(ivinfo->message_level, + (errmsg("scanned index \"%s\" to remove %d row versions", + RelationGetRelationName(ivinfo->index), + dead_items->num_items))); + + return istat; +} + +/* + * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation. + * + * Returns bulk delete stats derived from input stats + */ +IndexBulkDeleteResult * +vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat) +{ + istat = index_vacuum_cleanup(ivinfo, istat); + + if (istat) + ereport(ivinfo->message_level, + (errmsg("index \"%s\" now contains %.0f row versions in %u pages", + RelationGetRelationName(ivinfo->index), + istat->num_index_tuples, + istat->num_pages), + errdetail("%.0f index row versions were removed.\n" + "%u index pages were newly deleted.\n" + "%u index pages are currently deleted, of which %u are currently reusable.", + istat->tuples_removed, + istat->pages_newly_deleted, + istat->pages_deleted, istat->pages_free))); + + return istat; +} + +/* + * Returns the total required space for VACUUM's dead_items array given a + * max_items value. + */ +Size +vac_max_items_to_alloc_size(int max_items) +{ + Assert(max_items <= MAXDEADITEMS(MaxAllocSize)); + + return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items; +} + +/* + * vac_tid_reaped() -- is a particular tid deletable? + * + * This has the right signature to be an IndexBulkDeleteCallback. + * + * Assumes dead_items array is sorted (in ascending TID order). + */ +static bool +vac_tid_reaped(ItemPointer itemptr, void *state) +{ + VacDeadItems *dead_items = (VacDeadItems *) state; + int64 litem, + ritem, + item; + ItemPointer res; + + litem = itemptr_encode(&dead_items->items[0]); + ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]); + item = itemptr_encode(itemptr); + + /* + * Doing a simple bound check before bsearch() is useful to avoid the + * extra cost of bsearch(), especially if dead items on the heap are + * concentrated in a certain range. Since this function is called for + * every index tuple, it pays to be really fast. + */ + if (item < litem || item > ritem) + return false; + + res = (ItemPointer) bsearch((void *) itemptr, + (void *) dead_items->items, + dead_items->num_items, + sizeof(ItemPointerData), + vac_cmp_itemptr); + + return (res != NULL); +} + +/* + * Comparator routines for use with qsort() and bsearch(). + */ +static int +vac_cmp_itemptr(const void *left, const void *right) +{ + BlockNumber lblk, + rblk; + OffsetNumber loff, + roff; + + lblk = ItemPointerGetBlockNumber((ItemPointer) left); + rblk = ItemPointerGetBlockNumber((ItemPointer) right); + + if (lblk < rblk) + return -1; + if (lblk > rblk) + return 1; + + loff = ItemPointerGetOffsetNumber((ItemPointer) left); + roff = ItemPointerGetOffsetNumber((ItemPointer) right); + + if (loff < roff) + return -1; + if (loff > roff) + return 1; + + return 0; +} diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c new file mode 100644 index 0000000..f26d796 --- /dev/null +++ b/src/backend/commands/vacuumparallel.c @@ -0,0 +1,1074 @@ +/*------------------------------------------------------------------------- + * + * vacuumparallel.c + * Support routines for parallel vacuum execution. + * + * This file contains routines that are intended to support setting up, using, + * and tearing down a ParallelVacuumState. + * + * In a parallel vacuum, we perform both index bulk deletion and index cleanup + * with parallel worker processes. Individual indexes are processed by one + * vacuum process. ParalleVacuumState contains shared information as well as + * the memory space for storing dead items allocated in the DSM segment. We + * launch parallel worker processes at the start of parallel index + * bulk-deletion and index cleanup and once all indexes are processed, the + * parallel worker processes exit. Each time we process indexes in parallel, + * the parallel context is re-initialized so that the same DSM can be used for + * multiple passes of index bulk-deletion and index cleanup. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/vacuumparallel.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/amapi.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/index.h" +#include "commands/vacuum.h" +#include "optimizer/paths.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "tcop/tcopprot.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + +/* + * DSM keys for parallel vacuum. Unlike other parallel execution code, since + * we don't need to worry about DSM keys conflicting with plan_node_id we can + * use small integers. + */ +#define PARALLEL_VACUUM_KEY_SHARED 1 +#define PARALLEL_VACUUM_KEY_DEAD_ITEMS 2 +#define PARALLEL_VACUUM_KEY_QUERY_TEXT 3 +#define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4 +#define PARALLEL_VACUUM_KEY_WAL_USAGE 5 +#define PARALLEL_VACUUM_KEY_INDEX_STATS 6 + +/* + * Shared information among parallel workers. So this is allocated in the DSM + * segment. + */ +typedef struct PVShared +{ + /* + * Target table relid and log level (for messages about parallel workers + * launched during VACUUM VERBOSE). These fields are not modified during + * the parallel vacuum. + */ + Oid relid; + int elevel; + + /* + * Fields for both index vacuum and cleanup. + * + * reltuples is the total number of input heap tuples. We set either old + * live tuples in the index vacuum case or the new live tuples in the + * index cleanup case. + * + * estimated_count is true if reltuples is an estimated value. (Note that + * reltuples could be -1 in this case, indicating we have no idea.) + */ + double reltuples; + bool estimated_count; + + /* + * In single process vacuum we could consume more memory during index + * vacuuming or cleanup apart from the memory for heap scanning. In + * parallel vacuum, since individual vacuum workers can consume memory + * equal to maintenance_work_mem, the new maintenance_work_mem for each + * worker is set such that the parallel operation doesn't consume more + * memory than single process vacuum. + */ + int maintenance_work_mem_worker; + + /* + * Shared vacuum cost balance. During parallel vacuum, + * VacuumSharedCostBalance points to this value and it accumulates the + * balance of each parallel vacuum worker. + */ + pg_atomic_uint32 cost_balance; + + /* + * Number of active parallel workers. This is used for computing the + * minimum threshold of the vacuum cost balance before a worker sleeps for + * cost-based delay. + */ + pg_atomic_uint32 active_nworkers; + + /* Counter for vacuuming and cleanup */ + pg_atomic_uint32 idx; +} PVShared; + +/* Status used during parallel index vacuum or cleanup */ +typedef enum PVIndVacStatus +{ + PARALLEL_INDVAC_STATUS_INITIAL = 0, + PARALLEL_INDVAC_STATUS_NEED_BULKDELETE, + PARALLEL_INDVAC_STATUS_NEED_CLEANUP, + PARALLEL_INDVAC_STATUS_COMPLETED +} PVIndVacStatus; + +/* + * Struct for index vacuum statistics of an index that is used for parallel vacuum. + * This includes the status of parallel index vacuum as well as index statistics. + */ +typedef struct PVIndStats +{ + /* + * The following two fields are set by leader process before executing + * parallel index vacuum or parallel index cleanup. These fields are not + * fixed for the entire VACUUM operation. They are only fixed for an + * individual parallel index vacuum and cleanup. + * + * parallel_workers_can_process is true if both leader and worker can + * process the index, otherwise only leader can process it. + */ + PVIndVacStatus status; + bool parallel_workers_can_process; + + /* + * Individual worker or leader stores the result of index vacuum or + * cleanup. + */ + bool istat_updated; /* are the stats updated? */ + IndexBulkDeleteResult istat; +} PVIndStats; + +/* + * Struct for maintaining a parallel vacuum state. typedef appears in vacuum.h. + */ +struct ParallelVacuumState +{ + /* NULL for worker processes */ + ParallelContext *pcxt; + + /* Target indexes */ + Relation *indrels; + int nindexes; + + /* Shared information among parallel vacuum workers */ + PVShared *shared; + + /* + * Shared index statistics among parallel vacuum workers. The array + * element is allocated for every index, even those indexes where parallel + * index vacuuming is unsafe or not worthwhile (e.g., + * will_parallel_vacuum[] is false). During parallel vacuum, + * IndexBulkDeleteResult of each index is kept in DSM and is copied into + * local memory at the end of parallel vacuum. + */ + PVIndStats *indstats; + + /* Shared dead items space among parallel vacuum workers */ + VacDeadItems *dead_items; + + /* Points to buffer usage area in DSM */ + BufferUsage *buffer_usage; + + /* Points to WAL usage area in DSM */ + WalUsage *wal_usage; + + /* + * False if the index is totally unsuitable target for all parallel + * processing. For example, the index could be < + * min_parallel_index_scan_size cutoff. + */ + bool *will_parallel_vacuum; + + /* + * The number of indexes that support parallel index bulk-deletion and + * parallel index cleanup respectively. + */ + int nindexes_parallel_bulkdel; + int nindexes_parallel_cleanup; + int nindexes_parallel_condcleanup; + + /* Buffer access strategy used by leader process */ + BufferAccessStrategy bstrategy; + + /* + * Error reporting state. The error callback is set only for workers + * processes during parallel index vacuum. + */ + char *relnamespace; + char *relname; + char *indname; + PVIndVacStatus status; +}; + +static int parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested, + bool *will_parallel_vacuum); +static void parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans, + bool vacuum); +static void parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs); +static void parallel_vacuum_process_unsafe_indexes(ParallelVacuumState *pvs); +static void parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel, + PVIndStats *indstats); +static bool parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans, + bool vacuum); +static void parallel_vacuum_error_callback(void *arg); + +/* + * Try to enter parallel mode and create a parallel context. Then initialize + * shared memory state. + * + * On success, return parallel vacuum state. Otherwise return NULL. + */ +ParallelVacuumState * +parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, + int nrequested_workers, int max_items, + int elevel, BufferAccessStrategy bstrategy) +{ + ParallelVacuumState *pvs; + ParallelContext *pcxt; + PVShared *shared; + VacDeadItems *dead_items; + PVIndStats *indstats; + BufferUsage *buffer_usage; + WalUsage *wal_usage; + bool *will_parallel_vacuum; + Size est_indstats_len; + Size est_shared_len; + Size est_dead_items_len; + int nindexes_mwm = 0; + int parallel_workers = 0; + int querylen; + + /* + * A parallel vacuum must be requested and there must be indexes on the + * relation + */ + Assert(nrequested_workers >= 0); + Assert(nindexes > 0); + + /* + * Compute the number of parallel vacuum workers to launch + */ + will_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes); + parallel_workers = parallel_vacuum_compute_workers(indrels, nindexes, + nrequested_workers, + will_parallel_vacuum); + if (parallel_workers <= 0) + { + /* Can't perform vacuum in parallel -- return NULL */ + pfree(will_parallel_vacuum); + return NULL; + } + + pvs = (ParallelVacuumState *) palloc0(sizeof(ParallelVacuumState)); + pvs->indrels = indrels; + pvs->nindexes = nindexes; + pvs->will_parallel_vacuum = will_parallel_vacuum; + pvs->bstrategy = bstrategy; + + EnterParallelMode(); + pcxt = CreateParallelContext("postgres", "parallel_vacuum_main", + parallel_workers); + Assert(pcxt->nworkers > 0); + pvs->pcxt = pcxt; + + /* Estimate size for index vacuum stats -- PARALLEL_VACUUM_KEY_INDEX_STATS */ + est_indstats_len = mul_size(sizeof(PVIndStats), nindexes); + shm_toc_estimate_chunk(&pcxt->estimator, est_indstats_len); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */ + est_shared_len = sizeof(PVShared); + shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + /* Estimate size for dead_items -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */ + est_dead_items_len = vac_max_items_to_alloc_size(max_items); + shm_toc_estimate_chunk(&pcxt->estimator, est_dead_items_len); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + /* + * Estimate space for BufferUsage and WalUsage -- + * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE. + * + * If there are no extensions loaded that care, we could skip this. We + * have no way of knowing whether anyone's looking at pgBufferUsage or + * pgWalUsage, so do it unconditionally. + */ + shm_toc_estimate_chunk(&pcxt->estimator, + mul_size(sizeof(BufferUsage), pcxt->nworkers)); + shm_toc_estimate_keys(&pcxt->estimator, 1); + shm_toc_estimate_chunk(&pcxt->estimator, + mul_size(sizeof(WalUsage), pcxt->nworkers)); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */ + if (debug_query_string) + { + querylen = strlen(debug_query_string); + shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1); + shm_toc_estimate_keys(&pcxt->estimator, 1); + } + else + querylen = 0; /* keep compiler quiet */ + + InitializeParallelDSM(pcxt); + + /* Prepare index vacuum stats */ + indstats = (PVIndStats *) shm_toc_allocate(pcxt->toc, est_indstats_len); + MemSet(indstats, 0, est_indstats_len); + for (int i = 0; i < nindexes; i++) + { + Relation indrel = indrels[i]; + uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions; + + /* + * Cleanup option should be either disabled, always performing in + * parallel or conditionally performing in parallel. + */ + Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) || + ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0)); + Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE); + + if (!will_parallel_vacuum[i]) + continue; + + if (indrel->rd_indam->amusemaintenanceworkmem) + nindexes_mwm++; + + /* + * Remember the number of indexes that support parallel operation for + * each phase. + */ + if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0) + pvs->nindexes_parallel_bulkdel++; + if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) + pvs->nindexes_parallel_cleanup++; + if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0) + pvs->nindexes_parallel_condcleanup++; + } + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_INDEX_STATS, indstats); + pvs->indstats = indstats; + + /* Prepare shared information */ + shared = (PVShared *) shm_toc_allocate(pcxt->toc, est_shared_len); + MemSet(shared, 0, est_shared_len); + shared->relid = RelationGetRelid(rel); + shared->elevel = elevel; + shared->maintenance_work_mem_worker = + (nindexes_mwm > 0) ? + maintenance_work_mem / Min(parallel_workers, nindexes_mwm) : + maintenance_work_mem; + + pg_atomic_init_u32(&(shared->cost_balance), 0); + pg_atomic_init_u32(&(shared->active_nworkers), 0); + pg_atomic_init_u32(&(shared->idx), 0); + + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared); + pvs->shared = shared; + + /* Prepare the dead_items space */ + dead_items = (VacDeadItems *) shm_toc_allocate(pcxt->toc, + est_dead_items_len); + dead_items->max_items = max_items; + dead_items->num_items = 0; + MemSet(dead_items->items, 0, sizeof(ItemPointerData) * max_items); + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, dead_items); + pvs->dead_items = dead_items; + + /* + * Allocate space for each worker's BufferUsage and WalUsage; no need to + * initialize + */ + buffer_usage = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(BufferUsage), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage); + pvs->buffer_usage = buffer_usage; + wal_usage = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(WalUsage), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_WAL_USAGE, wal_usage); + pvs->wal_usage = wal_usage; + + /* Store query string for workers */ + if (debug_query_string) + { + char *sharedquery; + + sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1); + memcpy(sharedquery, debug_query_string, querylen + 1); + sharedquery[querylen] = '\0'; + shm_toc_insert(pcxt->toc, + PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery); + } + + /* Success -- return parallel vacuum state */ + return pvs; +} + +/* + * Destroy the parallel context, and end parallel mode. + * + * Since writes are not allowed during parallel mode, copy the + * updated index statistics from DSM into local memory and then later use that + * to update the index statistics. One might think that we can exit from + * parallel mode, update the index statistics and then destroy parallel + * context, but that won't be safe (see ExitParallelMode). + */ +void +parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats) +{ + Assert(!IsParallelWorker()); + + /* Copy the updated statistics */ + for (int i = 0; i < pvs->nindexes; i++) + { + PVIndStats *indstats = &(pvs->indstats[i]); + + if (indstats->istat_updated) + { + istats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + memcpy(istats[i], &indstats->istat, sizeof(IndexBulkDeleteResult)); + } + else + istats[i] = NULL; + } + + DestroyParallelContext(pvs->pcxt); + ExitParallelMode(); + + pfree(pvs->will_parallel_vacuum); + pfree(pvs); +} + +/* Returns the dead items space */ +VacDeadItems * +parallel_vacuum_get_dead_items(ParallelVacuumState *pvs) +{ + return pvs->dead_items; +} + +/* + * Do parallel index bulk-deletion with parallel workers. + */ +void +parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, + int num_index_scans) +{ + Assert(!IsParallelWorker()); + + /* + * We can only provide an approximate value of num_heap_tuples, at least + * for now. + */ + pvs->shared->reltuples = num_table_tuples; + pvs->shared->estimated_count = true; + + parallel_vacuum_process_all_indexes(pvs, num_index_scans, true); +} + +/* + * Do parallel index cleanup with parallel workers. + */ +void +parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, + int num_index_scans, bool estimated_count) +{ + Assert(!IsParallelWorker()); + + /* + * We can provide a better estimate of total number of surviving tuples + * (we assume indexes are more interested in that than in the number of + * nominally live tuples). + */ + pvs->shared->reltuples = num_table_tuples; + pvs->shared->estimated_count = estimated_count; + + parallel_vacuum_process_all_indexes(pvs, num_index_scans, false); +} + +/* + * Compute the number of parallel worker processes to request. Both index + * vacuum and index cleanup can be executed with parallel workers. + * The index is eligible for parallel vacuum iff its size is greater than + * min_parallel_index_scan_size as invoking workers for very small indexes + * can hurt performance. + * + * nrequested is the number of parallel workers that user requested. If + * nrequested is 0, we compute the parallel degree based on nindexes, that is + * the number of indexes that support parallel vacuum. This function also + * sets will_parallel_vacuum to remember indexes that participate in parallel + * vacuum. + */ +static int +parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested, + bool *will_parallel_vacuum) +{ + int nindexes_parallel = 0; + int nindexes_parallel_bulkdel = 0; + int nindexes_parallel_cleanup = 0; + int parallel_workers; + + /* + * We don't allow performing parallel operation in standalone backend or + * when parallelism is disabled. + */ + if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0) + return 0; + + /* + * Compute the number of indexes that can participate in parallel vacuum. + */ + for (int i = 0; i < nindexes; i++) + { + Relation indrel = indrels[i]; + uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions; + + /* Skip index that is not a suitable target for parallel index vacuum */ + if (vacoptions == VACUUM_OPTION_NO_PARALLEL || + RelationGetNumberOfBlocks(indrel) < min_parallel_index_scan_size) + continue; + + will_parallel_vacuum[i] = true; + + if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0) + nindexes_parallel_bulkdel++; + if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) || + ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)) + nindexes_parallel_cleanup++; + } + + nindexes_parallel = Max(nindexes_parallel_bulkdel, + nindexes_parallel_cleanup); + + /* The leader process takes one index */ + nindexes_parallel--; + + /* No index supports parallel vacuum */ + if (nindexes_parallel <= 0) + return 0; + + /* Compute the parallel degree */ + parallel_workers = (nrequested > 0) ? + Min(nrequested, nindexes_parallel) : nindexes_parallel; + + /* Cap by max_parallel_maintenance_workers */ + parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers); + + return parallel_workers; +} + +/* + * Perform index vacuum or index cleanup with parallel workers. This function + * must be used by the parallel vacuum leader process. + */ +static void +parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans, + bool vacuum) +{ + int nworkers; + PVIndVacStatus new_status; + + Assert(!IsParallelWorker()); + + if (vacuum) + { + new_status = PARALLEL_INDVAC_STATUS_NEED_BULKDELETE; + + /* Determine the number of parallel workers to launch */ + nworkers = pvs->nindexes_parallel_bulkdel; + } + else + { + new_status = PARALLEL_INDVAC_STATUS_NEED_CLEANUP; + + /* Determine the number of parallel workers to launch */ + nworkers = pvs->nindexes_parallel_cleanup; + + /* Add conditionally parallel-aware indexes if in the first time call */ + if (num_index_scans == 0) + nworkers += pvs->nindexes_parallel_condcleanup; + } + + /* The leader process will participate */ + nworkers--; + + /* + * It is possible that parallel context is initialized with fewer workers + * than the number of indexes that need a separate worker in the current + * phase, so we need to consider it. See + * parallel_vacuum_compute_workers(). + */ + nworkers = Min(nworkers, pvs->pcxt->nworkers); + + /* + * Set index vacuum status and mark whether parallel vacuum worker can + * process it. + */ + for (int i = 0; i < pvs->nindexes; i++) + { + PVIndStats *indstats = &(pvs->indstats[i]); + + Assert(indstats->status == PARALLEL_INDVAC_STATUS_INITIAL); + indstats->status = new_status; + indstats->parallel_workers_can_process = + (pvs->will_parallel_vacuum[i] && + parallel_vacuum_index_is_parallel_safe(pvs->indrels[i], + num_index_scans, + vacuum)); + } + + /* Reset the parallel index processing counter */ + pg_atomic_write_u32(&(pvs->shared->idx), 0); + + /* Setup the shared cost-based vacuum delay and launch workers */ + if (nworkers > 0) + { + /* Reinitialize parallel context to relaunch parallel workers */ + if (num_index_scans > 0) + ReinitializeParallelDSM(pvs->pcxt); + + /* + * Set up shared cost balance and the number of active workers for + * vacuum delay. We need to do this before launching workers as + * otherwise, they might not see the updated values for these + * parameters. + */ + pg_atomic_write_u32(&(pvs->shared->cost_balance), VacuumCostBalance); + pg_atomic_write_u32(&(pvs->shared->active_nworkers), 0); + + /* + * The number of workers can vary between bulkdelete and cleanup + * phase. + */ + ReinitializeParallelWorkers(pvs->pcxt, nworkers); + + LaunchParallelWorkers(pvs->pcxt); + + if (pvs->pcxt->nworkers_launched > 0) + { + /* + * Reset the local cost values for leader backend as we have + * already accumulated the remaining balance of heap. + */ + VacuumCostBalance = 0; + VacuumCostBalanceLocal = 0; + + /* Enable shared cost balance for leader backend */ + VacuumSharedCostBalance = &(pvs->shared->cost_balance); + VacuumActiveNWorkers = &(pvs->shared->active_nworkers); + } + + if (vacuum) + ereport(pvs->shared->elevel, + (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)", + "launched %d parallel vacuum workers for index vacuuming (planned: %d)", + pvs->pcxt->nworkers_launched), + pvs->pcxt->nworkers_launched, nworkers))); + else + ereport(pvs->shared->elevel, + (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)", + "launched %d parallel vacuum workers for index cleanup (planned: %d)", + pvs->pcxt->nworkers_launched), + pvs->pcxt->nworkers_launched, nworkers))); + } + + /* Vacuum the indexes that can be processed by only leader process */ + parallel_vacuum_process_unsafe_indexes(pvs); + + /* + * Join as a parallel worker. The leader vacuums alone processes all + * parallel-safe indexes in the case where no workers are launched. + */ + parallel_vacuum_process_safe_indexes(pvs); + + /* + * Next, accumulate buffer and WAL usage. (This must wait for the workers + * to finish, or we might get incomplete data.) + */ + if (nworkers > 0) + { + /* Wait for all vacuum workers to finish */ + WaitForParallelWorkersToFinish(pvs->pcxt); + + for (int i = 0; i < pvs->pcxt->nworkers_launched; i++) + InstrAccumParallelQuery(&pvs->buffer_usage[i], &pvs->wal_usage[i]); + } + + /* + * Reset all index status back to initial (while checking that we have + * vacuumed all indexes). + */ + for (int i = 0; i < pvs->nindexes; i++) + { + PVIndStats *indstats = &(pvs->indstats[i]); + + if (indstats->status != PARALLEL_INDVAC_STATUS_COMPLETED) + elog(ERROR, "parallel index vacuum on index \"%s\" is not completed", + RelationGetRelationName(pvs->indrels[i])); + + indstats->status = PARALLEL_INDVAC_STATUS_INITIAL; + } + + /* + * Carry the shared balance value to heap scan and disable shared costing + */ + if (VacuumSharedCostBalance) + { + VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance); + VacuumSharedCostBalance = NULL; + VacuumActiveNWorkers = NULL; + } +} + +/* + * Index vacuum/cleanup routine used by the leader process and parallel + * vacuum worker processes to vacuum the indexes in parallel. + */ +static void +parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs) +{ + /* + * Increment the active worker count if we are able to launch any worker. + */ + if (VacuumActiveNWorkers) + pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1); + + /* Loop until all indexes are vacuumed */ + for (;;) + { + int idx; + PVIndStats *indstats; + + /* Get an index number to process */ + idx = pg_atomic_fetch_add_u32(&(pvs->shared->idx), 1); + + /* Done for all indexes? */ + if (idx >= pvs->nindexes) + break; + + indstats = &(pvs->indstats[idx]); + + /* + * Skip vacuuming index that is unsafe for workers or has an + * unsuitable target for parallel index vacuum (this is vacuumed in + * parallel_vacuum_process_unsafe_indexes() by the leader). + */ + if (!indstats->parallel_workers_can_process) + continue; + + /* Do vacuum or cleanup of the index */ + parallel_vacuum_process_one_index(pvs, pvs->indrels[idx], indstats); + } + + /* + * We have completed the index vacuum so decrement the active worker + * count. + */ + if (VacuumActiveNWorkers) + pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1); +} + +/* + * Perform parallel vacuuming of indexes in leader process. + * + * Handles index vacuuming (or index cleanup) for indexes that are not + * parallel safe. It's possible that this will vary for a given index, based + * on details like whether we're performing index cleanup right now. + * + * Also performs vacuuming of smaller indexes that fell under the size cutoff + * enforced by parallel_vacuum_compute_workers(). + */ +static void +parallel_vacuum_process_unsafe_indexes(ParallelVacuumState *pvs) +{ + Assert(!IsParallelWorker()); + + /* + * Increment the active worker count if we are able to launch any worker. + */ + if (VacuumActiveNWorkers) + pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1); + + for (int i = 0; i < pvs->nindexes; i++) + { + PVIndStats *indstats = &(pvs->indstats[i]); + + /* Skip, indexes that are safe for workers */ + if (indstats->parallel_workers_can_process) + continue; + + /* Do vacuum or cleanup of the index */ + parallel_vacuum_process_one_index(pvs, pvs->indrels[i], indstats); + } + + /* + * We have completed the index vacuum so decrement the active worker + * count. + */ + if (VacuumActiveNWorkers) + pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1); +} + +/* + * Vacuum or cleanup index either by leader process or by one of the worker + * process. After vacuuming the index this function copies the index + * statistics returned from ambulkdelete and amvacuumcleanup to the DSM + * segment. + */ +static void +parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel, + PVIndStats *indstats) +{ + IndexBulkDeleteResult *istat = NULL; + IndexBulkDeleteResult *istat_res; + IndexVacuumInfo ivinfo; + + /* + * Update the pointer to the corresponding bulk-deletion result if someone + * has already updated it + */ + if (indstats->istat_updated) + istat = &(indstats->istat); + + ivinfo.index = indrel; + ivinfo.analyze_only = false; + ivinfo.report_progress = false; + ivinfo.message_level = DEBUG2; + ivinfo.estimated_count = pvs->shared->estimated_count; + ivinfo.num_heap_tuples = pvs->shared->reltuples; + ivinfo.strategy = pvs->bstrategy; + + /* Update error traceback information */ + pvs->indname = pstrdup(RelationGetRelationName(indrel)); + pvs->status = indstats->status; + + switch (indstats->status) + { + case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE: + istat_res = vac_bulkdel_one_index(&ivinfo, istat, pvs->dead_items); + break; + case PARALLEL_INDVAC_STATUS_NEED_CLEANUP: + istat_res = vac_cleanup_one_index(&ivinfo, istat); + break; + default: + elog(ERROR, "unexpected parallel vacuum index status %d for index \"%s\"", + indstats->status, + RelationGetRelationName(indrel)); + } + + /* + * Copy the index bulk-deletion result returned from ambulkdelete and + * amvacuumcleanup to the DSM segment if it's the first cycle because they + * allocate locally and it's possible that an index will be vacuumed by a + * different vacuum process the next cycle. Copying the result normally + * happens only the first time an index is vacuumed. For any additional + * vacuum pass, we directly point to the result on the DSM segment and + * pass it to vacuum index APIs so that workers can update it directly. + * + * Since all vacuum workers write the bulk-deletion result at different + * slots we can write them without locking. + */ + if (!indstats->istat_updated && istat_res != NULL) + { + memcpy(&(indstats->istat), istat_res, sizeof(IndexBulkDeleteResult)); + indstats->istat_updated = true; + + /* Free the locally-allocated bulk-deletion result */ + pfree(istat_res); + } + + /* + * Update the status to completed. No need to lock here since each worker + * touches different indexes. + */ + indstats->status = PARALLEL_INDVAC_STATUS_COMPLETED; + + /* Reset error traceback information */ + pvs->status = PARALLEL_INDVAC_STATUS_COMPLETED; + pfree(pvs->indname); + pvs->indname = NULL; +} + +/* + * Returns false, if the given index can't participate in the next execution of + * parallel index vacuum or parallel index cleanup. + */ +static bool +parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans, + bool vacuum) +{ + uint8 vacoptions; + + vacoptions = indrel->rd_indam->amparallelvacuumoptions; + + /* In parallel vacuum case, check if it supports parallel bulk-deletion */ + if (vacuum) + return ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0); + + /* Not safe, if the index does not support parallel cleanup */ + if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) && + ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0)) + return false; + + /* + * Not safe, if the index supports parallel cleanup conditionally, but we + * have already processed the index (for bulkdelete). We do this to avoid + * the need to invoke workers when parallel index cleanup doesn't need to + * scan the index. See the comments for option + * VACUUM_OPTION_PARALLEL_COND_CLEANUP to know when indexes support + * parallel cleanup conditionally. + */ + if (num_index_scans > 0 && + ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)) + return false; + + return true; +} + +/* + * Perform work within a launched parallel process. + * + * Since parallel vacuum workers perform only index vacuum or index cleanup, + * we don't need to report progress information. + */ +void +parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) +{ + ParallelVacuumState pvs; + Relation rel; + Relation *indrels; + PVIndStats *indstats; + PVShared *shared; + VacDeadItems *dead_items; + BufferUsage *buffer_usage; + WalUsage *wal_usage; + int nindexes; + char *sharedquery; + ErrorContextCallback errcallback; + + /* + * A parallel vacuum worker must have only PROC_IN_VACUUM flag since we + * don't support parallel vacuum for autovacuum as of now. + */ + Assert(MyProc->statusFlags == PROC_IN_VACUUM); + + elog(DEBUG1, "starting parallel vacuum worker"); + + shared = (PVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED, false); + + /* Set debug_query_string for individual workers */ + sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true); + debug_query_string = sharedquery; + pgstat_report_activity(STATE_RUNNING, debug_query_string); + + /* + * Open table. The lock mode is the same as the leader process. It's + * okay because the lock mode does not conflict among the parallel + * workers. + */ + rel = table_open(shared->relid, ShareUpdateExclusiveLock); + + /* + * Open all indexes. indrels are sorted in order by OID, which should be + * matched to the leader's one. + */ + vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels); + Assert(nindexes > 0); + + if (shared->maintenance_work_mem_worker > 0) + maintenance_work_mem = shared->maintenance_work_mem_worker; + + /* Set index statistics */ + indstats = (PVIndStats *) shm_toc_lookup(toc, + PARALLEL_VACUUM_KEY_INDEX_STATS, + false); + + /* Set dead_items space */ + dead_items = (VacDeadItems *) shm_toc_lookup(toc, + PARALLEL_VACUUM_KEY_DEAD_ITEMS, + false); + + /* Set cost-based vacuum delay */ + VacuumCostActive = (VacuumCostDelay > 0); + VacuumCostBalance = 0; + VacuumPageHit = 0; + VacuumPageMiss = 0; + VacuumPageDirty = 0; + VacuumCostBalanceLocal = 0; + VacuumSharedCostBalance = &(shared->cost_balance); + VacuumActiveNWorkers = &(shared->active_nworkers); + + /* Set parallel vacuum state */ + pvs.indrels = indrels; + pvs.nindexes = nindexes; + pvs.indstats = indstats; + pvs.shared = shared; + pvs.dead_items = dead_items; + pvs.relnamespace = get_namespace_name(RelationGetNamespace(rel)); + pvs.relname = pstrdup(RelationGetRelationName(rel)); + + /* These fields will be filled during index vacuum or cleanup */ + pvs.indname = NULL; + pvs.status = PARALLEL_INDVAC_STATUS_INITIAL; + + /* Each parallel VACUUM worker gets its own access strategy */ + pvs.bstrategy = GetAccessStrategy(BAS_VACUUM); + + /* Setup error traceback support for ereport() */ + errcallback.callback = parallel_vacuum_error_callback; + errcallback.arg = &pvs; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* Prepare to track buffer usage during parallel execution */ + InstrStartParallelQuery(); + + /* Process indexes to perform vacuum/cleanup */ + parallel_vacuum_process_safe_indexes(&pvs); + + /* Report buffer/WAL usage during parallel execution */ + buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false); + wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false); + InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], + &wal_usage[ParallelWorkerNumber]); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + + vac_close_indexes(nindexes, indrels, RowExclusiveLock); + table_close(rel, ShareUpdateExclusiveLock); + FreeAccessStrategy(pvs.bstrategy); +} + +/* + * Error context callback for errors occurring during parallel index vacuum. + * The error context messages should match the messages set in the lazy vacuum + * error context. If you change this function, change vacuum_error_callback() + * as well. + */ +static void +parallel_vacuum_error_callback(void *arg) +{ + ParallelVacuumState *errinfo = arg; + + switch (errinfo->status) + { + case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE: + errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"", + errinfo->indname, + errinfo->relnamespace, + errinfo->relname); + break; + case PARALLEL_INDVAC_STATUS_NEED_CLEANUP: + errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"", + errinfo->indname, + errinfo->relnamespace, + errinfo->relname); + break; + case PARALLEL_INDVAC_STATUS_INITIAL: + case PARALLEL_INDVAC_STATUS_COMPLETED: + default: + return; + } +} diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c new file mode 100644 index 0000000..e5ddcda --- /dev/null +++ b/src/backend/commands/variable.c @@ -0,0 +1,935 @@ +/*------------------------------------------------------------------------- + * + * variable.c + * Routines for handling specialized SET variables. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/variable.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "access/parallel.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/pg_authid.h" +#include "commands/variable.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/timestamp.h" +#include "utils/varlena.h" + +/* + * DATESTYLE + */ + +/* + * check_datestyle: GUC check_hook for datestyle + */ +bool +check_datestyle(char **newval, void **extra, GucSource source) +{ + int newDateStyle = DateStyle; + int newDateOrder = DateOrder; + bool have_style = false; + bool have_order = false; + bool ok = true; + char *rawstring; + int *myextra; + char *result; + List *elemlist; + ListCell *l; + + /* Need a modifiable copy of string */ + rawstring = pstrdup(*newval); + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + GUC_check_errdetail("List syntax is invalid."); + pfree(rawstring); + list_free(elemlist); + return false; + } + + foreach(l, elemlist) + { + char *tok = (char *) lfirst(l); + + /* Ugh. Somebody ought to write a table driven version -- mjl */ + + if (pg_strcasecmp(tok, "ISO") == 0) + { + if (have_style && newDateStyle != USE_ISO_DATES) + ok = false; /* conflicting styles */ + newDateStyle = USE_ISO_DATES; + have_style = true; + } + else if (pg_strcasecmp(tok, "SQL") == 0) + { + if (have_style && newDateStyle != USE_SQL_DATES) + ok = false; /* conflicting styles */ + newDateStyle = USE_SQL_DATES; + have_style = true; + } + else if (pg_strncasecmp(tok, "POSTGRES", 8) == 0) + { + if (have_style && newDateStyle != USE_POSTGRES_DATES) + ok = false; /* conflicting styles */ + newDateStyle = USE_POSTGRES_DATES; + have_style = true; + } + else if (pg_strcasecmp(tok, "GERMAN") == 0) + { + if (have_style && newDateStyle != USE_GERMAN_DATES) + ok = false; /* conflicting styles */ + newDateStyle = USE_GERMAN_DATES; + have_style = true; + /* GERMAN also sets DMY, unless explicitly overridden */ + if (!have_order) + newDateOrder = DATEORDER_DMY; + } + else if (pg_strcasecmp(tok, "YMD") == 0) + { + if (have_order && newDateOrder != DATEORDER_YMD) + ok = false; /* conflicting orders */ + newDateOrder = DATEORDER_YMD; + have_order = true; + } + else if (pg_strcasecmp(tok, "DMY") == 0 || + pg_strncasecmp(tok, "EURO", 4) == 0) + { + if (have_order && newDateOrder != DATEORDER_DMY) + ok = false; /* conflicting orders */ + newDateOrder = DATEORDER_DMY; + have_order = true; + } + else if (pg_strcasecmp(tok, "MDY") == 0 || + pg_strcasecmp(tok, "US") == 0 || + pg_strncasecmp(tok, "NONEURO", 7) == 0) + { + if (have_order && newDateOrder != DATEORDER_MDY) + ok = false; /* conflicting orders */ + newDateOrder = DATEORDER_MDY; + have_order = true; + } + else if (pg_strcasecmp(tok, "DEFAULT") == 0) + { + /* + * Easiest way to get the current DEFAULT state is to fetch the + * DEFAULT string from guc.c and recursively parse it. + * + * We can't simply "return check_datestyle(...)" because we need + * to handle constructs like "DEFAULT, ISO". + */ + char *subval; + void *subextra = NULL; + + subval = strdup(GetConfigOptionResetString("datestyle")); + if (!subval) + { + ok = false; + break; + } + if (!check_datestyle(&subval, &subextra, source)) + { + free(subval); + ok = false; + break; + } + myextra = (int *) subextra; + if (!have_style) + newDateStyle = myextra[0]; + if (!have_order) + newDateOrder = myextra[1]; + free(subval); + free(subextra); + } + else + { + GUC_check_errdetail("Unrecognized key word: \"%s\".", tok); + pfree(rawstring); + list_free(elemlist); + return false; + } + } + + pfree(rawstring); + list_free(elemlist); + + if (!ok) + { + GUC_check_errdetail("Conflicting \"datestyle\" specifications."); + return false; + } + + /* + * Prepare the canonical string to return. GUC wants it malloc'd. + */ + result = (char *) malloc(32); + if (!result) + return false; + + switch (newDateStyle) + { + case USE_ISO_DATES: + strcpy(result, "ISO"); + break; + case USE_SQL_DATES: + strcpy(result, "SQL"); + break; + case USE_GERMAN_DATES: + strcpy(result, "German"); + break; + default: + strcpy(result, "Postgres"); + break; + } + switch (newDateOrder) + { + case DATEORDER_YMD: + strcat(result, ", YMD"); + break; + case DATEORDER_DMY: + strcat(result, ", DMY"); + break; + default: + strcat(result, ", MDY"); + break; + } + + free(*newval); + *newval = result; + + /* + * Set up the "extra" struct actually used by assign_datestyle. + */ + myextra = (int *) malloc(2 * sizeof(int)); + if (!myextra) + return false; + myextra[0] = newDateStyle; + myextra[1] = newDateOrder; + *extra = (void *) myextra; + + return true; +} + +/* + * assign_datestyle: GUC assign_hook for datestyle + */ +void +assign_datestyle(const char *newval, void *extra) +{ + int *myextra = (int *) extra; + + DateStyle = myextra[0]; + DateOrder = myextra[1]; +} + + +/* + * TIMEZONE + */ + +/* + * check_timezone: GUC check_hook for timezone + */ +bool +check_timezone(char **newval, void **extra, GucSource source) +{ + pg_tz *new_tz; + long gmtoffset; + char *endptr; + double hours; + + if (pg_strncasecmp(*newval, "interval", 8) == 0) + { + /* + * Support INTERVAL 'foo'. This is for SQL spec compliance, not + * because it has any actual real-world usefulness. + */ + const char *valueptr = *newval; + char *val; + Interval *interval; + + valueptr += 8; + while (isspace((unsigned char) *valueptr)) + valueptr++; + if (*valueptr++ != '\'') + return false; + val = pstrdup(valueptr); + /* Check and remove trailing quote */ + endptr = strchr(val, '\''); + if (!endptr || endptr[1] != '\0') + { + pfree(val); + return false; + } + *endptr = '\0'; + + /* + * Try to parse it. XXX an invalid interval format will result in + * ereport(ERROR), which is not desirable for GUC. We did what we + * could to guard against this in flatten_set_variable_args, but a + * string coming in from postgresql.conf might contain anything. + */ + interval = DatumGetIntervalP(DirectFunctionCall3(interval_in, + CStringGetDatum(val), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1))); + + pfree(val); + if (interval->month != 0) + { + GUC_check_errdetail("Cannot specify months in time zone interval."); + pfree(interval); + return false; + } + if (interval->day != 0) + { + GUC_check_errdetail("Cannot specify days in time zone interval."); + pfree(interval); + return false; + } + + /* Here we change from SQL to Unix sign convention */ + gmtoffset = -(interval->time / USECS_PER_SEC); + new_tz = pg_tzset_offset(gmtoffset); + + pfree(interval); + } + else + { + /* + * Try it as a numeric number of hours (possibly fractional). + */ + hours = strtod(*newval, &endptr); + if (endptr != *newval && *endptr == '\0') + { + /* Here we change from SQL to Unix sign convention */ + gmtoffset = -hours * SECS_PER_HOUR; + new_tz = pg_tzset_offset(gmtoffset); + } + else + { + /* + * Otherwise assume it is a timezone name, and try to load it. + */ + new_tz = pg_tzset(*newval); + + if (!new_tz) + { + /* Doesn't seem to be any great value in errdetail here */ + return false; + } + + if (!pg_tz_acceptable(new_tz)) + { + GUC_check_errmsg("time zone \"%s\" appears to use leap seconds", + *newval); + GUC_check_errdetail("PostgreSQL does not support leap seconds."); + return false; + } + } + } + + /* Test for failure in pg_tzset_offset, which we assume is out-of-range */ + if (!new_tz) + { + GUC_check_errdetail("UTC timezone offset is out of range."); + return false; + } + + /* + * Pass back data for assign_timezone to use + */ + *extra = malloc(sizeof(pg_tz *)); + if (!*extra) + return false; + *((pg_tz **) *extra) = new_tz; + + return true; +} + +/* + * assign_timezone: GUC assign_hook for timezone + */ +void +assign_timezone(const char *newval, void *extra) +{ + session_timezone = *((pg_tz **) extra); +} + +/* + * show_timezone: GUC show_hook for timezone + */ +const char * +show_timezone(void) +{ + const char *tzn; + + /* Always show the zone's canonical name */ + tzn = pg_get_timezone_name(session_timezone); + + if (tzn != NULL) + return tzn; + + return "unknown"; +} + + +/* + * LOG_TIMEZONE + * + * For log_timezone, we don't support the interval-based methods of setting a + * zone, which are only there for SQL spec compliance not because they're + * actually useful. + */ + +/* + * check_log_timezone: GUC check_hook for log_timezone + */ +bool +check_log_timezone(char **newval, void **extra, GucSource source) +{ + pg_tz *new_tz; + + /* + * Assume it is a timezone name, and try to load it. + */ + new_tz = pg_tzset(*newval); + + if (!new_tz) + { + /* Doesn't seem to be any great value in errdetail here */ + return false; + } + + if (!pg_tz_acceptable(new_tz)) + { + GUC_check_errmsg("time zone \"%s\" appears to use leap seconds", + *newval); + GUC_check_errdetail("PostgreSQL does not support leap seconds."); + return false; + } + + /* + * Pass back data for assign_log_timezone to use + */ + *extra = malloc(sizeof(pg_tz *)); + if (!*extra) + return false; + *((pg_tz **) *extra) = new_tz; + + return true; +} + +/* + * assign_log_timezone: GUC assign_hook for log_timezone + */ +void +assign_log_timezone(const char *newval, void *extra) +{ + log_timezone = *((pg_tz **) extra); +} + +/* + * show_log_timezone: GUC show_hook for log_timezone + */ +const char * +show_log_timezone(void) +{ + const char *tzn; + + /* Always show the zone's canonical name */ + tzn = pg_get_timezone_name(log_timezone); + + if (tzn != NULL) + return tzn; + + return "unknown"; +} + + +/* + * SET TRANSACTION READ ONLY and SET TRANSACTION READ WRITE + * + * We allow idempotent changes (r/w -> r/w and r/o -> r/o) at any time, and + * we also always allow changes from read-write to read-only. However, + * read-only may be changed to read-write only when in a top-level transaction + * that has not yet taken an initial snapshot. Can't do it in a hot standby, + * either. + * + * If we are not in a transaction at all, just allow the change; it means + * nothing since XactReadOnly will be reset by the next StartTransaction(). + * The IsTransactionState() test protects us against trying to check + * RecoveryInProgress() in contexts where shared memory is not accessible. + * (Similarly, if we're restoring state in a parallel worker, just allow + * the change.) + */ +bool +check_transaction_read_only(bool *newval, void **extra, GucSource source) +{ + if (*newval == false && XactReadOnly && IsTransactionState() && !InitializingParallelWorker) + { + /* Can't go to r/w mode inside a r/o transaction */ + if (IsSubTransaction()) + { + GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION); + GUC_check_errmsg("cannot set transaction read-write mode inside a read-only transaction"); + return false; + } + /* Top level transaction can't change to r/w after first snapshot. */ + if (FirstSnapshotSet) + { + GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION); + GUC_check_errmsg("transaction read-write mode must be set before any query"); + return false; + } + /* Can't go to r/w mode while recovery is still active */ + if (RecoveryInProgress()) + { + GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED); + GUC_check_errmsg("cannot set transaction read-write mode during recovery"); + return false; + } + } + + return true; +} + +/* + * SET TRANSACTION ISOLATION LEVEL + * + * We allow idempotent changes at any time, but otherwise this can only be + * changed in a toplevel transaction that has not yet taken a snapshot. + * + * As in check_transaction_read_only, allow it if not inside a transaction. + */ +bool +check_XactIsoLevel(int *newval, void **extra, GucSource source) +{ + int newXactIsoLevel = *newval; + + if (newXactIsoLevel != XactIsoLevel && IsTransactionState()) + { + if (FirstSnapshotSet) + { + GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION); + GUC_check_errmsg("SET TRANSACTION ISOLATION LEVEL must be called before any query"); + return false; + } + /* We ignore a subtransaction setting it to the existing value. */ + if (IsSubTransaction()) + { + GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION); + GUC_check_errmsg("SET TRANSACTION ISOLATION LEVEL must not be called in a subtransaction"); + return false; + } + /* Can't go to serializable mode while recovery is still active */ + if (newXactIsoLevel == XACT_SERIALIZABLE && RecoveryInProgress()) + { + GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED); + GUC_check_errmsg("cannot use serializable mode in a hot standby"); + GUC_check_errhint("You can use REPEATABLE READ instead."); + return false; + } + } + + return true; +} + +/* + * SET TRANSACTION [NOT] DEFERRABLE + */ + +bool +check_transaction_deferrable(bool *newval, void **extra, GucSource source) +{ + if (IsSubTransaction()) + { + GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION); + GUC_check_errmsg("SET TRANSACTION [NOT] DEFERRABLE cannot be called within a subtransaction"); + return false; + } + if (FirstSnapshotSet) + { + GUC_check_errcode(ERRCODE_ACTIVE_SQL_TRANSACTION); + GUC_check_errmsg("SET TRANSACTION [NOT] DEFERRABLE must be called before any query"); + return false; + } + + return true; +} + +/* + * Random number seed + * + * We can't roll back the random sequence on error, and we don't want + * config file reloads to affect it, so we only want interactive SET SEED + * commands to set it. We use the "extra" storage to ensure that rollbacks + * don't try to do the operation again. + */ + +bool +check_random_seed(double *newval, void **extra, GucSource source) +{ + *extra = malloc(sizeof(int)); + if (!*extra) + return false; + /* Arm the assign only if source of value is an interactive SET */ + *((int *) *extra) = (source >= PGC_S_INTERACTIVE); + + return true; +} + +void +assign_random_seed(double newval, void *extra) +{ + /* We'll do this at most once for any setting of the GUC variable */ + if (*((int *) extra)) + DirectFunctionCall1(setseed, Float8GetDatum(newval)); + *((int *) extra) = 0; +} + +const char * +show_random_seed(void) +{ + return "unavailable"; +} + + +/* + * SET CLIENT_ENCODING + */ + +bool +check_client_encoding(char **newval, void **extra, GucSource source) +{ + int encoding; + const char *canonical_name; + + /* Look up the encoding by name */ + encoding = pg_valid_client_encoding(*newval); + if (encoding < 0) + return false; + + /* Get the canonical name (no aliases, uniform case) */ + canonical_name = pg_encoding_to_char(encoding); + + /* + * If we are not within a transaction then PrepareClientEncoding will not + * be able to look up the necessary conversion procs. If we are still + * starting up, it will return "OK" anyway, and InitializeClientEncoding + * will fix things once initialization is far enough along. After + * startup, we'll fail. This would only happen if someone tries to change + * client_encoding in postgresql.conf and then SIGHUP existing sessions. + * It seems like a bad idea for client_encoding to change that way anyhow, + * so we don't go out of our way to support it. + * + * Note: in the postmaster, or any other process that never calls + * InitializeClientEncoding, PrepareClientEncoding will always succeed, + * and so will SetClientEncoding; but they won't do anything, which is OK. + */ + if (PrepareClientEncoding(encoding) < 0) + { + if (IsTransactionState()) + { + /* Must be a genuine no-such-conversion problem */ + GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED); + GUC_check_errdetail("Conversion between %s and %s is not supported.", + canonical_name, + GetDatabaseEncodingName()); + } + else + { + /* Provide a useful complaint */ + GUC_check_errdetail("Cannot change \"client_encoding\" now."); + } + return false; + } + + /* + * Replace the user-supplied string with the encoding's canonical name. + * This gets rid of aliases and case-folding variations. + * + * XXX Although canonicalizing seems like a good idea in the abstract, it + * breaks pre-9.1 JDBC drivers, which expect that if they send "UNICODE" + * as the client_encoding setting then it will read back the same way. As + * a workaround, don't replace the string if it's "UNICODE". Remove that + * hack when pre-9.1 JDBC drivers are no longer in use. + */ + if (strcmp(*newval, canonical_name) != 0 && + strcmp(*newval, "UNICODE") != 0) + { + free(*newval); + *newval = strdup(canonical_name); + if (!*newval) + return false; + } + + /* + * Save the encoding's ID in *extra, for use by assign_client_encoding. + */ + *extra = malloc(sizeof(int)); + if (!*extra) + return false; + *((int *) *extra) = encoding; + + return true; +} + +void +assign_client_encoding(const char *newval, void *extra) +{ + int encoding = *((int *) extra); + + /* + * Parallel workers send data to the leader, not the client. They always + * send data using the database encoding. + */ + if (IsParallelWorker()) + { + /* + * During parallel worker startup, we want to accept the leader's + * client_encoding setting so that anyone who looks at the value in + * the worker sees the same value that they would see in the leader. + */ + if (InitializingParallelWorker) + return; + + /* + * A change other than during startup, for example due to a SET clause + * attached to a function definition, should be rejected, as there is + * nothing we can do inside the worker to make it take effect. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_TRANSACTION_STATE), + errmsg("cannot change client_encoding during a parallel operation"))); + } + + /* We do not expect an error if PrepareClientEncoding succeeded */ + if (SetClientEncoding(encoding) < 0) + elog(LOG, "SetClientEncoding(%d) failed", encoding); +} + + +/* + * SET SESSION AUTHORIZATION + */ + +typedef struct +{ + /* This is the "extra" state for both SESSION AUTHORIZATION and ROLE */ + Oid roleid; + bool is_superuser; +} role_auth_extra; + +bool +check_session_authorization(char **newval, void **extra, GucSource source) +{ + HeapTuple roleTup; + Form_pg_authid roleform; + Oid roleid; + bool is_superuser; + role_auth_extra *myextra; + + /* Do nothing for the boot_val default of NULL */ + if (*newval == NULL) + return true; + + if (!IsTransactionState()) + { + /* + * Can't do catalog lookups, so fail. The result of this is that + * session_authorization cannot be set in postgresql.conf, which seems + * like a good thing anyway, so we don't work hard to avoid it. + */ + return false; + } + + /* Look up the username */ + roleTup = SearchSysCache1(AUTHNAME, PointerGetDatum(*newval)); + if (!HeapTupleIsValid(roleTup)) + { + /* + * When source == PGC_S_TEST, we don't throw a hard error for a + * nonexistent user name, only a NOTICE. See comments in guc.h. + */ + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", *newval))); + return true; + } + GUC_check_errmsg("role \"%s\" does not exist", *newval); + return false; + } + + roleform = (Form_pg_authid) GETSTRUCT(roleTup); + roleid = roleform->oid; + is_superuser = roleform->rolsuper; + + ReleaseSysCache(roleTup); + + /* Set up "extra" struct for assign_session_authorization to use */ + myextra = (role_auth_extra *) malloc(sizeof(role_auth_extra)); + if (!myextra) + return false; + myextra->roleid = roleid; + myextra->is_superuser = is_superuser; + *extra = (void *) myextra; + + return true; +} + +void +assign_session_authorization(const char *newval, void *extra) +{ + role_auth_extra *myextra = (role_auth_extra *) extra; + + /* Do nothing for the boot_val default of NULL */ + if (!myextra) + return; + + SetSessionAuthorization(myextra->roleid, myextra->is_superuser); +} + + +/* + * SET ROLE + * + * The SQL spec requires "SET ROLE NONE" to unset the role, so we hardwire + * a translation of "none" to InvalidOid. Otherwise this is much like + * SET SESSION AUTHORIZATION. + */ +extern char *role_string; /* in guc.c */ + +bool +check_role(char **newval, void **extra, GucSource source) +{ + HeapTuple roleTup; + Oid roleid; + bool is_superuser; + role_auth_extra *myextra; + Form_pg_authid roleform; + + if (strcmp(*newval, "none") == 0) + { + /* hardwired translation */ + roleid = InvalidOid; + is_superuser = false; + } + else + { + if (!IsTransactionState()) + { + /* + * Can't do catalog lookups, so fail. The result of this is that + * role cannot be set in postgresql.conf, which seems like a good + * thing anyway, so we don't work hard to avoid it. + */ + return false; + } + + /* + * When source == PGC_S_TEST, we don't throw a hard error for a + * nonexistent user name or insufficient privileges, only a NOTICE. + * See comments in guc.h. + */ + + /* Look up the username */ + roleTup = SearchSysCache1(AUTHNAME, PointerGetDatum(*newval)); + if (!HeapTupleIsValid(roleTup)) + { + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("role \"%s\" does not exist", *newval))); + return true; + } + GUC_check_errmsg("role \"%s\" does not exist", *newval); + return false; + } + + roleform = (Form_pg_authid) GETSTRUCT(roleTup); + roleid = roleform->oid; + is_superuser = roleform->rolsuper; + + ReleaseSysCache(roleTup); + + /* + * Verify that session user is allowed to become this role, but skip + * this in parallel mode, where we must blindly recreate the parallel + * leader's state. + */ + if (!InitializingParallelWorker && + !is_member_of_role(GetSessionUserId(), roleid)) + { + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission will be denied to set role \"%s\"", + *newval))); + return true; + } + GUC_check_errcode(ERRCODE_INSUFFICIENT_PRIVILEGE); + GUC_check_errmsg("permission denied to set role \"%s\"", + *newval); + return false; + } + } + + /* Set up "extra" struct for assign_role to use */ + myextra = (role_auth_extra *) malloc(sizeof(role_auth_extra)); + if (!myextra) + return false; + myextra->roleid = roleid; + myextra->is_superuser = is_superuser; + *extra = (void *) myextra; + + return true; +} + +void +assign_role(const char *newval, void *extra) +{ + role_auth_extra *myextra = (role_auth_extra *) extra; + + SetCurrentRoleId(myextra->roleid, myextra->is_superuser); +} + +const char * +show_role(void) +{ + /* + * Check whether SET ROLE is active; if not return "none". This is a + * kluge to deal with the fact that SET SESSION AUTHORIZATION logically + * resets SET ROLE to NONE, but we cannot set the GUC role variable from + * assign_session_authorization (because we haven't got enough info to + * call set_config_option). + */ + if (!OidIsValid(GetCurrentRoleId())) + return "none"; + + /* Otherwise we can just use the GUC string */ + return role_string ? role_string : "none"; +} diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c new file mode 100644 index 0000000..b5a0fc0 --- /dev/null +++ b/src/backend/commands/view.c @@ -0,0 +1,604 @@ +/*------------------------------------------------------------------------- + * + * view.c + * use rewrite rules to construct views + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/view.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relation.h" +#include "access/xact.h" +#include "catalog/namespace.h" +#include "commands/defrem.h" +#include "commands/tablecmds.h" +#include "commands/view.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/analyze.h" +#include "parser/parse_relation.h" +#include "rewrite/rewriteDefine.h" +#include "rewrite/rewriteHandler.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rewriteSupport.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +static void checkViewTupleDesc(TupleDesc newdesc, TupleDesc olddesc); + +/*--------------------------------------------------------------------- + * DefineVirtualRelation + * + * Create a view relation and use the rules system to store the query + * for the view. + * + * EventTriggerAlterTableStart must have been called already. + *--------------------------------------------------------------------- + */ +static ObjectAddress +DefineVirtualRelation(RangeVar *relation, List *tlist, bool replace, + List *options, Query *viewParse) +{ + Oid viewOid; + LOCKMODE lockmode; + CreateStmt *createStmt = makeNode(CreateStmt); + List *attrList; + ListCell *t; + + /* + * create a list of ColumnDef nodes based on the names and types of the + * (non-junk) targetlist items from the view's SELECT list. + */ + attrList = NIL; + foreach(t, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(t); + + if (!tle->resjunk) + { + ColumnDef *def = makeColumnDef(tle->resname, + exprType((Node *) tle->expr), + exprTypmod((Node *) tle->expr), + exprCollation((Node *) tle->expr)); + + /* + * It's possible that the column is of a collatable type but the + * collation could not be resolved, so double-check. + */ + if (type_is_collatable(exprType((Node *) tle->expr))) + { + if (!OidIsValid(def->collOid)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for view column \"%s\"", + def->colname), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + else + Assert(!OidIsValid(def->collOid)); + + attrList = lappend(attrList, def); + } + } + + /* + * Look up, check permissions on, and lock the creation namespace; also + * check for a preexisting view with the same name. This will also set + * relation->relpersistence to RELPERSISTENCE_TEMP if the selected + * namespace is temporary. + */ + lockmode = replace ? AccessExclusiveLock : NoLock; + (void) RangeVarGetAndCheckCreationNamespace(relation, lockmode, &viewOid); + + if (OidIsValid(viewOid) && replace) + { + Relation rel; + TupleDesc descriptor; + List *atcmds = NIL; + AlterTableCmd *atcmd; + ObjectAddress address; + + /* Relation is already locked, but we must build a relcache entry. */ + rel = relation_open(viewOid, NoLock); + + /* Make sure it *is* a view. */ + if (rel->rd_rel->relkind != RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a view", + RelationGetRelationName(rel)))); + + /* Also check it's not in use already */ + CheckTableNotInUse(rel, "CREATE OR REPLACE VIEW"); + + /* + * Due to the namespace visibility rules for temporary objects, we + * should only end up replacing a temporary view with another + * temporary view, and similarly for permanent views. + */ + Assert(relation->relpersistence == rel->rd_rel->relpersistence); + + /* + * Create a tuple descriptor to compare against the existing view, and + * verify that the old column list is an initial prefix of the new + * column list. + */ + descriptor = BuildDescForRelation(attrList); + checkViewTupleDesc(descriptor, rel->rd_att); + + /* + * If new attributes have been added, we must add pg_attribute entries + * for them. It is convenient (although overkill) to use the ALTER + * TABLE ADD COLUMN infrastructure for this. + * + * Note that we must do this before updating the query for the view, + * since the rules system requires that the correct view columns be in + * place when defining the new rules. + * + * Also note that ALTER TABLE doesn't run parse transformation on + * AT_AddColumnToView commands. The ColumnDef we supply must be ready + * to execute as-is. + */ + if (list_length(attrList) > rel->rd_att->natts) + { + ListCell *c; + int skip = rel->rd_att->natts; + + foreach(c, attrList) + { + if (skip > 0) + { + skip--; + continue; + } + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_AddColumnToView; + atcmd->def = (Node *) lfirst(c); + atcmds = lappend(atcmds, atcmd); + } + + /* EventTriggerAlterTableStart called by ProcessUtilitySlow */ + AlterTableInternal(viewOid, atcmds, true); + + /* Make the new view columns visible */ + CommandCounterIncrement(); + } + + /* + * Update the query for the view. + * + * Note that we must do this before updating the view options, because + * the new options may not be compatible with the old view query (for + * example if we attempt to add the WITH CHECK OPTION, we require that + * the new view be automatically updatable, but the old view may not + * have been). + */ + StoreViewQuery(viewOid, viewParse, replace); + + /* Make the new view query visible */ + CommandCounterIncrement(); + + /* + * Update the view's options. + * + * The new options list replaces the existing options list, even if + * it's empty. + */ + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_ReplaceRelOptions; + atcmd->def = (Node *) options; + atcmds = list_make1(atcmd); + + /* EventTriggerAlterTableStart called by ProcessUtilitySlow */ + AlterTableInternal(viewOid, atcmds, true); + + /* + * There is very little to do here to update the view's dependencies. + * Most view-level dependency relationships, such as those on the + * owner, schema, and associated composite type, aren't changing. + * Because we don't allow changing type or collation of an existing + * view column, those dependencies of the existing columns don't + * change either, while the AT_AddColumnToView machinery took care of + * adding such dependencies for new view columns. The dependencies of + * the view's query could have changed arbitrarily, but that was dealt + * with inside StoreViewQuery. What remains is only to check that + * view replacement is allowed when we're creating an extension. + */ + ObjectAddressSet(address, RelationRelationId, viewOid); + + recordDependencyOnCurrentExtension(&address, true); + + /* + * Seems okay, so return the OID of the pre-existing view. + */ + relation_close(rel, NoLock); /* keep the lock! */ + + return address; + } + else + { + ObjectAddress address; + + /* + * Set the parameters for keys/inheritance etc. All of these are + * uninteresting for views... + */ + createStmt->relation = relation; + createStmt->tableElts = attrList; + createStmt->inhRelations = NIL; + createStmt->constraints = NIL; + createStmt->options = options; + createStmt->oncommit = ONCOMMIT_NOOP; + createStmt->tablespacename = NULL; + createStmt->if_not_exists = false; + + /* + * Create the relation (this will error out if there's an existing + * view, so we don't need more code to complain if "replace" is + * false). + */ + address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL, + NULL); + Assert(address.objectId != InvalidOid); + + /* Make the new view relation visible */ + CommandCounterIncrement(); + + /* Store the query for the view */ + StoreViewQuery(address.objectId, viewParse, replace); + + return address; + } +} + +/* + * Verify that tupledesc associated with proposed new view definition + * matches tupledesc of old view. This is basically a cut-down version + * of equalTupleDescs(), with code added to generate specific complaints. + * Also, we allow the new tupledesc to have more columns than the old. + */ +static void +checkViewTupleDesc(TupleDesc newdesc, TupleDesc olddesc) +{ + int i; + + if (newdesc->natts < olddesc->natts) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop columns from view"))); + + for (i = 0; i < olddesc->natts; i++) + { + Form_pg_attribute newattr = TupleDescAttr(newdesc, i); + Form_pg_attribute oldattr = TupleDescAttr(olddesc, i); + + /* XXX msg not right, but we don't support DROP COL on view anyway */ + if (newattr->attisdropped != oldattr->attisdropped) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop columns from view"))); + + if (strcmp(NameStr(newattr->attname), NameStr(oldattr->attname)) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot change name of view column \"%s\" to \"%s\"", + NameStr(oldattr->attname), + NameStr(newattr->attname)), + errhint("Use ALTER VIEW ... RENAME COLUMN ... to change name of view column instead."))); + + /* + * We cannot allow type, typmod, or collation to change, since these + * properties may be embedded in Vars of other views/rules referencing + * this one. Other column attributes can be ignored. + */ + if (newattr->atttypid != oldattr->atttypid || + newattr->atttypmod != oldattr->atttypmod) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot change data type of view column \"%s\" from %s to %s", + NameStr(oldattr->attname), + format_type_with_typemod(oldattr->atttypid, + oldattr->atttypmod), + format_type_with_typemod(newattr->atttypid, + newattr->atttypmod)))); + + /* + * At this point, attcollations should be both valid or both invalid, + * so applying get_collation_name unconditionally should be fine. + */ + if (newattr->attcollation != oldattr->attcollation) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot change collation of view column \"%s\" from \"%s\" to \"%s\"", + NameStr(oldattr->attname), + get_collation_name(oldattr->attcollation), + get_collation_name(newattr->attcollation)))); + } + + /* + * We ignore the constraint fields. The new view desc can't have any + * constraints, and the only ones that could be on the old view are + * defaults, which we are happy to leave in place. + */ +} + +static void +DefineViewRules(Oid viewOid, Query *viewParse, bool replace) +{ + /* + * Set up the ON SELECT rule. Since the query has already been through + * parse analysis, we use DefineQueryRewrite() directly. + */ + DefineQueryRewrite(pstrdup(ViewSelectRuleName), + viewOid, + NULL, + CMD_SELECT, + true, + replace, + list_make1(viewParse)); + + /* + * Someday: automatic ON INSERT, etc + */ +} + +/*--------------------------------------------------------------- + * UpdateRangeTableOfViewParse + * + * Update the range table of the given parsetree. + * This update consists of adding two new entries IN THE BEGINNING + * of the range table (otherwise the rule system will die a slow, + * horrible and painful death, and we do not want that now, do we?) + * one for the OLD relation and one for the NEW one (both of + * them refer in fact to the "view" relation). + * + * Of course we must also increase the 'varnos' of all the Var nodes + * by 2... + * + * These extra RT entries are not actually used in the query, + * except for run-time locking and permission checking. + *--------------------------------------------------------------- + */ +static Query * +UpdateRangeTableOfViewParse(Oid viewOid, Query *viewParse) +{ + Relation viewRel; + List *new_rt; + ParseNamespaceItem *nsitem; + RangeTblEntry *rt_entry1, + *rt_entry2; + ParseState *pstate; + + /* + * Make a copy of the given parsetree. It's not so much that we don't + * want to scribble on our input, it's that the parser has a bad habit of + * outputting multiple links to the same subtree for constructs like + * BETWEEN, and we mustn't have OffsetVarNodes increment the varno of a + * Var node twice. copyObject will expand any multiply-referenced subtree + * into multiple copies. + */ + viewParse = copyObject(viewParse); + + /* Create a dummy ParseState for addRangeTableEntryForRelation */ + pstate = make_parsestate(NULL); + + /* need to open the rel for addRangeTableEntryForRelation */ + viewRel = relation_open(viewOid, AccessShareLock); + + /* + * Create the 2 new range table entries and form the new range table... + * OLD first, then NEW.... + */ + nsitem = addRangeTableEntryForRelation(pstate, viewRel, + AccessShareLock, + makeAlias("old", NIL), + false, false); + rt_entry1 = nsitem->p_rte; + nsitem = addRangeTableEntryForRelation(pstate, viewRel, + AccessShareLock, + makeAlias("new", NIL), + false, false); + rt_entry2 = nsitem->p_rte; + + /* Must override addRangeTableEntry's default access-check flags */ + rt_entry1->requiredPerms = 0; + rt_entry2->requiredPerms = 0; + + new_rt = lcons(rt_entry1, lcons(rt_entry2, viewParse->rtable)); + + viewParse->rtable = new_rt; + + /* + * Now offset all var nodes by 2, and jointree RT indexes too. + */ + OffsetVarNodes((Node *) viewParse, 2, 0); + + relation_close(viewRel, AccessShareLock); + + return viewParse; +} + +/* + * DefineView + * Execute a CREATE VIEW command. + */ +ObjectAddress +DefineView(ViewStmt *stmt, const char *queryString, + int stmt_location, int stmt_len) +{ + RawStmt *rawstmt; + Query *viewParse; + RangeVar *view; + ListCell *cell; + bool check_option; + ObjectAddress address; + + /* + * Run parse analysis to convert the raw parse tree to a Query. Note this + * also acquires sufficient locks on the source table(s). + */ + rawstmt = makeNode(RawStmt); + rawstmt->stmt = stmt->query; + rawstmt->stmt_location = stmt_location; + rawstmt->stmt_len = stmt_len; + + viewParse = parse_analyze_fixedparams(rawstmt, queryString, NULL, 0, NULL); + + /* + * The grammar should ensure that the result is a single SELECT Query. + * However, it doesn't forbid SELECT INTO, so we have to check for that. + */ + if (!IsA(viewParse, Query)) + elog(ERROR, "unexpected parse analysis result"); + if (viewParse->utilityStmt != NULL && + IsA(viewParse->utilityStmt, CreateTableAsStmt)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("views must not contain SELECT INTO"))); + if (viewParse->commandType != CMD_SELECT) + elog(ERROR, "unexpected parse analysis result"); + + /* + * Check for unsupported cases. These tests are redundant with ones in + * DefineQueryRewrite(), but that function will complain about a bogus ON + * SELECT rule, and we'd rather the message complain about a view. + */ + if (viewParse->hasModifyingCTE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("views must not contain data-modifying statements in WITH"))); + + /* + * If the user specified the WITH CHECK OPTION, add it to the list of + * reloptions. + */ + if (stmt->withCheckOption == LOCAL_CHECK_OPTION) + stmt->options = lappend(stmt->options, + makeDefElem("check_option", + (Node *) makeString("local"), -1)); + else if (stmt->withCheckOption == CASCADED_CHECK_OPTION) + stmt->options = lappend(stmt->options, + makeDefElem("check_option", + (Node *) makeString("cascaded"), -1)); + + /* + * Check that the view is auto-updatable if WITH CHECK OPTION was + * specified. + */ + check_option = false; + + foreach(cell, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(cell); + + if (strcmp(defel->defname, "check_option") == 0) + check_option = true; + } + + /* + * If the check option is specified, look to see if the view is actually + * auto-updatable or not. + */ + if (check_option) + { + const char *view_updatable_error = + view_query_is_auto_updatable(viewParse, true); + + if (view_updatable_error) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("WITH CHECK OPTION is supported only on automatically updatable views"), + errhint("%s", _(view_updatable_error)))); + } + + /* + * If a list of column names was given, run through and insert these into + * the actual query tree. - thomas 2000-03-08 + */ + if (stmt->aliases != NIL) + { + ListCell *alist_item = list_head(stmt->aliases); + ListCell *targetList; + + foreach(targetList, viewParse->targetList) + { + TargetEntry *te = lfirst_node(TargetEntry, targetList); + + /* junk columns don't get aliases */ + if (te->resjunk) + continue; + te->resname = pstrdup(strVal(lfirst(alist_item))); + alist_item = lnext(stmt->aliases, alist_item); + if (alist_item == NULL) + break; /* done assigning aliases */ + } + + if (alist_item != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("CREATE VIEW specifies more column " + "names than columns"))); + } + + /* Unlogged views are not sensible. */ + if (stmt->view->relpersistence == RELPERSISTENCE_UNLOGGED) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("views cannot be unlogged because they do not have storage"))); + + /* + * If the user didn't explicitly ask for a temporary view, check whether + * we need one implicitly. We allow TEMP to be inserted automatically as + * long as the CREATE command is consistent with that --- no explicit + * schema name. + */ + view = copyObject(stmt->view); /* don't corrupt original command */ + if (view->relpersistence == RELPERSISTENCE_PERMANENT + && isQueryUsingTempRelation(viewParse)) + { + view->relpersistence = RELPERSISTENCE_TEMP; + ereport(NOTICE, + (errmsg("view \"%s\" will be a temporary view", + view->relname))); + } + + /* + * Create the view relation + * + * NOTE: if it already exists and replace is false, the xact will be + * aborted. + */ + address = DefineVirtualRelation(view, viewParse->targetList, + stmt->replace, stmt->options, viewParse); + + return address; +} + +/* + * Use the rules system to store the query for the view. + */ +void +StoreViewQuery(Oid viewOid, Query *viewParse, bool replace) +{ + /* + * The range table of 'viewParse' does not contain entries for the "OLD" + * and "NEW" relations. So... add them! + */ + viewParse = UpdateRangeTableOfViewParse(viewOid, viewParse); + + /* + * Now create the rules associated with the view. + */ + DefineViewRules(viewOid, viewParse, replace); +} -- cgit v1.2.3