diff options
Diffstat (limited to 'src/backend/commands/createas.c')
-rw-r--r-- | src/backend/commands/createas.c | 637 |
1 files changed, 637 insertions, 0 deletions
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c new file mode 100644 index 0000000..e91920c --- /dev/null +++ b/src/backend/commands/createas.c @@ -0,0 +1,637 @@ +/*------------------------------------------------------------------------- + * + * createas.c + * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO. + * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors, + * we implement that here, too. + * + * We implement this by diverting the query's normal output to a + * specialized DestReceiver type. + * + * Formerly, CTAS was implemented as a variant of SELECT, which led + * to assorted legacy behaviors that we still try to preserve, notably that + * we must return a tuples-processed count in the QueryCompletion. (We no + * longer do that for CTAS ... WITH NO DATA, however.) + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/createas.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/namespace.h" +#include "catalog/toasting.h" +#include "commands/createas.h" +#include "commands/matview.h" +#include "commands/prepare.h" +#include "commands/tablecmds.h" +#include "commands/view.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_clause.h" +#include "rewrite/rewriteHandler.h" +#include "storage/smgr.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/rls.h" +#include "utils/snapmgr.h" + +typedef struct +{ + DestReceiver pub; /* publicly-known function pointers */ + IntoClause *into; /* target relation specification */ + /* These fields are filled by intorel_startup: */ + Relation rel; /* relation to write to */ + ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ + CommandId output_cid; /* cmin to insert in output tuples */ + int ti_options; /* table_tuple_insert performance options */ + BulkInsertState bistate; /* bulk insert state */ +} DR_intorel; + +/* utility functions for CTAS definition creation */ +static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into); +static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into); + +/* DestReceiver routines for collecting data */ +static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo); +static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self); +static void intorel_shutdown(DestReceiver *self); +static void intorel_destroy(DestReceiver *self); + + +/* + * create_ctas_internal + * + * Internal utility used for the creation of the definition of a relation + * created via CREATE TABLE AS or a materialized view. Caller needs to + * provide a list of attributes (ColumnDef nodes). + */ +static ObjectAddress +create_ctas_internal(List *attrList, IntoClause *into) +{ + CreateStmt *create = makeNode(CreateStmt); + bool is_matview; + char relkind; + Datum toast_options; + static char *validnsps[] = HEAP_RELOPT_NAMESPACES; + ObjectAddress intoRelationAddr; + + /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ + is_matview = (into->viewQuery != NULL); + relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; + + /* + * Create the target relation by faking up a CREATE TABLE parsetree and + * passing it to DefineRelation. + */ + create->relation = into->rel; + create->tableElts = attrList; + create->inhRelations = NIL; + create->ofTypename = NULL; + create->constraints = NIL; + create->options = into->options; + create->oncommit = into->onCommit; + create->tablespacename = into->tableSpaceName; + create->if_not_exists = false; + create->accessMethod = into->accessMethod; + + /* + * Create the relation. (This will error out if there's an existing view, + * so we don't need more code to complain if "replace" is false.) + */ + intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL); + + /* + * If necessary, create a TOAST table for the target table. Note that + * NewRelationCreateToastTable ends with CommandCounterIncrement(), so + * that the TOAST table will be visible for insertion. + */ + CommandCounterIncrement(); + + /* parse and validate reloptions for the toast table */ + toast_options = transformRelOptions((Datum) 0, + create->options, + "toast", + validnsps, + true, false); + + (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true); + + NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options); + + /* Create the "view" part of a materialized view. */ + if (is_matview) + { + /* StoreViewQuery scribbles on tree, so make a copy */ + Query *query = (Query *) copyObject(into->viewQuery); + + StoreViewQuery(intoRelationAddr.objectId, query, false); + CommandCounterIncrement(); + } + + return intoRelationAddr; +} + + +/* + * create_ctas_nodata + * + * Create CTAS or materialized view when WITH NO DATA is used, starting from + * the targetlist of the SELECT or view definition. + */ +static ObjectAddress +create_ctas_nodata(List *tlist, IntoClause *into) +{ + List *attrList; + ListCell *t, + *lc; + + /* + * Build list of ColumnDefs from non-junk elements of the tlist. If a + * column name list was specified in CREATE TABLE AS, override the column + * names in the query. (Too few column names are OK, too many are not.) + */ + attrList = NIL; + lc = list_head(into->colNames); + foreach(t, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(t); + + if (!tle->resjunk) + { + ColumnDef *col; + char *colname; + + if (lc) + { + colname = strVal(lfirst(lc)); + lc = lnext(into->colNames, lc); + } + else + colname = tle->resname; + + col = makeColumnDef(colname, + exprType((Node *) tle->expr), + exprTypmod((Node *) tle->expr), + exprCollation((Node *) tle->expr)); + + /* + * It's possible that the column is of a collatable type but the + * collation could not be resolved, so double-check. (We must + * check this here because DefineRelation would adopt the type's + * default collation rather than complaining.) + */ + if (!OidIsValid(col->collOid) && + type_is_collatable(col->typeName->typeOid)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("no collation was derived for column \"%s\" with collatable type %s", + col->colname, + format_type_be(col->typeName->typeOid)), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + attrList = lappend(attrList, col); + } + } + + if (lc != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("too many column names were specified"))); + + /* Create the relation definition using the ColumnDef list */ + return create_ctas_internal(attrList, into); +} + + +/* + * ExecCreateTableAs -- execute a CREATE TABLE AS command + */ +ObjectAddress +ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, + ParamListInfo params, QueryEnvironment *queryEnv, + QueryCompletion *qc) +{ + Query *query = castNode(Query, stmt->query); + IntoClause *into = stmt->into; + bool is_matview = (into->viewQuery != NULL); + DestReceiver *dest; + Oid save_userid = InvalidOid; + int save_sec_context = 0; + int save_nestlevel = 0; + ObjectAddress address; + List *rewritten; + PlannedStmt *plan; + QueryDesc *queryDesc; + + /* Check if the relation exists or not */ + if (CreateTableAsRelExists(stmt)) + return InvalidObjectAddress; + + /* + * Create the tuple receiver object and insert info it will need + */ + dest = CreateIntoRelDestReceiver(into); + + /* + * The contained Query could be a SELECT, or an EXECUTE utility command. + * If the latter, we just pass it off to ExecuteQuery. + */ + if (query->commandType == CMD_UTILITY && + IsA(query->utilityStmt, ExecuteStmt)) + { + ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt); + + Assert(!is_matview); /* excluded by syntax */ + ExecuteQuery(pstate, estmt, into, params, dest, qc); + + /* get object address that intorel_startup saved for us */ + address = ((DR_intorel *) dest)->reladdr; + + return address; + } + Assert(query->commandType == CMD_SELECT); + + /* + * For materialized views, lock down security-restricted operations and + * arrange to make GUC variable changes local to this command. This is + * not necessary for security, but this keeps the behavior similar to + * REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized + * view not possible to refresh. + */ + if (is_matview) + { + GetUserIdAndSecContext(&save_userid, &save_sec_context); + SetUserIdAndSecContext(save_userid, + save_sec_context | SECURITY_RESTRICTED_OPERATION); + save_nestlevel = NewGUCNestLevel(); + } + + if (into->skipData) + { + /* + * If WITH NO DATA was specified, do not go through the rewriter, + * planner and executor. Just define the relation using a code path + * similar to CREATE VIEW. This avoids dump/restore problems stemming + * from running the planner before all dependencies are set up. + */ + address = create_ctas_nodata(query->targetList, into); + } + else + { + /* + * Parse analysis was done already, but we still have to run the rule + * rewriter. We do not do AcquireRewriteLocks: we assume the query + * either came straight from the parser, or suitable locks were + * acquired by plancache.c. + */ + rewritten = QueryRewrite(query); + + /* SELECT should never rewrite to more or less than one SELECT query */ + if (list_length(rewritten) != 1) + elog(ERROR, "unexpected rewrite result for %s", + is_matview ? "CREATE MATERIALIZED VIEW" : + "CREATE TABLE AS SELECT"); + query = linitial_node(Query, rewritten); + Assert(query->commandType == CMD_SELECT); + + /* plan the query */ + plan = pg_plan_query(query, pstate->p_sourcetext, + CURSOR_OPT_PARALLEL_OK, params); + + /* + * Use a snapshot with an updated command ID to ensure this query sees + * results of any previously executed queries. (This could only + * matter if the planner executed an allegedly-stable function that + * changed the database contents, but let's do it anyway to be + * parallel to the EXPLAIN code path.) + */ + PushCopiedSnapshot(GetActiveSnapshot()); + UpdateActiveSnapshotCommandId(); + + /* Create a QueryDesc, redirecting output to our tuple receiver */ + queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + GetActiveSnapshot(), InvalidSnapshot, + dest, params, queryEnv, 0); + + /* call ExecutorStart to prepare the plan for execution */ + ExecutorStart(queryDesc, GetIntoRelEFlags(into)); + + /* run the plan to completion */ + ExecutorRun(queryDesc, ForwardScanDirection, 0, true); + + /* save the rowcount if we're given a qc to fill */ + if (qc) + SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed); + + /* get object address that intorel_startup saved for us */ + address = ((DR_intorel *) dest)->reladdr; + + /* and clean up */ + ExecutorFinish(queryDesc); + ExecutorEnd(queryDesc); + + FreeQueryDesc(queryDesc); + + PopActiveSnapshot(); + } + + if (is_matview) + { + /* Roll back any GUC changes */ + AtEOXact_GUC(false, save_nestlevel); + + /* Restore userid and security context */ + SetUserIdAndSecContext(save_userid, save_sec_context); + } + + return address; +} + +/* + * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS + * + * This is exported because EXPLAIN and PREPARE need it too. (Note: those + * callers still need to deal explicitly with the skipData flag; since they + * use different methods for suppressing execution, it doesn't seem worth + * trying to encapsulate that part.) + */ +int +GetIntoRelEFlags(IntoClause *intoClause) +{ + int flags = 0; + + if (intoClause->skipData) + flags |= EXEC_FLAG_WITH_NO_DATA; + + return flags; +} + +/* + * CreateTableAsRelExists --- check existence of relation for CreateTableAsStmt + * + * Utility wrapper checking if the relation pending for creation in this + * CreateTableAsStmt query already exists or not. Returns true if the + * relation exists, otherwise false. + */ +bool +CreateTableAsRelExists(CreateTableAsStmt *ctas) +{ + Oid nspid; + Oid oldrelid; + ObjectAddress address; + IntoClause *into = ctas->into; + + nspid = RangeVarGetCreationNamespace(into->rel); + + oldrelid = get_relname_relid(into->rel->relname, nspid); + if (OidIsValid(oldrelid)) + { + if (!ctas->if_not_exists) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists", + into->rel->relname))); + + /* + * The relation exists and IF NOT EXISTS has been specified. + * + * If we are in an extension script, insist that the pre-existing + * object be a member of the extension, to avoid security risks. + */ + ObjectAddressSet(address, RelationRelationId, oldrelid); + checkMembershipInCurrentExtension(&address); + + /* OK to skip */ + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists, skipping", + into->rel->relname))); + return true; + } + + /* Relation does not exist, it can be created */ + return false; +} + +/* + * CreateIntoRelDestReceiver -- create a suitable DestReceiver object + * + * intoClause will be NULL if called from CreateDestReceiver(), in which + * case it has to be provided later. However, it is convenient to allow + * self->into to be filled in immediately for other callers. + */ +DestReceiver * +CreateIntoRelDestReceiver(IntoClause *intoClause) +{ + DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel)); + + self->pub.receiveSlot = intorel_receive; + self->pub.rStartup = intorel_startup; + self->pub.rShutdown = intorel_shutdown; + self->pub.rDestroy = intorel_destroy; + self->pub.mydest = DestIntoRel; + self->into = intoClause; + /* other private fields will be set during intorel_startup */ + + return (DestReceiver *) self; +} + +/* + * intorel_startup --- executor startup + */ +static void +intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) +{ + DR_intorel *myState = (DR_intorel *) self; + IntoClause *into = myState->into; + bool is_matview; + List *attrList; + ObjectAddress intoRelationAddr; + Relation intoRelationDesc; + ListCell *lc; + int attnum; + + Assert(into != NULL); /* else somebody forgot to set it */ + + /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ + is_matview = (into->viewQuery != NULL); + + /* + * Build column definitions using "pre-cooked" type and collation info. If + * a column name list was specified in CREATE TABLE AS, override the + * column names derived from the query. (Too few column names are OK, too + * many are not.) + */ + attrList = NIL; + lc = list_head(into->colNames); + for (attnum = 0; attnum < typeinfo->natts; attnum++) + { + Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum); + ColumnDef *col; + char *colname; + + if (lc) + { + colname = strVal(lfirst(lc)); + lc = lnext(into->colNames, lc); + } + else + colname = NameStr(attribute->attname); + + col = makeColumnDef(colname, + attribute->atttypid, + attribute->atttypmod, + attribute->attcollation); + + /* + * It's possible that the column is of a collatable type but the + * collation could not be resolved, so double-check. (We must check + * this here because DefineRelation would adopt the type's default + * collation rather than complaining.) + */ + if (!OidIsValid(col->collOid) && + type_is_collatable(col->typeName->typeOid)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("no collation was derived for column \"%s\" with collatable type %s", + col->colname, + format_type_be(col->typeName->typeOid)), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + attrList = lappend(attrList, col); + } + + if (lc != NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("too many column names were specified"))); + + /* + * Actually create the target table + */ + intoRelationAddr = create_ctas_internal(attrList, into); + + /* + * Finally we can open the target table + */ + intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock); + + /* + * Make sure the constructed table does not have RLS enabled. + * + * check_enable_rls() will ereport(ERROR) itself if the user has requested + * something invalid, and otherwise will return RLS_ENABLED if RLS should + * be enabled here. We don't actually support that currently, so throw + * our own ereport(ERROR) if that happens. + */ + if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("policies not yet implemented for this command"))); + + /* + * Tentatively mark the target as populated, if it's a matview and we're + * going to fill it; otherwise, no change needed. + */ + if (is_matview && !into->skipData) + SetMatViewPopulatedState(intoRelationDesc, true); + + /* + * Fill private fields of myState for use by later routines + */ + myState->rel = intoRelationDesc; + myState->reladdr = intoRelationAddr; + myState->output_cid = GetCurrentCommandId(true); + myState->ti_options = TABLE_INSERT_SKIP_FSM; + + /* + * If WITH NO DATA is specified, there is no need to set up the state for + * bulk inserts as there are no tuples to insert. + */ + if (!into->skipData) + myState->bistate = GetBulkInsertState(); + else + myState->bistate = NULL; + + /* + * Valid smgr_targblock implies something already wrote to the relation. + * This may be harmless, but this function hasn't planned for it. + */ + Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); +} + +/* + * intorel_receive --- receive one tuple + */ +static bool +intorel_receive(TupleTableSlot *slot, DestReceiver *self) +{ + DR_intorel *myState = (DR_intorel *) self; + + /* Nothing to insert if WITH NO DATA is specified. */ + if (!myState->into->skipData) + { + /* + * Note that the input slot might not be of the type of the target + * relation. That's supported by table_tuple_insert(), but slightly + * less efficient than inserting with the right slot - but the + * alternative would be to copy into a slot of the right type, which + * would not be cheap either. This also doesn't allow accessing per-AM + * data (say a tuple's xmin), but since we don't do that here... + */ + table_tuple_insert(myState->rel, + slot, + myState->output_cid, + myState->ti_options, + myState->bistate); + } + + /* We know this is a newly created relation, so there are no indexes */ + + return true; +} + +/* + * intorel_shutdown --- executor end + */ +static void +intorel_shutdown(DestReceiver *self) +{ + DR_intorel *myState = (DR_intorel *) self; + IntoClause *into = myState->into; + + if (!into->skipData) + { + FreeBulkInsertState(myState->bistate); + table_finish_bulk_insert(myState->rel, myState->ti_options); + } + + /* close rel, but keep lock until commit */ + table_close(myState->rel, NoLock); + myState->rel = NULL; +} + +/* + * intorel_destroy --- release DestReceiver object + */ +static void +intorel_destroy(DestReceiver *self) +{ + pfree(self); +} |