/*------------------------------------------------------------------------- * * cluster.c * CLUSTER a table on an index. This is now also used for VACUUM FULL. * * There is hardly anything left of Paul Brown's original implementation... * * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994-5, Regents of the University of California * * * IDENTIFICATION * src/backend/commands/cluster.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/amapi.h" #include "access/heapam.h" #include "access/multixact.h" #include "access/relscan.h" #include "access/tableam.h" #include "access/toast_internals.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlog.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/partition.h" #include "catalog/pg_am.h" #include "catalog/pg_database.h" #include "catalog/pg_inherits.h" #include "catalog/toasting.h" #include "commands/cluster.h" #include "commands/defrem.h" #include "commands/progress.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "optimizer/optimizer.h" #include "pgstat.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/predicate.h" #include "utils/acl.h" #include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_rusage.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tuplesort.h" /* * This struct is used to pass around the information on tables to be * clustered. We need this so we can make a list of them when invoked without * a specific table/index pair. */ typedef struct { Oid tableOid; Oid indexOid; } RelToCluster; static void cluster_multiple_rels(List *rtcs, ClusterParams *params); static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose); static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); static List *get_tables_to_cluster(MemoryContext cluster_context); static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid); /*--------------------------------------------------------------------------- * This cluster code allows for clustering multiple tables at once. Because * of this, we cannot just run everything on a single transaction, or we * would be forced to acquire exclusive locks on all the tables being * clustered, simultaneously --- very likely leading to deadlock. * * To solve this we follow a similar strategy to VACUUM code, * clustering each relation in a separate transaction. For this to work, * we need to: * - provide a separate memory context so that we can pass information in * a way that survives across transactions * - start a new transaction every time a new relation is clustered * - check for validity of the information on to-be-clustered relations, * as someone might have deleted a relation behind our back, or * clustered one on a different index * - end the transaction * * The single-relation case does not have any such overhead. * * We also allow a relation to be specified without index. In that case, * the indisclustered bit will be looked up, and an ERROR will be thrown * if there is no index with the bit set. *--------------------------------------------------------------------------- */ void cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) { ListCell *lc; ClusterParams params = {0}; bool verbose = false; Relation rel = NULL; Oid indexOid = InvalidOid; MemoryContext cluster_context; List *rtcs; /* Parse option list */ foreach(lc, stmt->params) { DefElem *opt = (DefElem *) lfirst(lc); if (strcmp(opt->defname, "verbose") == 0) verbose = defGetBoolean(opt); else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("unrecognized CLUSTER option \"%s\"", opt->defname), parser_errposition(pstate, opt->location))); } params.options = (verbose ? CLUOPT_VERBOSE : 0); if (stmt->relation != NULL) { /* This is the single-relation case. */ Oid tableOid; /* * Find, lock, and check permissions on the table. We obtain * AccessExclusiveLock right away to avoid lock-upgrade hazard in the * single-transaction case. */ tableOid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, 0, RangeVarCallbackOwnsTable, NULL); rel = table_open(tableOid, NoLock); /* * Reject clustering a remote temp table ... their local buffer * manager is not going to cope. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster temporary tables of other sessions"))); if (stmt->indexname == NULL) { ListCell *index; /* We need to find the index that has indisclustered set. */ foreach(index, RelationGetIndexList(rel)) { indexOid = lfirst_oid(index); if (get_index_isclustered(indexOid)) break; indexOid = InvalidOid; } if (!OidIsValid(indexOid)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("there is no previously clustered index for table \"%s\"", stmt->relation->relname))); } else { /* * The index is expected to be in the same namespace as the * relation. */ indexOid = get_relname_relid(stmt->indexname, rel->rd_rel->relnamespace); if (!OidIsValid(indexOid)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("index \"%s\" for table \"%s\" does not exist", stmt->indexname, stmt->relation->relname))); } if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) { /* close relation, keep lock till commit */ table_close(rel, NoLock); /* Do the job. */ cluster_rel(tableOid, indexOid, ¶ms); return; } } /* * By here, we know we are in a multi-table situation. In order to avoid * holding locks for too long, we want to process each table in its own * transaction. This forces us to disallow running inside a user * transaction block. */ PreventInTransactionBlock(isTopLevel, "CLUSTER"); /* Also, we need a memory context to hold our list of relations */ cluster_context = AllocSetContextCreate(PortalContext, "Cluster", ALLOCSET_DEFAULT_SIZES); /* * Either we're processing a partitioned table, or we were not given any * table name at all. In either case, obtain a list of relations to * process. * * In the former case, an index name must have been given, so we don't * need to recheck its "indisclustered" bit, but we have to check that it * is an index that we can cluster on. In the latter case, we set the * option bit to have indisclustered verified. * * Rechecking the relation itself is necessary here in all cases. */ params.options |= CLUOPT_RECHECK; if (rel != NULL) { Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); check_index_is_clusterable(rel, indexOid, AccessShareLock); rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid); /* close relation, releasing lock on parent table */ table_close(rel, AccessExclusiveLock); } else { rtcs = get_tables_to_cluster(cluster_context); params.options |= CLUOPT_RECHECK_ISCLUSTERED; } /* Do the job. */ cluster_multiple_rels(rtcs, ¶ms); /* Start a new transaction for the cleanup work. */ StartTransactionCommand(); /* Clean up working storage */ MemoryContextDelete(cluster_context); } /* * Given a list of relations to cluster, process each of them in a separate * transaction. * * We expect to be in a transaction at start, but there isn't one when we * return. */ static void cluster_multiple_rels(List *rtcs, ClusterParams *params) { ListCell *lc; /* Commit to get out of starting transaction */ PopActiveSnapshot(); CommitTransactionCommand(); /* Cluster the tables, each in a separate transaction */ foreach(lc, rtcs) { RelToCluster *rtc = (RelToCluster *) lfirst(lc); /* Start a new transaction for each relation. */ StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); /* Do the job. */ cluster_rel(rtc->tableOid, rtc->indexOid, params); PopActiveSnapshot(); CommitTransactionCommand(); } } /* * cluster_rel * * This clusters the table by creating a new, clustered table and * swapping the relfilenumbers of the new table and the old table, so * the OID of the original table is preserved. Thus we do not lose * GRANT, inheritance nor references to this table (this was a bug * in releases through 7.3). * * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading * the new table, it's better to create the indexes afterwards than to fill * them incrementally while we load the table. * * If indexOid is InvalidOid, the table will be rewritten in physical order * instead of index order. This is the new implementation of VACUUM FULL, * and error messages should refer to the operation as VACUUM not CLUSTER. */ void cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) { Relation OldHeap; Oid save_userid; int save_sec_context; int save_nestlevel; bool verbose = ((params->options & CLUOPT_VERBOSE) != 0); bool recheck = ((params->options & CLUOPT_RECHECK) != 0); /* Check for user-requested abort. */ CHECK_FOR_INTERRUPTS(); pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid); if (OidIsValid(indexOid)) pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, PROGRESS_CLUSTER_COMMAND_CLUSTER); else pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND, PROGRESS_CLUSTER_COMMAND_VACUUM_FULL); /* * We grab exclusive access to the target rel and index for the duration * of the transaction. (This is redundant for the single-transaction * case, since cluster() already did it.) The index lock is taken inside * check_index_is_clusterable. */ OldHeap = try_relation_open(tableOid, AccessExclusiveLock); /* If the table has gone away, we can skip processing it */ if (!OldHeap) { pgstat_progress_end_command(); return; } /* * Switch to the table owner's userid, so that any index functions are run * as that user. Also lock down security-restricted operations and * arrange to make GUC variable changes local to this command. */ GetUserIdAndSecContext(&save_userid, &save_sec_context); SetUserIdAndSecContext(OldHeap->rd_rel->relowner, save_sec_context | SECURITY_RESTRICTED_OPERATION); save_nestlevel = NewGUCNestLevel(); /* * Since we may open a new transaction for each relation, we have to check * that the relation still is what we think it is. * * If this is a single-transaction CLUSTER, we can skip these tests. We * *must* skip the one on indisclustered since it would reject an attempt * to cluster a not-previously-clustered index. */ if (recheck) { /* Check that the user still owns the relation */ if (!object_ownercheck(RelationRelationId, tableOid, save_userid)) { relation_close(OldHeap, AccessExclusiveLock); goto out; } /* * Silently skip a temp table for a remote session. Only doing this * check in the "recheck" case is appropriate (which currently means * somebody is executing a database-wide CLUSTER or on a partitioned * table), because there is another check in cluster() which will stop * any attempt to cluster remote temp tables by name. There is * another check in cluster_rel which is redundant, but we leave it * for extra safety. */ if (RELATION_IS_OTHER_TEMP(OldHeap)) { relation_close(OldHeap, AccessExclusiveLock); goto out; } if (OidIsValid(indexOid)) { /* * Check that the index still exists */ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid))) { relation_close(OldHeap, AccessExclusiveLock); goto out; } /* * Check that the index is still the one with indisclustered set, * if needed. */ if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 && !get_index_isclustered(indexOid)) { relation_close(OldHeap, AccessExclusiveLock); goto out; } } } /* * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER * would work in most respects, but the index would only get marked as * indisclustered in the current database, leading to unexpected behavior * if CLUSTER were later invoked in another database. */ if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster a shared catalog"))); /* * Don't process temp tables of other backends ... their local buffer * manager is not going to cope. */ if (RELATION_IS_OTHER_TEMP(OldHeap)) { if (OidIsValid(indexOid)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster temporary tables of other sessions"))); else ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot vacuum temporary tables of other sessions"))); } /* * Also check for active uses of the relation in the current transaction, * including open scans and pending AFTER trigger events. */ CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM"); /* Check heap and index are valid to cluster on */ if (OidIsValid(indexOid)) check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock); /* * Quietly ignore the request if this is a materialized view which has not * been populated from its query. No harm is done because there is no data * to deal with, and we don't want to throw an error if this is part of a * multi-relation request -- for example, CLUSTER was run on the entire * database. */ if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW && !RelationIsPopulated(OldHeap)) { relation_close(OldHeap, AccessExclusiveLock); goto out; } Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION || OldHeap->rd_rel->relkind == RELKIND_MATVIEW || OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE); /* * All predicate locks on the tuples or pages are about to be made * invalid, because we move tuples around. Promote them to relation * locks. Predicate locks on indexes will be promoted when they are * reindexed. */ TransferPredicateLocksToHeapRelation(OldHeap); /* rebuild_relation does all the dirty work */ rebuild_relation(OldHeap, indexOid, verbose); /* NB: rebuild_relation does table_close() on OldHeap */ out: /* Roll back any GUC changes executed by index functions */ AtEOXact_GUC(false, save_nestlevel); /* Restore userid and security context */ SetUserIdAndSecContext(save_userid, save_sec_context); pgstat_progress_end_command(); } /* * Verify that the specified heap and index are valid to cluster on * * Side effect: obtains lock on the index. The caller may * in some cases already have AccessExclusiveLock on the table, but * not in all cases so we can't rely on the table-level lock for * protection here. */ void check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode) { Relation OldIndex; OldIndex = index_open(indexOid, lockmode); /* * Check that index is in fact an index on the given relation */ if (OldIndex->rd_index == NULL || OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index for table \"%s\"", RelationGetRelationName(OldIndex), RelationGetRelationName(OldHeap)))); /* Index AM must allow clustering */ if (!OldIndex->rd_indam->amclusterable) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster on index \"%s\" because access method does not support clustering", RelationGetRelationName(OldIndex)))); /* * Disallow clustering on incomplete indexes (those that might not index * every row of the relation). We could relax this by making a separate * seqscan pass over the table to copy the missing rows, but that seems * expensive and tedious. */ if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster on partial index \"%s\"", RelationGetRelationName(OldIndex)))); /* * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY; * it might well not contain entries for every heap row, or might not even * be internally consistent. (But note that we don't check indcheckxmin; * the worst consequence of following broken HOT chains would be that we * might put recently-dead tuples out-of-order in the new table, and there * is little harm in that.) */ if (!OldIndex->rd_index->indisvalid) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster on invalid index \"%s\"", RelationGetRelationName(OldIndex)))); /* Drop relcache refcnt on OldIndex, but keep lock */ index_close(OldIndex, NoLock); } /* * mark_index_clustered: mark the specified index as the one clustered on * * With indexOid == InvalidOid, will mark all indexes of rel not-clustered. */ void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) { HeapTuple indexTuple; Form_pg_index indexForm; Relation pg_index; ListCell *index; /* Disallow applying to a partitioned table */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot mark index clustered in partitioned table"))); /* * If the index is already marked clustered, no need to do anything. */ if (OidIsValid(indexOid)) { if (get_index_isclustered(indexOid)) return; } /* * Check each index of the relation and set/clear the bit as needed. */ pg_index = table_open(IndexRelationId, RowExclusiveLock); foreach(index, RelationGetIndexList(rel)) { Oid thisIndexOid = lfirst_oid(index); indexTuple = SearchSysCacheCopy1(INDEXRELID, ObjectIdGetDatum(thisIndexOid)); if (!HeapTupleIsValid(indexTuple)) elog(ERROR, "cache lookup failed for index %u", thisIndexOid); indexForm = (Form_pg_index) GETSTRUCT(indexTuple); /* * Unset the bit if set. We know it's wrong because we checked this * earlier. */ if (indexForm->indisclustered) { indexForm->indisclustered = false; CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); } else if (thisIndexOid == indexOid) { /* this was checked earlier, but let's be real sure */ if (!indexForm->indisvalid) elog(ERROR, "cannot cluster on invalid index %u", indexOid); indexForm->indisclustered = true; CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); } InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, InvalidOid, is_internal); heap_freetuple(indexTuple); } table_close(pg_index, RowExclusiveLock); } /* * rebuild_relation: rebuild an existing relation in index or physical order * * OldHeap: table to rebuild --- must be opened and exclusive-locked! * indexOid: index to cluster by, or InvalidOid to rewrite in physical order. * * NB: this routine closes OldHeap at the right time; caller should not. */ static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) { Oid tableOid = RelationGetRelid(OldHeap); Oid accessMethod = OldHeap->rd_rel->relam; Oid tableSpace = OldHeap->rd_rel->reltablespace; Oid OIDNewHeap; char relpersistence; bool is_system_catalog; bool swap_toast_by_content; TransactionId frozenXid; MultiXactId cutoffMulti; if (OidIsValid(indexOid)) /* Mark the correct index as clustered */ mark_index_clustered(OldHeap, indexOid, true); /* Remember info about rel before closing OldHeap */ relpersistence = OldHeap->rd_rel->relpersistence; is_system_catalog = IsSystemRelation(OldHeap); /* Close relcache entry, but keep lock until transaction commit */ table_close(OldHeap, NoLock); /* Create the transient table that will receive the re-ordered data */ OIDNewHeap = make_new_heap(tableOid, tableSpace, accessMethod, relpersistence, AccessExclusiveLock); /* Copy the heap data into the new table in the desired order */ copy_table_data(OIDNewHeap, tableOid, indexOid, verbose, &swap_toast_by_content, &frozenXid, &cutoffMulti); /* * Swap the physical files of the target and transient tables, then * rebuild the target's indexes and throw away the transient table. */ finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog, swap_toast_by_content, false, true, frozenXid, cutoffMulti, relpersistence); } /* * Create the transient table that will be filled with new data during * CLUSTER, ALTER TABLE, and similar operations. The transient table * duplicates the logical structure of the OldHeap; but will have the * specified physical storage properties NewTableSpace, NewAccessMethod, and * relpersistence. * * After this, the caller should load the new heap with transferred/modified * data, then call finish_heap_swap to complete the operation. */ Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod, char relpersistence, LOCKMODE lockmode) { TupleDesc OldHeapDesc; char NewHeapName[NAMEDATALEN]; Oid OIDNewHeap; Oid toastid; Relation OldHeap; HeapTuple tuple; Datum reloptions; bool isNull; Oid namespaceid; OldHeap = table_open(OIDOldHeap, lockmode); OldHeapDesc = RelationGetDescr(OldHeap); /* * Note that the NewHeap will not receive any of the defaults or * constraints associated with the OldHeap; we don't need 'em, and there's * no reason to spend cycles inserting them into the catalogs only to * delete them. */ /* * But we do want to use reloptions of the old heap for new heap. */ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap)); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap); reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, &isNull); if (isNull) reloptions = (Datum) 0; if (relpersistence == RELPERSISTENCE_TEMP) namespaceid = LookupCreationNamespace("pg_temp"); else namespaceid = RelationGetNamespace(OldHeap); /* * Create the new heap, using a temporary name in the same namespace as * the existing table. NOTE: there is some risk of collision with user * relnames. Working around this seems more trouble than it's worth; in * particular, we can't create the new heap in a different namespace from * the old, or we will have problems with the TEMP status of temp tables. * * Note: the new heap is not a shared relation, even if we are rebuilding * a shared rel. However, we do make the new heap mapped if the source is * mapped. This simplifies swap_relation_files, and is absolutely * necessary for rebuilding pg_class, for reasons explained there. */ snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap); OIDNewHeap = heap_create_with_catalog(NewHeapName, namespaceid, NewTableSpace, InvalidOid, InvalidOid, InvalidOid, OldHeap->rd_rel->relowner, NewAccessMethod, OldHeapDesc, NIL, RELKIND_RELATION, relpersistence, false, RelationIsMapped(OldHeap), ONCOMMIT_NOOP, reloptions, false, true, true, OIDOldHeap, NULL); Assert(OIDNewHeap != InvalidOid); ReleaseSysCache(tuple); /* * Advance command counter so that the newly-created relation's catalog * tuples will be visible to table_open. */ CommandCounterIncrement(); /* * If necessary, create a TOAST table for the new relation. * * If the relation doesn't have a TOAST table already, we can't need one * for the new relation. The other way around is possible though: if some * wide columns have been dropped, NewHeapCreateToastTable can decide that * no TOAST table is needed for the new table. * * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so * that the TOAST table will be visible for insertion. */ toastid = OldHeap->rd_rel->reltoastrelid; if (OidIsValid(toastid)) { /* keep the existing toast table's reloptions, if any */ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid)); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for relation %u", toastid); reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, &isNull); if (isNull) reloptions = (Datum) 0; NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid); ReleaseSysCache(tuple); } table_close(OldHeap, NoLock); return OIDNewHeap; } /* * Do the physical copying of table data. * * There are three output parameters: * *pSwapToastByContent is set true if toast tables must be swapped by content. * *pFreezeXid receives the TransactionId used as freeze cutoff point. * *pCutoffMulti receives the MultiXactId used as a cutoff point. */ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti) { Relation NewHeap, OldHeap, OldIndex; Relation relRelation; HeapTuple reltup; Form_pg_class relform; TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY; TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY; VacuumParams params; struct VacuumCutoffs cutoffs; bool use_sort; double num_tuples = 0, tups_vacuumed = 0, tups_recently_dead = 0; BlockNumber num_pages; int elevel = verbose ? INFO : DEBUG2; PGRUsage ru0; char *nspname; pg_rusage_init(&ru0); /* * Open the relations we need. */ NewHeap = table_open(OIDNewHeap, AccessExclusiveLock); OldHeap = table_open(OIDOldHeap, AccessExclusiveLock); if (OidIsValid(OIDOldIndex)) OldIndex = index_open(OIDOldIndex, AccessExclusiveLock); else OldIndex = NULL; /* Store a copy of the namespace name for logging purposes */ nspname = get_namespace_name(RelationGetNamespace(OldHeap)); /* * Their tuple descriptors should be exactly alike, but here we only need * assume that they have the same number of columns. */ oldTupDesc = RelationGetDescr(OldHeap); newTupDesc = RelationGetDescr(NewHeap); Assert(newTupDesc->natts == oldTupDesc->natts); /* * If the OldHeap has a toast table, get lock on the toast table to keep * it from being vacuumed. This is needed because autovacuum processes * toast tables independently of their main tables, with no lock on the * latter. If an autovacuum were to start on the toast table after we * compute our OldestXmin below, it would use a later OldestXmin, and then * possibly remove as DEAD toast tuples belonging to main tuples we think * are only RECENTLY_DEAD. Then we'd fail while trying to copy those * tuples. * * We don't need to open the toast relation here, just lock it. The lock * will be held till end of transaction. */ if (OldHeap->rd_rel->reltoastrelid) LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock); /* * If both tables have TOAST tables, perform toast swap by content. It is * possible that the old table has a toast table but the new one doesn't, * if toastable columns have been dropped. In that case we have to do * swap by links. This is okay because swap by content is only essential * for system catalogs, and we don't support schema changes for them. */ if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid) { *pSwapToastByContent = true; /* * When doing swap by content, any toast pointers written into NewHeap * must use the old toast table's OID, because that's where the toast * data will eventually be found. Set this up by setting rd_toastoid. * This also tells toast_save_datum() to preserve the toast value * OIDs, which we want so as not to invalidate toast pointers in * system catalog caches, and to avoid making multiple copies of a * single toast value. * * Note that we must hold NewHeap open until we are done writing data, * since the relcache will not guarantee to remember this setting once * the relation is closed. Also, this technique depends on the fact * that no one will try to read from the NewHeap until after we've * finished writing it and swapping the rels --- otherwise they could * follow the toast pointers to the wrong place. (It would actually * work for values copied over from the old toast table, but not for * any values that we toast which were previously not toasted.) */ NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid; } else *pSwapToastByContent = false; /* * Compute xids used to freeze and weed out dead tuples and multixacts. * Since we're going to rewrite the whole table anyway, there's no reason * not to be aggressive about this. */ memset(¶ms, 0, sizeof(VacuumParams)); vacuum_get_cutoffs(OldHeap, ¶ms, &cutoffs); /* * FreezeXid will become the table's new relfrozenxid, and that mustn't go * backwards, so take the max. */ if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) && TransactionIdPrecedes(cutoffs.FreezeLimit, OldHeap->rd_rel->relfrozenxid)) cutoffs.FreezeLimit = OldHeap->rd_rel->relfrozenxid; /* * MultiXactCutoff, similarly, shouldn't go backwards either. */ if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) && MultiXactIdPrecedes(cutoffs.MultiXactCutoff, OldHeap->rd_rel->relminmxid)) cutoffs.MultiXactCutoff = OldHeap->rd_rel->relminmxid; /* * Decide whether to use an indexscan or seqscan-and-optional-sort to scan * the OldHeap. We know how to use a sort to duplicate the ordering of a * btree index, and will use seqscan-and-sort for that case if the planner * tells us it's cheaper. Otherwise, always indexscan if an index is * provided, else plain seqscan. */ if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID) use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex); else use_sort = false; /* Log what we're doing */ if (OldIndex != NULL && !use_sort) ereport(elevel, (errmsg("clustering \"%s.%s\" using index scan on \"%s\"", nspname, RelationGetRelationName(OldHeap), RelationGetRelationName(OldIndex)))); else if (use_sort) ereport(elevel, (errmsg("clustering \"%s.%s\" using sequential scan and sort", nspname, RelationGetRelationName(OldHeap)))); else ereport(elevel, (errmsg("vacuuming \"%s.%s\"", nspname, RelationGetRelationName(OldHeap)))); /* * Hand off the actual copying to AM specific function, the generic code * cannot know how to deal with visibility across AMs. Note that this * routine is allowed to set FreezeXid / MultiXactCutoff to different * values (e.g. because the AM doesn't use freezing). */ table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort, cutoffs.OldestXmin, &cutoffs.FreezeLimit, &cutoffs.MultiXactCutoff, &num_tuples, &tups_vacuumed, &tups_recently_dead); /* return selected values to caller, get set as relfrozenxid/minmxid */ *pFreezeXid = cutoffs.FreezeLimit; *pCutoffMulti = cutoffs.MultiXactCutoff; /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */ NewHeap->rd_toastoid = InvalidOid; num_pages = RelationGetNumberOfBlocks(NewHeap); /* Log what we did */ ereport(elevel, (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages", nspname, RelationGetRelationName(OldHeap), tups_vacuumed, num_tuples, RelationGetNumberOfBlocks(OldHeap)), errdetail("%.0f dead row versions cannot be removed yet.\n" "%s.", tups_recently_dead, pg_rusage_show(&ru0)))); if (OldIndex != NULL) index_close(OldIndex, NoLock); table_close(OldHeap, NoLock); table_close(NewHeap, NoLock); /* Update pg_class to reflect the correct values of pages and tuples. */ relRelation = table_open(RelationRelationId, RowExclusiveLock); reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap)); if (!HeapTupleIsValid(reltup)) elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap); relform = (Form_pg_class) GETSTRUCT(reltup); relform->relpages = num_pages; relform->reltuples = num_tuples; /* Don't update the stats for pg_class. See swap_relation_files. */ if (OIDOldHeap != RelationRelationId) CatalogTupleUpdate(relRelation, &reltup->t_self, reltup); else CacheInvalidateRelcacheByTuple(reltup); /* Clean up. */ heap_freetuple(reltup); table_close(relRelation, RowExclusiveLock); /* Make the update visible */ CommandCounterIncrement(); } /* * Swap the physical files of two given relations. * * We swap the physical identity (reltablespace, relfilenumber) while keeping * the same logical identities of the two relations. relpersistence is also * swapped, which is critical since it determines where buffers live for each * relation. * * We can swap associated TOAST data in either of two ways: recursively swap * the physical content of the toast tables (and their indexes), or swap the * TOAST links in the given relations' pg_class entries. The former is needed * to manage rewrites of shared catalogs (where we cannot change the pg_class * links) while the latter is the only way to handle cases in which a toast * table is added or removed altogether. * * Additionally, the first relation is marked with relfrozenxid set to * frozenXid. It seems a bit ugly to have this here, but the caller would * have to do it anyway, so having it here saves a heap_update. Note: in * the swap-toast-links case, we assume we don't need to change the toast * table's relfrozenxid: the new version of the toast table should already * have relfrozenxid set to RecentXmin, which is good enough. * * Lastly, if r2 and its toast table and toast index (if any) are mapped, * their OIDs are emitted into mapped_tables[]. This is hacky but beats * having to look the information up again later in finish_heap_swap. */ static void swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, bool swap_toast_by_content, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, Oid *mapped_tables) { Relation relRelation; HeapTuple reltup1, reltup2; Form_pg_class relform1, relform2; RelFileNumber relfilenumber1, relfilenumber2; RelFileNumber swaptemp; char swptmpchr; Oid relam1, relam2; /* We need writable copies of both pg_class tuples. */ relRelation = table_open(RelationRelationId, RowExclusiveLock); reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1)); if (!HeapTupleIsValid(reltup1)) elog(ERROR, "cache lookup failed for relation %u", r1); relform1 = (Form_pg_class) GETSTRUCT(reltup1); reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2)); if (!HeapTupleIsValid(reltup2)) elog(ERROR, "cache lookup failed for relation %u", r2); relform2 = (Form_pg_class) GETSTRUCT(reltup2); relfilenumber1 = relform1->relfilenode; relfilenumber2 = relform2->relfilenode; relam1 = relform1->relam; relam2 = relform2->relam; if (RelFileNumberIsValid(relfilenumber1) && RelFileNumberIsValid(relfilenumber2)) { /* * Normal non-mapped relations: swap relfilenumbers, reltablespaces, * relpersistence */ Assert(!target_is_pg_class); swaptemp = relform1->relfilenode; relform1->relfilenode = relform2->relfilenode; relform2->relfilenode = swaptemp; swaptemp = relform1->reltablespace; relform1->reltablespace = relform2->reltablespace; relform2->reltablespace = swaptemp; swaptemp = relform1->relam; relform1->relam = relform2->relam; relform2->relam = swaptemp; swptmpchr = relform1->relpersistence; relform1->relpersistence = relform2->relpersistence; relform2->relpersistence = swptmpchr; /* Also swap toast links, if we're swapping by links */ if (!swap_toast_by_content) { swaptemp = relform1->reltoastrelid; relform1->reltoastrelid = relform2->reltoastrelid; relform2->reltoastrelid = swaptemp; } } else { /* * Mapped-relation case. Here we have to swap the relation mappings * instead of modifying the pg_class columns. Both must be mapped. */ if (RelFileNumberIsValid(relfilenumber1) || RelFileNumberIsValid(relfilenumber2)) elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation", NameStr(relform1->relname)); /* * We can't change the tablespace nor persistence of a mapped rel, and * we can't handle toast link swapping for one either, because we must * not apply any critical changes to its pg_class row. These cases * should be prevented by upstream permissions tests, so these checks * are non-user-facing emergency backstop. */ if (relform1->reltablespace != relform2->reltablespace) elog(ERROR, "cannot change tablespace of mapped relation \"%s\"", NameStr(relform1->relname)); if (relform1->relpersistence != relform2->relpersistence) elog(ERROR, "cannot change persistence of mapped relation \"%s\"", NameStr(relform1->relname)); if (relform1->relam != relform2->relam) elog(ERROR, "cannot change access method of mapped relation \"%s\"", NameStr(relform1->relname)); if (!swap_toast_by_content && (relform1->reltoastrelid || relform2->reltoastrelid)) elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"", NameStr(relform1->relname)); /* * Fetch the mappings --- shouldn't fail, but be paranoid */ relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared); if (!RelFileNumberIsValid(relfilenumber1)) elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", NameStr(relform1->relname), r1); relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared); if (!RelFileNumberIsValid(relfilenumber2)) elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", NameStr(relform2->relname), r2); /* * Send replacement mappings to relmapper. Note these won't actually * take effect until CommandCounterIncrement. */ RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false); RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false); /* Pass OIDs of mapped r2 tables back to caller */ *mapped_tables++ = r2; } /* * Recognize that rel1's relfilenumber (swapped from rel2) is new in this * subtransaction. The rel2 storage (swapped from rel1) may or may not be * new. */ { Relation rel1, rel2; rel1 = relation_open(r1, NoLock); rel2 = relation_open(r2, NoLock); rel2->rd_createSubid = rel1->rd_createSubid; rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid; rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid; RelationAssumeNewRelfilelocator(rel1); relation_close(rel1, NoLock); relation_close(rel2, NoLock); } /* * In the case of a shared catalog, these next few steps will only affect * our own database's pg_class row; but that's okay, because they are all * noncritical updates. That's also an important fact for the case of a * mapped catalog, because it's possible that we'll commit the map change * and then fail to commit the pg_class update. */ /* set rel1's frozen Xid and minimum MultiXid */ if (relform1->relkind != RELKIND_INDEX) { Assert(!TransactionIdIsValid(frozenXid) || TransactionIdIsNormal(frozenXid)); relform1->relfrozenxid = frozenXid; relform1->relminmxid = cutoffMulti; } /* swap size statistics too, since new rel has freshly-updated stats */ { int32 swap_pages; float4 swap_tuples; int32 swap_allvisible; swap_pages = relform1->relpages; relform1->relpages = relform2->relpages; relform2->relpages = swap_pages; swap_tuples = relform1->reltuples; relform1->reltuples = relform2->reltuples; relform2->reltuples = swap_tuples; swap_allvisible = relform1->relallvisible; relform1->relallvisible = relform2->relallvisible; relform2->relallvisible = swap_allvisible; } /* * Update the tuples in pg_class --- unless the target relation of the * swap is pg_class itself. In that case, there is zero point in making * changes because we'd be updating the old data that we're about to throw * away. Because the real work being done here for a mapped relation is * just to change the relation map settings, it's all right to not update * the pg_class rows in this case. The most important changes will instead * performed later, in finish_heap_swap() itself. */ if (!target_is_pg_class) { CatalogIndexState indstate; indstate = CatalogOpenIndexes(relRelation); CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1, indstate); CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2, indstate); CatalogCloseIndexes(indstate); } else { /* no update ... but we do still need relcache inval */ CacheInvalidateRelcacheByTuple(reltup1); CacheInvalidateRelcacheByTuple(reltup2); } /* * Now that pg_class has been updated with its relevant information for * the swap, update the dependency of the relations to point to their new * table AM, if it has changed. */ if (relam1 != relam2) { if (changeDependencyFor(RelationRelationId, r1, AccessMethodRelationId, relam1, relam2) != 1) elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"", get_namespace_name(get_rel_namespace(r1)), get_rel_name(r1)); if (changeDependencyFor(RelationRelationId, r2, AccessMethodRelationId, relam2, relam1) != 1) elog(ERROR, "failed to change access method dependency for relation \"%s.%s\"", get_namespace_name(get_rel_namespace(r2)), get_rel_name(r2)); } /* * Post alter hook for modified relations. The change to r2 is always * internal, but r1 depends on the invocation context. */ InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0, InvalidOid, is_internal); InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0, InvalidOid, true); /* * If we have toast tables associated with the relations being swapped, * deal with them too. */ if (relform1->reltoastrelid || relform2->reltoastrelid) { if (swap_toast_by_content) { if (relform1->reltoastrelid && relform2->reltoastrelid) { /* Recursively swap the contents of the toast tables */ swap_relation_files(relform1->reltoastrelid, relform2->reltoastrelid, target_is_pg_class, swap_toast_by_content, is_internal, frozenXid, cutoffMulti, mapped_tables); } else { /* caller messed up */ elog(ERROR, "cannot swap toast files by content when there's only one"); } } else { /* * We swapped the ownership links, so we need to change dependency * data to match. * * NOTE: it is possible that only one table has a toast table. * * NOTE: at present, a TOAST table's only dependency is the one on * its owning table. If more are ever created, we'd need to use * something more selective than deleteDependencyRecordsFor() to * get rid of just the link we want. */ ObjectAddress baseobject, toastobject; long count; /* * We disallow this case for system catalogs, to avoid the * possibility that the catalog we're rebuilding is one of the * ones the dependency changes would change. It's too late to be * making any data changes to the target catalog. */ if (IsSystemClass(r1, relform1)) elog(ERROR, "cannot swap toast files by links for system catalogs"); /* Delete old dependencies */ if (relform1->reltoastrelid) { count = deleteDependencyRecordsFor(RelationRelationId, relform1->reltoastrelid, false); if (count != 1) elog(ERROR, "expected one dependency record for TOAST table, found %ld", count); } if (relform2->reltoastrelid) { count = deleteDependencyRecordsFor(RelationRelationId, relform2->reltoastrelid, false); if (count != 1) elog(ERROR, "expected one dependency record for TOAST table, found %ld", count); } /* Register new dependencies */ baseobject.classId = RelationRelationId; baseobject.objectSubId = 0; toastobject.classId = RelationRelationId; toastobject.objectSubId = 0; if (relform1->reltoastrelid) { baseobject.objectId = r1; toastobject.objectId = relform1->reltoastrelid; recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); } if (relform2->reltoastrelid) { baseobject.objectId = r2; toastobject.objectId = relform2->reltoastrelid; recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); } } } /* * If we're swapping two toast tables by content, do the same for their * valid index. The swap can actually be safely done only if the relations * have indexes. */ if (swap_toast_by_content && relform1->relkind == RELKIND_TOASTVALUE && relform2->relkind == RELKIND_TOASTVALUE) { Oid toastIndex1, toastIndex2; /* Get valid index for each relation */ toastIndex1 = toast_get_valid_index(r1, AccessExclusiveLock); toastIndex2 = toast_get_valid_index(r2, AccessExclusiveLock); swap_relation_files(toastIndex1, toastIndex2, target_is_pg_class, swap_toast_by_content, is_internal, InvalidTransactionId, InvalidMultiXactId, mapped_tables); } /* Clean up. */ heap_freetuple(reltup1); heap_freetuple(reltup2); table_close(relRelation, RowExclusiveLock); /* * Close both relcache entries' smgr links. We need this kluge because * both links will be invalidated during upcoming CommandCounterIncrement. * Whichever of the rels is the second to be cleared will have a dangling * reference to the other's smgr entry. Rather than trying to avoid this * by ordering operations just so, it's easiest to close the links first. * (Fortunately, since one of the entries is local in our transaction, * it's sufficient to clear out our own relcache this way; the problem * cannot arise for other backends when they see our update on the * non-transient relation.) * * Caution: the placement of this step interacts with the decision to * handle toast rels by recursion. When we are trying to rebuild pg_class * itself, the smgr close on pg_class must happen after all accesses in * this function. */ RelationCloseSmgrByOid(r1); RelationCloseSmgrByOid(r2); } /* * Remove the transient table that was built by make_new_heap, and finish * cleaning up (including rebuilding all indexes on the old heap). */ void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool is_system_catalog, bool swap_toast_by_content, bool check_constraints, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, char newrelpersistence) { ObjectAddress object; Oid mapped_tables[4]; int reindex_flags; ReindexParams reindex_params = {0}; int i; /* Report that we are now swapping relation files */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES); /* Zero out possible results from swapped_relation_files */ memset(mapped_tables, 0, sizeof(mapped_tables)); /* * Swap the contents of the heap relations (including any toast tables). * Also set old heap's relfrozenxid to frozenXid. */ swap_relation_files(OIDOldHeap, OIDNewHeap, (OIDOldHeap == RelationRelationId), swap_toast_by_content, is_internal, frozenXid, cutoffMulti, mapped_tables); /* * If it's a system catalog, queue a sinval message to flush all catcaches * on the catalog when we reach CommandCounterIncrement. */ if (is_system_catalog) CacheInvalidateCatalog(OIDOldHeap); /* * Rebuild each index on the relation (but not the toast table, which is * all-new at this point). It is important to do this before the DROP * step because if we are processing a system catalog that will be used * during DROP, we want to have its indexes available. There is no * advantage to the other order anyway because this is all transactional, * so no chance to reclaim disk space before commit. We do not need a * final CommandCounterIncrement() because reindex_relation does it. * * Note: because index_build is called via reindex_relation, it will never * set indcheckxmin true for the indexes. This is OK even though in some * sense we are building new indexes rather than rebuilding existing ones, * because the new heap won't contain any HOT chains at all, let alone * broken ones, so it can't be necessary to set indcheckxmin. */ reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE; if (check_constraints) reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS; /* * Ensure that the indexes have the same persistence as the parent * relation. */ if (newrelpersistence == RELPERSISTENCE_UNLOGGED) reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED; else if (newrelpersistence == RELPERSISTENCE_PERMANENT) reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT; /* Report that we are now reindexing relations */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, PROGRESS_CLUSTER_PHASE_REBUILD_INDEX); reindex_relation(OIDOldHeap, reindex_flags, &reindex_params); /* Report that we are now doing clean up */ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE, PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP); /* * If the relation being rebuilt is pg_class, swap_relation_files() * couldn't update pg_class's own pg_class entry (check comments in * swap_relation_files()), thus relfrozenxid was not updated. That's * annoying because a potential reason for doing a VACUUM FULL is a * imminent or actual anti-wraparound shutdown. So, now that we can * access the new relation using its indices, update relfrozenxid. * pg_class doesn't have a toast relation, so we don't need to update the * corresponding toast relation. Not that there's little point moving all * relfrozenxid updates here since swap_relation_files() needs to write to * pg_class for non-mapped relations anyway. */ if (OIDOldHeap == RelationRelationId) { Relation relRelation; HeapTuple reltup; Form_pg_class relform; relRelation = table_open(RelationRelationId, RowExclusiveLock); reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap)); if (!HeapTupleIsValid(reltup)) elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap); relform = (Form_pg_class) GETSTRUCT(reltup); relform->relfrozenxid = frozenXid; relform->relminmxid = cutoffMulti; CatalogTupleUpdate(relRelation, &reltup->t_self, reltup); table_close(relRelation, RowExclusiveLock); } /* Destroy new heap with old filenumber */ object.classId = RelationRelationId; object.objectId = OIDNewHeap; object.objectSubId = 0; /* * The new relation is local to our transaction and we know nothing * depends on it, so DROP_RESTRICT should be OK. */ performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); /* performDeletion does CommandCounterIncrement at end */ /* * Now we must remove any relation mapping entries that we set up for the * transient table, as well as its toast table and toast index if any. If * we fail to do this before commit, the relmapper will complain about new * permanent map entries being added post-bootstrap. */ for (i = 0; OidIsValid(mapped_tables[i]); i++) RelationMapRemoveMapping(mapped_tables[i]); /* * At this point, everything is kosher except that, if we did toast swap * by links, the toast table's name corresponds to the transient table. * The name is irrelevant to the backend because it's referenced by OID, * but users looking at the catalogs could be confused. Rename it to * prevent this problem. * * Note no lock required on the relation, because we already hold an * exclusive lock on it. */ if (!swap_toast_by_content) { Relation newrel; newrel = table_open(OIDOldHeap, NoLock); if (OidIsValid(newrel->rd_rel->reltoastrelid)) { Oid toastidx; char NewToastName[NAMEDATALEN]; /* Get the associated valid index to be renamed */ toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid, NoLock); /* rename the toast table ... */ snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", OIDOldHeap); RenameRelationInternal(newrel->rd_rel->reltoastrelid, NewToastName, true, false); /* ... and its valid index too. */ snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", OIDOldHeap); RenameRelationInternal(toastidx, NewToastName, true, true); /* * Reset the relrewrite for the toast. The command-counter * increment is required here as we are about to update the tuple * that is updated as part of RenameRelationInternal. */ CommandCounterIncrement(); ResetRelRewrite(newrel->rd_rel->reltoastrelid); } relation_close(newrel, NoLock); } /* if it's not a catalog table, clear any missing attribute settings */ if (!is_system_catalog) { Relation newrel; newrel = table_open(OIDOldHeap, NoLock); RelationClearMissing(newrel); relation_close(newrel, NoLock); } } /* * Get a list of tables that the current user owns and * have indisclustered set. Return the list in a List * of RelToCluster * (stored in the specified memory context), each one giving the tableOid * and the indexOid on which the table is already clustered. */ static List * get_tables_to_cluster(MemoryContext cluster_context) { Relation indRelation; TableScanDesc scan; ScanKeyData entry; HeapTuple indexTuple; Form_pg_index index; MemoryContext old_context; List *rtcs = NIL; /* * Get all indexes that have indisclustered set and are owned by * appropriate user. */ indRelation = table_open(IndexRelationId, AccessShareLock); ScanKeyInit(&entry, Anum_pg_index_indisclustered, BTEqualStrategyNumber, F_BOOLEQ, BoolGetDatum(true)); scan = table_beginscan_catalog(indRelation, 1, &entry); while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { RelToCluster *rtc; index = (Form_pg_index) GETSTRUCT(indexTuple); if (!object_ownercheck(RelationRelationId, index->indrelid, GetUserId())) continue; /* Use a permanent memory context for the result list */ old_context = MemoryContextSwitchTo(cluster_context); rtc = (RelToCluster *) palloc(sizeof(RelToCluster)); rtc->tableOid = index->indrelid; rtc->indexOid = index->indexrelid; rtcs = lappend(rtcs, rtc); MemoryContextSwitchTo(old_context); } table_endscan(scan); relation_close(indRelation, AccessShareLock); return rtcs; } /* * Given an index on a partitioned table, return a list of RelToCluster for * all the children leaves tables/indexes. * * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock * on the table containing the index. */ static List * get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid) { List *inhoids; ListCell *lc; List *rtcs = NIL; MemoryContext old_context; /* Do not lock the children until they're processed */ inhoids = find_all_inheritors(indexOid, NoLock, NULL); foreach(lc, inhoids) { Oid indexrelid = lfirst_oid(lc); Oid relid = IndexGetRelation(indexrelid, false); RelToCluster *rtc; /* consider only leaf indexes */ if (get_rel_relkind(indexrelid) != RELKIND_INDEX) continue; /* Silently skip partitions which the user has no access to. */ if (!object_ownercheck(RelationRelationId, relid, GetUserId()) && (!object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) || IsSharedRelation(relid))) continue; /* Use a permanent memory context for the result list */ old_context = MemoryContextSwitchTo(cluster_context); rtc = (RelToCluster *) palloc(sizeof(RelToCluster)); rtc->tableOid = relid; rtc->indexOid = indexrelid; rtcs = lappend(rtcs, rtc); MemoryContextSwitchTo(old_context); } return rtcs; }