summaryrefslogtreecommitdiffstats
path: root/src/backend/catalog/index.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/catalog/index.c')
-rw-r--r--src/backend/catalog/index.c4178
1 files changed, 4178 insertions, 0 deletions
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
new file mode 100644
index 0000000..a90cc3a
--- /dev/null
+++ b/src/backend/catalog/index.c
@@ -0,0 +1,4178 @@
+/*-------------------------------------------------------------------------
+ *
+ * index.c
+ * code to create and destroy POSTGRES index relations
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/catalog/index.c
+ *
+ *
+ * INTERFACE ROUTINES
+ * index_create() - Create a cataloged index relation
+ * index_drop() - Removes index relation from catalogs
+ * BuildIndexInfo() - Prepare to insert index tuples
+ * FormIndexDatum() - Construct datum vector for one index tuple
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "access/amapi.h"
+#include "access/heapam.h"
+#include "access/multixact.h"
+#include "access/reloptions.h"
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "access/toast_compression.h"
+#include "access/transam.h"
+#include "access/visibilitymap.h"
+#include "access/xact.h"
+#include "bootstrap/bootstrap.h"
+#include "catalog/binary_upgrade.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_description.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage.h"
+#include "commands/event_trigger.h"
+#include "commands/progress.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "parser/parser.h"
+#include "pgstat.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_rusage.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tuplesort.h"
+
+/* Potentially set by pg_upgrade_support functions */
+Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
+
+/*
+ * Pointer-free representation of variables used when reindexing system
+ * catalogs; we use this to propagate those values to parallel workers.
+ */
+typedef struct
+{
+ Oid currentlyReindexedHeap;
+ Oid currentlyReindexedIndex;
+ int numPendingReindexedIndexes;
+ Oid pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
+} SerializedReindexState;
+
+/* non-export function prototypes */
+static bool relationHasPrimaryKey(Relation rel);
+static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
+ IndexInfo *indexInfo,
+ List *indexColNames,
+ Oid accessMethodObjectId,
+ Oid *collationObjectId,
+ Oid *classObjectId);
+static void InitializeAttributeOids(Relation indexRelation,
+ int numatts, Oid indexoid);
+static void AppendAttributeTuples(Relation indexRelation, Datum *attopts);
+static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
+ Oid parentIndexId,
+ IndexInfo *indexInfo,
+ Oid *collationOids,
+ Oid *classOids,
+ int16 *coloptions,
+ bool primary,
+ bool isexclusion,
+ bool immediate,
+ bool isvalid,
+ bool isready);
+static void index_update_stats(Relation rel,
+ bool hasindex,
+ double reltuples);
+static void IndexCheckExclusion(Relation heapRelation,
+ Relation indexRelation,
+ IndexInfo *indexInfo);
+static bool validate_index_callback(ItemPointer itemptr, void *opaque);
+static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
+static void SetReindexProcessing(Oid heapOid, Oid indexOid);
+static void ResetReindexProcessing(void);
+static void SetReindexPending(List *indexes);
+static void RemoveReindexPending(Oid indexOid);
+
+
+/*
+ * relationHasPrimaryKey
+ * See whether an existing relation has a primary key.
+ *
+ * Caller must have suitable lock on the relation.
+ *
+ * Note: we intentionally do not check indisvalid here; that's because this
+ * is used to enforce the rule that there can be only one indisprimary index,
+ * and we want that to be true even if said index is invalid.
+ */
+static bool
+relationHasPrimaryKey(Relation rel)
+{
+ bool result = false;
+ List *indexoidlist;
+ ListCell *indexoidscan;
+
+ /*
+ * Get the list of index OIDs for the table from the relcache, and look up
+ * each one in the pg_index syscache until we find one marked primary key
+ * (hopefully there isn't more than one such).
+ */
+ indexoidlist = RelationGetIndexList(rel);
+
+ foreach(indexoidscan, indexoidlist)
+ {
+ Oid indexoid = lfirst_oid(indexoidscan);
+ HeapTuple indexTuple;
+
+ indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
+ if (!HeapTupleIsValid(indexTuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for index %u", indexoid);
+ result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
+ ReleaseSysCache(indexTuple);
+ if (result)
+ break;
+ }
+
+ list_free(indexoidlist);
+
+ return result;
+}
+
+/*
+ * index_check_primary_key
+ * Apply special checks needed before creating a PRIMARY KEY index
+ *
+ * This processing used to be in DefineIndex(), but has been split out
+ * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
+ *
+ * We check for a pre-existing primary key, and that all columns of the index
+ * are simple column references (not expressions), and that all those
+ * columns are marked NOT NULL. If not, fail.
+ *
+ * We used to automatically change unmarked columns to NOT NULL here by doing
+ * our own local ALTER TABLE command. But that doesn't work well if we're
+ * executing one subcommand of an ALTER TABLE: the operations may not get
+ * performed in the right order overall. Now we expect that the parser
+ * inserted any required ALTER TABLE SET NOT NULL operations before trying
+ * to create a primary-key index.
+ *
+ * Caller had better have at least ShareLock on the table, else the not-null
+ * checking isn't trustworthy.
+ */
+void
+index_check_primary_key(Relation heapRel,
+ IndexInfo *indexInfo,
+ bool is_alter_table,
+ IndexStmt *stmt)
+{
+ int i;
+
+ /*
+ * If ALTER TABLE or CREATE TABLE .. PARTITION OF, check that there isn't
+ * already a PRIMARY KEY. In CREATE TABLE for an ordinary relation, we
+ * have faith that the parser rejected multiple pkey clauses; and CREATE
+ * INDEX doesn't have a way to say PRIMARY KEY, so it's no problem either.
+ */
+ if ((is_alter_table || heapRel->rd_rel->relispartition) &&
+ relationHasPrimaryKey(heapRel))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("multiple primary keys for table \"%s\" are not allowed",
+ RelationGetRelationName(heapRel))));
+ }
+
+ /*
+ * Check that all of the attributes in a primary key are marked as not
+ * null. (We don't really expect to see that; it'd mean the parser messed
+ * up. But it seems wise to check anyway.)
+ */
+ for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
+ {
+ AttrNumber attnum = indexInfo->ii_IndexAttrNumbers[i];
+ HeapTuple atttuple;
+ Form_pg_attribute attform;
+
+ if (attnum == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("primary keys cannot be expressions")));
+
+ /* System attributes are never null, so no need to check */
+ if (attnum < 0)
+ continue;
+
+ atttuple = SearchSysCache2(ATTNUM,
+ ObjectIdGetDatum(RelationGetRelid(heapRel)),
+ Int16GetDatum(attnum));
+ if (!HeapTupleIsValid(atttuple))
+ elog(ERROR, "cache lookup failed for attribute %d of relation %u",
+ attnum, RelationGetRelid(heapRel));
+ attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+ if (!attform->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("primary key column \"%s\" is not marked NOT NULL",
+ NameStr(attform->attname))));
+
+ ReleaseSysCache(atttuple);
+ }
+}
+
+/*
+ * ConstructTupleDescriptor
+ *
+ * Build an index tuple descriptor for a new index
+ */
+static TupleDesc
+ConstructTupleDescriptor(Relation heapRelation,
+ IndexInfo *indexInfo,
+ List *indexColNames,
+ Oid accessMethodObjectId,
+ Oid *collationObjectId,
+ Oid *classObjectId)
+{
+ int numatts = indexInfo->ii_NumIndexAttrs;
+ int numkeyatts = indexInfo->ii_NumIndexKeyAttrs;
+ ListCell *colnames_item = list_head(indexColNames);
+ ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
+ IndexAmRoutine *amroutine;
+ TupleDesc heapTupDesc;
+ TupleDesc indexTupDesc;
+ int natts; /* #atts in heap rel --- for error checks */
+ int i;
+
+ /* We need access to the index AM's API struct */
+ amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
+
+ /* ... and to the table's tuple descriptor */
+ heapTupDesc = RelationGetDescr(heapRelation);
+ natts = RelationGetForm(heapRelation)->relnatts;
+
+ /*
+ * allocate the new tuple descriptor
+ */
+ indexTupDesc = CreateTemplateTupleDesc(numatts);
+
+ /*
+ * Fill in the pg_attribute row.
+ */
+ for (i = 0; i < numatts; i++)
+ {
+ AttrNumber atnum = indexInfo->ii_IndexAttrNumbers[i];
+ Form_pg_attribute to = TupleDescAttr(indexTupDesc, i);
+ HeapTuple tuple;
+ Form_pg_type typeTup;
+ Form_pg_opclass opclassTup;
+ Oid keyType;
+
+ MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
+ to->attnum = i + 1;
+ to->attstattarget = -1;
+ to->attcacheoff = -1;
+ to->attislocal = true;
+ to->attcollation = (i < numkeyatts) ?
+ collationObjectId[i] : InvalidOid;
+
+ /*
+ * Set the attribute name as specified by caller.
+ */
+ if (colnames_item == NULL) /* shouldn't happen */
+ elog(ERROR, "too few entries in colnames list");
+ namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
+ colnames_item = lnext(indexColNames, colnames_item);
+
+ /*
+ * For simple index columns, we copy some pg_attribute fields from the
+ * parent relation. For expressions we have to look at the expression
+ * result.
+ */
+ if (atnum != 0)
+ {
+ /* Simple index column */
+ const FormData_pg_attribute *from;
+
+ Assert(atnum > 0); /* should've been caught above */
+
+ if (atnum > natts) /* safety check */
+ elog(ERROR, "invalid column number %d", atnum);
+ from = TupleDescAttr(heapTupDesc,
+ AttrNumberGetAttrOffset(atnum));
+
+ to->atttypid = from->atttypid;
+ to->attlen = from->attlen;
+ to->attndims = from->attndims;
+ to->atttypmod = from->atttypmod;
+ to->attbyval = from->attbyval;
+ to->attalign = from->attalign;
+ to->attstorage = from->attstorage;
+ to->attcompression = from->attcompression;
+ }
+ else
+ {
+ /* Expressional index */
+ Node *indexkey;
+
+ if (indexpr_item == NULL) /* shouldn't happen */
+ elog(ERROR, "too few entries in indexprs list");
+ indexkey = (Node *) lfirst(indexpr_item);
+ indexpr_item = lnext(indexInfo->ii_Expressions, indexpr_item);
+
+ /*
+ * Lookup the expression type in pg_type for the type length etc.
+ */
+ keyType = exprType(indexkey);
+ tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for type %u", keyType);
+ typeTup = (Form_pg_type) GETSTRUCT(tuple);
+
+ /*
+ * Assign some of the attributes values. Leave the rest.
+ */
+ to->atttypid = keyType;
+ to->attlen = typeTup->typlen;
+ to->atttypmod = exprTypmod(indexkey);
+ to->attbyval = typeTup->typbyval;
+ to->attalign = typeTup->typalign;
+ to->attstorage = typeTup->typstorage;
+
+ /*
+ * For expression columns, set attcompression invalid, since
+ * there's no table column from which to copy the value. Whenever
+ * we actually need to compress a value, we'll use whatever the
+ * current value of default_toast_compression is at that point in
+ * time.
+ */
+ to->attcompression = InvalidCompressionMethod;
+
+ ReleaseSysCache(tuple);
+
+ /*
+ * Make sure the expression yields a type that's safe to store in
+ * an index. We need this defense because we have index opclasses
+ * for pseudo-types such as "record", and the actually stored type
+ * had better be safe; eg, a named composite type is okay, an
+ * anonymous record type is not. The test is the same as for
+ * whether a table column is of a safe type (which is why we
+ * needn't check for the non-expression case).
+ */
+ CheckAttributeType(NameStr(to->attname),
+ to->atttypid, to->attcollation,
+ NIL, 0);
+ }
+
+ /*
+ * We do not yet have the correct relation OID for the index, so just
+ * set it invalid for now. InitializeAttributeOids() will fix it
+ * later.
+ */
+ to->attrelid = InvalidOid;
+
+ /*
+ * Check the opclass and index AM to see if either provides a keytype
+ * (overriding the attribute type). Opclass (if exists) takes
+ * precedence.
+ */
+ keyType = amroutine->amkeytype;
+
+ if (i < indexInfo->ii_NumIndexKeyAttrs)
+ {
+ tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for opclass %u",
+ classObjectId[i]);
+ opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
+ if (OidIsValid(opclassTup->opckeytype))
+ keyType = opclassTup->opckeytype;
+
+ /*
+ * If keytype is specified as ANYELEMENT, and opcintype is
+ * ANYARRAY, then the attribute type must be an array (else it'd
+ * not have matched this opclass); use its element type.
+ *
+ * We could also allow ANYCOMPATIBLE/ANYCOMPATIBLEARRAY here, but
+ * there seems no need to do so; there's no reason to declare an
+ * opclass as taking ANYCOMPATIBLEARRAY rather than ANYARRAY.
+ */
+ if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
+ {
+ keyType = get_base_element_type(to->atttypid);
+ if (!OidIsValid(keyType))
+ elog(ERROR, "could not get element type of array type %u",
+ to->atttypid);
+ }
+
+ ReleaseSysCache(tuple);
+ }
+
+ /*
+ * If a key type different from the heap value is specified, update
+ * the type-related fields in the index tupdesc.
+ */
+ if (OidIsValid(keyType) && keyType != to->atttypid)
+ {
+ tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for type %u", keyType);
+ typeTup = (Form_pg_type) GETSTRUCT(tuple);
+
+ to->atttypid = keyType;
+ to->atttypmod = -1;
+ to->attlen = typeTup->typlen;
+ to->attbyval = typeTup->typbyval;
+ to->attalign = typeTup->typalign;
+ to->attstorage = typeTup->typstorage;
+ /* As above, use the default compression method in this case */
+ to->attcompression = InvalidCompressionMethod;
+
+ ReleaseSysCache(tuple);
+ }
+ }
+
+ pfree(amroutine);
+
+ return indexTupDesc;
+}
+
+/* ----------------------------------------------------------------
+ * InitializeAttributeOids
+ * ----------------------------------------------------------------
+ */
+static void
+InitializeAttributeOids(Relation indexRelation,
+ int numatts,
+ Oid indexoid)
+{
+ TupleDesc tupleDescriptor;
+ int i;
+
+ tupleDescriptor = RelationGetDescr(indexRelation);
+
+ for (i = 0; i < numatts; i += 1)
+ TupleDescAttr(tupleDescriptor, i)->attrelid = indexoid;
+}
+
+/* ----------------------------------------------------------------
+ * AppendAttributeTuples
+ * ----------------------------------------------------------------
+ */
+static void
+AppendAttributeTuples(Relation indexRelation, Datum *attopts)
+{
+ Relation pg_attribute;
+ CatalogIndexState indstate;
+ TupleDesc indexTupDesc;
+
+ /*
+ * open the attribute relation and its indexes
+ */
+ pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
+
+ indstate = CatalogOpenIndexes(pg_attribute);
+
+ /*
+ * insert data from new index's tupdesc into pg_attribute
+ */
+ indexTupDesc = RelationGetDescr(indexRelation);
+
+ InsertPgAttributeTuples(pg_attribute, indexTupDesc, InvalidOid, attopts, indstate);
+
+ CatalogCloseIndexes(indstate);
+
+ table_close(pg_attribute, RowExclusiveLock);
+}
+
+/* ----------------------------------------------------------------
+ * UpdateIndexRelation
+ *
+ * Construct and insert a new entry in the pg_index catalog
+ * ----------------------------------------------------------------
+ */
+static void
+UpdateIndexRelation(Oid indexoid,
+ Oid heapoid,
+ Oid parentIndexId,
+ IndexInfo *indexInfo,
+ Oid *collationOids,
+ Oid *classOids,
+ int16 *coloptions,
+ bool primary,
+ bool isexclusion,
+ bool immediate,
+ bool isvalid,
+ bool isready)
+{
+ int2vector *indkey;
+ oidvector *indcollation;
+ oidvector *indclass;
+ int2vector *indoption;
+ Datum exprsDatum;
+ Datum predDatum;
+ Datum values[Natts_pg_index];
+ bool nulls[Natts_pg_index];
+ Relation pg_index;
+ HeapTuple tuple;
+ int i;
+
+ /*
+ * Copy the index key, opclass, and indoption info into arrays (should we
+ * make the caller pass them like this to start with?)
+ */
+ indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
+ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ indkey->values[i] = indexInfo->ii_IndexAttrNumbers[i];
+ indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexKeyAttrs);
+ indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs);
+ indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexKeyAttrs);
+
+ /*
+ * Convert the index expressions (if any) to a text datum
+ */
+ if (indexInfo->ii_Expressions != NIL)
+ {
+ char *exprsString;
+
+ exprsString = nodeToString(indexInfo->ii_Expressions);
+ exprsDatum = CStringGetTextDatum(exprsString);
+ pfree(exprsString);
+ }
+ else
+ exprsDatum = (Datum) 0;
+
+ /*
+ * Convert the index predicate (if any) to a text datum. Note we convert
+ * implicit-AND format to normal explicit-AND for storage.
+ */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ char *predString;
+
+ predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
+ predDatum = CStringGetTextDatum(predString);
+ pfree(predString);
+ }
+ else
+ predDatum = (Datum) 0;
+
+
+ /*
+ * open the system catalog index relation
+ */
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+ /*
+ * Build a pg_index tuple
+ */
+ MemSet(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
+ values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
+ values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
+ values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs);
+ values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
+ values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
+ values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
+ values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
+ values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
+ values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
+ values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
+ values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
+ values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
+ values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
+ values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
+ values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
+ values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
+ values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
+ values[Anum_pg_index_indexprs - 1] = exprsDatum;
+ if (exprsDatum == (Datum) 0)
+ nulls[Anum_pg_index_indexprs - 1] = true;
+ values[Anum_pg_index_indpred - 1] = predDatum;
+ if (predDatum == (Datum) 0)
+ nulls[Anum_pg_index_indpred - 1] = true;
+
+ tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
+
+ /*
+ * insert the tuple into the pg_index catalog
+ */
+ CatalogTupleInsert(pg_index, tuple);
+
+ /*
+ * close the relation and free the tuple
+ */
+ table_close(pg_index, RowExclusiveLock);
+ heap_freetuple(tuple);
+}
+
+
+/*
+ * index_create
+ *
+ * heapRelation: table to build index on (suitably locked by caller)
+ * indexRelationName: what it say
+ * indexRelationId: normally, pass InvalidOid to let this routine
+ * generate an OID for the index. During bootstrap this may be
+ * nonzero to specify a preselected OID.
+ * parentIndexRelid: if creating an index partition, the OID of the
+ * parent index; otherwise InvalidOid.
+ * parentConstraintId: if creating a constraint on a partition, the OID
+ * of the constraint in the parent; otherwise InvalidOid.
+ * relFileNode: normally, pass InvalidOid to get new storage. May be
+ * nonzero to attach an existing valid build.
+ * indexInfo: same info executor uses to insert into the index
+ * indexColNames: column names to use for index (List of char *)
+ * accessMethodObjectId: OID of index AM to use
+ * tableSpaceId: OID of tablespace to use
+ * collationObjectId: array of collation OIDs, one per index column
+ * classObjectId: array of index opclass OIDs, one per index column
+ * coloptions: array of per-index-column indoption settings
+ * reloptions: AM-specific options
+ * flags: bitmask that can include any combination of these bits:
+ * INDEX_CREATE_IS_PRIMARY
+ * the index is a primary key
+ * INDEX_CREATE_ADD_CONSTRAINT:
+ * invoke index_constraint_create also
+ * INDEX_CREATE_SKIP_BUILD:
+ * skip the index_build() step for the moment; caller must do it
+ * later (typically via reindex_index())
+ * INDEX_CREATE_CONCURRENT:
+ * do not lock the table against writers. The index will be
+ * marked "invalid" and the caller must take additional steps
+ * to fix it up.
+ * INDEX_CREATE_IF_NOT_EXISTS:
+ * do not throw an error if a relation with the same name
+ * already exists.
+ * INDEX_CREATE_PARTITIONED:
+ * create a partitioned index (table must be partitioned)
+ * constr_flags: flags passed to index_constraint_create
+ * (only if INDEX_CREATE_ADD_CONSTRAINT is set)
+ * allow_system_table_mods: allow table to be a system catalog
+ * is_internal: if true, post creation hook for new index
+ * constraintId: if not NULL, receives OID of created constraint
+ *
+ * Returns the OID of the created index.
+ */
+Oid
+index_create(Relation heapRelation,
+ const char *indexRelationName,
+ Oid indexRelationId,
+ Oid parentIndexRelid,
+ Oid parentConstraintId,
+ Oid relFileNode,
+ IndexInfo *indexInfo,
+ List *indexColNames,
+ Oid accessMethodObjectId,
+ Oid tableSpaceId,
+ Oid *collationObjectId,
+ Oid *classObjectId,
+ int16 *coloptions,
+ Datum reloptions,
+ bits16 flags,
+ bits16 constr_flags,
+ bool allow_system_table_mods,
+ bool is_internal,
+ Oid *constraintId)
+{
+ Oid heapRelationId = RelationGetRelid(heapRelation);
+ Relation pg_class;
+ Relation indexRelation;
+ TupleDesc indexTupDesc;
+ bool shared_relation;
+ bool mapped_relation;
+ bool is_exclusion;
+ Oid namespaceId;
+ int i;
+ char relpersistence;
+ bool isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0;
+ bool invalid = (flags & INDEX_CREATE_INVALID) != 0;
+ bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
+ bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
+ char relkind;
+ TransactionId relfrozenxid;
+ MultiXactId relminmxid;
+
+ /* constraint flags can only be set when a constraint is requested */
+ Assert((constr_flags == 0) ||
+ ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0));
+ /* partitioned indexes must never be "built" by themselves */
+ Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD));
+
+ relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX;
+ is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
+
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+ /*
+ * The index will be in the same namespace as its parent table, and is
+ * shared across databases if and only if the parent is. Likewise, it
+ * will use the relfilenode map if and only if the parent does; and it
+ * inherits the parent's relpersistence.
+ */
+ namespaceId = RelationGetNamespace(heapRelation);
+ shared_relation = heapRelation->rd_rel->relisshared;
+ mapped_relation = RelationIsMapped(heapRelation);
+ relpersistence = heapRelation->rd_rel->relpersistence;
+
+ /*
+ * check parameters
+ */
+ if (indexInfo->ii_NumIndexAttrs < 1)
+ elog(ERROR, "must index at least one column");
+
+ if (!allow_system_table_mods &&
+ IsSystemRelation(heapRelation) &&
+ IsNormalProcessingMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("user-defined indexes on system catalog tables are not supported")));
+
+ /*
+ * Btree text_pattern_ops uses text_eq as the equality operator, which is
+ * fine as long as the collation is deterministic; text_eq then reduces to
+ * bitwise equality and so it is semantically compatible with the other
+ * operators and functions in that opclass. But with a nondeterministic
+ * collation, text_eq could yield results that are incompatible with the
+ * actual behavior of the index (which is determined by the opclass's
+ * comparison function). We prevent such problems by refusing creation of
+ * an index with that opclass and a nondeterministic collation.
+ *
+ * The same applies to varchar_pattern_ops and bpchar_pattern_ops. If we
+ * find more cases, we might decide to create a real mechanism for marking
+ * opclasses as incompatible with nondeterminism; but for now, this small
+ * hack suffices.
+ *
+ * Another solution is to use a special operator, not text_eq, as the
+ * equality opclass member; but that is undesirable because it would
+ * prevent index usage in many queries that work fine today.
+ */
+ for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
+ {
+ Oid collation = collationObjectId[i];
+ Oid opclass = classObjectId[i];
+
+ if (collation)
+ {
+ if ((opclass == TEXT_BTREE_PATTERN_OPS_OID ||
+ opclass == VARCHAR_BTREE_PATTERN_OPS_OID ||
+ opclass == BPCHAR_BTREE_PATTERN_OPS_OID) &&
+ !get_collation_isdeterministic(collation))
+ {
+ HeapTuple classtup;
+
+ classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
+ if (!HeapTupleIsValid(classtup))
+ elog(ERROR, "cache lookup failed for operator class %u", opclass);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+ NameStr(((Form_pg_opclass) GETSTRUCT(classtup))->opcname))));
+ ReleaseSysCache(classtup);
+ }
+ }
+ }
+
+ /*
+ * Concurrent index build on a system catalog is unsafe because we tend to
+ * release locks before committing in catalogs.
+ */
+ if (concurrent &&
+ IsCatalogRelation(heapRelation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("concurrent index creation on system catalog tables is not supported")));
+
+ /*
+ * This case is currently not supported. There's no way to ask for it in
+ * the grammar with CREATE INDEX, but it can happen with REINDEX.
+ */
+ if (concurrent && is_exclusion)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("concurrent index creation for exclusion constraints is not supported")));
+
+ /*
+ * We cannot allow indexing a shared relation after initdb (because
+ * there's no way to make the entry in other databases' pg_class).
+ */
+ if (shared_relation && !IsBootstrapProcessingMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("shared indexes cannot be created after initdb")));
+
+ /*
+ * Shared relations must be in pg_global, too (last-ditch check)
+ */
+ if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
+ elog(ERROR, "shared relations must be placed in pg_global tablespace");
+
+ /*
+ * Check for duplicate name (both as to the index, and as to the
+ * associated constraint if any). Such cases would fail on the relevant
+ * catalogs' unique indexes anyway, but we prefer to give a friendlier
+ * error message.
+ */
+ if (get_relname_relid(indexRelationName, namespaceId))
+ {
+ if ((flags & INDEX_CREATE_IF_NOT_EXISTS) != 0)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" already exists, skipping",
+ indexRelationName)));
+ table_close(pg_class, RowExclusiveLock);
+ return InvalidOid;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("relation \"%s\" already exists",
+ indexRelationName)));
+ }
+
+ if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0 &&
+ ConstraintNameIsUsed(CONSTRAINT_RELATION, heapRelationId,
+ indexRelationName))
+ {
+ /*
+ * INDEX_CREATE_IF_NOT_EXISTS does not apply here, since the
+ * conflicting constraint is not an index.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("constraint \"%s\" for relation \"%s\" already exists",
+ indexRelationName, RelationGetRelationName(heapRelation))));
+ }
+
+ /*
+ * construct tuple descriptor for index tuples
+ */
+ indexTupDesc = ConstructTupleDescriptor(heapRelation,
+ indexInfo,
+ indexColNames,
+ accessMethodObjectId,
+ collationObjectId,
+ classObjectId);
+
+ /*
+ * Allocate an OID for the index, unless we were told what to use.
+ *
+ * The OID will be the relfilenode as well, so make sure it doesn't
+ * collide with either pg_class OIDs or existing physical files.
+ */
+ if (!OidIsValid(indexRelationId))
+ {
+ /* Use binary-upgrade override for pg_class.oid/relfilenode? */
+ if (IsBinaryUpgrade)
+ {
+ if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_class index OID value not set when in binary upgrade mode")));
+
+ indexRelationId = binary_upgrade_next_index_pg_class_oid;
+ binary_upgrade_next_index_pg_class_oid = InvalidOid;
+ }
+ else
+ {
+ indexRelationId =
+ GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
+ }
+ }
+
+ /*
+ * create the index relation's relcache entry and, if necessary, the
+ * physical disk file. (If we fail further down, it's the smgr's
+ * responsibility to remove the disk file again, if any.)
+ */
+ indexRelation = heap_create(indexRelationName,
+ namespaceId,
+ tableSpaceId,
+ indexRelationId,
+ relFileNode,
+ accessMethodObjectId,
+ indexTupDesc,
+ relkind,
+ relpersistence,
+ shared_relation,
+ mapped_relation,
+ allow_system_table_mods,
+ &relfrozenxid,
+ &relminmxid);
+
+ Assert(relfrozenxid == InvalidTransactionId);
+ Assert(relminmxid == InvalidMultiXactId);
+ Assert(indexRelationId == RelationGetRelid(indexRelation));
+
+ /*
+ * Obtain exclusive lock on it. Although no other transactions can see it
+ * until we commit, this prevents deadlock-risk complaints from lock
+ * manager in cases such as CLUSTER.
+ */
+ LockRelation(indexRelation, AccessExclusiveLock);
+
+ /*
+ * Fill in fields of the index's pg_class entry that are not set correctly
+ * by heap_create.
+ *
+ * XXX should have a cleaner way to create cataloged indexes
+ */
+ indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
+ indexRelation->rd_rel->relam = accessMethodObjectId;
+ indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid);
+
+ /*
+ * store index's pg_class entry
+ */
+ InsertPgClassTuple(pg_class, indexRelation,
+ RelationGetRelid(indexRelation),
+ (Datum) 0,
+ reloptions);
+
+ /* done with pg_class */
+ table_close(pg_class, RowExclusiveLock);
+
+ /*
+ * now update the object id's of all the attribute tuple forms in the
+ * index relation's tuple descriptor
+ */
+ InitializeAttributeOids(indexRelation,
+ indexInfo->ii_NumIndexAttrs,
+ indexRelationId);
+
+ /*
+ * append ATTRIBUTE tuples for the index
+ */
+ AppendAttributeTuples(indexRelation, indexInfo->ii_OpclassOptions);
+
+ /* ----------------
+ * update pg_index
+ * (append INDEX tuple)
+ *
+ * Note that this stows away a representation of "predicate".
+ * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
+ * ----------------
+ */
+ UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid,
+ indexInfo,
+ collationObjectId, classObjectId, coloptions,
+ isprimary, is_exclusion,
+ (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0,
+ !concurrent && !invalid,
+ !concurrent);
+
+ /*
+ * Register relcache invalidation on the indexes' heap relation, to
+ * maintain consistency of its index list
+ */
+ CacheInvalidateRelcache(heapRelation);
+
+ /* update pg_inherits and the parent's relhassubclass, if needed */
+ if (OidIsValid(parentIndexRelid))
+ {
+ StoreSingleInheritance(indexRelationId, parentIndexRelid, 1);
+ SetRelationHasSubclass(parentIndexRelid, true);
+ }
+
+ /*
+ * Register constraint and dependencies for the index.
+ *
+ * If the index is from a CONSTRAINT clause, construct a pg_constraint
+ * entry. The index will be linked to the constraint, which in turn is
+ * linked to the table. If it's not a CONSTRAINT, we need to make a
+ * dependency directly on the table.
+ *
+ * We don't need a dependency on the namespace, because there'll be an
+ * indirect dependency via our parent table.
+ *
+ * During bootstrap we can't register any dependencies, and we don't try
+ * to make a constraint either.
+ */
+ if (!IsBootstrapProcessingMode())
+ {
+ ObjectAddress myself,
+ referenced;
+ ObjectAddresses *addrs;
+
+ ObjectAddressSet(myself, RelationRelationId, indexRelationId);
+
+ if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)
+ {
+ char constraintType;
+ ObjectAddress localaddr;
+
+ if (isprimary)
+ constraintType = CONSTRAINT_PRIMARY;
+ else if (indexInfo->ii_Unique)
+ constraintType = CONSTRAINT_UNIQUE;
+ else if (is_exclusion)
+ constraintType = CONSTRAINT_EXCLUSION;
+ else
+ {
+ elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
+ constraintType = 0; /* keep compiler quiet */
+ }
+
+ localaddr = index_constraint_create(heapRelation,
+ indexRelationId,
+ parentConstraintId,
+ indexInfo,
+ indexRelationName,
+ constraintType,
+ constr_flags,
+ allow_system_table_mods,
+ is_internal);
+ if (constraintId)
+ *constraintId = localaddr.objectId;
+ }
+ else
+ {
+ bool have_simple_col = false;
+
+ addrs = new_object_addresses();
+
+ /* Create auto dependencies on simply-referenced columns */
+ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ {
+ if (indexInfo->ii_IndexAttrNumbers[i] != 0)
+ {
+ ObjectAddressSubSet(referenced, RelationRelationId,
+ heapRelationId,
+ indexInfo->ii_IndexAttrNumbers[i]);
+ add_exact_object_address(&referenced, addrs);
+ have_simple_col = true;
+ }
+ }
+
+ /*
+ * If there are no simply-referenced columns, give the index an
+ * auto dependency on the whole table. In most cases, this will
+ * be redundant, but it might not be if the index expressions and
+ * predicate contain no Vars or only whole-row Vars.
+ */
+ if (!have_simple_col)
+ {
+ ObjectAddressSet(referenced, RelationRelationId,
+ heapRelationId);
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_AUTO);
+ free_object_addresses(addrs);
+ }
+
+ /*
+ * If this is an index partition, create partition dependencies on
+ * both the parent index and the table. (Note: these must be *in
+ * addition to*, not instead of, all other dependencies. Otherwise
+ * we'll be short some dependencies after DETACH PARTITION.)
+ */
+ if (OidIsValid(parentIndexRelid))
+ {
+ ObjectAddressSet(referenced, RelationRelationId, parentIndexRelid);
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
+
+ ObjectAddressSet(referenced, RelationRelationId, heapRelationId);
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
+ }
+
+ /* placeholder for normal dependencies */
+ addrs = new_object_addresses();
+
+ /* Store dependency on collations */
+
+ /* The default collation is pinned, so don't bother recording it */
+ for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
+ {
+ if (OidIsValid(collationObjectId[i]) &&
+ collationObjectId[i] != DEFAULT_COLLATION_OID)
+ {
+ ObjectAddressSet(referenced, CollationRelationId,
+ collationObjectId[i]);
+ add_exact_object_address(&referenced, addrs);
+ }
+ }
+
+ /* Store dependency on operator classes */
+ for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
+ {
+ ObjectAddressSet(referenced, OperatorClassRelationId, classObjectId[i]);
+ add_exact_object_address(&referenced, addrs);
+ }
+
+ record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
+ free_object_addresses(addrs);
+
+ /* Store dependencies on anything mentioned in index expressions */
+ if (indexInfo->ii_Expressions)
+ {
+ recordDependencyOnSingleRelExpr(&myself,
+ (Node *) indexInfo->ii_Expressions,
+ heapRelationId,
+ DEPENDENCY_NORMAL,
+ DEPENDENCY_AUTO, false);
+ }
+
+ /* Store dependencies on anything mentioned in predicate */
+ if (indexInfo->ii_Predicate)
+ {
+ recordDependencyOnSingleRelExpr(&myself,
+ (Node *) indexInfo->ii_Predicate,
+ heapRelationId,
+ DEPENDENCY_NORMAL,
+ DEPENDENCY_AUTO, false);
+ }
+ }
+ else
+ {
+ /* Bootstrap mode - assert we weren't asked for constraint support */
+ Assert((flags & INDEX_CREATE_ADD_CONSTRAINT) == 0);
+ }
+
+ /* Post creation hook for new index */
+ InvokeObjectPostCreateHookArg(RelationRelationId,
+ indexRelationId, 0, is_internal);
+
+ /*
+ * Advance the command counter so that we can see the newly-entered
+ * catalog tuples for the index.
+ */
+ CommandCounterIncrement();
+
+ /*
+ * In bootstrap mode, we have to fill in the index strategy structure with
+ * information from the catalogs. If we aren't bootstrapping, then the
+ * relcache entry has already been rebuilt thanks to sinval update during
+ * CommandCounterIncrement.
+ */
+ if (IsBootstrapProcessingMode())
+ RelationInitIndexAccessInfo(indexRelation);
+ else
+ Assert(indexRelation->rd_indexcxt != NULL);
+
+ indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs;
+
+ /* Validate opclass-specific options */
+ if (indexInfo->ii_OpclassOptions)
+ for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
+ (void) index_opclass_options(indexRelation, i + 1,
+ indexInfo->ii_OpclassOptions[i],
+ true);
+
+ /*
+ * If this is bootstrap (initdb) time, then we don't actually fill in the
+ * index yet. We'll be creating more indexes and classes later, so we
+ * delay filling them in until just before we're done with bootstrapping.
+ * Similarly, if the caller specified to skip the build then filling the
+ * index is delayed till later (ALTER TABLE can save work in some cases
+ * with this). Otherwise, we call the AM routine that constructs the
+ * index.
+ */
+ if (IsBootstrapProcessingMode())
+ {
+ index_register(heapRelationId, indexRelationId, indexInfo);
+ }
+ else if ((flags & INDEX_CREATE_SKIP_BUILD) != 0)
+ {
+ /*
+ * Caller is responsible for filling the index later on. However,
+ * we'd better make sure that the heap relation is correctly marked as
+ * having an index.
+ */
+ index_update_stats(heapRelation,
+ true,
+ -1.0);
+ /* Make the above update visible */
+ CommandCounterIncrement();
+ }
+ else
+ {
+ index_build(heapRelation, indexRelation, indexInfo, false, true);
+ }
+
+ /*
+ * Close the index; but we keep the lock that we acquired above until end
+ * of transaction. Closing the heap is caller's responsibility.
+ */
+ index_close(indexRelation, NoLock);
+
+ return indexRelationId;
+}
+
+/*
+ * index_concurrently_create_copy
+ *
+ * Create concurrently an index based on the definition of the one provided by
+ * caller. The index is inserted into catalogs and needs to be built later
+ * on. This is called during concurrent reindex processing.
+ *
+ * "tablespaceOid" is the tablespace to use for this index.
+ */
+Oid
+index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId,
+ Oid tablespaceOid, const char *newName)
+{
+ Relation indexRelation;
+ IndexInfo *oldInfo,
+ *newInfo;
+ Oid newIndexId = InvalidOid;
+ HeapTuple indexTuple,
+ classTuple;
+ Datum indclassDatum,
+ colOptionDatum,
+ optionDatum;
+ oidvector *indclass;
+ int2vector *indcoloptions;
+ bool isnull;
+ List *indexColNames = NIL;
+ List *indexExprs = NIL;
+ List *indexPreds = NIL;
+
+ indexRelation = index_open(oldIndexId, RowExclusiveLock);
+
+ /* The new index needs some information from the old index */
+ oldInfo = BuildIndexInfo(indexRelation);
+
+ /*
+ * Concurrent build of an index with exclusion constraints is not
+ * supported.
+ */
+ if (oldInfo->ii_ExclusionOps != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("concurrent index creation for exclusion constraints is not supported")));
+
+ /* Get the array of class and column options IDs from index info */
+ indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", oldIndexId);
+ indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+ Anum_pg_index_indclass, &isnull);
+ Assert(!isnull);
+ indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+ colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+ Anum_pg_index_indoption, &isnull);
+ Assert(!isnull);
+ indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum);
+
+ /* Fetch options of index if any */
+ classTuple = SearchSysCache1(RELOID, oldIndexId);
+ if (!HeapTupleIsValid(classTuple))
+ elog(ERROR, "cache lookup failed for relation %u", oldIndexId);
+ optionDatum = SysCacheGetAttr(RELOID, classTuple,
+ Anum_pg_class_reloptions, &isnull);
+
+ /*
+ * Fetch the list of expressions and predicates directly from the
+ * catalogs. This cannot rely on the information from IndexInfo of the
+ * old index as these have been flattened for the planner.
+ */
+ if (oldInfo->ii_Expressions != NIL)
+ {
+ Datum exprDatum;
+ char *exprString;
+
+ exprDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+ Anum_pg_index_indexprs, &isnull);
+ Assert(!isnull);
+ exprString = TextDatumGetCString(exprDatum);
+ indexExprs = (List *) stringToNode(exprString);
+ pfree(exprString);
+ }
+ if (oldInfo->ii_Predicate != NIL)
+ {
+ Datum predDatum;
+ char *predString;
+
+ predDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
+ Anum_pg_index_indpred, &isnull);
+ Assert(!isnull);
+ predString = TextDatumGetCString(predDatum);
+ indexPreds = (List *) stringToNode(predString);
+
+ /* Also convert to implicit-AND format */
+ indexPreds = make_ands_implicit((Expr *) indexPreds);
+ pfree(predString);
+ }
+
+ /*
+ * Build the index information for the new index. Note that rebuild of
+ * indexes with exclusion constraints is not supported, hence there is no
+ * need to fill all the ii_Exclusion* fields.
+ */
+ newInfo = makeIndexInfo(oldInfo->ii_NumIndexAttrs,
+ oldInfo->ii_NumIndexKeyAttrs,
+ oldInfo->ii_Am,
+ indexExprs,
+ indexPreds,
+ oldInfo->ii_Unique,
+ false, /* not ready for inserts */
+ true);
+
+ /*
+ * Extract the list of column names and the column numbers for the new
+ * index information. All this information will be used for the index
+ * creation.
+ */
+ for (int i = 0; i < oldInfo->ii_NumIndexAttrs; i++)
+ {
+ TupleDesc indexTupDesc = RelationGetDescr(indexRelation);
+ Form_pg_attribute att = TupleDescAttr(indexTupDesc, i);
+
+ indexColNames = lappend(indexColNames, NameStr(att->attname));
+ newInfo->ii_IndexAttrNumbers[i] = oldInfo->ii_IndexAttrNumbers[i];
+ }
+
+ /* Extract opclass parameters for each attribute, if any */
+ if (oldInfo->ii_OpclassOptions != NULL)
+ {
+ newInfo->ii_OpclassOptions = palloc0(sizeof(Datum) *
+ newInfo->ii_NumIndexAttrs);
+ for (int i = 0; i < newInfo->ii_NumIndexAttrs; i++)
+ newInfo->ii_OpclassOptions[i] = get_attoptions(oldIndexId, i + 1);
+ }
+
+ /*
+ * Now create the new index.
+ *
+ * For a partition index, we adjust the partition dependency later, to
+ * ensure a consistent state at all times. That is why parentIndexRelid
+ * is not set here.
+ */
+ newIndexId = index_create(heapRelation,
+ newName,
+ InvalidOid, /* indexRelationId */
+ InvalidOid, /* parentIndexRelid */
+ InvalidOid, /* parentConstraintId */
+ InvalidOid, /* relFileNode */
+ newInfo,
+ indexColNames,
+ indexRelation->rd_rel->relam,
+ tablespaceOid,
+ indexRelation->rd_indcollation,
+ indclass->values,
+ indcoloptions->values,
+ optionDatum,
+ INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
+ 0,
+ true, /* allow table to be a system catalog? */
+ false, /* is_internal? */
+ NULL);
+
+ /* Close the relations used and clean up */
+ index_close(indexRelation, NoLock);
+ ReleaseSysCache(indexTuple);
+ ReleaseSysCache(classTuple);
+
+ return newIndexId;
+}
+
+/*
+ * index_concurrently_build
+ *
+ * Build index for a concurrent operation. Low-level locks are taken when
+ * this operation is performed to prevent only schema changes, but they need
+ * to be kept until the end of the transaction performing this operation.
+ * 'indexOid' refers to an index relation OID already created as part of
+ * previous processing, and 'heapOid' refers to its parent heap relation.
+ */
+void
+index_concurrently_build(Oid heapRelationId,
+ Oid indexRelationId)
+{
+ Relation heapRel;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+ Relation indexRelation;
+ IndexInfo *indexInfo;
+
+ /* This had better make sure that a snapshot is active */
+ Assert(ActiveSnapshotSet());
+
+ /* Open and lock the parent heap relation */
+ heapRel = table_open(heapRelationId, ShareUpdateExclusiveLock);
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(heapRel->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ indexRelation = index_open(indexRelationId, RowExclusiveLock);
+
+ /*
+ * We have to re-build the IndexInfo struct, since it was lost in the
+ * commit of the transaction where this concurrent index was created at
+ * the catalog level.
+ */
+ indexInfo = BuildIndexInfo(indexRelation);
+ Assert(!indexInfo->ii_ReadyForInserts);
+ indexInfo->ii_Concurrent = true;
+ indexInfo->ii_BrokenHotChain = false;
+
+ /* Now build the index */
+ index_build(heapRel, indexRelation, indexInfo, false, true);
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* Close both the relations, but keep the locks */
+ table_close(heapRel, NoLock);
+ index_close(indexRelation, NoLock);
+
+ /*
+ * Update the pg_index row to mark the index as ready for inserts. Once we
+ * commit this transaction, any new transactions that open the table must
+ * insert new entries into the index for insertions and non-HOT updates.
+ */
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+}
+
+/*
+ * index_concurrently_swap
+ *
+ * Swap name, dependencies, and constraints of the old index over to the new
+ * index, while marking the old index as invalid and the new as valid.
+ */
+void
+index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
+{
+ Relation pg_class,
+ pg_index,
+ pg_constraint,
+ pg_trigger;
+ Relation oldClassRel,
+ newClassRel;
+ HeapTuple oldClassTuple,
+ newClassTuple;
+ Form_pg_class oldClassForm,
+ newClassForm;
+ HeapTuple oldIndexTuple,
+ newIndexTuple;
+ Form_pg_index oldIndexForm,
+ newIndexForm;
+ bool isPartition;
+ Oid indexConstraintOid;
+ List *constraintOids = NIL;
+ ListCell *lc;
+
+ /*
+ * Take a necessary lock on the old and new index before swapping them.
+ */
+ oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock);
+ newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock);
+
+ /* Now swap names and dependencies of those indexes */
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+ oldClassTuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(oldIndexId));
+ if (!HeapTupleIsValid(oldClassTuple))
+ elog(ERROR, "could not find tuple for relation %u", oldIndexId);
+ newClassTuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(newIndexId));
+ if (!HeapTupleIsValid(newClassTuple))
+ elog(ERROR, "could not find tuple for relation %u", newIndexId);
+
+ oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple);
+ newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple);
+
+ /* Swap the names */
+ namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname));
+ namestrcpy(&oldClassForm->relname, oldName);
+
+ /* Swap the partition flags to track inheritance properly */
+ isPartition = newClassForm->relispartition;
+ newClassForm->relispartition = oldClassForm->relispartition;
+ oldClassForm->relispartition = isPartition;
+
+ CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
+ CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
+
+ heap_freetuple(oldClassTuple);
+ heap_freetuple(newClassTuple);
+
+ /* Now swap index info */
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+ oldIndexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(oldIndexId));
+ if (!HeapTupleIsValid(oldIndexTuple))
+ elog(ERROR, "could not find tuple for relation %u", oldIndexId);
+ newIndexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(newIndexId));
+ if (!HeapTupleIsValid(newIndexTuple))
+ elog(ERROR, "could not find tuple for relation %u", newIndexId);
+
+ oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple);
+ newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple);
+
+ /*
+ * Copy constraint flags from the old index. This is safe because the old
+ * index guaranteed uniqueness.
+ */
+ newIndexForm->indisprimary = oldIndexForm->indisprimary;
+ oldIndexForm->indisprimary = false;
+ newIndexForm->indisexclusion = oldIndexForm->indisexclusion;
+ oldIndexForm->indisexclusion = false;
+ newIndexForm->indimmediate = oldIndexForm->indimmediate;
+ oldIndexForm->indimmediate = true;
+
+ /* Preserve indisreplident in the new index */
+ newIndexForm->indisreplident = oldIndexForm->indisreplident;
+
+ /* Preserve indisclustered in the new index */
+ newIndexForm->indisclustered = oldIndexForm->indisclustered;
+
+ /*
+ * Mark the new index as valid, and the old index as invalid similarly to
+ * what index_set_state_flags() does.
+ */
+ newIndexForm->indisvalid = true;
+ oldIndexForm->indisvalid = false;
+ oldIndexForm->indisclustered = false;
+ oldIndexForm->indisreplident = false;
+
+ CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple);
+ CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple);
+
+ heap_freetuple(oldIndexTuple);
+ heap_freetuple(newIndexTuple);
+
+ /*
+ * Move constraints and triggers over to the new index
+ */
+
+ constraintOids = get_index_ref_constraints(oldIndexId);
+
+ indexConstraintOid = get_index_constraint(oldIndexId);
+
+ if (OidIsValid(indexConstraintOid))
+ constraintOids = lappend_oid(constraintOids, indexConstraintOid);
+
+ pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock);
+ pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
+
+ foreach(lc, constraintOids)
+ {
+ HeapTuple constraintTuple,
+ triggerTuple;
+ Form_pg_constraint conForm;
+ ScanKeyData key[1];
+ SysScanDesc scan;
+ Oid constraintOid = lfirst_oid(lc);
+
+ /* Move the constraint from the old to the new index */
+ constraintTuple = SearchSysCacheCopy1(CONSTROID,
+ ObjectIdGetDatum(constraintOid));
+ if (!HeapTupleIsValid(constraintTuple))
+ elog(ERROR, "could not find tuple for constraint %u", constraintOid);
+
+ conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple));
+
+ if (conForm->conindid == oldIndexId)
+ {
+ conForm->conindid = newIndexId;
+
+ CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple);
+ }
+
+ heap_freetuple(constraintTuple);
+
+ /* Search for trigger records */
+ ScanKeyInit(&key[0],
+ Anum_pg_trigger_tgconstraint,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(constraintOid));
+
+ scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true,
+ NULL, 1, key);
+
+ while (HeapTupleIsValid((triggerTuple = systable_getnext(scan))))
+ {
+ Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
+
+ if (tgForm->tgconstrindid != oldIndexId)
+ continue;
+
+ /* Make a modifiable copy */
+ triggerTuple = heap_copytuple(triggerTuple);
+ tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
+
+ tgForm->tgconstrindid = newIndexId;
+
+ CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple);
+
+ heap_freetuple(triggerTuple);
+ }
+
+ systable_endscan(scan);
+ }
+
+ /*
+ * Move comment if any
+ */
+ {
+ Relation description;
+ ScanKeyData skey[3];
+ SysScanDesc sd;
+ HeapTuple tuple;
+ Datum values[Natts_pg_description] = {0};
+ bool nulls[Natts_pg_description] = {0};
+ bool replaces[Natts_pg_description] = {0};
+
+ values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId);
+ replaces[Anum_pg_description_objoid - 1] = true;
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_description_objoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(oldIndexId));
+ ScanKeyInit(&skey[1],
+ Anum_pg_description_classoid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&skey[2],
+ Anum_pg_description_objsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(0));
+
+ description = table_open(DescriptionRelationId, RowExclusiveLock);
+
+ sd = systable_beginscan(description, DescriptionObjIndexId, true,
+ NULL, 3, skey);
+
+ while ((tuple = systable_getnext(sd)) != NULL)
+ {
+ tuple = heap_modify_tuple(tuple, RelationGetDescr(description),
+ values, nulls, replaces);
+ CatalogTupleUpdate(description, &tuple->t_self, tuple);
+
+ break; /* Assume there can be only one match */
+ }
+
+ systable_endscan(sd);
+ table_close(description, NoLock);
+ }
+
+ /*
+ * Swap inheritance relationship with parent index
+ */
+ if (get_rel_relispartition(oldIndexId))
+ {
+ List *ancestors = get_partition_ancestors(oldIndexId);
+ Oid parentIndexRelid = linitial_oid(ancestors);
+
+ DeleteInheritsTuple(oldIndexId, parentIndexRelid, false, NULL);
+ StoreSingleInheritance(newIndexId, parentIndexRelid, 1);
+
+ list_free(ancestors);
+ }
+
+ /*
+ * Swap all dependencies of and on the old index to the new one, and
+ * vice-versa. Note that a call to CommandCounterIncrement() would cause
+ * duplicate entries in pg_depend, so this should not be done.
+ */
+ changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
+ changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
+
+ changeDependenciesOf(RelationRelationId, oldIndexId, newIndexId);
+ changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
+
+ /*
+ * Copy over statistics from old to new index
+ */
+ {
+ PgStat_StatTabEntry *tabentry;
+
+ tabentry = pgstat_fetch_stat_tabentry(oldIndexId);
+ if (tabentry)
+ {
+ if (newClassRel->pgstat_info)
+ {
+ newClassRel->pgstat_info->t_counts.t_numscans = tabentry->numscans;
+ newClassRel->pgstat_info->t_counts.t_tuples_returned = tabentry->tuples_returned;
+ newClassRel->pgstat_info->t_counts.t_tuples_fetched = tabentry->tuples_fetched;
+ newClassRel->pgstat_info->t_counts.t_blocks_fetched = tabentry->blocks_fetched;
+ newClassRel->pgstat_info->t_counts.t_blocks_hit = tabentry->blocks_hit;
+
+ /*
+ * The data will be sent by the next pgstat_report_stat()
+ * call.
+ */
+ }
+ }
+ }
+
+ /* Copy data of pg_statistic from the old index to the new one */
+ CopyStatistics(oldIndexId, newIndexId);
+
+ /* Copy pg_attribute.attstattarget for each index attribute */
+ {
+ HeapTuple attrTuple;
+ Relation pg_attribute;
+ SysScanDesc scan;
+ ScanKeyData key[1];
+
+ pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
+ ScanKeyInit(&key[0],
+ Anum_pg_attribute_attrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(newIndexId));
+ scan = systable_beginscan(pg_attribute, AttributeRelidNumIndexId,
+ true, NULL, 1, key);
+
+ while (HeapTupleIsValid((attrTuple = systable_getnext(scan))))
+ {
+ Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attrTuple);
+ Datum repl_val[Natts_pg_attribute];
+ bool repl_null[Natts_pg_attribute];
+ bool repl_repl[Natts_pg_attribute];
+ int attstattarget;
+ HeapTuple newTuple;
+
+ /* Ignore dropped columns */
+ if (att->attisdropped)
+ continue;
+
+ /*
+ * Get attstattarget from the old index and refresh the new value.
+ */
+ attstattarget = get_attstattarget(oldIndexId, att->attnum);
+
+ /* no need for a refresh if both match */
+ if (attstattarget == att->attstattarget)
+ continue;
+
+ memset(repl_val, 0, sizeof(repl_val));
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ repl_repl[Anum_pg_attribute_attstattarget - 1] = true;
+ repl_val[Anum_pg_attribute_attstattarget - 1] = Int32GetDatum(attstattarget);
+
+ newTuple = heap_modify_tuple(attrTuple,
+ RelationGetDescr(pg_attribute),
+ repl_val, repl_null, repl_repl);
+ CatalogTupleUpdate(pg_attribute, &newTuple->t_self, newTuple);
+
+ heap_freetuple(newTuple);
+ }
+
+ systable_endscan(scan);
+ table_close(pg_attribute, RowExclusiveLock);
+ }
+
+ /* Close relations */
+ table_close(pg_class, RowExclusiveLock);
+ table_close(pg_index, RowExclusiveLock);
+ table_close(pg_constraint, RowExclusiveLock);
+ table_close(pg_trigger, RowExclusiveLock);
+
+ /* The lock taken previously is not released until the end of transaction */
+ relation_close(oldClassRel, NoLock);
+ relation_close(newClassRel, NoLock);
+}
+
+/*
+ * index_concurrently_set_dead
+ *
+ * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX
+ * CONCURRENTLY before actually dropping the index. After calling this
+ * function, the index is seen by all the backends as dead. Low-level locks
+ * taken here are kept until the end of the transaction calling this function.
+ */
+void
+index_concurrently_set_dead(Oid heapId, Oid indexId)
+{
+ Relation userHeapRelation;
+ Relation userIndexRelation;
+
+ /*
+ * No more predicate locks will be acquired on this index, and we're about
+ * to stop doing inserts into the index which could show conflicts with
+ * existing predicate locks, so now is the time to move them to the heap
+ * relation.
+ */
+ userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
+ userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
+ TransferPredicateLocksToHeapRelation(userIndexRelation);
+
+ /*
+ * Now we are sure that nobody uses the index for queries; they just might
+ * have it open for updating it. So now we can unset indisready and
+ * indislive, then wait till nobody could be using it at all anymore.
+ */
+ index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
+
+ /*
+ * Invalidate the relcache for the table, so that after this commit all
+ * sessions will refresh the table's index list. Forgetting just the
+ * index's relcache entry is not enough.
+ */
+ CacheInvalidateRelcache(userHeapRelation);
+
+ /*
+ * Close the relations again, though still holding session lock.
+ */
+ table_close(userHeapRelation, NoLock);
+ index_close(userIndexRelation, NoLock);
+}
+
+/*
+ * index_constraint_create
+ *
+ * Set up a constraint associated with an index. Return the new constraint's
+ * address.
+ *
+ * heapRelation: table owning the index (must be suitably locked by caller)
+ * indexRelationId: OID of the index
+ * parentConstraintId: if constraint is on a partition, the OID of the
+ * constraint in the parent.
+ * indexInfo: same info executor uses to insert into the index
+ * constraintName: what it say (generally, should match name of index)
+ * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
+ * CONSTRAINT_EXCLUSION
+ * flags: bitmask that can include any combination of these bits:
+ * INDEX_CONSTR_CREATE_MARK_AS_PRIMARY: index is a PRIMARY KEY
+ * INDEX_CONSTR_CREATE_DEFERRABLE: constraint is DEFERRABLE
+ * INDEX_CONSTR_CREATE_INIT_DEFERRED: constraint is INITIALLY DEFERRED
+ * INDEX_CONSTR_CREATE_UPDATE_INDEX: update the pg_index row
+ * INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS: remove existing dependencies
+ * of index on table's columns
+ * allow_system_table_mods: allow table to be a system catalog
+ * is_internal: index is constructed due to internal process
+ */
+ObjectAddress
+index_constraint_create(Relation heapRelation,
+ Oid indexRelationId,
+ Oid parentConstraintId,
+ IndexInfo *indexInfo,
+ const char *constraintName,
+ char constraintType,
+ bits16 constr_flags,
+ bool allow_system_table_mods,
+ bool is_internal)
+{
+ Oid namespaceId = RelationGetNamespace(heapRelation);
+ ObjectAddress myself,
+ idxaddr;
+ Oid conOid;
+ bool deferrable;
+ bool initdeferred;
+ bool mark_as_primary;
+ bool islocal;
+ bool noinherit;
+ int inhcount;
+
+ deferrable = (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) != 0;
+ initdeferred = (constr_flags & INDEX_CONSTR_CREATE_INIT_DEFERRED) != 0;
+ mark_as_primary = (constr_flags & INDEX_CONSTR_CREATE_MARK_AS_PRIMARY) != 0;
+
+ /* constraint creation support doesn't work while bootstrapping */
+ Assert(!IsBootstrapProcessingMode());
+
+ /* enforce system-table restriction */
+ if (!allow_system_table_mods &&
+ IsSystemRelation(heapRelation) &&
+ IsNormalProcessingMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("user-defined indexes on system catalog tables are not supported")));
+
+ /* primary/unique constraints shouldn't have any expressions */
+ if (indexInfo->ii_Expressions &&
+ constraintType != CONSTRAINT_EXCLUSION)
+ elog(ERROR, "constraints cannot have index expressions");
+
+ /*
+ * If we're manufacturing a constraint for a pre-existing index, we need
+ * to get rid of the existing auto dependencies for the index (the ones
+ * that index_create() would have made instead of calling this function).
+ *
+ * Note: this code would not necessarily do the right thing if the index
+ * has any expressions or predicate, but we'd never be turning such an
+ * index into a UNIQUE or PRIMARY KEY constraint.
+ */
+ if (constr_flags & INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS)
+ deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
+ RelationRelationId, DEPENDENCY_AUTO);
+
+ if (OidIsValid(parentConstraintId))
+ {
+ islocal = false;
+ inhcount = 1;
+ noinherit = false;
+ }
+ else
+ {
+ islocal = true;
+ inhcount = 0;
+ noinherit = true;
+ }
+
+ /*
+ * Construct a pg_constraint entry.
+ */
+ conOid = CreateConstraintEntry(constraintName,
+ namespaceId,
+ constraintType,
+ deferrable,
+ initdeferred,
+ true,
+ parentConstraintId,
+ RelationGetRelid(heapRelation),
+ indexInfo->ii_IndexAttrNumbers,
+ indexInfo->ii_NumIndexKeyAttrs,
+ indexInfo->ii_NumIndexAttrs,
+ InvalidOid, /* no domain */
+ indexRelationId, /* index OID */
+ InvalidOid, /* no foreign key */
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ ' ',
+ ' ',
+ ' ',
+ indexInfo->ii_ExclusionOps,
+ NULL, /* no check constraint */
+ NULL,
+ islocal,
+ inhcount,
+ noinherit,
+ is_internal);
+
+ /*
+ * Register the index as internally dependent on the constraint.
+ *
+ * Note that the constraint has a dependency on the table, so we don't
+ * need (or want) any direct dependency from the index to the table.
+ */
+ ObjectAddressSet(myself, ConstraintRelationId, conOid);
+ ObjectAddressSet(idxaddr, RelationRelationId, indexRelationId);
+ recordDependencyOn(&idxaddr, &myself, DEPENDENCY_INTERNAL);
+
+ /*
+ * Also, if this is a constraint on a partition, give it partition-type
+ * dependencies on the parent constraint as well as the table.
+ */
+ if (OidIsValid(parentConstraintId))
+ {
+ ObjectAddress referenced;
+
+ ObjectAddressSet(referenced, ConstraintRelationId, parentConstraintId);
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
+ ObjectAddressSet(referenced, RelationRelationId,
+ RelationGetRelid(heapRelation));
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
+ }
+
+ /*
+ * If the constraint is deferrable, create the deferred uniqueness
+ * checking trigger. (The trigger will be given an internal dependency on
+ * the constraint by CreateTrigger.)
+ */
+ if (deferrable)
+ {
+ CreateTrigStmt *trigger = makeNode(CreateTrigStmt);
+
+ trigger->replace = false;
+ trigger->isconstraint = true;
+ trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
+ "PK_ConstraintTrigger" :
+ "Unique_ConstraintTrigger";
+ trigger->relation = NULL;
+ trigger->funcname = SystemFuncName("unique_key_recheck");
+ trigger->args = NIL;
+ trigger->row = true;
+ trigger->timing = TRIGGER_TYPE_AFTER;
+ trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
+ trigger->columns = NIL;
+ trigger->whenClause = NULL;
+ trigger->transitionRels = NIL;
+ trigger->deferrable = true;
+ trigger->initdeferred = initdeferred;
+ trigger->constrrel = NULL;
+
+ (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
+ InvalidOid, conOid, indexRelationId, InvalidOid,
+ InvalidOid, NULL, true, false);
+ }
+
+ /*
+ * If needed, mark the index as primary and/or deferred in pg_index.
+ *
+ * Note: When making an existing index into a constraint, caller must have
+ * a table lock that prevents concurrent table updates; otherwise, there
+ * is a risk that concurrent readers of the table will miss seeing this
+ * index at all.
+ */
+ if ((constr_flags & INDEX_CONSTR_CREATE_UPDATE_INDEX) &&
+ (mark_as_primary || deferrable))
+ {
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+ bool dirty = false;
+ bool marked_as_primary = false;
+
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(indexRelationId));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexRelationId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ if (mark_as_primary && !indexForm->indisprimary)
+ {
+ indexForm->indisprimary = true;
+ dirty = true;
+ marked_as_primary = true;
+ }
+
+ if (deferrable && indexForm->indimmediate)
+ {
+ indexForm->indimmediate = false;
+ dirty = true;
+ }
+
+ if (dirty)
+ {
+ CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+
+ /*
+ * When we mark an existing index as primary, force a relcache
+ * flush on its parent table, so that all sessions will become
+ * aware that the table now has a primary key. This is important
+ * because it affects some replication behaviors.
+ */
+ if (marked_as_primary)
+ CacheInvalidateRelcache(heapRelation);
+
+ InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
+ InvalidOid, is_internal);
+ }
+
+ heap_freetuple(indexTuple);
+ table_close(pg_index, RowExclusiveLock);
+ }
+
+ return myself;
+}
+
+/*
+ * index_drop
+ *
+ * NOTE: this routine should now only be called through performDeletion(),
+ * else associated dependencies won't be cleaned up.
+ *
+ * If concurrent is true, do a DROP INDEX CONCURRENTLY. If concurrent is
+ * false but concurrent_lock_mode is true, then do a normal DROP INDEX but
+ * take a lock for CONCURRENTLY processing. That is used as part of REINDEX
+ * CONCURRENTLY.
+ */
+void
+index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
+{
+ Oid heapId;
+ Relation userHeapRelation;
+ Relation userIndexRelation;
+ Relation indexRelation;
+ HeapTuple tuple;
+ bool hasexprs;
+ LockRelId heaprelid,
+ indexrelid;
+ LOCKTAG heaplocktag;
+ LOCKMODE lockmode;
+
+ /*
+ * A temporary relation uses a non-concurrent DROP. Other backends can't
+ * access a temporary relation, so there's no harm in grabbing a stronger
+ * lock (see comments in RemoveRelations), and a non-concurrent DROP is
+ * more efficient.
+ */
+ Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
+ (!concurrent && !concurrent_lock_mode));
+
+ /*
+ * To drop an index safely, we must grab exclusive lock on its parent
+ * table. Exclusive lock on the index alone is insufficient because
+ * another backend might be about to execute a query on the parent table.
+ * If it relies on a previously cached list of index OIDs, then it could
+ * attempt to access the just-dropped index. We must therefore take a
+ * table lock strong enough to prevent all queries on the table from
+ * proceeding until we commit and send out a shared-cache-inval notice
+ * that will make them update their index lists.
+ *
+ * In the concurrent case we avoid this requirement by disabling index use
+ * in multiple steps and waiting out any transactions that might be using
+ * the index, so we don't need exclusive lock on the parent table. Instead
+ * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
+ * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
+ * AccessExclusiveLock on the index below, once we're sure nobody else is
+ * using it.)
+ */
+ heapId = IndexGetRelation(indexId, false);
+ lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock;
+ userHeapRelation = table_open(heapId, lockmode);
+ userIndexRelation = index_open(indexId, lockmode);
+
+ /*
+ * We might still have open queries using it in our own session, which the
+ * above locking won't prevent, so test explicitly.
+ */
+ CheckTableNotInUse(userIndexRelation, "DROP INDEX");
+
+ /*
+ * Drop Index Concurrently is more or less the reverse process of Create
+ * Index Concurrently.
+ *
+ * First we unset indisvalid so queries starting afterwards don't use the
+ * index to answer queries anymore. We have to keep indisready = true so
+ * transactions that are still scanning the index can continue to see
+ * valid index contents. For instance, if they are using READ COMMITTED
+ * mode, and another transaction makes changes and commits, they need to
+ * see those new tuples in the index.
+ *
+ * After all transactions that could possibly have used the index for
+ * queries end, we can unset indisready and indislive, then wait till
+ * nobody could be touching it anymore. (Note: we need indislive because
+ * this state must be distinct from the initial state during CREATE INDEX
+ * CONCURRENTLY, which has indislive true while indisready and indisvalid
+ * are false. That's because in that state, transactions must examine the
+ * index for HOT-safety decisions, while in this state we don't want them
+ * to open it at all.)
+ *
+ * Since all predicate locks on the index are about to be made invalid, we
+ * must promote them to predicate locks on the heap. In the
+ * non-concurrent case we can just do that now. In the concurrent case
+ * it's a bit trickier. The predicate locks must be moved when there are
+ * no index scans in progress on the index and no more can subsequently
+ * start, so that no new predicate locks can be made on the index. Also,
+ * they must be moved before heap inserts stop maintaining the index, else
+ * the conflict with the predicate lock on the index gap could be missed
+ * before the lock on the heap relation is in place to detect a conflict
+ * based on the heap tuple insert.
+ */
+ if (concurrent)
+ {
+ /*
+ * We must commit our transaction in order to make the first pg_index
+ * state update visible to other sessions. If the DROP machinery has
+ * already performed any other actions (removal of other objects,
+ * pg_depend entries, etc), the commit would make those actions
+ * permanent, which would leave us with inconsistent catalog state if
+ * we fail partway through the following sequence. Since DROP INDEX
+ * CONCURRENTLY is restricted to dropping just one index that has no
+ * dependencies, we should get here before anything's been done ---
+ * but let's check that to be sure. We can verify that the current
+ * transaction has not executed any transactional updates by checking
+ * that no XID has been assigned.
+ */
+ if (GetTopTransactionIdIfAny() != InvalidTransactionId)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
+
+ /*
+ * Mark index invalid by updating its pg_index entry
+ */
+ index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
+
+ /*
+ * Invalidate the relcache for the table, so that after this commit
+ * all sessions will refresh any cached plans that might reference the
+ * index.
+ */
+ CacheInvalidateRelcache(userHeapRelation);
+
+ /* save lockrelid and locktag for below, then close but keep locks */
+ heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
+ SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+ indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
+
+ table_close(userHeapRelation, NoLock);
+ index_close(userIndexRelation, NoLock);
+
+ /*
+ * We must commit our current transaction so that the indisvalid
+ * update becomes visible to other transactions; then start another.
+ * Note that any previously-built data structures are lost in the
+ * commit. The only data we keep past here are the relation IDs.
+ *
+ * Before committing, get a session-level lock on the table, to ensure
+ * that neither it nor the index can be dropped before we finish. This
+ * cannot block, even if someone else is waiting for access, because
+ * we already have the same lock within our transaction.
+ */
+ LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+ LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /*
+ * Now we must wait until no running transaction could be using the
+ * index for a query. Use AccessExclusiveLock here to check for
+ * running transactions that hold locks of any kind on the table. Note
+ * we do not need to worry about xacts that open the table for reading
+ * after this point; they will see the index as invalid when they open
+ * the relation.
+ *
+ * Note: the reason we use actual lock acquisition here, rather than
+ * just checking the ProcArray and sleeping, is that deadlock is
+ * possible if one of the transactions in question is blocked trying
+ * to acquire an exclusive lock on our table. The lock code will
+ * detect deadlock and error out properly.
+ *
+ * Note: we report progress through WaitForLockers() unconditionally
+ * here, even though it will only be used when we're called by REINDEX
+ * CONCURRENTLY and not when called by DROP INDEX CONCURRENTLY.
+ */
+ WaitForLockers(heaplocktag, AccessExclusiveLock, true);
+
+ /* Finish invalidation of index and mark it as dead */
+ index_concurrently_set_dead(heapId, indexId);
+
+ /*
+ * Again, commit the transaction to make the pg_index update visible
+ * to other sessions.
+ */
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /*
+ * Wait till every transaction that saw the old index state has
+ * finished. See above about progress reporting.
+ */
+ WaitForLockers(heaplocktag, AccessExclusiveLock, true);
+
+ /*
+ * Re-open relations to allow us to complete our actions.
+ *
+ * At this point, nothing should be accessing the index, but lets
+ * leave nothing to chance and grab AccessExclusiveLock on the index
+ * before the physical deletion.
+ */
+ userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
+ userIndexRelation = index_open(indexId, AccessExclusiveLock);
+ }
+ else
+ {
+ /* Not concurrent, so just transfer predicate locks and we're good */
+ TransferPredicateLocksToHeapRelation(userIndexRelation);
+ }
+
+ /*
+ * Schedule physical removal of the files (if any)
+ */
+ if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+ RelationDropStorage(userIndexRelation);
+
+ /*
+ * Close and flush the index's relcache entry, to ensure relcache doesn't
+ * try to rebuild it while we're deleting catalog entries. We keep the
+ * lock though.
+ */
+ index_close(userIndexRelation, NoLock);
+
+ RelationForgetRelation(indexId);
+
+ /*
+ * fix INDEX relation, and check for expressional index
+ */
+ indexRelation = table_open(IndexRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+
+ hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
+ RelationGetDescr(indexRelation));
+
+ CatalogTupleDelete(indexRelation, &tuple->t_self);
+
+ ReleaseSysCache(tuple);
+ table_close(indexRelation, RowExclusiveLock);
+
+ /*
+ * if it has any expression columns, we might have stored statistics about
+ * them.
+ */
+ if (hasexprs)
+ RemoveStatistics(indexId, 0);
+
+ /*
+ * fix ATTRIBUTE relation
+ */
+ DeleteAttributeTuples(indexId);
+
+ /*
+ * fix RELATION relation
+ */
+ DeleteRelationTuple(indexId);
+
+ /*
+ * fix INHERITS relation
+ */
+ DeleteInheritsTuple(indexId, InvalidOid, false, NULL);
+
+ /*
+ * We are presently too lazy to attempt to compute the new correct value
+ * of relhasindex (the next VACUUM will fix it if necessary). So there is
+ * no need to update the pg_class tuple for the owning relation. But we
+ * must send out a shared-cache-inval notice on the owning relation to
+ * ensure other backends update their relcache lists of indexes. (In the
+ * concurrent case, this is redundant but harmless.)
+ */
+ CacheInvalidateRelcache(userHeapRelation);
+
+ /*
+ * Close owning rel, but keep lock
+ */
+ table_close(userHeapRelation, NoLock);
+
+ /*
+ * Release the session locks before we go.
+ */
+ if (concurrent)
+ {
+ UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+ UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
+ }
+}
+
+/* ----------------------------------------------------------------
+ * index_build support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ * BuildIndexInfo
+ * Construct an IndexInfo record for an open index
+ *
+ * IndexInfo stores the information about the index that's needed by
+ * FormIndexDatum, which is used for both index_build() and later insertion
+ * of individual index tuples. Normally we build an IndexInfo for an index
+ * just once per command, and then use it for (potentially) many tuples.
+ * ----------------
+ */
+IndexInfo *
+BuildIndexInfo(Relation index)
+{
+ IndexInfo *ii;
+ Form_pg_index indexStruct = index->rd_index;
+ int i;
+ int numAtts;
+
+ /* check the number of keys, and copy attr numbers into the IndexInfo */
+ numAtts = indexStruct->indnatts;
+ if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
+ elog(ERROR, "invalid indnatts %d for index %u",
+ numAtts, RelationGetRelid(index));
+
+ /*
+ * Create the node, fetching any expressions needed for expressional
+ * indexes and index predicate if any.
+ */
+ ii = makeIndexInfo(indexStruct->indnatts,
+ indexStruct->indnkeyatts,
+ index->rd_rel->relam,
+ RelationGetIndexExpressions(index),
+ RelationGetIndexPredicate(index),
+ indexStruct->indisunique,
+ indexStruct->indisready,
+ false);
+
+ /* fill in attribute numbers */
+ for (i = 0; i < numAtts; i++)
+ ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
+
+ /* fetch exclusion constraint info if any */
+ if (indexStruct->indisexclusion)
+ {
+ RelationGetExclusionInfo(index,
+ &ii->ii_ExclusionOps,
+ &ii->ii_ExclusionProcs,
+ &ii->ii_ExclusionStrats);
+ }
+
+ ii->ii_OpclassOptions = RelationGetIndexRawAttOptions(index);
+
+ return ii;
+}
+
+/* ----------------
+ * BuildDummyIndexInfo
+ * Construct a dummy IndexInfo record for an open index
+ *
+ * This differs from the real BuildIndexInfo in that it will never run any
+ * user-defined code that might exist in index expressions or predicates.
+ * Instead of the real index expressions, we return null constants that have
+ * the right types/typmods/collations. Predicates and exclusion clauses are
+ * just ignored. This is sufficient for the purpose of truncating an index,
+ * since we will not need to actually evaluate the expressions or predicates;
+ * the only thing that's likely to be done with the data is construction of
+ * a tupdesc describing the index's rowtype.
+ * ----------------
+ */
+IndexInfo *
+BuildDummyIndexInfo(Relation index)
+{
+ IndexInfo *ii;
+ Form_pg_index indexStruct = index->rd_index;
+ int i;
+ int numAtts;
+
+ /* check the number of keys, and copy attr numbers into the IndexInfo */
+ numAtts = indexStruct->indnatts;
+ if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
+ elog(ERROR, "invalid indnatts %d for index %u",
+ numAtts, RelationGetRelid(index));
+
+ /*
+ * Create the node, using dummy index expressions, and pretending there is
+ * no predicate.
+ */
+ ii = makeIndexInfo(indexStruct->indnatts,
+ indexStruct->indnkeyatts,
+ index->rd_rel->relam,
+ RelationGetDummyIndexExpressions(index),
+ NIL,
+ indexStruct->indisunique,
+ indexStruct->indisready,
+ false);
+
+ /* fill in attribute numbers */
+ for (i = 0; i < numAtts; i++)
+ ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
+
+ /* We ignore the exclusion constraint if any */
+
+ return ii;
+}
+
+/*
+ * CompareIndexInfo
+ * Return whether the properties of two indexes (in different tables)
+ * indicate that they have the "same" definitions.
+ *
+ * Note: passing collations and opfamilies separately is a kludge. Adding
+ * them to IndexInfo may result in better coding here and elsewhere.
+ *
+ * Use build_attrmap_by_name(index2, index1) to build the attmap.
+ */
+bool
+CompareIndexInfo(IndexInfo *info1, IndexInfo *info2,
+ Oid *collations1, Oid *collations2,
+ Oid *opfamilies1, Oid *opfamilies2,
+ AttrMap *attmap)
+{
+ int i;
+
+ if (info1->ii_Unique != info2->ii_Unique)
+ return false;
+
+ /* indexes are only equivalent if they have the same access method */
+ if (info1->ii_Am != info2->ii_Am)
+ return false;
+
+ /* and same number of attributes */
+ if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs)
+ return false;
+
+ /* and same number of key attributes */
+ if (info1->ii_NumIndexKeyAttrs != info2->ii_NumIndexKeyAttrs)
+ return false;
+
+ /*
+ * and columns match through the attribute map (actual attribute numbers
+ * might differ!) Note that this implies that index columns that are
+ * expressions appear in the same positions. We will next compare the
+ * expressions themselves.
+ */
+ for (i = 0; i < info1->ii_NumIndexAttrs; i++)
+ {
+ if (attmap->maplen < info2->ii_IndexAttrNumbers[i])
+ elog(ERROR, "incorrect attribute map");
+
+ /* ignore expressions at this stage */
+ if ((info1->ii_IndexAttrNumbers[i] != InvalidAttrNumber) &&
+ (attmap->attnums[info2->ii_IndexAttrNumbers[i] - 1] !=
+ info1->ii_IndexAttrNumbers[i]))
+ return false;
+
+ /* collation and opfamily is not valid for including columns */
+ if (i >= info1->ii_NumIndexKeyAttrs)
+ continue;
+
+ if (collations1[i] != collations2[i])
+ return false;
+ if (opfamilies1[i] != opfamilies2[i])
+ return false;
+ }
+
+ /*
+ * For expression indexes: either both are expression indexes, or neither
+ * is; if they are, make sure the expressions match.
+ */
+ if ((info1->ii_Expressions != NIL) != (info2->ii_Expressions != NIL))
+ return false;
+ if (info1->ii_Expressions != NIL)
+ {
+ bool found_whole_row;
+ Node *mapped;
+
+ mapped = map_variable_attnos((Node *) info2->ii_Expressions,
+ 1, 0, attmap,
+ InvalidOid, &found_whole_row);
+ if (found_whole_row)
+ {
+ /*
+ * we could throw an error here, but seems out of scope for this
+ * routine.
+ */
+ return false;
+ }
+
+ if (!equal(info1->ii_Expressions, mapped))
+ return false;
+ }
+
+ /* Partial index predicates must be identical, if they exist */
+ if ((info1->ii_Predicate == NULL) != (info2->ii_Predicate == NULL))
+ return false;
+ if (info1->ii_Predicate != NULL)
+ {
+ bool found_whole_row;
+ Node *mapped;
+
+ mapped = map_variable_attnos((Node *) info2->ii_Predicate,
+ 1, 0, attmap,
+ InvalidOid, &found_whole_row);
+ if (found_whole_row)
+ {
+ /*
+ * we could throw an error here, but seems out of scope for this
+ * routine.
+ */
+ return false;
+ }
+ if (!equal(info1->ii_Predicate, mapped))
+ return false;
+ }
+
+ /* No support currently for comparing exclusion indexes. */
+ if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL)
+ return false;
+
+ return true;
+}
+
+/* ----------------
+ * BuildSpeculativeIndexInfo
+ * Add extra state to IndexInfo record
+ *
+ * For unique indexes, we usually don't want to add info to the IndexInfo for
+ * checking uniqueness, since the B-Tree AM handles that directly. However,
+ * in the case of speculative insertion, additional support is required.
+ *
+ * Do this processing here rather than in BuildIndexInfo() to not incur the
+ * overhead in the common non-speculative cases.
+ * ----------------
+ */
+void
+BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
+{
+ int indnkeyatts;
+ int i;
+
+ indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
+
+ /*
+ * fetch info for checking unique indexes
+ */
+ Assert(ii->ii_Unique);
+
+ if (index->rd_rel->relam != BTREE_AM_OID)
+ elog(ERROR, "unexpected non-btree speculative unique index");
+
+ ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
+ ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
+ ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
+
+ /*
+ * We have to look up the operator's strategy number. This provides a
+ * cross-check that the operator does match the index.
+ */
+ /* We need the func OIDs and strategy numbers too */
+ for (i = 0; i < indnkeyatts; i++)
+ {
+ ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
+ ii->ii_UniqueOps[i] =
+ get_opfamily_member(index->rd_opfamily[i],
+ index->rd_opcintype[i],
+ index->rd_opcintype[i],
+ ii->ii_UniqueStrats[i]);
+ if (!OidIsValid(ii->ii_UniqueOps[i]))
+ elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+ ii->ii_UniqueStrats[i], index->rd_opcintype[i],
+ index->rd_opcintype[i], index->rd_opfamily[i]);
+ ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
+ }
+}
+
+/* ----------------
+ * FormIndexDatum
+ * Construct values[] and isnull[] arrays for a new index tuple.
+ *
+ * indexInfo Info about the index
+ * slot Heap tuple for which we must prepare an index entry
+ * estate executor state for evaluating any index expressions
+ * values Array of index Datums (output area)
+ * isnull Array of is-null indicators (output area)
+ *
+ * When there are no index expressions, estate may be NULL. Otherwise it
+ * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
+ * context must point to the heap tuple passed in.
+ *
+ * Notice we don't actually call index_form_tuple() here; we just prepare
+ * its input arrays values[] and isnull[]. This is because the index AM
+ * may wish to alter the data before storage.
+ * ----------------
+ */
+void
+FormIndexDatum(IndexInfo *indexInfo,
+ TupleTableSlot *slot,
+ EState *estate,
+ Datum *values,
+ bool *isnull)
+{
+ ListCell *indexpr_item;
+ int i;
+
+ if (indexInfo->ii_Expressions != NIL &&
+ indexInfo->ii_ExpressionsState == NIL)
+ {
+ /* First time through, set up expression evaluation state */
+ indexInfo->ii_ExpressionsState =
+ ExecPrepareExprList(indexInfo->ii_Expressions, estate);
+ /* Check caller has set up context correctly */
+ Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
+ }
+ indexpr_item = list_head(indexInfo->ii_ExpressionsState);
+
+ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ {
+ int keycol = indexInfo->ii_IndexAttrNumbers[i];
+ Datum iDatum;
+ bool isNull;
+
+ if (keycol < 0)
+ iDatum = slot_getsysattr(slot, keycol, &isNull);
+ else if (keycol != 0)
+ {
+ /*
+ * Plain index column; get the value we need directly from the
+ * heap tuple.
+ */
+ iDatum = slot_getattr(slot, keycol, &isNull);
+ }
+ else
+ {
+ /*
+ * Index expression --- need to evaluate it.
+ */
+ if (indexpr_item == NULL)
+ elog(ERROR, "wrong number of index expressions");
+ iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
+ GetPerTupleExprContext(estate),
+ &isNull);
+ indexpr_item = lnext(indexInfo->ii_ExpressionsState, indexpr_item);
+ }
+ values[i] = iDatum;
+ isnull[i] = isNull;
+ }
+
+ if (indexpr_item != NULL)
+ elog(ERROR, "wrong number of index expressions");
+}
+
+
+/*
+ * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
+ *
+ * This routine updates the pg_class row of either an index or its parent
+ * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
+ * to ensure we can do all the necessary work in just one update.
+ *
+ * hasindex: set relhasindex to this value
+ * reltuples: if >= 0, set reltuples to this value; else no change
+ *
+ * If reltuples >= 0, relpages and relallvisible are also updated (using
+ * RelationGetNumberOfBlocks() and visibilitymap_count()).
+ *
+ * NOTE: an important side-effect of this operation is that an SI invalidation
+ * message is sent out to all backends --- including me --- causing relcache
+ * entries to be flushed or updated with the new data. This must happen even
+ * if we find that no change is needed in the pg_class row. When updating
+ * a heap entry, this ensures that other backends find out about the new
+ * index. When updating an index, it's important because some index AMs
+ * expect a relcache flush to occur after REINDEX.
+ */
+static void
+index_update_stats(Relation rel,
+ bool hasindex,
+ double reltuples)
+{
+ Oid relid = RelationGetRelid(rel);
+ Relation pg_class;
+ HeapTuple tuple;
+ Form_pg_class rd_rel;
+ bool dirty;
+
+ /*
+ * We always update the pg_class row using a non-transactional,
+ * overwrite-in-place update. There are several reasons for this:
+ *
+ * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
+ *
+ * 2. We could be reindexing pg_class itself, in which case we can't move
+ * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
+ * not know about all the indexes yet (see reindex_relation).
+ *
+ * 3. Because we execute CREATE INDEX with just share lock on the parent
+ * rel (to allow concurrent index creations), an ordinary update could
+ * suffer a tuple-concurrently-updated failure against another CREATE
+ * INDEX committing at about the same time. We can avoid that by having
+ * them both do nontransactional updates (we assume they will both be
+ * trying to change the pg_class row to the same thing, so it doesn't
+ * matter which goes first).
+ *
+ * It is safe to use a non-transactional update even though our
+ * transaction could still fail before committing. Setting relhasindex
+ * true is safe even if there are no indexes (VACUUM will eventually fix
+ * it). And of course the new relpages and reltuples counts are correct
+ * regardless. However, we don't want to change relpages (or
+ * relallvisible) if the caller isn't providing an updated reltuples
+ * count, because that would bollix the reltuples/relpages ratio which is
+ * what's really important.
+ */
+
+ pg_class = table_open(RelationRelationId, RowExclusiveLock);
+
+ /*
+ * Make a copy of the tuple to update. Normally we use the syscache, but
+ * we can't rely on that during bootstrap or while reindexing pg_class
+ * itself.
+ */
+ if (IsBootstrapProcessingMode() ||
+ ReindexIsProcessingHeap(RelationRelationId))
+ {
+ /* don't assume syscache will work */
+ TableScanDesc pg_class_scan;
+ ScanKeyData key[1];
+
+ ScanKeyInit(&key[0],
+ Anum_pg_class_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+
+ pg_class_scan = table_beginscan_catalog(pg_class, 1, key);
+ tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
+ tuple = heap_copytuple(tuple);
+ table_endscan(pg_class_scan);
+ }
+ else
+ {
+ /* normal case, use syscache */
+ tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+ }
+
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "could not find tuple for relation %u", relid);
+ rd_rel = (Form_pg_class) GETSTRUCT(tuple);
+
+ /* Should this be a more comprehensive test? */
+ Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
+
+ /*
+ * As a special hack, if we are dealing with an empty table and the
+ * existing reltuples is -1, we leave that alone. This ensures that
+ * creating an index as part of CREATE TABLE doesn't cause the table to
+ * prematurely look like it's been vacuumed.
+ */
+ if (reltuples == 0 && rd_rel->reltuples < 0)
+ reltuples = -1;
+
+ /* Apply required updates, if any, to copied tuple */
+
+ dirty = false;
+ if (rd_rel->relhasindex != hasindex)
+ {
+ rd_rel->relhasindex = hasindex;
+ dirty = true;
+ }
+
+ if (reltuples >= 0)
+ {
+ BlockNumber relpages = RelationGetNumberOfBlocks(rel);
+ BlockNumber relallvisible;
+
+ if (rd_rel->relkind != RELKIND_INDEX)
+ visibilitymap_count(rel, &relallvisible, NULL);
+ else /* don't bother for indexes */
+ relallvisible = 0;
+
+ if (rd_rel->relpages != (int32) relpages)
+ {
+ rd_rel->relpages = (int32) relpages;
+ dirty = true;
+ }
+ if (rd_rel->reltuples != (float4) reltuples)
+ {
+ rd_rel->reltuples = (float4) reltuples;
+ dirty = true;
+ }
+ if (rd_rel->relallvisible != (int32) relallvisible)
+ {
+ rd_rel->relallvisible = (int32) relallvisible;
+ dirty = true;
+ }
+ }
+
+ /*
+ * If anything changed, write out the tuple
+ */
+ if (dirty)
+ {
+ heap_inplace_update(pg_class, tuple);
+ /* the above sends a cache inval message */
+ }
+ else
+ {
+ /* no need to change tuple, but force relcache inval anyway */
+ CacheInvalidateRelcacheByTuple(tuple);
+ }
+
+ heap_freetuple(tuple);
+
+ table_close(pg_class, RowExclusiveLock);
+}
+
+
+/*
+ * index_build - invoke access-method-specific index build procedure
+ *
+ * On entry, the index's catalog entries are valid, and its physical disk
+ * file has been created but is empty. We call the AM-specific build
+ * procedure to fill in the index contents. We then update the pg_class
+ * entries of the index and heap relation as needed, using statistics
+ * returned by ambuild as well as data passed by the caller.
+ *
+ * isreindex indicates we are recreating a previously-existing index.
+ * parallel indicates if parallelism may be useful.
+ *
+ * Note: before Postgres 8.2, the passed-in heap and index Relations
+ * were automatically closed by this routine. This is no longer the case.
+ * The caller opened 'em, and the caller should close 'em.
+ */
+void
+index_build(Relation heapRelation,
+ Relation indexRelation,
+ IndexInfo *indexInfo,
+ bool isreindex,
+ bool parallel)
+{
+ IndexBuildResult *stats;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+
+ /*
+ * sanity checks
+ */
+ Assert(RelationIsValid(indexRelation));
+ Assert(PointerIsValid(indexRelation->rd_indam));
+ Assert(PointerIsValid(indexRelation->rd_indam->ambuild));
+ Assert(PointerIsValid(indexRelation->rd_indam->ambuildempty));
+
+ /*
+ * Determine worker process details for parallel CREATE INDEX. Currently,
+ * only btree has support for parallel builds.
+ *
+ * Note that planner considers parallel safety for us.
+ */
+ if (parallel && IsNormalProcessingMode() &&
+ indexRelation->rd_rel->relam == BTREE_AM_OID)
+ indexInfo->ii_ParallelWorkers =
+ plan_create_index_workers(RelationGetRelid(heapRelation),
+ RelationGetRelid(indexRelation));
+
+ if (indexInfo->ii_ParallelWorkers == 0)
+ ereport(DEBUG1,
+ (errmsg_internal("building index \"%s\" on table \"%s\" serially",
+ RelationGetRelationName(indexRelation),
+ RelationGetRelationName(heapRelation))));
+ else
+ ereport(DEBUG1,
+ (errmsg_internal("building index \"%s\" on table \"%s\" with request for %d parallel workers",
+ RelationGetRelationName(indexRelation),
+ RelationGetRelationName(heapRelation),
+ indexInfo->ii_ParallelWorkers)));
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ /* Set up initial progress report status */
+ {
+ const int progress_index[] = {
+ PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_SUBPHASE,
+ PROGRESS_CREATEIDX_TUPLES_DONE,
+ PROGRESS_CREATEIDX_TUPLES_TOTAL,
+ PROGRESS_SCAN_BLOCKS_DONE,
+ PROGRESS_SCAN_BLOCKS_TOTAL
+ };
+ const int64 progress_vals[] = {
+ PROGRESS_CREATEIDX_PHASE_BUILD,
+ PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE,
+ 0, 0, 0, 0
+ };
+
+ pgstat_progress_update_multi_param(6, progress_index, progress_vals);
+ }
+
+ /*
+ * Call the access method's build procedure
+ */
+ stats = indexRelation->rd_indam->ambuild(heapRelation, indexRelation,
+ indexInfo);
+ Assert(PointerIsValid(stats));
+
+ /*
+ * If this is an unlogged index, we may need to write out an init fork for
+ * it -- but we must first check whether one already exists. If, for
+ * example, an unlogged relation is truncated in the transaction that
+ * created it, or truncated twice in a subsequent transaction, the
+ * relfilenode won't change, and nothing needs to be done here.
+ */
+ if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
+ !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
+ {
+ RelationOpenSmgr(indexRelation);
+ smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
+ indexRelation->rd_indam->ambuildempty(indexRelation);
+ }
+
+ /*
+ * If we found any potentially broken HOT chains, mark the index as not
+ * being usable until the current transaction is below the event horizon.
+ * See src/backend/access/heap/README.HOT for discussion. Also set this
+ * if early pruning/vacuuming is enabled for the heap relation. While it
+ * might become safe to use the index earlier based on actual cleanup
+ * activity and other active transactions, the test for that would be much
+ * more complex and would require some form of blocking, so keep it simple
+ * and fast by just using the current transaction.
+ *
+ * However, when reindexing an existing index, we should do nothing here.
+ * Any HOT chains that are broken with respect to the index must predate
+ * the index's original creation, so there is no need to change the
+ * index's usability horizon. Moreover, we *must not* try to change the
+ * index's pg_index entry while reindexing pg_index itself, and this
+ * optimization nicely prevents that. The more complex rules needed for a
+ * reindex are handled separately after this function returns.
+ *
+ * We also need not set indcheckxmin during a concurrent index build,
+ * because we won't set indisvalid true until all transactions that care
+ * about the broken HOT chains or early pruning/vacuuming are gone.
+ *
+ * Therefore, this code path can only be taken during non-concurrent
+ * CREATE INDEX. Thus the fact that heap_update will set the pg_index
+ * tuple's xmin doesn't matter, because that tuple was created in the
+ * current transaction anyway. That also means we don't need to worry
+ * about any concurrent readers of the tuple; no other transaction can see
+ * it yet.
+ */
+ if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
+ !isreindex &&
+ !indexInfo->ii_Concurrent)
+ {
+ Oid indexId = RelationGetRelid(indexRelation);
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(indexId));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ /* If it's a new index, indcheckxmin shouldn't be set ... */
+ Assert(!indexForm->indcheckxmin);
+
+ indexForm->indcheckxmin = true;
+ CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+
+ heap_freetuple(indexTuple);
+ table_close(pg_index, RowExclusiveLock);
+ }
+
+ /*
+ * Update heap and index pg_class rows
+ */
+ index_update_stats(heapRelation,
+ true,
+ stats->heap_tuples);
+
+ index_update_stats(indexRelation,
+ false,
+ stats->index_tuples);
+
+ /* Make the updated catalog row versions visible */
+ CommandCounterIncrement();
+
+ /*
+ * If it's for an exclusion constraint, make a second pass over the heap
+ * to verify that the constraint is satisfied. We must not do this until
+ * the index is fully valid. (Broken HOT chains shouldn't matter, though;
+ * see comments for IndexCheckExclusion.)
+ */
+ if (indexInfo->ii_ExclusionOps != NULL)
+ IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+}
+
+/*
+ * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
+ *
+ * When creating an exclusion constraint, we first build the index normally
+ * and then rescan the heap to check for conflicts. We assume that we only
+ * need to validate tuples that are live according to an up-to-date snapshot,
+ * and that these were correctly indexed even in the presence of broken HOT
+ * chains. This should be OK since we are holding at least ShareLock on the
+ * table, meaning there can be no uncommitted updates from other transactions.
+ * (Note: that wouldn't necessarily work for system catalogs, since many
+ * operations release write lock early on the system catalogs.)
+ */
+static void
+IndexCheckExclusion(Relation heapRelation,
+ Relation indexRelation,
+ IndexInfo *indexInfo)
+{
+ TableScanDesc scan;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ ExprState *predicate;
+ TupleTableSlot *slot;
+ EState *estate;
+ ExprContext *econtext;
+ Snapshot snapshot;
+
+ /*
+ * If we are reindexing the target index, mark it as no longer being
+ * reindexed, to forestall an Assert in index_beginscan when we try to use
+ * the index for probes. This is OK because the index is now fully valid.
+ */
+ if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
+ ResetReindexProcessing();
+
+ /*
+ * Need an EState for evaluation of index expressions and partial-index
+ * predicates. Also a slot to hold the current tuple.
+ */
+ estate = CreateExecutorState();
+ econtext = GetPerTupleExprContext(estate);
+ slot = table_slot_create(heapRelation, NULL);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /* Set up execution state for predicate, if any. */
+ predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+ /*
+ * Scan all live tuples in the base relation.
+ */
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+ scan = table_beginscan_strat(heapRelation, /* relation */
+ snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK */
+
+ while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * In a partial index, ignore tuples that don't satisfy the predicate.
+ */
+ if (predicate != NULL)
+ {
+ if (!ExecQual(predicate, econtext))
+ continue;
+ }
+
+ /*
+ * Extract index column values, including computing expressions.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ /*
+ * Check that this tuple has no conflicts.
+ */
+ check_exclusion_constraint(heapRelation,
+ indexRelation, indexInfo,
+ &(slot->tts_tid), values, isnull,
+ estate, true);
+
+ MemoryContextReset(econtext->ecxt_per_tuple_memory);
+ }
+
+ table_endscan(scan);
+ UnregisterSnapshot(snapshot);
+
+ ExecDropSingleTupleTableSlot(slot);
+
+ FreeExecutorState(estate);
+
+ /* These may have been pointing to the now-gone estate */
+ indexInfo->ii_ExpressionsState = NIL;
+ indexInfo->ii_PredicateState = NULL;
+}
+
+
+/*
+ * validate_index - support code for concurrent index builds
+ *
+ * We do a concurrent index build by first inserting the catalog entry for the
+ * index via index_create(), marking it not indisready and not indisvalid.
+ * Then we commit our transaction and start a new one, then we wait for all
+ * transactions that could have been modifying the table to terminate. Now
+ * we know that any subsequently-started transactions will see the index and
+ * honor its constraints on HOT updates; so while existing HOT-chains might
+ * be broken with respect to the index, no currently live tuple will have an
+ * incompatible HOT update done to it. We now build the index normally via
+ * index_build(), while holding a weak lock that allows concurrent
+ * insert/update/delete. Also, we index only tuples that are valid
+ * as of the start of the scan (see table_index_build_scan), whereas a normal
+ * build takes care to include recently-dead tuples. This is OK because
+ * we won't mark the index valid until all transactions that might be able
+ * to see those tuples are gone. The reason for doing that is to avoid
+ * bogus unique-index failures due to concurrent UPDATEs (we might see
+ * different versions of the same row as being valid when we pass over them,
+ * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
+ * does not contain any tuples added to the table while we built the index.
+ *
+ * Next, we mark the index "indisready" (but still not "indisvalid") and
+ * commit the second transaction and start a third. Again we wait for all
+ * transactions that could have been modifying the table to terminate. Now
+ * we know that any subsequently-started transactions will see the index and
+ * insert their new tuples into it. We then take a new reference snapshot
+ * which is passed to validate_index(). Any tuples that are valid according
+ * to this snap, but are not in the index, must be added to the index.
+ * (Any tuples committed live after the snap will be inserted into the
+ * index by their originating transaction. Any tuples committed dead before
+ * the snap need not be indexed, because we will wait out all transactions
+ * that might care about them before we mark the index valid.)
+ *
+ * validate_index() works by first gathering all the TIDs currently in the
+ * index, using a bulkdelete callback that just stores the TIDs and doesn't
+ * ever say "delete it". (This should be faster than a plain indexscan;
+ * also, not all index AMs support full-index indexscan.) Then we sort the
+ * TIDs, and finally scan the table doing a "merge join" against the TID list
+ * to see which tuples are missing from the index. Thus we will ensure that
+ * all tuples valid according to the reference snapshot are in the index.
+ *
+ * Building a unique index this way is tricky: we might try to insert a
+ * tuple that is already dead or is in process of being deleted, and we
+ * mustn't have a uniqueness failure against an updated version of the same
+ * row. We could try to check the tuple to see if it's already dead and tell
+ * index_insert() not to do the uniqueness check, but that still leaves us
+ * with a race condition against an in-progress update. To handle that,
+ * we expect the index AM to recheck liveness of the to-be-inserted tuple
+ * before it declares a uniqueness error.
+ *
+ * After completing validate_index(), we wait until all transactions that
+ * were alive at the time of the reference snapshot are gone; this is
+ * necessary to be sure there are none left with a transaction snapshot
+ * older than the reference (and hence possibly able to see tuples we did
+ * not index). Then we mark the index "indisvalid" and commit. Subsequent
+ * transactions will be able to use it for queries.
+ *
+ * Doing two full table scans is a brute-force strategy. We could try to be
+ * cleverer, eg storing new tuples in a special area of the table (perhaps
+ * making the table append-only by setting use_fsm). However that would
+ * add yet more locking issues.
+ */
+void
+validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
+{
+ Relation heapRelation,
+ indexRelation;
+ IndexInfo *indexInfo;
+ IndexVacuumInfo ivinfo;
+ ValidateIndexState state;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+
+ {
+ const int progress_index[] = {
+ PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_TUPLES_DONE,
+ PROGRESS_CREATEIDX_TUPLES_TOTAL,
+ PROGRESS_SCAN_BLOCKS_DONE,
+ PROGRESS_SCAN_BLOCKS_TOTAL
+ };
+ const int64 progress_vals[] = {
+ PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
+ 0, 0, 0, 0
+ };
+
+ pgstat_progress_update_multi_param(5, progress_index, progress_vals);
+ }
+
+ /* Open and lock the parent heap relation */
+ heapRelation = table_open(heapId, ShareUpdateExclusiveLock);
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ indexRelation = index_open(indexId, RowExclusiveLock);
+
+ /*
+ * Fetch info needed for index_insert. (You might think this should be
+ * passed in from DefineIndex, but its copy is long gone due to having
+ * been built in a previous transaction.)
+ */
+ indexInfo = BuildIndexInfo(indexRelation);
+
+ /* mark build is concurrent just for consistency */
+ indexInfo->ii_Concurrent = true;
+
+ /*
+ * Scan the index and gather up all the TIDs into a tuplesort object.
+ */
+ ivinfo.index = indexRelation;
+ ivinfo.analyze_only = false;
+ ivinfo.report_progress = true;
+ ivinfo.estimated_count = true;
+ ivinfo.message_level = DEBUG2;
+ ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
+ ivinfo.strategy = NULL;
+
+ /*
+ * Encode TIDs as int8 values for the sort, rather than directly sorting
+ * item pointers. This can be significantly faster, primarily because TID
+ * is a pass-by-reference type on all platforms, whereas int8 is
+ * pass-by-value on most platforms.
+ */
+ state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
+ InvalidOid, false,
+ maintenance_work_mem,
+ NULL, false);
+ state.htups = state.itups = state.tups_inserted = 0;
+
+ /* ambulkdelete updates progress metrics */
+ (void) index_bulk_delete(&ivinfo, NULL,
+ validate_index_callback, (void *) &state);
+
+ /* Execute the sort */
+ {
+ const int progress_index[] = {
+ PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_SCAN_BLOCKS_DONE,
+ PROGRESS_SCAN_BLOCKS_TOTAL
+ };
+ const int64 progress_vals[] = {
+ PROGRESS_CREATEIDX_PHASE_VALIDATE_SORT,
+ 0, 0
+ };
+
+ pgstat_progress_update_multi_param(3, progress_index, progress_vals);
+ }
+ tuplesort_performsort(state.tuplesort);
+
+ /*
+ * Now scan the heap and "merge" it with the index
+ */
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PHASE_VALIDATE_TABLESCAN);
+ table_index_validate_scan(heapRelation,
+ indexRelation,
+ indexInfo,
+ snapshot,
+ &state);
+
+ /* Done with tuplesort object */
+ tuplesort_end(state.tuplesort);
+
+ elog(DEBUG2,
+ "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
+ state.htups, state.itups, state.tups_inserted);
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* Close rels, but keep locks */
+ index_close(indexRelation, NoLock);
+ table_close(heapRelation, NoLock);
+}
+
+/*
+ * validate_index_callback - bulkdelete callback to collect the index TIDs
+ */
+static bool
+validate_index_callback(ItemPointer itemptr, void *opaque)
+{
+ ValidateIndexState *state = (ValidateIndexState *) opaque;
+ int64 encoded = itemptr_encode(itemptr);
+
+ tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
+ state->itups += 1;
+ return false; /* never actually delete anything */
+}
+
+/*
+ * index_set_state_flags - adjust pg_index state flags
+ *
+ * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
+ * flags that denote the index's state.
+ *
+ * Note that CatalogTupleUpdate() sends a cache invalidation message for the
+ * tuple, so other sessions will hear about the update as soon as we commit.
+ */
+void
+index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
+{
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+
+ /* Open pg_index and fetch a writable copy of the index's tuple */
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(indexId));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ /* Perform the requested state change on the copy */
+ switch (action)
+ {
+ case INDEX_CREATE_SET_READY:
+ /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
+ Assert(indexForm->indislive);
+ Assert(!indexForm->indisready);
+ Assert(!indexForm->indisvalid);
+ indexForm->indisready = true;
+ break;
+ case INDEX_CREATE_SET_VALID:
+ /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
+ Assert(indexForm->indislive);
+ Assert(indexForm->indisready);
+ Assert(!indexForm->indisvalid);
+ indexForm->indisvalid = true;
+ break;
+ case INDEX_DROP_CLEAR_VALID:
+
+ /*
+ * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
+ *
+ * If indisready == true we leave it set so the index still gets
+ * maintained by active transactions. We only need to ensure that
+ * indisvalid is false. (We don't assert that either is initially
+ * true, though, since we want to be able to retry a DROP INDEX
+ * CONCURRENTLY that failed partway through.)
+ *
+ * Note: the CLUSTER logic assumes that indisclustered cannot be
+ * set on any invalid index, so clear that flag too. Similarly,
+ * ALTER TABLE assumes that indisreplident cannot be set for
+ * invalid indexes.
+ */
+ indexForm->indisvalid = false;
+ indexForm->indisclustered = false;
+ indexForm->indisreplident = false;
+ break;
+ case INDEX_DROP_SET_DEAD:
+
+ /*
+ * Clear indisready/indislive during DROP INDEX CONCURRENTLY
+ *
+ * We clear both indisready and indislive, because we not only
+ * want to stop updates, we want to prevent sessions from touching
+ * the index at all.
+ */
+ Assert(!indexForm->indisvalid);
+ Assert(!indexForm->indisclustered);
+ Assert(!indexForm->indisreplident);
+ indexForm->indisready = false;
+ indexForm->indislive = false;
+ break;
+ }
+
+ /* ... and update it */
+ CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+
+ table_close(pg_index, RowExclusiveLock);
+}
+
+
+/*
+ * IndexGetRelation: given an index's relation OID, get the OID of the
+ * relation it is an index on. Uses the system cache.
+ */
+Oid
+IndexGetRelation(Oid indexId, bool missing_ok)
+{
+ HeapTuple tuple;
+ Form_pg_index index;
+ Oid result;
+
+ tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
+ if (!HeapTupleIsValid(tuple))
+ {
+ if (missing_ok)
+ return InvalidOid;
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ }
+ index = (Form_pg_index) GETSTRUCT(tuple);
+ Assert(index->indexrelid == indexId);
+
+ result = index->indrelid;
+ ReleaseSysCache(tuple);
+ return result;
+}
+
+/*
+ * reindex_index - This routine is used to recreate a single index
+ */
+void
+reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
+ ReindexParams *params)
+{
+ Relation iRel,
+ heapRelation;
+ Oid heapId;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+ IndexInfo *indexInfo;
+ volatile bool skipped_constraint = false;
+ PGRUsage ru0;
+ bool progress = ((params->options & REINDEXOPT_REPORT_PROGRESS) != 0);
+ bool set_tablespace = false;
+
+ pg_rusage_init(&ru0);
+
+ /*
+ * Open and lock the parent heap relation. ShareLock is sufficient since
+ * we only need to be sure no schema or data changes are going on.
+ */
+ heapId = IndexGetRelation(indexId,
+ (params->options & REINDEXOPT_MISSING_OK) != 0);
+ /* if relation is missing, leave */
+ if (!OidIsValid(heapId))
+ return;
+
+ if ((params->options & REINDEXOPT_MISSING_OK) != 0)
+ heapRelation = try_table_open(heapId, ShareLock);
+ else
+ heapRelation = table_open(heapId, ShareLock);
+
+ /* if relation is gone, leave */
+ if (!heapRelation)
+ return;
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are run
+ * as that user. Also lock down security-restricted operations and
+ * arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+
+ if (progress)
+ {
+ const int progress_cols[] = {
+ PROGRESS_CREATEIDX_COMMAND,
+ PROGRESS_CREATEIDX_INDEX_OID
+ };
+ const int64 progress_vals[] = {
+ PROGRESS_CREATEIDX_COMMAND_REINDEX,
+ indexId
+ };
+
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+ heapId);
+ pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
+ }
+
+ /*
+ * Open the target index relation and get an exclusive lock on it, to
+ * ensure that no one else is touching this particular index.
+ */
+ iRel = index_open(indexId, AccessExclusiveLock);
+
+ if (progress)
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
+ iRel->rd_rel->relam);
+
+ /*
+ * Partitioned indexes should never get processed here, as they have no
+ * physical storage.
+ */
+ if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+ elog(ERROR, "cannot reindex partitioned index \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(iRel)),
+ RelationGetRelationName(iRel));
+
+ /*
+ * Don't allow reindex on temp tables of other backends ... their local
+ * buffer manager is not going to cope.
+ */
+ if (RELATION_IS_OTHER_TEMP(iRel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex temporary tables of other sessions")));
+
+ /*
+ * Don't allow reindex of an invalid index on TOAST table. This is a
+ * leftover from a failed REINDEX CONCURRENTLY, and if rebuilt it would
+ * not be possible to drop it anymore.
+ */
+ if (IsToastNamespace(RelationGetNamespace(iRel)) &&
+ !get_index_isvalid(indexId))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex invalid index on TOAST table")));
+
+ /*
+ * System relations cannot be moved even if allow_system_table_mods is
+ * enabled to keep things consistent with the concurrent case where all
+ * the indexes of a relation are processed in series, including indexes of
+ * toast relations.
+ *
+ * Note that this check is not part of CheckRelationTableSpaceMove() as it
+ * gets used for ALTER TABLE SET TABLESPACE that could cascade across
+ * toast relations.
+ */
+ if (OidIsValid(params->tablespaceOid) &&
+ IsSystemRelation(iRel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot move system relation \"%s\"",
+ RelationGetRelationName(iRel))));
+
+ /* Check if the tablespace of this index needs to be changed */
+ if (OidIsValid(params->tablespaceOid) &&
+ CheckRelationTableSpaceMove(iRel, params->tablespaceOid))
+ set_tablespace = true;
+
+ /*
+ * Also check for active uses of the index in the current transaction; we
+ * don't want to reindex underneath an open indexscan.
+ */
+ CheckTableNotInUse(iRel, "REINDEX INDEX");
+
+ /* Set new tablespace, if requested */
+ if (set_tablespace)
+ {
+ /* Update its pg_class row */
+ SetRelationTableSpace(iRel, params->tablespaceOid, InvalidOid);
+
+ /*
+ * Schedule unlinking of the old index storage at transaction commit.
+ */
+ RelationDropStorage(iRel);
+ RelationAssumeNewRelfilenode(iRel);
+
+ /* Make sure the reltablespace change is visible */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * All predicate locks on the index are about to be made invalid. Promote
+ * them to relation locks on the heap.
+ */
+ TransferPredicateLocksToHeapRelation(iRel);
+
+ /* Fetch info needed for index_build */
+ indexInfo = BuildIndexInfo(iRel);
+
+ /* If requested, skip checking uniqueness/exclusion constraints */
+ if (skip_constraint_checks)
+ {
+ if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
+ skipped_constraint = true;
+ indexInfo->ii_Unique = false;
+ indexInfo->ii_ExclusionOps = NULL;
+ indexInfo->ii_ExclusionProcs = NULL;
+ indexInfo->ii_ExclusionStrats = NULL;
+ }
+
+ /* Suppress use of the target index while rebuilding it */
+ SetReindexProcessing(heapId, indexId);
+
+ /* Create a new physical relation for the index */
+ RelationSetNewRelfilenode(iRel, persistence);
+
+ /* Initialize the index and rebuild */
+ /* Note: we do not need to re-establish pkey setting */
+ index_build(heapRelation, iRel, indexInfo, true, true);
+
+ /* Re-allow use of target index */
+ ResetReindexProcessing();
+
+ /*
+ * If the index is marked invalid/not-ready/dead (ie, it's from a failed
+ * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
+ * and we didn't skip a uniqueness check, we can now mark it valid. This
+ * allows REINDEX to be used to clean up in such cases.
+ *
+ * We can also reset indcheckxmin, because we have now done a
+ * non-concurrent index build, *except* in the case where index_build
+ * found some still-broken HOT chains. If it did, and we don't have to
+ * change any of the other flags, we just leave indcheckxmin alone (note
+ * that index_build won't have changed it, because this is a reindex).
+ * This is okay and desirable because not updating the tuple leaves the
+ * index's usability horizon (recorded as the tuple's xmin value) the same
+ * as it was.
+ *
+ * But, if the index was invalid/not-ready/dead and there were broken HOT
+ * chains, we had better force indcheckxmin true, because the normal
+ * argument that the HOT chains couldn't conflict with the index is
+ * suspect for an invalid index. (A conflict is definitely possible if
+ * the index was dead. It probably shouldn't happen otherwise, but let's
+ * be conservative.) In this case advancing the usability horizon is
+ * appropriate.
+ *
+ * Another reason for avoiding unnecessary updates here is that while
+ * reindexing pg_index itself, we must not try to update tuples in it.
+ * pg_index's indexes should always have these flags in their clean state,
+ * so that won't happen.
+ *
+ * If early pruning/vacuuming is enabled for the heap relation, the
+ * usability horizon must be advanced to the current transaction on every
+ * build or rebuild. pg_index is OK in this regard because catalog tables
+ * are not subject to early cleanup.
+ */
+ if (!skipped_constraint)
+ {
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
+ bool index_bad;
+ bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
+
+ pg_index = table_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy1(INDEXRELID,
+ ObjectIdGetDatum(indexId));
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ index_bad = (!indexForm->indisvalid ||
+ !indexForm->indisready ||
+ !indexForm->indislive);
+ if (index_bad ||
+ (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
+ early_pruning_enabled)
+ {
+ if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
+ indexForm->indcheckxmin = false;
+ else if (index_bad || early_pruning_enabled)
+ indexForm->indcheckxmin = true;
+ indexForm->indisvalid = true;
+ indexForm->indisready = true;
+ indexForm->indislive = true;
+ CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+
+ /*
+ * Invalidate the relcache for the table, so that after we commit
+ * all sessions will refresh the table's index list. This ensures
+ * that if anyone misses seeing the pg_index row during this
+ * update, they'll refresh their list before attempting any update
+ * on the table.
+ */
+ CacheInvalidateRelcache(heapRelation);
+ }
+
+ table_close(pg_index, RowExclusiveLock);
+ }
+
+ /* Log what we did */
+ if ((params->options & REINDEXOPT_VERBOSE) != 0)
+ ereport(INFO,
+ (errmsg("index \"%s\" was reindexed",
+ get_rel_name(indexId)),
+ errdetail_internal("%s",
+ pg_rusage_show(&ru0))));
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* Close rels, but keep locks */
+ index_close(iRel, NoLock);
+ table_close(heapRelation, NoLock);
+
+ if (progress)
+ pgstat_progress_end_command();
+}
+
+/*
+ * reindex_relation - This routine is used to recreate all indexes
+ * of a relation (and optionally its toast relation too, if any).
+ *
+ * "flags" is a bitmask that can include any combination of these bits:
+ *
+ * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
+ *
+ * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
+ * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
+ * indexes are inconsistent with it. This makes things tricky if the relation
+ * is a system catalog that we might consult during the reindexing. To deal
+ * with that case, we mark all of the indexes as pending rebuild so that they
+ * won't be trusted until rebuilt. The caller is required to call us *without*
+ * having made the rebuilt table visible by doing CommandCounterIncrement;
+ * we'll do CCI after having collected the index list. (This way we can still
+ * use catalog indexes while collecting the list.)
+ *
+ * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
+ * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
+ * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
+ * rebuild an index if constraint inconsistency is suspected. For optimal
+ * performance, other callers should include the flag only after transforming
+ * the data in a manner that risks a change in constraint validity.
+ *
+ * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
+ * rebuilt indexes to unlogged.
+ *
+ * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
+ * rebuilt indexes to permanent.
+ *
+ * Returns true if any indexes were rebuilt (including toast table's index
+ * when relevant). Note that a CommandCounterIncrement will occur after each
+ * index rebuild.
+ */
+bool
+reindex_relation(Oid relid, int flags, ReindexParams *params)
+{
+ Relation rel;
+ Oid toast_relid;
+ List *indexIds;
+ char persistence;
+ bool result;
+ ListCell *indexId;
+ int i;
+
+ /*
+ * Open and lock the relation. ShareLock is sufficient since we only need
+ * to prevent schema and data changes in it. The lock level used here
+ * should match ReindexTable().
+ */
+ if ((params->options & REINDEXOPT_MISSING_OK) != 0)
+ rel = try_table_open(relid, ShareLock);
+ else
+ rel = table_open(relid, ShareLock);
+
+ /* if relation is gone, leave */
+ if (!rel)
+ return false;
+
+ /*
+ * Partitioned tables should never get processed here, as they have no
+ * physical storage.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ elog(ERROR, "cannot reindex partitioned table \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel));
+
+ toast_relid = rel->rd_rel->reltoastrelid;
+
+ /*
+ * Get the list of index OIDs for this relation. (We trust to the
+ * relcache to get this with a sequential scan if ignoring system
+ * indexes.)
+ */
+ indexIds = RelationGetIndexList(rel);
+
+ if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
+ {
+ /* Suppress use of all the indexes until they are rebuilt */
+ SetReindexPending(indexIds);
+
+ /*
+ * Make the new heap contents visible --- now things might be
+ * inconsistent!
+ */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Compute persistence of indexes: same as that of owning rel, unless
+ * caller specified otherwise.
+ */
+ if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
+ persistence = RELPERSISTENCE_UNLOGGED;
+ else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
+ persistence = RELPERSISTENCE_PERMANENT;
+ else
+ persistence = rel->rd_rel->relpersistence;
+
+ /* Reindex all the indexes. */
+ i = 1;
+ foreach(indexId, indexIds)
+ {
+ Oid indexOid = lfirst_oid(indexId);
+ Oid indexNamespaceId = get_rel_namespace(indexOid);
+
+ /*
+ * Skip any invalid indexes on a TOAST table. These can only be
+ * duplicate leftovers from a failed REINDEX CONCURRENTLY, and if
+ * rebuilt it would not be possible to drop them anymore.
+ */
+ if (IsToastNamespace(indexNamespaceId) &&
+ !get_index_isvalid(indexOid))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot reindex invalid index \"%s.%s\" on TOAST table, skipping",
+ get_namespace_name(indexNamespaceId),
+ get_rel_name(indexOid))));
+ continue;
+ }
+
+ reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
+ persistence, params);
+
+ CommandCounterIncrement();
+
+ /* Index should no longer be in the pending list */
+ Assert(!ReindexIsProcessingIndex(indexOid));
+
+ /* Set index rebuild count */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
+ i);
+ i++;
+ }
+
+ /*
+ * Close rel, but continue to hold the lock.
+ */
+ table_close(rel, NoLock);
+
+ result = (indexIds != NIL);
+
+ /*
+ * If the relation has a secondary toast rel, reindex that too while we
+ * still hold the lock on the main table.
+ */
+ if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
+ {
+ /*
+ * Note that this should fail if the toast relation is missing, so
+ * reset REINDEXOPT_MISSING_OK. Even if a new tablespace is set for
+ * the parent relation, the indexes on its toast table are not moved.
+ * This rule is enforced by setting tablespaceOid to InvalidOid.
+ */
+ ReindexParams newparams = *params;
+
+ newparams.options &= ~(REINDEXOPT_MISSING_OK);
+ newparams.tablespaceOid = InvalidOid;
+ result |= reindex_relation(toast_relid, flags, &newparams);
+ }
+
+ return result;
+}
+
+
+/* ----------------------------------------------------------------
+ * System index reindexing support
+ *
+ * When we are busy reindexing a system index, this code provides support
+ * for preventing catalog lookups from using that index. We also make use
+ * of this to catch attempted uses of user indexes during reindexing of
+ * those indexes. This information is propagated to parallel workers;
+ * attempting to change it during a parallel operation is not permitted.
+ * ----------------------------------------------------------------
+ */
+
+static Oid currentlyReindexedHeap = InvalidOid;
+static Oid currentlyReindexedIndex = InvalidOid;
+static List *pendingReindexedIndexes = NIL;
+static int reindexingNestLevel = 0;
+
+/*
+ * ReindexIsProcessingHeap
+ * True if heap specified by OID is currently being reindexed.
+ */
+bool
+ReindexIsProcessingHeap(Oid heapOid)
+{
+ return heapOid == currentlyReindexedHeap;
+}
+
+/*
+ * ReindexIsCurrentlyProcessingIndex
+ * True if index specified by OID is currently being reindexed.
+ */
+static bool
+ReindexIsCurrentlyProcessingIndex(Oid indexOid)
+{
+ return indexOid == currentlyReindexedIndex;
+}
+
+/*
+ * ReindexIsProcessingIndex
+ * True if index specified by OID is currently being reindexed,
+ * or should be treated as invalid because it is awaiting reindex.
+ */
+bool
+ReindexIsProcessingIndex(Oid indexOid)
+{
+ return indexOid == currentlyReindexedIndex ||
+ list_member_oid(pendingReindexedIndexes, indexOid);
+}
+
+/*
+ * SetReindexProcessing
+ * Set flag that specified heap/index are being reindexed.
+ */
+static void
+SetReindexProcessing(Oid heapOid, Oid indexOid)
+{
+ Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
+ /* Reindexing is not re-entrant. */
+ if (OidIsValid(currentlyReindexedHeap))
+ elog(ERROR, "cannot reindex while reindexing");
+ currentlyReindexedHeap = heapOid;
+ currentlyReindexedIndex = indexOid;
+ /* Index is no longer "pending" reindex. */
+ RemoveReindexPending(indexOid);
+ /* This may have been set already, but in case it isn't, do so now. */
+ reindexingNestLevel = GetCurrentTransactionNestLevel();
+}
+
+/*
+ * ResetReindexProcessing
+ * Unset reindexing status.
+ */
+static void
+ResetReindexProcessing(void)
+{
+ currentlyReindexedHeap = InvalidOid;
+ currentlyReindexedIndex = InvalidOid;
+ /* reindexingNestLevel remains set till end of (sub)transaction */
+}
+
+/*
+ * SetReindexPending
+ * Mark the given indexes as pending reindex.
+ *
+ * NB: we assume that the current memory context stays valid throughout.
+ */
+static void
+SetReindexPending(List *indexes)
+{
+ /* Reindexing is not re-entrant. */
+ if (pendingReindexedIndexes)
+ elog(ERROR, "cannot reindex while reindexing");
+ if (IsInParallelMode())
+ elog(ERROR, "cannot modify reindex state during a parallel operation");
+ pendingReindexedIndexes = list_copy(indexes);
+ reindexingNestLevel = GetCurrentTransactionNestLevel();
+}
+
+/*
+ * RemoveReindexPending
+ * Remove the given index from the pending list.
+ */
+static void
+RemoveReindexPending(Oid indexOid)
+{
+ if (IsInParallelMode())
+ elog(ERROR, "cannot modify reindex state during a parallel operation");
+ pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
+ indexOid);
+}
+
+/*
+ * ResetReindexState
+ * Clear all reindexing state during (sub)transaction abort.
+ */
+void
+ResetReindexState(int nestLevel)
+{
+ /*
+ * Because reindexing is not re-entrant, we don't need to cope with nested
+ * reindexing states. We just need to avoid messing up the outer-level
+ * state in case a subtransaction fails within a REINDEX. So checking the
+ * current nest level against that of the reindex operation is sufficient.
+ */
+ if (reindexingNestLevel >= nestLevel)
+ {
+ currentlyReindexedHeap = InvalidOid;
+ currentlyReindexedIndex = InvalidOid;
+
+ /*
+ * We needn't try to release the contents of pendingReindexedIndexes;
+ * that list should be in a transaction-lifespan context, so it will
+ * go away automatically.
+ */
+ pendingReindexedIndexes = NIL;
+
+ reindexingNestLevel = 0;
+ }
+}
+
+/*
+ * EstimateReindexStateSpace
+ * Estimate space needed to pass reindex state to parallel workers.
+ */
+Size
+EstimateReindexStateSpace(void)
+{
+ return offsetof(SerializedReindexState, pendingReindexedIndexes)
+ + mul_size(sizeof(Oid), list_length(pendingReindexedIndexes));
+}
+
+/*
+ * SerializeReindexState
+ * Serialize reindex state for parallel workers.
+ */
+void
+SerializeReindexState(Size maxsize, char *start_address)
+{
+ SerializedReindexState *sistate = (SerializedReindexState *) start_address;
+ int c = 0;
+ ListCell *lc;
+
+ sistate->currentlyReindexedHeap = currentlyReindexedHeap;
+ sistate->currentlyReindexedIndex = currentlyReindexedIndex;
+ sistate->numPendingReindexedIndexes = list_length(pendingReindexedIndexes);
+ foreach(lc, pendingReindexedIndexes)
+ sistate->pendingReindexedIndexes[c++] = lfirst_oid(lc);
+}
+
+/*
+ * RestoreReindexState
+ * Restore reindex state in a parallel worker.
+ */
+void
+RestoreReindexState(void *reindexstate)
+{
+ SerializedReindexState *sistate = (SerializedReindexState *) reindexstate;
+ int c = 0;
+ MemoryContext oldcontext;
+
+ currentlyReindexedHeap = sistate->currentlyReindexedHeap;
+ currentlyReindexedIndex = sistate->currentlyReindexedIndex;
+
+ Assert(pendingReindexedIndexes == NIL);
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ for (c = 0; c < sistate->numPendingReindexedIndexes; ++c)
+ pendingReindexedIndexes =
+ lappend_oid(pendingReindexedIndexes,
+ sistate->pendingReindexedIndexes[c]);
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Note the worker has its own transaction nesting level */
+ reindexingNestLevel = GetCurrentTransactionNestLevel();
+}