diff options
Diffstat (limited to '')
-rw-r--r-- | src/backend/utils/cache/Makefile | 31 | ||||
-rw-r--r-- | src/backend/utils/cache/attoptcache.c | 177 | ||||
-rw-r--r-- | src/backend/utils/cache/catcache.c | 2095 | ||||
-rw-r--r-- | src/backend/utils/cache/evtcache.c | 270 | ||||
-rw-r--r-- | src/backend/utils/cache/inval.c | 1578 | ||||
-rw-r--r-- | src/backend/utils/cache/lsyscache.c | 3580 | ||||
-rw-r--r-- | src/backend/utils/cache/partcache.c | 430 | ||||
-rw-r--r-- | src/backend/utils/cache/plancache.c | 2207 | ||||
-rw-r--r-- | src/backend/utils/cache/relcache.c | 6651 | ||||
-rw-r--r-- | src/backend/utils/cache/relfilenodemap.c | 244 | ||||
-rw-r--r-- | src/backend/utils/cache/relmapper.c | 1045 | ||||
-rw-r--r-- | src/backend/utils/cache/spccache.c | 236 | ||||
-rw-r--r-- | src/backend/utils/cache/syscache.c | 1565 | ||||
-rw-r--r-- | src/backend/utils/cache/ts_cache.c | 652 | ||||
-rw-r--r-- | src/backend/utils/cache/typcache.c | 2883 |
15 files changed, 23644 insertions, 0 deletions
diff --git a/src/backend/utils/cache/Makefile b/src/backend/utils/cache/Makefile new file mode 100644 index 0000000..38e46d2 --- /dev/null +++ b/src/backend/utils/cache/Makefile @@ -0,0 +1,31 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for utils/cache +# +# IDENTIFICATION +# src/backend/utils/cache/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/utils/cache +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + attoptcache.o \ + catcache.o \ + evtcache.o \ + inval.o \ + lsyscache.o \ + partcache.o \ + plancache.o \ + relcache.o \ + relfilenodemap.o \ + relmapper.o \ + spccache.o \ + syscache.o \ + ts_cache.o \ + typcache.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/cache/attoptcache.c b/src/backend/utils/cache/attoptcache.c new file mode 100644 index 0000000..72d89cb --- /dev/null +++ b/src/backend/utils/cache/attoptcache.c @@ -0,0 +1,177 @@ +/*------------------------------------------------------------------------- + * + * attoptcache.c + * Attribute options cache management. + * + * Attribute options are cached separately from the fixed-size portion of + * pg_attribute entries, which are handled by the relcache. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/attoptcache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/reloptions.h" +#include "utils/attoptcache.h" +#include "utils/catcache.h" +#include "utils/hsearch.h" +#include "utils/inval.h" +#include "utils/syscache.h" + + +/* Hash table for information about each attribute's options */ +static HTAB *AttoptCacheHash = NULL; + +/* attrelid and attnum form the lookup key, and must appear first */ +typedef struct +{ + Oid attrelid; + int attnum; +} AttoptCacheKey; + +typedef struct +{ + AttoptCacheKey key; /* lookup key - must be first */ + AttributeOpts *opts; /* options, or NULL if none */ +} AttoptCacheEntry; + + +/* + * InvalidateAttoptCacheCallback + * Flush all cache entries when pg_attribute is updated. + * + * When pg_attribute is updated, we must flush the cache entry at least + * for that attribute. Currently, we just flush them all. Since attribute + * options are not currently used in performance-critical paths (such as + * query execution), this seems OK. + */ +static void +InvalidateAttoptCacheCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + HASH_SEQ_STATUS status; + AttoptCacheEntry *attopt; + + hash_seq_init(&status, AttoptCacheHash); + while ((attopt = (AttoptCacheEntry *) hash_seq_search(&status)) != NULL) + { + if (attopt->opts) + pfree(attopt->opts); + if (hash_search(AttoptCacheHash, + (void *) &attopt->key, + HASH_REMOVE, + NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } +} + +/* + * InitializeAttoptCache + * Initialize the attribute options cache. + */ +static void +InitializeAttoptCache(void) +{ + HASHCTL ctl; + + /* Initialize the hash table. */ + ctl.keysize = sizeof(AttoptCacheKey); + ctl.entrysize = sizeof(AttoptCacheEntry); + AttoptCacheHash = + hash_create("Attopt cache", 256, &ctl, + HASH_ELEM | HASH_BLOBS); + + /* Make sure we've initialized CacheMemoryContext. */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + /* Watch for invalidation events. */ + CacheRegisterSyscacheCallback(ATTNUM, + InvalidateAttoptCacheCallback, + (Datum) 0); +} + +/* + * get_attribute_options + * Fetch attribute options for a specified table OID. + */ +AttributeOpts * +get_attribute_options(Oid attrelid, int attnum) +{ + AttoptCacheKey key; + AttoptCacheEntry *attopt; + AttributeOpts *result; + HeapTuple tp; + + /* Find existing cache entry, if any. */ + if (!AttoptCacheHash) + InitializeAttoptCache(); + memset(&key, 0, sizeof(key)); /* make sure any padding bits are unset */ + key.attrelid = attrelid; + key.attnum = attnum; + attopt = + (AttoptCacheEntry *) hash_search(AttoptCacheHash, + (void *) &key, + HASH_FIND, + NULL); + + /* Not found in Attopt cache. Construct new cache entry. */ + if (!attopt) + { + AttributeOpts *opts; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(attrelid), + Int16GetDatum(attnum)); + + /* + * If we don't find a valid HeapTuple, it must mean someone has + * managed to request attribute details for a non-existent attribute. + * We treat that case as if no options were specified. + */ + if (!HeapTupleIsValid(tp)) + opts = NULL; + else + { + Datum datum; + bool isNull; + + datum = SysCacheGetAttr(ATTNUM, + tp, + Anum_pg_attribute_attoptions, + &isNull); + if (isNull) + opts = NULL; + else + { + bytea *bytea_opts = attribute_reloptions(datum, false); + + opts = MemoryContextAlloc(CacheMemoryContext, + VARSIZE(bytea_opts)); + memcpy(opts, bytea_opts, VARSIZE(bytea_opts)); + } + ReleaseSysCache(tp); + } + + /* + * It's important to create the actual cache entry only after reading + * pg_attribute, since the read could cause a cache flush. + */ + attopt = (AttoptCacheEntry *) hash_search(AttoptCacheHash, + (void *) &key, + HASH_ENTER, + NULL); + attopt->opts = opts; + } + + /* Return results in caller's memory context. */ + if (attopt->opts == NULL) + return NULL; + result = palloc(VARSIZE(attopt->opts)); + memcpy(result, attopt->opts, VARSIZE(attopt->opts)); + return result; +} diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c new file mode 100644 index 0000000..7b44ee1 --- /dev/null +++ b/src/backend/utils/cache/catcache.c @@ -0,0 +1,2095 @@ +/*------------------------------------------------------------------------- + * + * catcache.c + * System catalog cache for tuples matching a key. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/cache/catcache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heaptoast.h" +#include "access/relscan.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/valid.h" +#include "access/xact.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "miscadmin.h" +#ifdef CATCACHE_STATS +#include "storage/ipc.h" /* for on_proc_exit */ +#endif +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/resowner_private.h" +#include "utils/syscache.h" + + + /* #define CACHEDEBUG */ /* turns DEBUG elogs on */ + +/* + * Given a hash value and the size of the hash table, find the bucket + * in which the hash value belongs. Since the hash table must contain + * a power-of-2 number of elements, this is a simple bitmask. + */ +#define HASH_INDEX(h, sz) ((Index) ((h) & ((sz) - 1))) + + +/* + * variables, macros and other stuff + */ + +#ifdef CACHEDEBUG +#define CACHE_elog(...) elog(__VA_ARGS__) +#else +#define CACHE_elog(...) +#endif + +/* Cache management header --- pointer is NULL until created */ +static CatCacheHeader *CacheHdr = NULL; + +static inline HeapTuple SearchCatCacheInternal(CatCache *cache, + int nkeys, + Datum v1, Datum v2, + Datum v3, Datum v4); + +static pg_noinline HeapTuple SearchCatCacheMiss(CatCache *cache, + int nkeys, + uint32 hashValue, + Index hashIndex, + Datum v1, Datum v2, + Datum v3, Datum v4); + +static uint32 CatalogCacheComputeHashValue(CatCache *cache, int nkeys, + Datum v1, Datum v2, Datum v3, Datum v4); +static uint32 CatalogCacheComputeTupleHashValue(CatCache *cache, int nkeys, + HeapTuple tuple); +static inline bool CatalogCacheCompareTuple(const CatCache *cache, int nkeys, + const Datum *cachekeys, + const Datum *searchkeys); + +#ifdef CATCACHE_STATS +static void CatCachePrintStats(int code, Datum arg); +#endif +static void CatCacheRemoveCTup(CatCache *cache, CatCTup *ct); +static void CatCacheRemoveCList(CatCache *cache, CatCList *cl); +static void CatalogCacheInitializeCache(CatCache *cache); +static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, + Datum *arguments, + uint32 hashValue, Index hashIndex, + bool negative); + +static void CatCacheFreeKeys(TupleDesc tupdesc, int nkeys, int *attnos, + Datum *keys); +static void CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos, + Datum *srckeys, Datum *dstkeys); + + +/* + * internal support functions + */ + +/* + * Hash and equality functions for system types that are used as cache key + * fields. In some cases, we just call the regular SQL-callable functions for + * the appropriate data type, but that tends to be a little slow, and the + * speed of these functions is performance-critical. Therefore, for data + * types that frequently occur as catcache keys, we hard-code the logic here. + * Avoiding the overhead of DirectFunctionCallN(...) is a substantial win, and + * in certain cases (like int4) we can adopt a faster hash algorithm as well. + */ + +static bool +chareqfast(Datum a, Datum b) +{ + return DatumGetChar(a) == DatumGetChar(b); +} + +static uint32 +charhashfast(Datum datum) +{ + return murmurhash32((int32) DatumGetChar(datum)); +} + +static bool +nameeqfast(Datum a, Datum b) +{ + char *ca = NameStr(*DatumGetName(a)); + char *cb = NameStr(*DatumGetName(b)); + + return strncmp(ca, cb, NAMEDATALEN) == 0; +} + +static uint32 +namehashfast(Datum datum) +{ + char *key = NameStr(*DatumGetName(datum)); + + return hash_any((unsigned char *) key, strlen(key)); +} + +static bool +int2eqfast(Datum a, Datum b) +{ + return DatumGetInt16(a) == DatumGetInt16(b); +} + +static uint32 +int2hashfast(Datum datum) +{ + return murmurhash32((int32) DatumGetInt16(datum)); +} + +static bool +int4eqfast(Datum a, Datum b) +{ + return DatumGetInt32(a) == DatumGetInt32(b); +} + +static uint32 +int4hashfast(Datum datum) +{ + return murmurhash32((int32) DatumGetInt32(datum)); +} + +static bool +texteqfast(Datum a, Datum b) +{ + /* + * The use of DEFAULT_COLLATION_OID is fairly arbitrary here. We just + * want to take the fast "deterministic" path in texteq(). + */ + return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b)); +} + +static uint32 +texthashfast(Datum datum) +{ + /* analogously here as in texteqfast() */ + return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum)); +} + +static bool +oidvectoreqfast(Datum a, Datum b) +{ + return DatumGetBool(DirectFunctionCall2(oidvectoreq, a, b)); +} + +static uint32 +oidvectorhashfast(Datum datum) +{ + return DatumGetInt32(DirectFunctionCall1(hashoidvector, datum)); +} + +/* Lookup support functions for a type. */ +static void +GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEqualFN *fasteqfunc) +{ + switch (keytype) + { + case BOOLOID: + *hashfunc = charhashfast; + *fasteqfunc = chareqfast; + *eqfunc = F_BOOLEQ; + break; + case CHAROID: + *hashfunc = charhashfast; + *fasteqfunc = chareqfast; + *eqfunc = F_CHAREQ; + break; + case NAMEOID: + *hashfunc = namehashfast; + *fasteqfunc = nameeqfast; + *eqfunc = F_NAMEEQ; + break; + case INT2OID: + *hashfunc = int2hashfast; + *fasteqfunc = int2eqfast; + *eqfunc = F_INT2EQ; + break; + case INT4OID: + *hashfunc = int4hashfast; + *fasteqfunc = int4eqfast; + *eqfunc = F_INT4EQ; + break; + case TEXTOID: + *hashfunc = texthashfast; + *fasteqfunc = texteqfast; + *eqfunc = F_TEXTEQ; + break; + case OIDOID: + case REGPROCOID: + case REGPROCEDUREOID: + case REGOPEROID: + case REGOPERATOROID: + case REGCLASSOID: + case REGTYPEOID: + case REGCOLLATIONOID: + case REGCONFIGOID: + case REGDICTIONARYOID: + case REGROLEOID: + case REGNAMESPACEOID: + *hashfunc = int4hashfast; + *fasteqfunc = int4eqfast; + *eqfunc = F_OIDEQ; + break; + case OIDVECTOROID: + *hashfunc = oidvectorhashfast; + *fasteqfunc = oidvectoreqfast; + *eqfunc = F_OIDVECTOREQ; + break; + default: + elog(FATAL, "type %u not supported as catcache key", keytype); + *hashfunc = NULL; /* keep compiler quiet */ + + *eqfunc = InvalidOid; + break; + } +} + +/* + * CatalogCacheComputeHashValue + * + * Compute the hash value associated with a given set of lookup keys + */ +static uint32 +CatalogCacheComputeHashValue(CatCache *cache, int nkeys, + Datum v1, Datum v2, Datum v3, Datum v4) +{ + uint32 hashValue = 0; + uint32 oneHash; + CCHashFN *cc_hashfunc = cache->cc_hashfunc; + + CACHE_elog(DEBUG2, "CatalogCacheComputeHashValue %s %d %p", + cache->cc_relname, nkeys, cache); + + switch (nkeys) + { + case 4: + oneHash = (cc_hashfunc[3]) (v4); + + hashValue ^= oneHash << 24; + hashValue ^= oneHash >> 8; + /* FALLTHROUGH */ + case 3: + oneHash = (cc_hashfunc[2]) (v3); + + hashValue ^= oneHash << 16; + hashValue ^= oneHash >> 16; + /* FALLTHROUGH */ + case 2: + oneHash = (cc_hashfunc[1]) (v2); + + hashValue ^= oneHash << 8; + hashValue ^= oneHash >> 24; + /* FALLTHROUGH */ + case 1: + oneHash = (cc_hashfunc[0]) (v1); + + hashValue ^= oneHash; + break; + default: + elog(FATAL, "wrong number of hash keys: %d", nkeys); + break; + } + + return hashValue; +} + +/* + * CatalogCacheComputeTupleHashValue + * + * Compute the hash value associated with a given tuple to be cached + */ +static uint32 +CatalogCacheComputeTupleHashValue(CatCache *cache, int nkeys, HeapTuple tuple) +{ + Datum v1 = 0, + v2 = 0, + v3 = 0, + v4 = 0; + bool isNull = false; + int *cc_keyno = cache->cc_keyno; + TupleDesc cc_tupdesc = cache->cc_tupdesc; + + /* Now extract key fields from tuple, insert into scankey */ + switch (nkeys) + { + case 4: + v4 = fastgetattr(tuple, + cc_keyno[3], + cc_tupdesc, + &isNull); + Assert(!isNull); + /* FALLTHROUGH */ + case 3: + v3 = fastgetattr(tuple, + cc_keyno[2], + cc_tupdesc, + &isNull); + Assert(!isNull); + /* FALLTHROUGH */ + case 2: + v2 = fastgetattr(tuple, + cc_keyno[1], + cc_tupdesc, + &isNull); + Assert(!isNull); + /* FALLTHROUGH */ + case 1: + v1 = fastgetattr(tuple, + cc_keyno[0], + cc_tupdesc, + &isNull); + Assert(!isNull); + break; + default: + elog(FATAL, "wrong number of hash keys: %d", nkeys); + break; + } + + return CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4); +} + +/* + * CatalogCacheCompareTuple + * + * Compare a tuple to the passed arguments. + */ +static inline bool +CatalogCacheCompareTuple(const CatCache *cache, int nkeys, + const Datum *cachekeys, + const Datum *searchkeys) +{ + const CCFastEqualFN *cc_fastequal = cache->cc_fastequal; + int i; + + for (i = 0; i < nkeys; i++) + { + if (!(cc_fastequal[i]) (cachekeys[i], searchkeys[i])) + return false; + } + return true; +} + + +#ifdef CATCACHE_STATS + +static void +CatCachePrintStats(int code, Datum arg) +{ + slist_iter iter; + long cc_searches = 0; + long cc_hits = 0; + long cc_neg_hits = 0; + long cc_newloads = 0; + long cc_invals = 0; + long cc_lsearches = 0; + long cc_lhits = 0; + + slist_foreach(iter, &CacheHdr->ch_caches) + { + CatCache *cache = slist_container(CatCache, cc_next, iter.cur); + + if (cache->cc_ntup == 0 && cache->cc_searches == 0) + continue; /* don't print unused caches */ + elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits", + cache->cc_relname, + cache->cc_indexoid, + cache->cc_ntup, + cache->cc_searches, + cache->cc_hits, + cache->cc_neg_hits, + cache->cc_hits + cache->cc_neg_hits, + cache->cc_newloads, + cache->cc_searches - cache->cc_hits - cache->cc_neg_hits - cache->cc_newloads, + cache->cc_searches - cache->cc_hits - cache->cc_neg_hits, + cache->cc_invals, + cache->cc_lsearches, + cache->cc_lhits); + cc_searches += cache->cc_searches; + cc_hits += cache->cc_hits; + cc_neg_hits += cache->cc_neg_hits; + cc_newloads += cache->cc_newloads; + cc_invals += cache->cc_invals; + cc_lsearches += cache->cc_lsearches; + cc_lhits += cache->cc_lhits; + } + elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits", + CacheHdr->ch_ntup, + cc_searches, + cc_hits, + cc_neg_hits, + cc_hits + cc_neg_hits, + cc_newloads, + cc_searches - cc_hits - cc_neg_hits - cc_newloads, + cc_searches - cc_hits - cc_neg_hits, + cc_invals, + cc_lsearches, + cc_lhits); +} +#endif /* CATCACHE_STATS */ + + +/* + * CatCacheRemoveCTup + * + * Unlink and delete the given cache entry + * + * NB: if it is a member of a CatCList, the CatCList is deleted too. + * Both the cache entry and the list had better have zero refcount. + */ +static void +CatCacheRemoveCTup(CatCache *cache, CatCTup *ct) +{ + Assert(ct->refcount == 0); + Assert(ct->my_cache == cache); + + if (ct->c_list) + { + /* + * The cleanest way to handle this is to call CatCacheRemoveCList, + * which will recurse back to me, and the recursive call will do the + * work. Set the "dead" flag to make sure it does recurse. + */ + ct->dead = true; + CatCacheRemoveCList(cache, ct->c_list); + return; /* nothing left to do */ + } + + /* delink from linked list */ + dlist_delete(&ct->cache_elem); + + /* + * Free keys when we're dealing with a negative entry, normal entries just + * point into tuple, allocated together with the CatCTup. + */ + if (ct->negative) + CatCacheFreeKeys(cache->cc_tupdesc, cache->cc_nkeys, + cache->cc_keyno, ct->keys); + + pfree(ct); + + --cache->cc_ntup; + --CacheHdr->ch_ntup; +} + +/* + * CatCacheRemoveCList + * + * Unlink and delete the given cache list entry + * + * NB: any dead member entries that become unreferenced are deleted too. + */ +static void +CatCacheRemoveCList(CatCache *cache, CatCList *cl) +{ + int i; + + Assert(cl->refcount == 0); + Assert(cl->my_cache == cache); + + /* delink from member tuples */ + for (i = cl->n_members; --i >= 0;) + { + CatCTup *ct = cl->members[i]; + + Assert(ct->c_list == cl); + ct->c_list = NULL; + /* if the member is dead and now has no references, remove it */ + if ( +#ifndef CATCACHE_FORCE_RELEASE + ct->dead && +#endif + ct->refcount == 0) + CatCacheRemoveCTup(cache, ct); + } + + /* delink from linked list */ + dlist_delete(&cl->cache_elem); + + /* free associated column data */ + CatCacheFreeKeys(cache->cc_tupdesc, cl->nkeys, + cache->cc_keyno, cl->keys); + + pfree(cl); +} + + +/* + * CatCacheInvalidate + * + * Invalidate entries in the specified cache, given a hash value. + * + * We delete cache entries that match the hash value, whether positive + * or negative. We don't care whether the invalidation is the result + * of a tuple insertion or a deletion. + * + * We used to try to match positive cache entries by TID, but that is + * unsafe after a VACUUM FULL on a system catalog: an inval event could + * be queued before VACUUM FULL, and then processed afterwards, when the + * target tuple that has to be invalidated has a different TID than it + * did when the event was created. So now we just compare hash values and + * accept the small risk of unnecessary invalidations due to false matches. + * + * This routine is only quasi-public: it should only be used by inval.c. + */ +void +CatCacheInvalidate(CatCache *cache, uint32 hashValue) +{ + Index hashIndex; + dlist_mutable_iter iter; + + CACHE_elog(DEBUG2, "CatCacheInvalidate: called"); + + /* + * We don't bother to check whether the cache has finished initialization + * yet; if not, there will be no entries in it so no problem. + */ + + /* + * Invalidate *all* CatCLists in this cache; it's too hard to tell which + * searches might still be correct, so just zap 'em all. + */ + dlist_foreach_modify(iter, &cache->cc_lists) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + + if (cl->refcount > 0) + cl->dead = true; + else + CatCacheRemoveCList(cache, cl); + } + + /* + * inspect the proper hash bucket for tuple matches + */ + hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); + dlist_foreach_modify(iter, &cache->cc_bucket[hashIndex]) + { + CatCTup *ct = dlist_container(CatCTup, cache_elem, iter.cur); + + if (hashValue == ct->hash_value) + { + if (ct->refcount > 0 || + (ct->c_list && ct->c_list->refcount > 0)) + { + ct->dead = true; + /* list, if any, was marked dead above */ + Assert(ct->c_list == NULL || ct->c_list->dead); + } + else + CatCacheRemoveCTup(cache, ct); + CACHE_elog(DEBUG2, "CatCacheInvalidate: invalidated"); +#ifdef CATCACHE_STATS + cache->cc_invals++; +#endif + /* could be multiple matches, so keep looking! */ + } + } +} + +/* ---------------------------------------------------------------- + * public functions + * ---------------------------------------------------------------- + */ + + +/* + * Standard routine for creating cache context if it doesn't exist yet + * + * There are a lot of places (probably far more than necessary) that check + * whether CacheMemoryContext exists yet and want to create it if not. + * We centralize knowledge of exactly how to create it here. + */ +void +CreateCacheMemoryContext(void) +{ + /* + * Purely for paranoia, check that context doesn't exist; caller probably + * did so already. + */ + if (!CacheMemoryContext) + CacheMemoryContext = AllocSetContextCreate(TopMemoryContext, + "CacheMemoryContext", + ALLOCSET_DEFAULT_SIZES); +} + + +/* + * ResetCatalogCache + * + * Reset one catalog cache to empty. + * + * This is not very efficient if the target cache is nearly empty. + * However, it shouldn't need to be efficient; we don't invoke it often. + */ +static void +ResetCatalogCache(CatCache *cache) +{ + dlist_mutable_iter iter; + int i; + + /* Remove each list in this cache, or at least mark it dead */ + dlist_foreach_modify(iter, &cache->cc_lists) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + + if (cl->refcount > 0) + cl->dead = true; + else + CatCacheRemoveCList(cache, cl); + } + + /* Remove each tuple in this cache, or at least mark it dead */ + for (i = 0; i < cache->cc_nbuckets; i++) + { + dlist_head *bucket = &cache->cc_bucket[i]; + + dlist_foreach_modify(iter, bucket) + { + CatCTup *ct = dlist_container(CatCTup, cache_elem, iter.cur); + + if (ct->refcount > 0 || + (ct->c_list && ct->c_list->refcount > 0)) + { + ct->dead = true; + /* list, if any, was marked dead above */ + Assert(ct->c_list == NULL || ct->c_list->dead); + } + else + CatCacheRemoveCTup(cache, ct); +#ifdef CATCACHE_STATS + cache->cc_invals++; +#endif + } + } +} + +/* + * ResetCatalogCaches + * + * Reset all caches when a shared cache inval event forces it + */ +void +ResetCatalogCaches(void) +{ + slist_iter iter; + + CACHE_elog(DEBUG2, "ResetCatalogCaches called"); + + slist_foreach(iter, &CacheHdr->ch_caches) + { + CatCache *cache = slist_container(CatCache, cc_next, iter.cur); + + ResetCatalogCache(cache); + } + + CACHE_elog(DEBUG2, "end of ResetCatalogCaches call"); +} + +/* + * CatalogCacheFlushCatalog + * + * Flush all catcache entries that came from the specified system catalog. + * This is needed after VACUUM FULL/CLUSTER on the catalog, since the + * tuples very likely now have different TIDs than before. (At one point + * we also tried to force re-execution of CatalogCacheInitializeCache for + * the cache(s) on that catalog. This is a bad idea since it leads to all + * kinds of trouble if a cache flush occurs while loading cache entries. + * We now avoid the need to do it by copying cc_tupdesc out of the relcache, + * rather than relying on the relcache to keep a tupdesc for us. Of course + * this assumes the tupdesc of a cachable system table will not change...) + */ +void +CatalogCacheFlushCatalog(Oid catId) +{ + slist_iter iter; + + CACHE_elog(DEBUG2, "CatalogCacheFlushCatalog called for %u", catId); + + slist_foreach(iter, &CacheHdr->ch_caches) + { + CatCache *cache = slist_container(CatCache, cc_next, iter.cur); + + /* Does this cache store tuples of the target catalog? */ + if (cache->cc_reloid == catId) + { + /* Yes, so flush all its contents */ + ResetCatalogCache(cache); + + /* Tell inval.c to call syscache callbacks for this cache */ + CallSyscacheCallbacks(cache->id, 0); + } + } + + CACHE_elog(DEBUG2, "end of CatalogCacheFlushCatalog call"); +} + +/* + * InitCatCache + * + * This allocates and initializes a cache for a system catalog relation. + * Actually, the cache is only partially initialized to avoid opening the + * relation. The relation will be opened and the rest of the cache + * structure initialized on the first access. + */ +#ifdef CACHEDEBUG +#define InitCatCache_DEBUG2 \ +do { \ + elog(DEBUG2, "InitCatCache: rel=%u ind=%u id=%d nkeys=%d size=%d", \ + cp->cc_reloid, cp->cc_indexoid, cp->id, \ + cp->cc_nkeys, cp->cc_nbuckets); \ +} while(0) +#else +#define InitCatCache_DEBUG2 +#endif + +CatCache * +InitCatCache(int id, + Oid reloid, + Oid indexoid, + int nkeys, + const int *key, + int nbuckets) +{ + CatCache *cp; + MemoryContext oldcxt; + size_t sz; + int i; + + /* + * nbuckets is the initial number of hash buckets to use in this catcache. + * It will be enlarged later if it becomes too full. + * + * nbuckets must be a power of two. We check this via Assert rather than + * a full runtime check because the values will be coming from constant + * tables. + * + * If you're confused by the power-of-two check, see comments in + * bitmapset.c for an explanation. + */ + Assert(nbuckets > 0 && (nbuckets & -nbuckets) == nbuckets); + + /* + * first switch to the cache context so our allocations do not vanish at + * the end of a transaction + */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * if first time through, initialize the cache group header + */ + if (CacheHdr == NULL) + { + CacheHdr = (CatCacheHeader *) palloc(sizeof(CatCacheHeader)); + slist_init(&CacheHdr->ch_caches); + CacheHdr->ch_ntup = 0; +#ifdef CATCACHE_STATS + /* set up to dump stats at backend exit */ + on_proc_exit(CatCachePrintStats, 0); +#endif + } + + /* + * Allocate a new cache structure, aligning to a cacheline boundary + * + * Note: we rely on zeroing to initialize all the dlist headers correctly + */ + sz = sizeof(CatCache) + PG_CACHE_LINE_SIZE; + cp = (CatCache *) CACHELINEALIGN(palloc0(sz)); + cp->cc_bucket = palloc0(nbuckets * sizeof(dlist_head)); + + /* + * initialize the cache's relation information for the relation + * corresponding to this cache, and initialize some of the new cache's + * other internal fields. But don't open the relation yet. + */ + cp->id = id; + cp->cc_relname = "(not known yet)"; + cp->cc_reloid = reloid; + cp->cc_indexoid = indexoid; + cp->cc_relisshared = false; /* temporary */ + cp->cc_tupdesc = (TupleDesc) NULL; + cp->cc_ntup = 0; + cp->cc_nbuckets = nbuckets; + cp->cc_nkeys = nkeys; + for (i = 0; i < nkeys; ++i) + cp->cc_keyno[i] = key[i]; + + /* + * new cache is initialized as far as we can go for now. print some + * debugging information, if appropriate. + */ + InitCatCache_DEBUG2; + + /* + * add completed cache to top of group header's list + */ + slist_push_head(&CacheHdr->ch_caches, &cp->cc_next); + + /* + * back to the old context before we return... + */ + MemoryContextSwitchTo(oldcxt); + + return cp; +} + +/* + * Enlarge a catcache, doubling the number of buckets. + */ +static void +RehashCatCache(CatCache *cp) +{ + dlist_head *newbucket; + int newnbuckets; + int i; + + elog(DEBUG1, "rehashing catalog cache id %d for %s; %d tups, %d buckets", + cp->id, cp->cc_relname, cp->cc_ntup, cp->cc_nbuckets); + + /* Allocate a new, larger, hash table. */ + newnbuckets = cp->cc_nbuckets * 2; + newbucket = (dlist_head *) MemoryContextAllocZero(CacheMemoryContext, newnbuckets * sizeof(dlist_head)); + + /* Move all entries from old hash table to new. */ + for (i = 0; i < cp->cc_nbuckets; i++) + { + dlist_mutable_iter iter; + + dlist_foreach_modify(iter, &cp->cc_bucket[i]) + { + CatCTup *ct = dlist_container(CatCTup, cache_elem, iter.cur); + int hashIndex = HASH_INDEX(ct->hash_value, newnbuckets); + + dlist_delete(iter.cur); + dlist_push_head(&newbucket[hashIndex], &ct->cache_elem); + } + } + + /* Switch to the new array. */ + pfree(cp->cc_bucket); + cp->cc_nbuckets = newnbuckets; + cp->cc_bucket = newbucket; +} + +/* + * CatalogCacheInitializeCache + * + * This function does final initialization of a catcache: obtain the tuple + * descriptor and set up the hash and equality function links. We assume + * that the relcache entry can be opened at this point! + */ +#ifdef CACHEDEBUG +#define CatalogCacheInitializeCache_DEBUG1 \ + elog(DEBUG2, "CatalogCacheInitializeCache: cache @%p rel=%u", cache, \ + cache->cc_reloid) + +#define CatalogCacheInitializeCache_DEBUG2 \ +do { \ + if (cache->cc_keyno[i] > 0) { \ + elog(DEBUG2, "CatalogCacheInitializeCache: load %d/%d w/%d, %u", \ + i+1, cache->cc_nkeys, cache->cc_keyno[i], \ + TupleDescAttr(tupdesc, cache->cc_keyno[i] - 1)->atttypid); \ + } else { \ + elog(DEBUG2, "CatalogCacheInitializeCache: load %d/%d w/%d", \ + i+1, cache->cc_nkeys, cache->cc_keyno[i]); \ + } \ +} while(0) +#else +#define CatalogCacheInitializeCache_DEBUG1 +#define CatalogCacheInitializeCache_DEBUG2 +#endif + +static void +CatalogCacheInitializeCache(CatCache *cache) +{ + Relation relation; + MemoryContext oldcxt; + TupleDesc tupdesc; + int i; + + CatalogCacheInitializeCache_DEBUG1; + + relation = table_open(cache->cc_reloid, AccessShareLock); + + /* + * switch to the cache context so our allocations do not vanish at the end + * of a transaction + */ + Assert(CacheMemoryContext != NULL); + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * copy the relcache's tuple descriptor to permanent cache storage + */ + tupdesc = CreateTupleDescCopyConstr(RelationGetDescr(relation)); + + /* + * save the relation's name and relisshared flag, too (cc_relname is used + * only for debugging purposes) + */ + cache->cc_relname = pstrdup(RelationGetRelationName(relation)); + cache->cc_relisshared = RelationGetForm(relation)->relisshared; + + /* + * return to the caller's memory context and close the rel + */ + MemoryContextSwitchTo(oldcxt); + + table_close(relation, AccessShareLock); + + CACHE_elog(DEBUG2, "CatalogCacheInitializeCache: %s, %d keys", + cache->cc_relname, cache->cc_nkeys); + + /* + * initialize cache's key information + */ + for (i = 0; i < cache->cc_nkeys; ++i) + { + Oid keytype; + RegProcedure eqfunc; + + CatalogCacheInitializeCache_DEBUG2; + + if (cache->cc_keyno[i] > 0) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, + cache->cc_keyno[i] - 1); + + keytype = attr->atttypid; + /* cache key columns should always be NOT NULL */ + Assert(attr->attnotnull); + } + else + { + if (cache->cc_keyno[i] < 0) + elog(FATAL, "sys attributes are not supported in caches"); + keytype = OIDOID; + } + + GetCCHashEqFuncs(keytype, + &cache->cc_hashfunc[i], + &eqfunc, + &cache->cc_fastequal[i]); + + /* + * Do equality-function lookup (we assume this won't need a catalog + * lookup for any supported type) + */ + fmgr_info_cxt(eqfunc, + &cache->cc_skey[i].sk_func, + CacheMemoryContext); + + /* Initialize sk_attno suitably for HeapKeyTest() and heap scans */ + cache->cc_skey[i].sk_attno = cache->cc_keyno[i]; + + /* Fill in sk_strategy as well --- always standard equality */ + cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber; + cache->cc_skey[i].sk_subtype = InvalidOid; + /* If a catcache key requires a collation, it must be C collation */ + cache->cc_skey[i].sk_collation = C_COLLATION_OID; + + CACHE_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p", + cache->cc_relname, i, cache); + } + + /* + * mark this cache fully initialized + */ + cache->cc_tupdesc = tupdesc; +} + +/* + * InitCatCachePhase2 -- external interface for CatalogCacheInitializeCache + * + * One reason to call this routine is to ensure that the relcache has + * created entries for all the catalogs and indexes referenced by catcaches. + * Therefore, provide an option to open the index as well as fixing the + * cache itself. An exception is the indexes on pg_am, which we don't use + * (cf. IndexScanOK). + */ +void +InitCatCachePhase2(CatCache *cache, bool touch_index) +{ + if (cache->cc_tupdesc == NULL) + CatalogCacheInitializeCache(cache); + + if (touch_index && + cache->id != AMOID && + cache->id != AMNAME) + { + Relation idesc; + + /* + * We must lock the underlying catalog before opening the index to + * avoid deadlock, since index_open could possibly result in reading + * this same catalog, and if anyone else is exclusive-locking this + * catalog and index they'll be doing it in that order. + */ + LockRelationOid(cache->cc_reloid, AccessShareLock); + idesc = index_open(cache->cc_indexoid, AccessShareLock); + + /* + * While we've got the index open, let's check that it's unique (and + * not just deferrable-unique, thank you very much). This is just to + * catch thinkos in definitions of new catcaches, so we don't worry + * about the pg_am indexes not getting tested. + */ + Assert(idesc->rd_index->indisunique && + idesc->rd_index->indimmediate); + + index_close(idesc, AccessShareLock); + UnlockRelationOid(cache->cc_reloid, AccessShareLock); + } +} + + +/* + * IndexScanOK + * + * This function checks for tuples that will be fetched by + * IndexSupportInitialize() during relcache initialization for + * certain system indexes that support critical syscaches. + * We can't use an indexscan to fetch these, else we'll get into + * infinite recursion. A plain heap scan will work, however. + * Once we have completed relcache initialization (signaled by + * criticalRelcachesBuilt), we don't have to worry anymore. + * + * Similarly, during backend startup we have to be able to use the + * pg_authid, pg_auth_members and pg_database syscaches for + * authentication even if we don't yet have relcache entries for those + * catalogs' indexes. + */ +static bool +IndexScanOK(CatCache *cache, ScanKey cur_skey) +{ + switch (cache->id) + { + case INDEXRELID: + + /* + * Rather than tracking exactly which indexes have to be loaded + * before we can use indexscans (which changes from time to time), + * just force all pg_index searches to be heap scans until we've + * built the critical relcaches. + */ + if (!criticalRelcachesBuilt) + return false; + break; + + case AMOID: + case AMNAME: + + /* + * Always do heap scans in pg_am, because it's so small there's + * not much point in an indexscan anyway. We *must* do this when + * initially building critical relcache entries, but we might as + * well just always do it. + */ + return false; + + case AUTHNAME: + case AUTHOID: + case AUTHMEMMEMROLE: + case DATABASEOID: + + /* + * Protect authentication lookups occurring before relcache has + * collected entries for shared indexes. + */ + if (!criticalSharedRelcachesBuilt) + return false; + break; + + default: + break; + } + + /* Normal case, allow index scan */ + return true; +} + +/* + * SearchCatCache + * + * This call searches a system cache for a tuple, opening the relation + * if necessary (on the first access to a particular cache). + * + * The result is NULL if not found, or a pointer to a HeapTuple in + * the cache. The caller must not modify the tuple, and must call + * ReleaseCatCache() when done with it. + * + * The search key values should be expressed as Datums of the key columns' + * datatype(s). (Pass zeroes for any unused parameters.) As a special + * exception, the passed-in key for a NAME column can be just a C string; + * the caller need not go to the trouble of converting it to a fully + * null-padded NAME. + */ +HeapTuple +SearchCatCache(CatCache *cache, + Datum v1, + Datum v2, + Datum v3, + Datum v4) +{ + return SearchCatCacheInternal(cache, cache->cc_nkeys, v1, v2, v3, v4); +} + + +/* + * SearchCatCacheN() are SearchCatCache() versions for a specific number of + * arguments. The compiler can inline the body and unroll loops, making them a + * bit faster than SearchCatCache(). + */ + +HeapTuple +SearchCatCache1(CatCache *cache, + Datum v1) +{ + return SearchCatCacheInternal(cache, 1, v1, 0, 0, 0); +} + + +HeapTuple +SearchCatCache2(CatCache *cache, + Datum v1, Datum v2) +{ + return SearchCatCacheInternal(cache, 2, v1, v2, 0, 0); +} + + +HeapTuple +SearchCatCache3(CatCache *cache, + Datum v1, Datum v2, Datum v3) +{ + return SearchCatCacheInternal(cache, 3, v1, v2, v3, 0); +} + + +HeapTuple +SearchCatCache4(CatCache *cache, + Datum v1, Datum v2, Datum v3, Datum v4) +{ + return SearchCatCacheInternal(cache, 4, v1, v2, v3, v4); +} + +/* + * Work-horse for SearchCatCache/SearchCatCacheN. + */ +static inline HeapTuple +SearchCatCacheInternal(CatCache *cache, + int nkeys, + Datum v1, + Datum v2, + Datum v3, + Datum v4) +{ + Datum arguments[CATCACHE_MAXKEYS]; + uint32 hashValue; + Index hashIndex; + dlist_iter iter; + dlist_head *bucket; + CatCTup *ct; + + /* Make sure we're in an xact, even if this ends up being a cache hit */ + Assert(IsTransactionState()); + + Assert(cache->cc_nkeys == nkeys); + + /* + * one-time startup overhead for each cache + */ + if (unlikely(cache->cc_tupdesc == NULL)) + CatalogCacheInitializeCache(cache); + +#ifdef CATCACHE_STATS + cache->cc_searches++; +#endif + + /* Initialize local parameter array */ + arguments[0] = v1; + arguments[1] = v2; + arguments[2] = v3; + arguments[3] = v4; + + /* + * find the hash bucket in which to look for the tuple + */ + hashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4); + hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); + + /* + * scan the hash bucket until we find a match or exhaust our tuples + * + * Note: it's okay to use dlist_foreach here, even though we modify the + * dlist within the loop, because we don't continue the loop afterwards. + */ + bucket = &cache->cc_bucket[hashIndex]; + dlist_foreach(iter, bucket) + { + ct = dlist_container(CatCTup, cache_elem, iter.cur); + + if (ct->dead) + continue; /* ignore dead entries */ + + if (ct->hash_value != hashValue) + continue; /* quickly skip entry if wrong hash val */ + + if (!CatalogCacheCompareTuple(cache, nkeys, ct->keys, arguments)) + continue; + + /* + * We found a match in the cache. Move it to the front of the list + * for its hashbucket, in order to speed subsequent searches. (The + * most frequently accessed elements in any hashbucket will tend to be + * near the front of the hashbucket's list.) + */ + dlist_move_head(bucket, &ct->cache_elem); + + /* + * If it's a positive entry, bump its refcount and return it. If it's + * negative, we can report failure to the caller. + */ + if (!ct->negative) + { + ResourceOwnerEnlargeCatCacheRefs(CurrentResourceOwner); + ct->refcount++; + ResourceOwnerRememberCatCacheRef(CurrentResourceOwner, &ct->tuple); + + CACHE_elog(DEBUG2, "SearchCatCache(%s): found in bucket %d", + cache->cc_relname, hashIndex); + +#ifdef CATCACHE_STATS + cache->cc_hits++; +#endif + + return &ct->tuple; + } + else + { + CACHE_elog(DEBUG2, "SearchCatCache(%s): found neg entry in bucket %d", + cache->cc_relname, hashIndex); + +#ifdef CATCACHE_STATS + cache->cc_neg_hits++; +#endif + + return NULL; + } + } + + return SearchCatCacheMiss(cache, nkeys, hashValue, hashIndex, v1, v2, v3, v4); +} + +/* + * Search the actual catalogs, rather than the cache. + * + * This is kept separate from SearchCatCacheInternal() to keep the fast-path + * as small as possible. To avoid that effort being undone by a helpful + * compiler, try to explicitly forbid inlining. + */ +static pg_noinline HeapTuple +SearchCatCacheMiss(CatCache *cache, + int nkeys, + uint32 hashValue, + Index hashIndex, + Datum v1, + Datum v2, + Datum v3, + Datum v4) +{ + ScanKeyData cur_skey[CATCACHE_MAXKEYS]; + Relation relation; + SysScanDesc scandesc; + HeapTuple ntp; + CatCTup *ct; + Datum arguments[CATCACHE_MAXKEYS]; + + /* Initialize local parameter array */ + arguments[0] = v1; + arguments[1] = v2; + arguments[2] = v3; + arguments[3] = v4; + + /* + * Ok, need to make a lookup in the relation, copy the scankey and fill + * out any per-call fields. + */ + memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * nkeys); + cur_skey[0].sk_argument = v1; + cur_skey[1].sk_argument = v2; + cur_skey[2].sk_argument = v3; + cur_skey[3].sk_argument = v4; + + /* + * Tuple was not found in cache, so we have to try to retrieve it directly + * from the relation. If found, we will add it to the cache; if not + * found, we will add a negative cache entry instead. + * + * NOTE: it is possible for recursive cache lookups to occur while reading + * the relation --- for example, due to shared-cache-inval messages being + * processed during table_open(). This is OK. It's even possible for one + * of those lookups to find and enter the very same tuple we are trying to + * fetch here. If that happens, we will enter a second copy of the tuple + * into the cache. The first copy will never be referenced again, and + * will eventually age out of the cache, so there's no functional problem. + * This case is rare enough that it's not worth expending extra cycles to + * detect. + */ + relation = table_open(cache->cc_reloid, AccessShareLock); + + scandesc = systable_beginscan(relation, + cache->cc_indexoid, + IndexScanOK(cache, cur_skey), + NULL, + nkeys, + cur_skey); + + ct = NULL; + + while (HeapTupleIsValid(ntp = systable_getnext(scandesc))) + { + ct = CatalogCacheCreateEntry(cache, ntp, arguments, + hashValue, hashIndex, + false); + /* immediately set the refcount to 1 */ + ResourceOwnerEnlargeCatCacheRefs(CurrentResourceOwner); + ct->refcount++; + ResourceOwnerRememberCatCacheRef(CurrentResourceOwner, &ct->tuple); + break; /* assume only one match */ + } + + systable_endscan(scandesc); + + table_close(relation, AccessShareLock); + + /* + * If tuple was not found, we need to build a negative cache entry + * containing a fake tuple. The fake tuple has the correct key columns, + * but nulls everywhere else. + * + * In bootstrap mode, we don't build negative entries, because the cache + * invalidation mechanism isn't alive and can't clear them if the tuple + * gets created later. (Bootstrap doesn't do UPDATEs, so it doesn't need + * cache inval for that.) + */ + if (ct == NULL) + { + if (IsBootstrapProcessingMode()) + return NULL; + + ct = CatalogCacheCreateEntry(cache, NULL, arguments, + hashValue, hashIndex, + true); + + CACHE_elog(DEBUG2, "SearchCatCache(%s): Contains %d/%d tuples", + cache->cc_relname, cache->cc_ntup, CacheHdr->ch_ntup); + CACHE_elog(DEBUG2, "SearchCatCache(%s): put neg entry in bucket %d", + cache->cc_relname, hashIndex); + + /* + * We are not returning the negative entry to the caller, so leave its + * refcount zero. + */ + + return NULL; + } + + CACHE_elog(DEBUG2, "SearchCatCache(%s): Contains %d/%d tuples", + cache->cc_relname, cache->cc_ntup, CacheHdr->ch_ntup); + CACHE_elog(DEBUG2, "SearchCatCache(%s): put in bucket %d", + cache->cc_relname, hashIndex); + +#ifdef CATCACHE_STATS + cache->cc_newloads++; +#endif + + return &ct->tuple; +} + +/* + * ReleaseCatCache + * + * Decrement the reference count of a catcache entry (releasing the + * hold grabbed by a successful SearchCatCache). + * + * NOTE: if compiled with -DCATCACHE_FORCE_RELEASE then catcache entries + * will be freed as soon as their refcount goes to zero. In combination + * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test + * to catch references to already-released catcache entries. + */ +void +ReleaseCatCache(HeapTuple tuple) +{ + CatCTup *ct = (CatCTup *) (((char *) tuple) - + offsetof(CatCTup, tuple)); + + /* Safety checks to ensure we were handed a cache entry */ + Assert(ct->ct_magic == CT_MAGIC); + Assert(ct->refcount > 0); + + ct->refcount--; + ResourceOwnerForgetCatCacheRef(CurrentResourceOwner, &ct->tuple); + + if ( +#ifndef CATCACHE_FORCE_RELEASE + ct->dead && +#endif + ct->refcount == 0 && + (ct->c_list == NULL || ct->c_list->refcount == 0)) + CatCacheRemoveCTup(ct->my_cache, ct); +} + + +/* + * GetCatCacheHashValue + * + * Compute the hash value for a given set of search keys. + * + * The reason for exposing this as part of the API is that the hash value is + * exposed in cache invalidation operations, so there are places outside the + * catcache code that need to be able to compute the hash values. + */ +uint32 +GetCatCacheHashValue(CatCache *cache, + Datum v1, + Datum v2, + Datum v3, + Datum v4) +{ + /* + * one-time startup overhead for each cache + */ + if (cache->cc_tupdesc == NULL) + CatalogCacheInitializeCache(cache); + + /* + * calculate the hash value + */ + return CatalogCacheComputeHashValue(cache, cache->cc_nkeys, v1, v2, v3, v4); +} + + +/* + * SearchCatCacheList + * + * Generate a list of all tuples matching a partial key (that is, + * a key specifying just the first K of the cache's N key columns). + * + * It doesn't make any sense to specify all of the cache's key columns + * here: since the key is unique, there could be at most one match, so + * you ought to use SearchCatCache() instead. Hence this function takes + * one fewer Datum argument than SearchCatCache() does. + * + * The caller must not modify the list object or the pointed-to tuples, + * and must call ReleaseCatCacheList() when done with the list. + */ +CatCList * +SearchCatCacheList(CatCache *cache, + int nkeys, + Datum v1, + Datum v2, + Datum v3) +{ + Datum v4 = 0; /* dummy last-column value */ + Datum arguments[CATCACHE_MAXKEYS]; + uint32 lHashValue; + dlist_iter iter; + CatCList *cl; + CatCTup *ct; + List *volatile ctlist; + ListCell *ctlist_item; + int nmembers; + bool ordered; + HeapTuple ntp; + MemoryContext oldcxt; + int i; + + /* + * one-time startup overhead for each cache + */ + if (cache->cc_tupdesc == NULL) + CatalogCacheInitializeCache(cache); + + Assert(nkeys > 0 && nkeys < cache->cc_nkeys); + +#ifdef CATCACHE_STATS + cache->cc_lsearches++; +#endif + + /* Initialize local parameter array */ + arguments[0] = v1; + arguments[1] = v2; + arguments[2] = v3; + arguments[3] = v4; + + /* + * compute a hash value of the given keys for faster search. We don't + * presently divide the CatCList items into buckets, but this still lets + * us skip non-matching items quickly most of the time. + */ + lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4); + + /* + * scan the items until we find a match or exhaust our list + * + * Note: it's okay to use dlist_foreach here, even though we modify the + * dlist within the loop, because we don't continue the loop afterwards. + */ + dlist_foreach(iter, &cache->cc_lists) + { + cl = dlist_container(CatCList, cache_elem, iter.cur); + + if (cl->dead) + continue; /* ignore dead entries */ + + if (cl->hash_value != lHashValue) + continue; /* quickly skip entry if wrong hash val */ + + /* + * see if the cached list matches our key. + */ + if (cl->nkeys != nkeys) + continue; + + if (!CatalogCacheCompareTuple(cache, nkeys, cl->keys, arguments)) + continue; + + /* + * We found a matching list. Move the list to the front of the + * cache's list-of-lists, to speed subsequent searches. (We do not + * move the members to the fronts of their hashbucket lists, however, + * since there's no point in that unless they are searched for + * individually.) + */ + dlist_move_head(&cache->cc_lists, &cl->cache_elem); + + /* Bump the list's refcount and return it */ + ResourceOwnerEnlargeCatCacheListRefs(CurrentResourceOwner); + cl->refcount++; + ResourceOwnerRememberCatCacheListRef(CurrentResourceOwner, cl); + + CACHE_elog(DEBUG2, "SearchCatCacheList(%s): found list", + cache->cc_relname); + +#ifdef CATCACHE_STATS + cache->cc_lhits++; +#endif + + return cl; + } + + /* + * List was not found in cache, so we have to build it by reading the + * relation. For each matching tuple found in the relation, use an + * existing cache entry if possible, else build a new one. + * + * We have to bump the member refcounts temporarily to ensure they won't + * get dropped from the cache while loading other members. We use a PG_TRY + * block to ensure we can undo those refcounts if we get an error before + * we finish constructing the CatCList. + */ + ResourceOwnerEnlargeCatCacheListRefs(CurrentResourceOwner); + + ctlist = NIL; + + PG_TRY(); + { + ScanKeyData cur_skey[CATCACHE_MAXKEYS]; + Relation relation; + SysScanDesc scandesc; + + /* + * Ok, need to make a lookup in the relation, copy the scankey and + * fill out any per-call fields. + */ + memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * cache->cc_nkeys); + cur_skey[0].sk_argument = v1; + cur_skey[1].sk_argument = v2; + cur_skey[2].sk_argument = v3; + cur_skey[3].sk_argument = v4; + + relation = table_open(cache->cc_reloid, AccessShareLock); + + scandesc = systable_beginscan(relation, + cache->cc_indexoid, + IndexScanOK(cache, cur_skey), + NULL, + nkeys, + cur_skey); + + /* The list will be ordered iff we are doing an index scan */ + ordered = (scandesc->irel != NULL); + + while (HeapTupleIsValid(ntp = systable_getnext(scandesc))) + { + uint32 hashValue; + Index hashIndex; + bool found = false; + dlist_head *bucket; + + /* + * See if there's an entry for this tuple already. + */ + ct = NULL; + hashValue = CatalogCacheComputeTupleHashValue(cache, cache->cc_nkeys, ntp); + hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); + + bucket = &cache->cc_bucket[hashIndex]; + dlist_foreach(iter, bucket) + { + ct = dlist_container(CatCTup, cache_elem, iter.cur); + + if (ct->dead || ct->negative) + continue; /* ignore dead and negative entries */ + + if (ct->hash_value != hashValue) + continue; /* quickly skip entry if wrong hash val */ + + if (!ItemPointerEquals(&(ct->tuple.t_self), &(ntp->t_self))) + continue; /* not same tuple */ + + /* + * Found a match, but can't use it if it belongs to another + * list already + */ + if (ct->c_list) + continue; + + found = true; + break; /* A-OK */ + } + + if (!found) + { + /* We didn't find a usable entry, so make a new one */ + ct = CatalogCacheCreateEntry(cache, ntp, arguments, + hashValue, hashIndex, + false); + } + + /* Careful here: add entry to ctlist, then bump its refcount */ + /* This way leaves state correct if lappend runs out of memory */ + ctlist = lappend(ctlist, ct); + ct->refcount++; + } + + systable_endscan(scandesc); + + table_close(relation, AccessShareLock); + + /* Now we can build the CatCList entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + nmembers = list_length(ctlist); + cl = (CatCList *) + palloc(offsetof(CatCList, members) + nmembers * sizeof(CatCTup *)); + + /* Extract key values */ + CatCacheCopyKeys(cache->cc_tupdesc, nkeys, cache->cc_keyno, + arguments, cl->keys); + MemoryContextSwitchTo(oldcxt); + + /* + * We are now past the last thing that could trigger an elog before we + * have finished building the CatCList and remembering it in the + * resource owner. So it's OK to fall out of the PG_TRY, and indeed + * we'd better do so before we start marking the members as belonging + * to the list. + */ + + } + PG_CATCH(); + { + foreach(ctlist_item, ctlist) + { + ct = (CatCTup *) lfirst(ctlist_item); + Assert(ct->c_list == NULL); + Assert(ct->refcount > 0); + ct->refcount--; + if ( +#ifndef CATCACHE_FORCE_RELEASE + ct->dead && +#endif + ct->refcount == 0 && + (ct->c_list == NULL || ct->c_list->refcount == 0)) + CatCacheRemoveCTup(cache, ct); + } + + PG_RE_THROW(); + } + PG_END_TRY(); + + cl->cl_magic = CL_MAGIC; + cl->my_cache = cache; + cl->refcount = 0; /* for the moment */ + cl->dead = false; + cl->ordered = ordered; + cl->nkeys = nkeys; + cl->hash_value = lHashValue; + cl->n_members = nmembers; + + i = 0; + foreach(ctlist_item, ctlist) + { + cl->members[i++] = ct = (CatCTup *) lfirst(ctlist_item); + Assert(ct->c_list == NULL); + ct->c_list = cl; + /* release the temporary refcount on the member */ + Assert(ct->refcount > 0); + ct->refcount--; + /* mark list dead if any members already dead */ + if (ct->dead) + cl->dead = true; + } + Assert(i == nmembers); + + dlist_push_head(&cache->cc_lists, &cl->cache_elem); + + /* Finally, bump the list's refcount and return it */ + cl->refcount++; + ResourceOwnerRememberCatCacheListRef(CurrentResourceOwner, cl); + + CACHE_elog(DEBUG2, "SearchCatCacheList(%s): made list of %d members", + cache->cc_relname, nmembers); + + return cl; +} + +/* + * ReleaseCatCacheList + * + * Decrement the reference count of a catcache list. + */ +void +ReleaseCatCacheList(CatCList *list) +{ + /* Safety checks to ensure we were handed a cache entry */ + Assert(list->cl_magic == CL_MAGIC); + Assert(list->refcount > 0); + list->refcount--; + ResourceOwnerForgetCatCacheListRef(CurrentResourceOwner, list); + + if ( +#ifndef CATCACHE_FORCE_RELEASE + list->dead && +#endif + list->refcount == 0) + CatCacheRemoveCList(list->my_cache, list); +} + + +/* + * CatalogCacheCreateEntry + * Create a new CatCTup entry, copying the given HeapTuple and other + * supplied data into it. The new entry initially has refcount 0. + */ +static CatCTup * +CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, + uint32 hashValue, Index hashIndex, + bool negative) +{ + CatCTup *ct; + HeapTuple dtp; + MemoryContext oldcxt; + + /* negative entries have no tuple associated */ + if (ntp) + { + int i; + + Assert(!negative); + + /* + * If there are any out-of-line toasted fields in the tuple, expand + * them in-line. This saves cycles during later use of the catcache + * entry, and also protects us against the possibility of the toast + * tuples being freed before we attempt to fetch them, in case of + * something using a slightly stale catcache entry. + */ + if (HeapTupleHasExternal(ntp)) + dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc); + else + dtp = ntp; + + /* Allocate memory for CatCTup and the cached tuple in one go */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + ct = (CatCTup *) palloc(sizeof(CatCTup) + + MAXIMUM_ALIGNOF + dtp->t_len); + ct->tuple.t_len = dtp->t_len; + ct->tuple.t_self = dtp->t_self; + ct->tuple.t_tableOid = dtp->t_tableOid; + ct->tuple.t_data = (HeapTupleHeader) + MAXALIGN(((char *) ct) + sizeof(CatCTup)); + /* copy tuple contents */ + memcpy((char *) ct->tuple.t_data, + (const char *) dtp->t_data, + dtp->t_len); + MemoryContextSwitchTo(oldcxt); + + if (dtp != ntp) + heap_freetuple(dtp); + + /* extract keys - they'll point into the tuple if not by-value */ + for (i = 0; i < cache->cc_nkeys; i++) + { + Datum atp; + bool isnull; + + atp = heap_getattr(&ct->tuple, + cache->cc_keyno[i], + cache->cc_tupdesc, + &isnull); + Assert(!isnull); + ct->keys[i] = atp; + } + } + else + { + Assert(negative); + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + ct = (CatCTup *) palloc(sizeof(CatCTup)); + + /* + * Store keys - they'll point into separately allocated memory if not + * by-value. + */ + CatCacheCopyKeys(cache->cc_tupdesc, cache->cc_nkeys, cache->cc_keyno, + arguments, ct->keys); + MemoryContextSwitchTo(oldcxt); + } + + /* + * Finish initializing the CatCTup header, and add it to the cache's + * linked list and counts. + */ + ct->ct_magic = CT_MAGIC; + ct->my_cache = cache; + ct->c_list = NULL; + ct->refcount = 0; /* for the moment */ + ct->dead = false; + ct->negative = negative; + ct->hash_value = hashValue; + + dlist_push_head(&cache->cc_bucket[hashIndex], &ct->cache_elem); + + cache->cc_ntup++; + CacheHdr->ch_ntup++; + + /* + * If the hash table has become too full, enlarge the buckets array. Quite + * arbitrarily, we enlarge when fill factor > 2. + */ + if (cache->cc_ntup > cache->cc_nbuckets * 2) + RehashCatCache(cache); + + return ct; +} + +/* + * Helper routine that frees keys stored in the keys array. + */ +static void +CatCacheFreeKeys(TupleDesc tupdesc, int nkeys, int *attnos, Datum *keys) +{ + int i; + + for (i = 0; i < nkeys; i++) + { + int attnum = attnos[i]; + Form_pg_attribute att; + + /* system attribute are not supported in caches */ + Assert(attnum > 0); + + att = TupleDescAttr(tupdesc, attnum - 1); + + if (!att->attbyval) + pfree(DatumGetPointer(keys[i])); + } +} + +/* + * Helper routine that copies the keys in the srckeys array into the dstkeys + * one, guaranteeing that the datums are fully allocated in the current memory + * context. + */ +static void +CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos, + Datum *srckeys, Datum *dstkeys) +{ + int i; + + /* + * XXX: memory and lookup performance could possibly be improved by + * storing all keys in one allocation. + */ + + for (i = 0; i < nkeys; i++) + { + int attnum = attnos[i]; + Form_pg_attribute att = TupleDescAttr(tupdesc, attnum - 1); + Datum src = srckeys[i]; + NameData srcname; + + /* + * Must be careful in case the caller passed a C string where a NAME + * is wanted: convert the given argument to a correctly padded NAME. + * Otherwise the memcpy() done by datumCopy() could fall off the end + * of memory. + */ + if (att->atttypid == NAMEOID) + { + namestrcpy(&srcname, DatumGetCString(src)); + src = NameGetDatum(&srcname); + } + + dstkeys[i] = datumCopy(src, + att->attbyval, + att->attlen); + } + +} + +/* + * PrepareToInvalidateCacheTuple() + * + * This is part of a rather subtle chain of events, so pay attention: + * + * When a tuple is inserted or deleted, it cannot be flushed from the + * catcaches immediately, for reasons explained at the top of cache/inval.c. + * Instead we have to add entry(s) for the tuple to a list of pending tuple + * invalidations that will be done at the end of the command or transaction. + * + * The lists of tuples that need to be flushed are kept by inval.c. This + * routine is a helper routine for inval.c. Given a tuple belonging to + * the specified relation, find all catcaches it could be in, compute the + * correct hash value for each such catcache, and call the specified + * function to record the cache id and hash value in inval.c's lists. + * SysCacheInvalidate will be called later, if appropriate, + * using the recorded information. + * + * For an insert or delete, tuple is the target tuple and newtuple is NULL. + * For an update, we are called just once, with tuple being the old tuple + * version and newtuple the new version. We should make two list entries + * if the tuple's hash value changed, but only one if it didn't. + * + * Note that it is irrelevant whether the given tuple is actually loaded + * into the catcache at the moment. Even if it's not there now, it might + * be by the end of the command, or there might be a matching negative entry + * to flush --- or other backends' caches might have such entries --- so + * we have to make list entries to flush it later. + * + * Also note that it's not an error if there are no catcaches for the + * specified relation. inval.c doesn't know exactly which rels have + * catcaches --- it will call this routine for any tuple that's in a + * system relation. + */ +void +PrepareToInvalidateCacheTuple(Relation relation, + HeapTuple tuple, + HeapTuple newtuple, + void (*function) (int, uint32, Oid)) +{ + slist_iter iter; + Oid reloid; + + CACHE_elog(DEBUG2, "PrepareToInvalidateCacheTuple: called"); + + /* + * sanity checks + */ + Assert(RelationIsValid(relation)); + Assert(HeapTupleIsValid(tuple)); + Assert(PointerIsValid(function)); + Assert(CacheHdr != NULL); + + reloid = RelationGetRelid(relation); + + /* ---------------- + * for each cache + * if the cache contains tuples from the specified relation + * compute the tuple's hash value(s) in this cache, + * and call the passed function to register the information. + * ---------------- + */ + + slist_foreach(iter, &CacheHdr->ch_caches) + { + CatCache *ccp = slist_container(CatCache, cc_next, iter.cur); + uint32 hashvalue; + Oid dbid; + + if (ccp->cc_reloid != reloid) + continue; + + /* Just in case cache hasn't finished initialization yet... */ + if (ccp->cc_tupdesc == NULL) + CatalogCacheInitializeCache(ccp); + + hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple); + dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId; + + (*function) (ccp->id, hashvalue, dbid); + + if (newtuple) + { + uint32 newhashvalue; + + newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple); + + if (newhashvalue != hashvalue) + (*function) (ccp->id, newhashvalue, dbid); + } + } +} + + +/* + * Subroutines for warning about reference leaks. These are exported so + * that resowner.c can call them. + */ +void +PrintCatCacheLeakWarning(HeapTuple tuple) +{ + CatCTup *ct = (CatCTup *) (((char *) tuple) - + offsetof(CatCTup, tuple)); + + /* Safety check to ensure we were handed a cache entry */ + Assert(ct->ct_magic == CT_MAGIC); + + elog(WARNING, "cache reference leak: cache %s (%d), tuple %u/%u has count %d", + ct->my_cache->cc_relname, ct->my_cache->id, + ItemPointerGetBlockNumber(&(tuple->t_self)), + ItemPointerGetOffsetNumber(&(tuple->t_self)), + ct->refcount); +} + +void +PrintCatCacheListLeakWarning(CatCList *list) +{ + elog(WARNING, "cache reference leak: cache %s (%d), list %p has count %d", + list->my_cache->cc_relname, list->my_cache->id, + list, list->refcount); +} diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c new file mode 100644 index 0000000..460b720 --- /dev/null +++ b/src/backend/utils/cache/evtcache.c @@ -0,0 +1,270 @@ +/*------------------------------------------------------------------------- + * + * evtcache.c + * Special-purpose cache for event trigger data. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/evtcache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/relation.h" +#include "catalog/pg_event_trigger.h" +#include "catalog/pg_type.h" +#include "commands/trigger.h" +#include "tcop/cmdtag.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/evtcache.h" +#include "utils/hsearch.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + +typedef enum +{ + ETCS_NEEDS_REBUILD, + ETCS_REBUILD_STARTED, + ETCS_VALID +} EventTriggerCacheStateType; + +typedef struct +{ + EventTriggerEvent event; + List *triggerlist; +} EventTriggerCacheEntry; + +static HTAB *EventTriggerCache; +static MemoryContext EventTriggerCacheContext; +static EventTriggerCacheStateType EventTriggerCacheState = ETCS_NEEDS_REBUILD; + +static void BuildEventTriggerCache(void); +static void InvalidateEventCacheCallback(Datum arg, + int cacheid, uint32 hashvalue); +static Bitmapset *DecodeTextArrayToBitmapset(Datum array); + +/* + * Search the event cache by trigger event. + * + * Note that the caller had better copy any data it wants to keep around + * across any operation that might touch a system catalog into some other + * memory context, since a cache reset could blow the return value away. + */ +List * +EventCacheLookup(EventTriggerEvent event) +{ + EventTriggerCacheEntry *entry; + + if (EventTriggerCacheState != ETCS_VALID) + BuildEventTriggerCache(); + entry = hash_search(EventTriggerCache, &event, HASH_FIND, NULL); + return entry != NULL ? entry->triggerlist : NIL; +} + +/* + * Rebuild the event trigger cache. + */ +static void +BuildEventTriggerCache(void) +{ + HASHCTL ctl; + HTAB *cache; + MemoryContext oldcontext; + Relation rel; + Relation irel; + SysScanDesc scan; + + if (EventTriggerCacheContext != NULL) + { + /* + * Free up any memory already allocated in EventTriggerCacheContext. + * This can happen either because a previous rebuild failed, or + * because an invalidation happened before the rebuild was complete. + */ + MemoryContextResetAndDeleteChildren(EventTriggerCacheContext); + } + else + { + /* + * This is our first time attempting to build the cache, so we need to + * set up the memory context and register a syscache callback to + * capture future invalidation events. + */ + if (CacheMemoryContext == NULL) + CreateCacheMemoryContext(); + EventTriggerCacheContext = + AllocSetContextCreate(CacheMemoryContext, + "EventTriggerCache", + ALLOCSET_DEFAULT_SIZES); + CacheRegisterSyscacheCallback(EVENTTRIGGEROID, + InvalidateEventCacheCallback, + (Datum) 0); + } + + /* Switch to correct memory context. */ + oldcontext = MemoryContextSwitchTo(EventTriggerCacheContext); + + /* Prevent the memory context from being nuked while we're rebuilding. */ + EventTriggerCacheState = ETCS_REBUILD_STARTED; + + /* Create new hash table. */ + ctl.keysize = sizeof(EventTriggerEvent); + ctl.entrysize = sizeof(EventTriggerCacheEntry); + ctl.hcxt = EventTriggerCacheContext; + cache = hash_create("Event Trigger Cache", 32, &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* + * Prepare to scan pg_event_trigger in name order. + */ + rel = relation_open(EventTriggerRelationId, AccessShareLock); + irel = index_open(EventTriggerNameIndexId, AccessShareLock); + scan = systable_beginscan_ordered(rel, irel, NULL, 0, NULL); + + /* + * Build a cache item for each pg_event_trigger tuple, and append each one + * to the appropriate cache entry. + */ + for (;;) + { + HeapTuple tup; + Form_pg_event_trigger form; + char *evtevent; + EventTriggerEvent event; + EventTriggerCacheItem *item; + Datum evttags; + bool evttags_isnull; + EventTriggerCacheEntry *entry; + bool found; + + /* Get next tuple. */ + tup = systable_getnext_ordered(scan, ForwardScanDirection); + if (!HeapTupleIsValid(tup)) + break; + + /* Skip trigger if disabled. */ + form = (Form_pg_event_trigger) GETSTRUCT(tup); + if (form->evtenabled == TRIGGER_DISABLED) + continue; + + /* Decode event name. */ + evtevent = NameStr(form->evtevent); + if (strcmp(evtevent, "ddl_command_start") == 0) + event = EVT_DDLCommandStart; + else if (strcmp(evtevent, "ddl_command_end") == 0) + event = EVT_DDLCommandEnd; + else if (strcmp(evtevent, "sql_drop") == 0) + event = EVT_SQLDrop; + else if (strcmp(evtevent, "table_rewrite") == 0) + event = EVT_TableRewrite; + else + continue; + + /* Allocate new cache item. */ + item = palloc0(sizeof(EventTriggerCacheItem)); + item->fnoid = form->evtfoid; + item->enabled = form->evtenabled; + + /* Decode and sort tags array. */ + evttags = heap_getattr(tup, Anum_pg_event_trigger_evttags, + RelationGetDescr(rel), &evttags_isnull); + if (!evttags_isnull) + item->tagset = DecodeTextArrayToBitmapset(evttags); + + /* Add to cache entry. */ + entry = hash_search(cache, &event, HASH_ENTER, &found); + if (found) + entry->triggerlist = lappend(entry->triggerlist, item); + else + entry->triggerlist = list_make1(item); + } + + /* Done with pg_event_trigger scan. */ + systable_endscan_ordered(scan); + index_close(irel, AccessShareLock); + relation_close(rel, AccessShareLock); + + /* Restore previous memory context. */ + MemoryContextSwitchTo(oldcontext); + + /* Install new cache. */ + EventTriggerCache = cache; + + /* + * If the cache has been invalidated since we entered this routine, we + * still use and return the cache we just finished constructing, to avoid + * infinite loops, but we leave the cache marked stale so that we'll + * rebuild it again on next access. Otherwise, we mark the cache valid. + */ + if (EventTriggerCacheState == ETCS_REBUILD_STARTED) + EventTriggerCacheState = ETCS_VALID; +} + +/* + * Decode text[] to a Bitmapset of CommandTags. + * + * We could avoid a bit of overhead here if we were willing to duplicate some + * of the logic from deconstruct_array, but it doesn't seem worth the code + * complexity. + */ +static Bitmapset * +DecodeTextArrayToBitmapset(Datum array) +{ + ArrayType *arr = DatumGetArrayTypeP(array); + Datum *elems; + Bitmapset *bms; + int i; + int nelems; + + if (ARR_NDIM(arr) != 1 || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != TEXTOID) + elog(ERROR, "expected 1-D text array"); + deconstruct_array(arr, TEXTOID, -1, false, TYPALIGN_INT, + &elems, NULL, &nelems); + + for (bms = NULL, i = 0; i < nelems; ++i) + { + char *str = TextDatumGetCString(elems[i]); + + bms = bms_add_member(bms, GetCommandTagEnum(str)); + pfree(str); + } + + pfree(elems); + + return bms; +} + +/* + * Flush all cache entries when pg_event_trigger is updated. + * + * This should be rare enough that we don't need to be very granular about + * it, so we just blow away everything, which also avoids the possibility of + * memory leaks. + */ +static void +InvalidateEventCacheCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + /* + * If the cache isn't valid, then there might be a rebuild in progress, so + * we can't immediately blow it away. But it's advantageous to do this + * when possible, so as to immediately free memory. + */ + if (EventTriggerCacheState == ETCS_VALID) + { + MemoryContextResetAndDeleteChildren(EventTriggerCacheContext); + EventTriggerCache = NULL; + } + + /* Mark cache for rebuild. */ + EventTriggerCacheState = ETCS_NEEDS_REBUILD; +} diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c new file mode 100644 index 0000000..e281a45 --- /dev/null +++ b/src/backend/utils/cache/inval.c @@ -0,0 +1,1578 @@ +/*------------------------------------------------------------------------- + * + * inval.c + * POSTGRES cache invalidation dispatcher code. + * + * This is subtle stuff, so pay attention: + * + * When a tuple is updated or deleted, our standard visibility rules + * consider that it is *still valid* so long as we are in the same command, + * ie, until the next CommandCounterIncrement() or transaction commit. + * (See access/heap/heapam_visibility.c, and note that system catalogs are + * generally scanned under the most current snapshot available, rather than + * the transaction snapshot.) At the command boundary, the old tuple stops + * being valid and the new version, if any, becomes valid. Therefore, + * we cannot simply flush a tuple from the system caches during heap_update() + * or heap_delete(). The tuple is still good at that point; what's more, + * even if we did flush it, it might be reloaded into the caches by a later + * request in the same command. So the correct behavior is to keep a list + * of outdated (updated/deleted) tuples and then do the required cache + * flushes at the next command boundary. We must also keep track of + * inserted tuples so that we can flush "negative" cache entries that match + * the new tuples; again, that mustn't happen until end of command. + * + * Once we have finished the command, we still need to remember inserted + * tuples (including new versions of updated tuples), so that we can flush + * them from the caches if we abort the transaction. Similarly, we'd better + * be able to flush "negative" cache entries that may have been loaded in + * place of deleted tuples, so we still need the deleted ones too. + * + * If we successfully complete the transaction, we have to broadcast all + * these invalidation events to other backends (via the SI message queue) + * so that they can flush obsolete entries from their caches. Note we have + * to record the transaction commit before sending SI messages, otherwise + * the other backends won't see our updated tuples as good. + * + * When a subtransaction aborts, we can process and discard any events + * it has queued. When a subtransaction commits, we just add its events + * to the pending lists of the parent transaction. + * + * In short, we need to remember until xact end every insert or delete + * of a tuple that might be in the system caches. Updates are treated as + * two events, delete + insert, for simplicity. (If the update doesn't + * change the tuple hash value, catcache.c optimizes this into one event.) + * + * We do not need to register EVERY tuple operation in this way, just those + * on tuples in relations that have associated catcaches. We do, however, + * have to register every operation on every tuple that *could* be in a + * catcache, whether or not it currently is in our cache. Also, if the + * tuple is in a relation that has multiple catcaches, we need to register + * an invalidation message for each such catcache. catcache.c's + * PrepareToInvalidateCacheTuple() routine provides the knowledge of which + * catcaches may need invalidation for a given tuple. + * + * Also, whenever we see an operation on a pg_class, pg_attribute, or + * pg_index tuple, we register a relcache flush operation for the relation + * described by that tuple (as specified in CacheInvalidateHeapTuple()). + * Likewise for pg_constraint tuples for foreign keys on relations. + * + * We keep the relcache flush requests in lists separate from the catcache + * tuple flush requests. This allows us to issue all the pending catcache + * flushes before we issue relcache flushes, which saves us from loading + * a catcache tuple during relcache load only to flush it again right away. + * Also, we avoid queuing multiple relcache flush requests for the same + * relation, since a relcache flush is relatively expensive to do. + * (XXX is it worth testing likewise for duplicate catcache flush entries? + * Probably not.) + * + * Many subsystems own higher-level caches that depend on relcache and/or + * catcache, and they register callbacks here to invalidate their caches. + * While building a higher-level cache entry, a backend may receive a + * callback for the being-built entry or one of its dependencies. This + * implies the new higher-level entry would be born stale, and it might + * remain stale for the life of the backend. Many caches do not prevent + * that. They rely on DDL for can't-miss catalog changes taking + * AccessExclusiveLock on suitable objects. (For a change made with less + * locking, backends might never read the change.) The relation cache, + * however, needs to reflect changes from CREATE INDEX CONCURRENTLY no later + * than the beginning of the next transaction. Hence, when a relevant + * invalidation callback arrives during a build, relcache.c reattempts that + * build. Caches with similar needs could do likewise. + * + * If a relcache flush is issued for a system relation that we preload + * from the relcache init file, we must also delete the init file so that + * it will be rebuilt during the next backend restart. The actual work of + * manipulating the init file is in relcache.c, but we keep track of the + * need for it here. + * + * The request lists proper are kept in CurTransactionContext of their + * creating (sub)transaction, since they can be forgotten on abort of that + * transaction but must be kept till top-level commit otherwise. For + * simplicity we keep the controlling list-of-lists in TopTransactionContext. + * + * Currently, inval messages are sent without regard for the possibility + * that the object described by the catalog tuple might be a session-local + * object such as a temporary table. This is because (1) this code has + * no practical way to tell the difference, and (2) it is not certain that + * other backends don't have catalog cache or even relcache entries for + * such tables, anyway; there is nothing that prevents that. It might be + * worth trying to avoid sending such inval traffic in the future, if those + * problems can be overcome cheaply. + * + * When wal_level=logical, write invalidations into WAL at each command end to + * support the decoding of the in-progress transactions. See + * CommandEndInvalidationMessages. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/inval.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> + +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/pg_constraint.h" +#include "miscadmin.h" +#include "storage/sinval.h" +#include "storage/smgr.h" +#include "utils/catcache.h" +#include "utils/guc.h" +#include "utils/inval.h" +#include "utils/memdebug.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/relmapper.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +/* + * To minimize palloc traffic, we keep pending requests in successively- + * larger chunks (a slightly more sophisticated version of an expansible + * array). All request types can be stored as SharedInvalidationMessage + * records. The ordering of requests within a list is never significant. + */ +typedef struct InvalidationChunk +{ + struct InvalidationChunk *next; /* list link */ + int nitems; /* # items currently stored in chunk */ + int maxitems; /* size of allocated array in this chunk */ + SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]; +} InvalidationChunk; + +typedef struct InvalidationListHeader +{ + InvalidationChunk *cclist; /* list of chunks holding catcache msgs */ + InvalidationChunk *rclist; /* list of chunks holding relcache msgs */ +} InvalidationListHeader; + +/*---------------- + * Invalidation info is divided into two lists: + * 1) events so far in current command, not yet reflected to caches. + * 2) events in previous commands of current transaction; these have + * been reflected to local caches, and must be either broadcast to + * other backends or rolled back from local cache when we commit + * or abort the transaction. + * Actually, we need two such lists for each level of nested transaction, + * so that we can discard events from an aborted subtransaction. When + * a subtransaction commits, we append its lists to the parent's lists. + * + * The relcache-file-invalidated flag can just be a simple boolean, + * since we only act on it at transaction commit; we don't care which + * command of the transaction set it. + *---------------- + */ + +typedef struct TransInvalidationInfo +{ + /* Back link to parent transaction's info */ + struct TransInvalidationInfo *parent; + + /* Subtransaction nesting depth */ + int my_level; + + /* head of current-command event list */ + InvalidationListHeader CurrentCmdInvalidMsgs; + + /* head of previous-commands event list */ + InvalidationListHeader PriorCmdInvalidMsgs; + + /* init file must be invalidated? */ + bool RelcacheInitFileInval; +} TransInvalidationInfo; + +static TransInvalidationInfo *transInvalInfo = NULL; + +static SharedInvalidationMessage *SharedInvalidMessagesArray; +static int numSharedInvalidMessagesArray; +static int maxSharedInvalidMessagesArray; + +/* GUC storage */ +int debug_discard_caches = 0; + +/* + * Dynamically-registered callback functions. Current implementation + * assumes there won't be enough of these to justify a dynamically resizable + * array; it'd be easy to improve that if needed. + * + * To avoid searching in CallSyscacheCallbacks, all callbacks for a given + * syscache are linked into a list pointed to by syscache_callback_links[id]. + * The link values are syscache_callback_list[] index plus 1, or 0 for none. + */ + +#define MAX_SYSCACHE_CALLBACKS 64 +#define MAX_RELCACHE_CALLBACKS 10 + +static struct SYSCACHECALLBACK +{ + int16 id; /* cache number */ + int16 link; /* next callback index+1 for same cache */ + SyscacheCallbackFunction function; + Datum arg; +} syscache_callback_list[MAX_SYSCACHE_CALLBACKS]; + +static int16 syscache_callback_links[SysCacheSize]; + +static int syscache_callback_count = 0; + +static struct RELCACHECALLBACK +{ + RelcacheCallbackFunction function; + Datum arg; +} relcache_callback_list[MAX_RELCACHE_CALLBACKS]; + +static int relcache_callback_count = 0; + +/* ---------------------------------------------------------------- + * Invalidation list support functions + * + * These three routines encapsulate processing of the "chunked" + * representation of what is logically just a list of messages. + * ---------------------------------------------------------------- + */ + +/* + * AddInvalidationMessage + * Add an invalidation message to a list (of chunks). + * + * Note that we do not pay any great attention to maintaining the original + * ordering of the messages. + */ +static void +AddInvalidationMessage(InvalidationChunk **listHdr, + SharedInvalidationMessage *msg) +{ + InvalidationChunk *chunk = *listHdr; + + if (chunk == NULL) + { + /* First time through; create initial chunk */ +#define FIRSTCHUNKSIZE 32 + chunk = (InvalidationChunk *) + MemoryContextAlloc(CurTransactionContext, + offsetof(InvalidationChunk, msgs) + + FIRSTCHUNKSIZE * sizeof(SharedInvalidationMessage)); + chunk->nitems = 0; + chunk->maxitems = FIRSTCHUNKSIZE; + chunk->next = *listHdr; + *listHdr = chunk; + } + else if (chunk->nitems >= chunk->maxitems) + { + /* Need another chunk; double size of last chunk */ + int chunksize = 2 * chunk->maxitems; + + chunk = (InvalidationChunk *) + MemoryContextAlloc(CurTransactionContext, + offsetof(InvalidationChunk, msgs) + + chunksize * sizeof(SharedInvalidationMessage)); + chunk->nitems = 0; + chunk->maxitems = chunksize; + chunk->next = *listHdr; + *listHdr = chunk; + } + /* Okay, add message to current chunk */ + chunk->msgs[chunk->nitems] = *msg; + chunk->nitems++; +} + +/* + * Append one list of invalidation message chunks to another, resetting + * the source chunk-list pointer to NULL. + */ +static void +AppendInvalidationMessageList(InvalidationChunk **destHdr, + InvalidationChunk **srcHdr) +{ + InvalidationChunk *chunk = *srcHdr; + + if (chunk == NULL) + return; /* nothing to do */ + + while (chunk->next != NULL) + chunk = chunk->next; + + chunk->next = *destHdr; + + *destHdr = *srcHdr; + + *srcHdr = NULL; +} + +/* + * Process a list of invalidation messages. + * + * This is a macro that executes the given code fragment for each message in + * a message chunk list. The fragment should refer to the message as *msg. + */ +#define ProcessMessageList(listHdr, codeFragment) \ + do { \ + InvalidationChunk *_chunk; \ + for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \ + { \ + int _cindex; \ + for (_cindex = 0; _cindex < _chunk->nitems; _cindex++) \ + { \ + SharedInvalidationMessage *msg = &_chunk->msgs[_cindex]; \ + codeFragment; \ + } \ + } \ + } while (0) + +/* + * Process a list of invalidation messages group-wise. + * + * As above, but the code fragment can handle an array of messages. + * The fragment should refer to the messages as msgs[], with n entries. + */ +#define ProcessMessageListMulti(listHdr, codeFragment) \ + do { \ + InvalidationChunk *_chunk; \ + for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \ + { \ + SharedInvalidationMessage *msgs = _chunk->msgs; \ + int n = _chunk->nitems; \ + codeFragment; \ + } \ + } while (0) + + +/* ---------------------------------------------------------------- + * Invalidation set support functions + * + * These routines understand about the division of a logical invalidation + * list into separate physical lists for catcache and relcache entries. + * ---------------------------------------------------------------- + */ + +/* + * Add a catcache inval entry + */ +static void +AddCatcacheInvalidationMessage(InvalidationListHeader *hdr, + int id, uint32 hashValue, Oid dbId) +{ + SharedInvalidationMessage msg; + + Assert(id < CHAR_MAX); + msg.cc.id = (int8) id; + msg.cc.dbId = dbId; + msg.cc.hashValue = hashValue; + + /* + * Define padding bytes in SharedInvalidationMessage structs to be + * defined. Otherwise the sinvaladt.c ringbuffer, which is accessed by + * multiple processes, will cause spurious valgrind warnings about + * undefined memory being used. That's because valgrind remembers the + * undefined bytes from the last local process's store, not realizing that + * another process has written since, filling the previously uninitialized + * bytes + */ + VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); + + AddInvalidationMessage(&hdr->cclist, &msg); +} + +/* + * Add a whole-catalog inval entry + */ +static void +AddCatalogInvalidationMessage(InvalidationListHeader *hdr, + Oid dbId, Oid catId) +{ + SharedInvalidationMessage msg; + + msg.cat.id = SHAREDINVALCATALOG_ID; + msg.cat.dbId = dbId; + msg.cat.catId = catId; + /* check AddCatcacheInvalidationMessage() for an explanation */ + VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); + + AddInvalidationMessage(&hdr->cclist, &msg); +} + +/* + * Add a relcache inval entry + */ +static void +AddRelcacheInvalidationMessage(InvalidationListHeader *hdr, + Oid dbId, Oid relId) +{ + SharedInvalidationMessage msg; + + /* + * Don't add a duplicate item. We assume dbId need not be checked because + * it will never change. InvalidOid for relId means all relations so we + * don't need to add individual ones when it is present. + */ + ProcessMessageList(hdr->rclist, + if (msg->rc.id == SHAREDINVALRELCACHE_ID && + (msg->rc.relId == relId || + msg->rc.relId == InvalidOid)) + return); + + /* OK, add the item */ + msg.rc.id = SHAREDINVALRELCACHE_ID; + msg.rc.dbId = dbId; + msg.rc.relId = relId; + /* check AddCatcacheInvalidationMessage() for an explanation */ + VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); + + AddInvalidationMessage(&hdr->rclist, &msg); +} + +/* + * Add a snapshot inval entry + */ +static void +AddSnapshotInvalidationMessage(InvalidationListHeader *hdr, + Oid dbId, Oid relId) +{ + SharedInvalidationMessage msg; + + /* Don't add a duplicate item */ + /* We assume dbId need not be checked because it will never change */ + ProcessMessageList(hdr->rclist, + if (msg->sn.id == SHAREDINVALSNAPSHOT_ID && + msg->sn.relId == relId) + return); + + /* OK, add the item */ + msg.sn.id = SHAREDINVALSNAPSHOT_ID; + msg.sn.dbId = dbId; + msg.sn.relId = relId; + /* check AddCatcacheInvalidationMessage() for an explanation */ + VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); + + AddInvalidationMessage(&hdr->rclist, &msg); +} + +/* + * Append one list of invalidation messages to another, resetting + * the source list to empty. + */ +static void +AppendInvalidationMessages(InvalidationListHeader *dest, + InvalidationListHeader *src) +{ + AppendInvalidationMessageList(&dest->cclist, &src->cclist); + AppendInvalidationMessageList(&dest->rclist, &src->rclist); +} + +/* + * Execute the given function for all the messages in an invalidation list. + * The list is not altered. + * + * catcache entries are processed first, for reasons mentioned above. + */ +static void +ProcessInvalidationMessages(InvalidationListHeader *hdr, + void (*func) (SharedInvalidationMessage *msg)) +{ + ProcessMessageList(hdr->cclist, func(msg)); + ProcessMessageList(hdr->rclist, func(msg)); +} + +/* + * As above, but the function is able to process an array of messages + * rather than just one at a time. + */ +static void +ProcessInvalidationMessagesMulti(InvalidationListHeader *hdr, + void (*func) (const SharedInvalidationMessage *msgs, int n)) +{ + ProcessMessageListMulti(hdr->cclist, func(msgs, n)); + ProcessMessageListMulti(hdr->rclist, func(msgs, n)); +} + +/* ---------------------------------------------------------------- + * private support functions + * ---------------------------------------------------------------- + */ + +/* + * RegisterCatcacheInvalidation + * + * Register an invalidation event for a catcache tuple entry. + */ +static void +RegisterCatcacheInvalidation(int cacheId, + uint32 hashValue, + Oid dbId) +{ + AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + cacheId, hashValue, dbId); +} + +/* + * RegisterCatalogInvalidation + * + * Register an invalidation event for all catcache entries from a catalog. + */ +static void +RegisterCatalogInvalidation(Oid dbId, Oid catId) +{ + AddCatalogInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + dbId, catId); +} + +/* + * RegisterRelcacheInvalidation + * + * As above, but register a relcache invalidation event. + */ +static void +RegisterRelcacheInvalidation(Oid dbId, Oid relId) +{ + AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + dbId, relId); + + /* + * Most of the time, relcache invalidation is associated with system + * catalog updates, but there are a few cases where it isn't. Quick hack + * to ensure that the next CommandCounterIncrement() will think that we + * need to do CommandEndInvalidationMessages(). + */ + (void) GetCurrentCommandId(true); + + /* + * If the relation being invalidated is one of those cached in a relcache + * init file, mark that we need to zap that file at commit. For simplicity + * invalidations for a specific database always invalidate the shared file + * as well. Also zap when we are invalidating whole relcache. + */ + if (relId == InvalidOid || RelationIdIsInInitFile(relId)) + transInvalInfo->RelcacheInitFileInval = true; +} + +/* + * RegisterSnapshotInvalidation + * + * Register an invalidation event for MVCC scans against a given catalog. + * Only needed for catalogs that don't have catcaches. + */ +static void +RegisterSnapshotInvalidation(Oid dbId, Oid relId) +{ + AddSnapshotInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + dbId, relId); +} + +/* + * LocalExecuteInvalidationMessage + * + * Process a single invalidation message (which could be of any type). + * Only the local caches are flushed; this does not transmit the message + * to other backends. + */ +void +LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) +{ + if (msg->id >= 0) + { + if (msg->cc.dbId == MyDatabaseId || msg->cc.dbId == InvalidOid) + { + InvalidateCatalogSnapshot(); + + SysCacheInvalidate(msg->cc.id, msg->cc.hashValue); + + CallSyscacheCallbacks(msg->cc.id, msg->cc.hashValue); + } + } + else if (msg->id == SHAREDINVALCATALOG_ID) + { + if (msg->cat.dbId == MyDatabaseId || msg->cat.dbId == InvalidOid) + { + InvalidateCatalogSnapshot(); + + CatalogCacheFlushCatalog(msg->cat.catId); + + /* CatalogCacheFlushCatalog calls CallSyscacheCallbacks as needed */ + } + } + else if (msg->id == SHAREDINVALRELCACHE_ID) + { + if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid) + { + int i; + + if (msg->rc.relId == InvalidOid) + RelationCacheInvalidate(false); + else + RelationCacheInvalidateEntry(msg->rc.relId); + + for (i = 0; i < relcache_callback_count; i++) + { + struct RELCACHECALLBACK *ccitem = relcache_callback_list + i; + + ccitem->function(ccitem->arg, msg->rc.relId); + } + } + } + else if (msg->id == SHAREDINVALSMGR_ID) + { + /* + * We could have smgr entries for relations of other databases, so no + * short-circuit test is possible here. + */ + RelFileNodeBackend rnode; + + rnode.node = msg->sm.rnode; + rnode.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo; + smgrclosenode(rnode); + } + else if (msg->id == SHAREDINVALRELMAP_ID) + { + /* We only care about our own database and shared catalogs */ + if (msg->rm.dbId == InvalidOid) + RelationMapInvalidate(true); + else if (msg->rm.dbId == MyDatabaseId) + RelationMapInvalidate(false); + } + else if (msg->id == SHAREDINVALSNAPSHOT_ID) + { + /* We only care about our own database and shared catalogs */ + if (msg->sn.dbId == InvalidOid) + InvalidateCatalogSnapshot(); + else if (msg->sn.dbId == MyDatabaseId) + InvalidateCatalogSnapshot(); + } + else + elog(FATAL, "unrecognized SI message ID: %d", msg->id); +} + +/* + * InvalidateSystemCaches + * + * This blows away all tuples in the system catalog caches and + * all the cached relation descriptors and smgr cache entries. + * Relation descriptors that have positive refcounts are then rebuilt. + * + * We call this when we see a shared-inval-queue overflow signal, + * since that tells us we've lost some shared-inval messages and hence + * don't know what needs to be invalidated. + */ +void +InvalidateSystemCaches(void) +{ + InvalidateSystemCachesExtended(false); +} + +void +InvalidateSystemCachesExtended(bool debug_discard) +{ + int i; + + InvalidateCatalogSnapshot(); + ResetCatalogCaches(); + RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */ + + for (i = 0; i < syscache_callback_count; i++) + { + struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i; + + ccitem->function(ccitem->arg, ccitem->id, 0); + } + + for (i = 0; i < relcache_callback_count; i++) + { + struct RELCACHECALLBACK *ccitem = relcache_callback_list + i; + + ccitem->function(ccitem->arg, InvalidOid); + } +} + + +/* ---------------------------------------------------------------- + * public functions + * ---------------------------------------------------------------- + */ + +/* + * AcceptInvalidationMessages + * Read and process invalidation messages from the shared invalidation + * message queue. + * + * Note: + * This should be called as the first step in processing a transaction. + */ +void +AcceptInvalidationMessages(void) +{ + ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage, + InvalidateSystemCaches); + + /*---------- + * Test code to force cache flushes anytime a flush could happen. + * + * This helps detect intermittent faults caused by code that reads a cache + * entry and then performs an action that could invalidate the entry, but + * rarely actually does so. This can spot issues that would otherwise + * only arise with badly timed concurrent DDL, for example. + * + * The default debug_discard_caches = 0 does no forced cache flushes. + * + * If used with CLOBBER_FREED_MEMORY, + * debug_discard_caches = 1 (formerly known as CLOBBER_CACHE_ALWAYS) + * provides a fairly thorough test that the system contains no cache-flush + * hazards. However, it also makes the system unbelievably slow --- the + * regression tests take about 100 times longer than normal. + * + * If you're a glutton for punishment, try + * debug_discard_caches = 3 (formerly known as CLOBBER_CACHE_RECURSIVELY). + * This slows things by at least a factor of 10000, so I wouldn't suggest + * trying to run the entire regression tests that way. It's useful to try + * a few simple tests, to make sure that cache reload isn't subject to + * internal cache-flush hazards, but after you've done a few thousand + * recursive reloads it's unlikely you'll learn more. + *---------- + */ +#ifdef DISCARD_CACHES_ENABLED + { + static int recursion_depth = 0; + + if (recursion_depth < debug_discard_caches) + { + recursion_depth++; + InvalidateSystemCachesExtended(true); + recursion_depth--; + } + } +#endif +} + +/* + * PrepareInvalidationState + * Initialize inval lists for the current (sub)transaction. + */ +static void +PrepareInvalidationState(void) +{ + TransInvalidationInfo *myInfo; + + if (transInvalInfo != NULL && + transInvalInfo->my_level == GetCurrentTransactionNestLevel()) + return; + + myInfo = (TransInvalidationInfo *) + MemoryContextAllocZero(TopTransactionContext, + sizeof(TransInvalidationInfo)); + myInfo->parent = transInvalInfo; + myInfo->my_level = GetCurrentTransactionNestLevel(); + + /* + * If there's any previous entry, this one should be for a deeper nesting + * level. + */ + Assert(transInvalInfo == NULL || + myInfo->my_level > transInvalInfo->my_level); + + transInvalInfo = myInfo; +} + +/* + * PostPrepare_Inval + * Clean up after successful PREPARE. + * + * Here, we want to act as though the transaction aborted, so that we will + * undo any syscache changes it made, thereby bringing us into sync with the + * outside world, which doesn't believe the transaction committed yet. + * + * If the prepared transaction is later aborted, there is nothing more to + * do; if it commits, we will receive the consequent inval messages just + * like everyone else. + */ +void +PostPrepare_Inval(void) +{ + AtEOXact_Inval(false); +} + +/* + * Collect invalidation messages into SharedInvalidMessagesArray array. + */ +static void +MakeSharedInvalidMessagesArray(const SharedInvalidationMessage *msgs, int n) +{ + /* + * Initialise array first time through in each commit + */ + if (SharedInvalidMessagesArray == NULL) + { + maxSharedInvalidMessagesArray = FIRSTCHUNKSIZE; + numSharedInvalidMessagesArray = 0; + + /* + * Although this is being palloc'd we don't actually free it directly. + * We're so close to EOXact that we now we're going to lose it anyhow. + */ + SharedInvalidMessagesArray = palloc(maxSharedInvalidMessagesArray + * sizeof(SharedInvalidationMessage)); + } + + if ((numSharedInvalidMessagesArray + n) > maxSharedInvalidMessagesArray) + { + while ((numSharedInvalidMessagesArray + n) > maxSharedInvalidMessagesArray) + maxSharedInvalidMessagesArray *= 2; + + SharedInvalidMessagesArray = repalloc(SharedInvalidMessagesArray, + maxSharedInvalidMessagesArray + * sizeof(SharedInvalidationMessage)); + } + + /* + * Append the next chunk onto the array + */ + memcpy(SharedInvalidMessagesArray + numSharedInvalidMessagesArray, + msgs, n * sizeof(SharedInvalidationMessage)); + numSharedInvalidMessagesArray += n; +} + +/* + * xactGetCommittedInvalidationMessages() is executed by + * RecordTransactionCommit() to add invalidation messages onto the + * commit record. This applies only to commit message types, never to + * abort records. Must always run before AtEOXact_Inval(), since that + * removes the data we need to see. + * + * Remember that this runs before we have officially committed, so we + * must not do anything here to change what might occur *if* we should + * fail between here and the actual commit. + * + * see also xact_redo_commit() and xact_desc_commit() + */ +int +xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, + bool *RelcacheInitFileInval) +{ + MemoryContext oldcontext; + + /* Quick exit if we haven't done anything with invalidation messages. */ + if (transInvalInfo == NULL) + { + *RelcacheInitFileInval = false; + *msgs = NULL; + return 0; + } + + /* Must be at top of stack */ + Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); + + /* + * Relcache init file invalidation requires processing both before and + * after we send the SI messages. However, we need not do anything unless + * we committed. + */ + *RelcacheInitFileInval = transInvalInfo->RelcacheInitFileInval; + + /* + * Walk through TransInvalidationInfo to collect all the messages into a + * single contiguous array of invalidation messages. It must be contiguous + * so we can copy directly into WAL message. Maintain the order that they + * would be processed in by AtEOXact_Inval(), to ensure emulated behaviour + * in redo is as similar as possible to original. We want the same bugs, + * if any, not new ones. + */ + oldcontext = MemoryContextSwitchTo(CurTransactionContext); + + ProcessInvalidationMessagesMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + MakeSharedInvalidMessagesArray); + ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs, + MakeSharedInvalidMessagesArray); + MemoryContextSwitchTo(oldcontext); + + Assert(!(numSharedInvalidMessagesArray > 0 && + SharedInvalidMessagesArray == NULL)); + + *msgs = SharedInvalidMessagesArray; + + return numSharedInvalidMessagesArray; +} + +/* + * ProcessCommittedInvalidationMessages is executed by xact_redo_commit() or + * standby_redo() to process invalidation messages. Currently that happens + * only at end-of-xact. + * + * Relcache init file invalidation requires processing both + * before and after we send the SI messages. See AtEOXact_Inval() + */ +void +ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, + int nmsgs, bool RelcacheInitFileInval, + Oid dbid, Oid tsid) +{ + if (nmsgs <= 0) + return; + + elog(trace_recovery(DEBUG4), "replaying commit with %d messages%s", nmsgs, + (RelcacheInitFileInval ? " and relcache file invalidation" : "")); + + if (RelcacheInitFileInval) + { + elog(trace_recovery(DEBUG4), "removing relcache init files for database %u", + dbid); + + /* + * RelationCacheInitFilePreInvalidate, when the invalidation message + * is for a specific database, requires DatabasePath to be set, but we + * should not use SetDatabasePath during recovery, since it is + * intended to be used only once by normal backends. Hence, a quick + * hack: set DatabasePath directly then unset after use. + */ + if (OidIsValid(dbid)) + DatabasePath = GetDatabasePath(dbid, tsid); + + RelationCacheInitFilePreInvalidate(); + + if (OidIsValid(dbid)) + { + pfree(DatabasePath); + DatabasePath = NULL; + } + } + + SendSharedInvalidMessages(msgs, nmsgs); + + if (RelcacheInitFileInval) + RelationCacheInitFilePostInvalidate(); +} + +/* + * AtEOXact_Inval + * Process queued-up invalidation messages at end of main transaction. + * + * If isCommit, we must send out the messages in our PriorCmdInvalidMsgs list + * to the shared invalidation message queue. Note that these will be read + * not only by other backends, but also by our own backend at the next + * transaction start (via AcceptInvalidationMessages). This means that + * we can skip immediate local processing of anything that's still in + * CurrentCmdInvalidMsgs, and just send that list out too. + * + * If not isCommit, we are aborting, and must locally process the messages + * in PriorCmdInvalidMsgs. No messages need be sent to other backends, + * since they'll not have seen our changed tuples anyway. We can forget + * about CurrentCmdInvalidMsgs too, since those changes haven't touched + * the caches yet. + * + * In any case, reset the various lists to empty. We need not physically + * free memory here, since TopTransactionContext is about to be emptied + * anyway. + * + * Note: + * This should be called as the last step in processing a transaction. + */ +void +AtEOXact_Inval(bool isCommit) +{ + /* Quick exit if no messages */ + if (transInvalInfo == NULL) + return; + + /* Must be at top of stack */ + Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); + + if (isCommit) + { + /* + * Relcache init file invalidation requires processing both before and + * after we send the SI messages. However, we need not do anything + * unless we committed. + */ + if (transInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePreInvalidate(); + + AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, + &transInvalInfo->CurrentCmdInvalidMsgs); + + ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs, + SendSharedInvalidMessages); + + if (transInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePostInvalidate(); + } + else + { + ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, + LocalExecuteInvalidationMessage); + } + + /* Need not free anything explicitly */ + transInvalInfo = NULL; + SharedInvalidMessagesArray = NULL; + numSharedInvalidMessagesArray = 0; +} + +/* + * AtEOSubXact_Inval + * Process queued-up invalidation messages at end of subtransaction. + * + * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't), + * and then attach both CurrentCmdInvalidMsgs and PriorCmdInvalidMsgs to the + * parent's PriorCmdInvalidMsgs list. + * + * If not isCommit, we are aborting, and must locally process the messages + * in PriorCmdInvalidMsgs. No messages need be sent to other backends. + * We can forget about CurrentCmdInvalidMsgs too, since those changes haven't + * touched the caches yet. + * + * In any case, pop the transaction stack. We need not physically free memory + * here, since CurTransactionContext is about to be emptied anyway + * (if aborting). Beware of the possibility of aborting the same nesting + * level twice, though. + */ +void +AtEOSubXact_Inval(bool isCommit) +{ + int my_level; + TransInvalidationInfo *myInfo = transInvalInfo; + + /* Quick exit if no messages. */ + if (myInfo == NULL) + return; + + /* Also bail out quickly if messages are not for this level. */ + my_level = GetCurrentTransactionNestLevel(); + if (myInfo->my_level != my_level) + { + Assert(myInfo->my_level < my_level); + return; + } + + if (isCommit) + { + /* If CurrentCmdInvalidMsgs still has anything, fix it */ + CommandEndInvalidationMessages(); + + /* + * We create invalidation stack entries lazily, so the parent might + * not have one. Instead of creating one, moving all the data over, + * and then freeing our own, we can just adjust the level of our own + * entry. + */ + if (myInfo->parent == NULL || myInfo->parent->my_level < my_level - 1) + { + myInfo->my_level--; + return; + } + + /* Pass up my inval messages to parent */ + AppendInvalidationMessages(&myInfo->parent->PriorCmdInvalidMsgs, + &myInfo->PriorCmdInvalidMsgs); + + /* Pending relcache inval becomes parent's problem too */ + if (myInfo->RelcacheInitFileInval) + myInfo->parent->RelcacheInitFileInval = true; + + /* Pop the transaction state stack */ + transInvalInfo = myInfo->parent; + + /* Need not free anything else explicitly */ + pfree(myInfo); + } + else + { + ProcessInvalidationMessages(&myInfo->PriorCmdInvalidMsgs, + LocalExecuteInvalidationMessage); + + /* Pop the transaction state stack */ + transInvalInfo = myInfo->parent; + + /* Need not free anything else explicitly */ + pfree(myInfo); + } +} + +/* + * CommandEndInvalidationMessages + * Process queued-up invalidation messages at end of one command + * in a transaction. + * + * Here, we send no messages to the shared queue, since we don't know yet if + * we will commit. We do need to locally process the CurrentCmdInvalidMsgs + * list, so as to flush our caches of any entries we have outdated in the + * current command. We then move the current-cmd list over to become part + * of the prior-cmds list. + * + * Note: + * This should be called during CommandCounterIncrement(), + * after we have advanced the command ID. + */ +void +CommandEndInvalidationMessages(void) +{ + /* + * You might think this shouldn't be called outside any transaction, but + * bootstrap does it, and also ABORT issued when not in a transaction. So + * just quietly return if no state to work on. + */ + if (transInvalInfo == NULL) + return; + + ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, + LocalExecuteInvalidationMessage); + + /* WAL Log per-command invalidation messages for wal_level=logical */ + if (XLogLogicalInfoActive()) + LogLogicalInvalidations(); + + AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, + &transInvalInfo->CurrentCmdInvalidMsgs); +} + + +/* + * CacheInvalidateHeapTuple + * Register the given tuple for invalidation at end of command + * (ie, current command is creating or outdating this tuple). + * Also, detect whether a relcache invalidation is implied. + * + * For an insert or delete, tuple is the target tuple and newtuple is NULL. + * For an update, we are called just once, with tuple being the old tuple + * version and newtuple the new version. This allows avoidance of duplicate + * effort during an update. + */ +void +CacheInvalidateHeapTuple(Relation relation, + HeapTuple tuple, + HeapTuple newtuple) +{ + Oid tupleRelId; + Oid databaseId; + Oid relationId; + + /* Do nothing during bootstrap */ + if (IsBootstrapProcessingMode()) + return; + + /* + * We only need to worry about invalidation for tuples that are in system + * catalogs; user-relation tuples are never in catcaches and can't affect + * the relcache either. + */ + if (!IsCatalogRelation(relation)) + return; + + /* + * IsCatalogRelation() will return true for TOAST tables of system + * catalogs, but we don't care about those, either. + */ + if (IsToastRelation(relation)) + return; + + /* + * If we're not prepared to queue invalidation messages for this + * subtransaction level, get ready now. + */ + PrepareInvalidationState(); + + /* + * First let the catcache do its thing + */ + tupleRelId = RelationGetRelid(relation); + if (RelationInvalidatesSnapshotsOnly(tupleRelId)) + { + databaseId = IsSharedRelation(tupleRelId) ? InvalidOid : MyDatabaseId; + RegisterSnapshotInvalidation(databaseId, tupleRelId); + } + else + PrepareToInvalidateCacheTuple(relation, tuple, newtuple, + RegisterCatcacheInvalidation); + + /* + * Now, is this tuple one of the primary definers of a relcache entry? See + * comments in file header for deeper explanation. + * + * Note we ignore newtuple here; we assume an update cannot move a tuple + * from being part of one relcache entry to being part of another. + */ + if (tupleRelId == RelationRelationId) + { + Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple); + + relationId = classtup->oid; + if (classtup->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + } + else if (tupleRelId == AttributeRelationId) + { + Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple); + + relationId = atttup->attrelid; + + /* + * KLUGE ALERT: we always send the relcache event with MyDatabaseId, + * even if the rel in question is shared (which we can't easily tell). + * This essentially means that only backends in this same database + * will react to the relcache flush request. This is in fact + * appropriate, since only those backends could see our pg_attribute + * change anyway. It looks a bit ugly though. (In practice, shared + * relations can't have schema changes after bootstrap, so we should + * never come here for a shared rel anyway.) + */ + databaseId = MyDatabaseId; + } + else if (tupleRelId == IndexRelationId) + { + Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple); + + /* + * When a pg_index row is updated, we should send out a relcache inval + * for the index relation. As above, we don't know the shared status + * of the index, but in practice it doesn't matter since indexes of + * shared catalogs can't have such updates. + */ + relationId = indextup->indexrelid; + databaseId = MyDatabaseId; + } + else if (tupleRelId == ConstraintRelationId) + { + Form_pg_constraint constrtup = (Form_pg_constraint) GETSTRUCT(tuple); + + /* + * Foreign keys are part of relcache entries, too, so send out an + * inval for the table that the FK applies to. + */ + if (constrtup->contype == CONSTRAINT_FOREIGN && + OidIsValid(constrtup->conrelid)) + { + relationId = constrtup->conrelid; + databaseId = MyDatabaseId; + } + else + return; + } + else + return; + + /* + * Yes. We need to register a relcache invalidation event. + */ + RegisterRelcacheInvalidation(databaseId, relationId); +} + +/* + * CacheInvalidateCatalog + * Register invalidation of the whole content of a system catalog. + * + * This is normally used in VACUUM FULL/CLUSTER, where we haven't so much + * changed any tuples as moved them around. Some uses of catcache entries + * expect their TIDs to be correct, so we have to blow away the entries. + * + * Note: we expect caller to verify that the rel actually is a system + * catalog. If it isn't, no great harm is done, just a wasted sinval message. + */ +void +CacheInvalidateCatalog(Oid catalogId) +{ + Oid databaseId; + + PrepareInvalidationState(); + + if (IsSharedRelation(catalogId)) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + + RegisterCatalogInvalidation(databaseId, catalogId); +} + +/* + * CacheInvalidateRelcache + * Register invalidation of the specified relation's relcache entry + * at end of command. + * + * This is used in places that need to force relcache rebuild but aren't + * changing any of the tuples recognized as contributors to the relcache + * entry by CacheInvalidateHeapTuple. (An example is dropping an index.) + */ +void +CacheInvalidateRelcache(Relation relation) +{ + Oid databaseId; + Oid relationId; + + PrepareInvalidationState(); + + relationId = RelationGetRelid(relation); + if (relation->rd_rel->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + + RegisterRelcacheInvalidation(databaseId, relationId); +} + +/* + * CacheInvalidateRelcacheAll + * Register invalidation of the whole relcache at the end of command. + * + * This is used by alter publication as changes in publications may affect + * large number of tables. + */ +void +CacheInvalidateRelcacheAll(void) +{ + PrepareInvalidationState(); + + RegisterRelcacheInvalidation(InvalidOid, InvalidOid); +} + +/* + * CacheInvalidateRelcacheByTuple + * As above, but relation is identified by passing its pg_class tuple. + */ +void +CacheInvalidateRelcacheByTuple(HeapTuple classTuple) +{ + Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple); + Oid databaseId; + Oid relationId; + + PrepareInvalidationState(); + + relationId = classtup->oid; + if (classtup->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + RegisterRelcacheInvalidation(databaseId, relationId); +} + +/* + * CacheInvalidateRelcacheByRelid + * As above, but relation is identified by passing its OID. + * This is the least efficient of the three options; use one of + * the above routines if you have a Relation or pg_class tuple. + */ +void +CacheInvalidateRelcacheByRelid(Oid relid) +{ + HeapTuple tup; + + PrepareInvalidationState(); + + tup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for relation %u", relid); + CacheInvalidateRelcacheByTuple(tup); + ReleaseSysCache(tup); +} + + +/* + * CacheInvalidateSmgr + * Register invalidation of smgr references to a physical relation. + * + * Sending this type of invalidation msg forces other backends to close open + * smgr entries for the rel. This should be done to flush dangling open-file + * references when the physical rel is being dropped or truncated. Because + * these are nontransactional (i.e., not-rollback-able) operations, we just + * send the inval message immediately without any queuing. + * + * Note: in most cases there will have been a relcache flush issued against + * the rel at the logical level. We need a separate smgr-level flush because + * it is possible for backends to have open smgr entries for rels they don't + * have a relcache entry for, e.g. because the only thing they ever did with + * the rel is write out dirty shared buffers. + * + * Note: because these messages are nontransactional, they won't be captured + * in commit/abort WAL entries. Instead, calls to CacheInvalidateSmgr() + * should happen in low-level smgr.c routines, which are executed while + * replaying WAL as well as when creating it. + * + * Note: In order to avoid bloating SharedInvalidationMessage, we store only + * three bytes of the backend ID using what would otherwise be padding space. + * Thus, the maximum possible backend ID is 2^23-1. + */ +void +CacheInvalidateSmgr(RelFileNodeBackend rnode) +{ + SharedInvalidationMessage msg; + + msg.sm.id = SHAREDINVALSMGR_ID; + msg.sm.backend_hi = rnode.backend >> 16; + msg.sm.backend_lo = rnode.backend & 0xffff; + msg.sm.rnode = rnode.node; + /* check AddCatcacheInvalidationMessage() for an explanation */ + VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); + + SendSharedInvalidMessages(&msg, 1); +} + +/* + * CacheInvalidateRelmap + * Register invalidation of the relation mapping for a database, + * or for the shared catalogs if databaseId is zero. + * + * Sending this type of invalidation msg forces other backends to re-read + * the indicated relation mapping file. It is also necessary to send a + * relcache inval for the specific relations whose mapping has been altered, + * else the relcache won't get updated with the new filenode data. + * + * Note: because these messages are nontransactional, they won't be captured + * in commit/abort WAL entries. Instead, calls to CacheInvalidateRelmap() + * should happen in low-level relmapper.c routines, which are executed while + * replaying WAL as well as when creating it. + */ +void +CacheInvalidateRelmap(Oid databaseId) +{ + SharedInvalidationMessage msg; + + msg.rm.id = SHAREDINVALRELMAP_ID; + msg.rm.dbId = databaseId; + /* check AddCatcacheInvalidationMessage() for an explanation */ + VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg)); + + SendSharedInvalidMessages(&msg, 1); +} + + +/* + * CacheRegisterSyscacheCallback + * Register the specified function to be called for all future + * invalidation events in the specified cache. The cache ID and the + * hash value of the tuple being invalidated will be passed to the + * function. + * + * NOTE: Hash value zero will be passed if a cache reset request is received. + * In this case the called routines should flush all cached state. + * Yes, there's a possibility of a false match to zero, but it doesn't seem + * worth troubling over, especially since most of the current callees just + * flush all cached state anyway. + */ +void +CacheRegisterSyscacheCallback(int cacheid, + SyscacheCallbackFunction func, + Datum arg) +{ + if (cacheid < 0 || cacheid >= SysCacheSize) + elog(FATAL, "invalid cache ID: %d", cacheid); + if (syscache_callback_count >= MAX_SYSCACHE_CALLBACKS) + elog(FATAL, "out of syscache_callback_list slots"); + + if (syscache_callback_links[cacheid] == 0) + { + /* first callback for this cache */ + syscache_callback_links[cacheid] = syscache_callback_count + 1; + } + else + { + /* add to end of chain, so that older callbacks are called first */ + int i = syscache_callback_links[cacheid] - 1; + + while (syscache_callback_list[i].link > 0) + i = syscache_callback_list[i].link - 1; + syscache_callback_list[i].link = syscache_callback_count + 1; + } + + syscache_callback_list[syscache_callback_count].id = cacheid; + syscache_callback_list[syscache_callback_count].link = 0; + syscache_callback_list[syscache_callback_count].function = func; + syscache_callback_list[syscache_callback_count].arg = arg; + + ++syscache_callback_count; +} + +/* + * CacheRegisterRelcacheCallback + * Register the specified function to be called for all future + * relcache invalidation events. The OID of the relation being + * invalidated will be passed to the function. + * + * NOTE: InvalidOid will be passed if a cache reset request is received. + * In this case the called routines should flush all cached state. + */ +void +CacheRegisterRelcacheCallback(RelcacheCallbackFunction func, + Datum arg) +{ + if (relcache_callback_count >= MAX_RELCACHE_CALLBACKS) + elog(FATAL, "out of relcache_callback_list slots"); + + relcache_callback_list[relcache_callback_count].function = func; + relcache_callback_list[relcache_callback_count].arg = arg; + + ++relcache_callback_count; +} + +/* + * CallSyscacheCallbacks + * + * This is exported so that CatalogCacheFlushCatalog can call it, saving + * this module from knowing which catcache IDs correspond to which catalogs. + */ +void +CallSyscacheCallbacks(int cacheid, uint32 hashvalue) +{ + int i; + + if (cacheid < 0 || cacheid >= SysCacheSize) + elog(ERROR, "invalid cache ID: %d", cacheid); + + i = syscache_callback_links[cacheid] - 1; + while (i >= 0) + { + struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i; + + Assert(ccitem->id == cacheid); + ccitem->function(ccitem->arg, cacheid, hashvalue); + i = ccitem->link - 1; + } +} + +/* + * LogLogicalInvalidations + * + * Emit WAL for invalidations. This is currently only used for logging + * invalidations at the command end or at commit time if any invalidations + * are pending. + */ +void +LogLogicalInvalidations() +{ + xl_xact_invals xlrec; + SharedInvalidationMessage *invalMessages; + int nmsgs = 0; + + /* Quick exit if we haven't done anything with invalidation messages. */ + if (transInvalInfo == NULL) + return; + + ProcessInvalidationMessagesMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + MakeSharedInvalidMessagesArray); + + Assert(!(numSharedInvalidMessagesArray > 0 && + SharedInvalidMessagesArray == NULL)); + + invalMessages = SharedInvalidMessagesArray; + nmsgs = numSharedInvalidMessagesArray; + SharedInvalidMessagesArray = NULL; + numSharedInvalidMessagesArray = 0; + + if (nmsgs > 0) + { + /* prepare record */ + memset(&xlrec, 0, MinSizeOfXactInvals); + xlrec.nmsgs = nmsgs; + + /* perform insertion */ + XLogBeginInsert(); + XLogRegisterData((char *) (&xlrec), MinSizeOfXactInvals); + XLogRegisterData((char *) invalMessages, + nmsgs * sizeof(SharedInvalidationMessage)); + XLogInsert(RM_XACT_ID, XLOG_XACT_INVALIDATIONS); + + pfree(invalMessages); + } +} diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c new file mode 100644 index 0000000..5564b53 --- /dev/null +++ b/src/backend/utils/cache/lsyscache.c @@ -0,0 +1,3580 @@ +/*------------------------------------------------------------------------- + * + * lsyscache.c + * Convenience routines for common queries in the system catalog cache. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/lsyscache.c + * + * NOTES + * Eventually, the index information should go through here, too. + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "bootstrap/bootstrap.h" +#include "catalog/namespace.h" +#include "catalog/pg_am.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_cast.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_language.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_range.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_transform.h" +#include "catalog/pg_type.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +/* Hook for plugins to get control in get_attavgwidth() */ +get_attavgwidth_hook_type get_attavgwidth_hook = NULL; + + +/* ---------- AMOP CACHES ---------- */ + +/* + * op_in_opfamily + * + * Return t iff operator 'opno' is in operator family 'opfamily'. + * + * This function only considers search operators, not ordering operators. + */ +bool +op_in_opfamily(Oid opno, Oid opfamily) +{ + return SearchSysCacheExists3(AMOPOPID, + ObjectIdGetDatum(opno), + CharGetDatum(AMOP_SEARCH), + ObjectIdGetDatum(opfamily)); +} + +/* + * get_op_opfamily_strategy + * + * Get the operator's strategy number within the specified opfamily, + * or 0 if it's not a member of the opfamily. + * + * This function only considers search operators, not ordering operators. + */ +int +get_op_opfamily_strategy(Oid opno, Oid opfamily) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + int result; + + tp = SearchSysCache3(AMOPOPID, + ObjectIdGetDatum(opno), + CharGetDatum(AMOP_SEARCH), + ObjectIdGetDatum(opfamily)); + if (!HeapTupleIsValid(tp)) + return 0; + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + result = amop_tup->amopstrategy; + ReleaseSysCache(tp); + return result; +} + +/* + * get_op_opfamily_sortfamily + * + * If the operator is an ordering operator within the specified opfamily, + * return its amopsortfamily OID; else return InvalidOid. + */ +Oid +get_op_opfamily_sortfamily(Oid opno, Oid opfamily) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + Oid result; + + tp = SearchSysCache3(AMOPOPID, + ObjectIdGetDatum(opno), + CharGetDatum(AMOP_ORDER), + ObjectIdGetDatum(opfamily)); + if (!HeapTupleIsValid(tp)) + return InvalidOid; + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + result = amop_tup->amopsortfamily; + ReleaseSysCache(tp); + return result; +} + +/* + * get_op_opfamily_properties + * + * Get the operator's strategy number and declared input data types + * within the specified opfamily. + * + * Caller should already have verified that opno is a member of opfamily, + * therefore we raise an error if the tuple is not found. + */ +void +get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, + int *strategy, + Oid *lefttype, + Oid *righttype) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + + tp = SearchSysCache3(AMOPOPID, + ObjectIdGetDatum(opno), + CharGetDatum(ordering_op ? AMOP_ORDER : AMOP_SEARCH), + ObjectIdGetDatum(opfamily)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "operator %u is not a member of opfamily %u", + opno, opfamily); + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + *strategy = amop_tup->amopstrategy; + *lefttype = amop_tup->amoplefttype; + *righttype = amop_tup->amoprighttype; + ReleaseSysCache(tp); +} + +/* + * get_opfamily_member + * Get the OID of the operator that implements the specified strategy + * with the specified datatypes for the specified opfamily. + * + * Returns InvalidOid if there is no pg_amop entry for the given keys. + */ +Oid +get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, + int16 strategy) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + Oid result; + + tp = SearchSysCache4(AMOPSTRATEGY, + ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(lefttype), + ObjectIdGetDatum(righttype), + Int16GetDatum(strategy)); + if (!HeapTupleIsValid(tp)) + return InvalidOid; + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + result = amop_tup->amopopr; + ReleaseSysCache(tp); + return result; +} + +/* + * get_ordering_op_properties + * Given the OID of an ordering operator (a btree "<" or ">" operator), + * determine its opfamily, its declared input datatype, and its + * strategy number (BTLessStrategyNumber or BTGreaterStrategyNumber). + * + * Returns true if successful, false if no matching pg_amop entry exists. + * (This indicates that the operator is not a valid ordering operator.) + * + * Note: the operator could be registered in multiple families, for example + * if someone were to build a "reverse sort" opfamily. This would result in + * uncertainty as to whether "ORDER BY USING op" would default to NULLS FIRST + * or NULLS LAST, as well as inefficient planning due to failure to match up + * pathkeys that should be the same. So we want a determinate result here. + * Because of the way the syscache search works, we'll use the interpretation + * associated with the opfamily with smallest OID, which is probably + * determinate enough. Since there is no longer any particularly good reason + * to build reverse-sort opfamilies, it doesn't seem worth expending any + * additional effort on ensuring consistency. + */ +bool +get_ordering_op_properties(Oid opno, + Oid *opfamily, Oid *opcintype, int16 *strategy) +{ + bool result = false; + CatCList *catlist; + int i; + + /* ensure outputs are initialized on failure */ + *opfamily = InvalidOid; + *opcintype = InvalidOid; + *strategy = 0; + + /* + * Search pg_amop to see if the target operator is registered as the "<" + * or ">" operator of any btree opfamily. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* must be btree */ + if (aform->amopmethod != BTREE_AM_OID) + continue; + + if (aform->amopstrategy == BTLessStrategyNumber || + aform->amopstrategy == BTGreaterStrategyNumber) + { + /* Found it ... should have consistent input types */ + if (aform->amoplefttype == aform->amoprighttype) + { + /* Found a suitable opfamily, return info */ + *opfamily = aform->amopfamily; + *opcintype = aform->amoplefttype; + *strategy = aform->amopstrategy; + result = true; + break; + } + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * get_equality_op_for_ordering_op + * Get the OID of the datatype-specific btree equality operator + * associated with an ordering operator (a "<" or ">" operator). + * + * If "reverse" isn't NULL, also set *reverse to false if the operator is "<", + * true if it's ">" + * + * Returns InvalidOid if no matching equality operator can be found. + * (This indicates that the operator is not a valid ordering operator.) + */ +Oid +get_equality_op_for_ordering_op(Oid opno, bool *reverse) +{ + Oid result = InvalidOid; + Oid opfamily; + Oid opcintype; + int16 strategy; + + /* Find the operator in pg_amop */ + if (get_ordering_op_properties(opno, + &opfamily, &opcintype, &strategy)) + { + /* Found a suitable opfamily, get matching equality operator */ + result = get_opfamily_member(opfamily, + opcintype, + opcintype, + BTEqualStrategyNumber); + if (reverse) + *reverse = (strategy == BTGreaterStrategyNumber); + } + + return result; +} + +/* + * get_ordering_op_for_equality_op + * Get the OID of a datatype-specific btree ordering operator + * associated with an equality operator. (If there are multiple + * possibilities, assume any one will do.) + * + * This function is used when we have to sort data before unique-ifying, + * and don't much care which sorting op is used as long as it's compatible + * with the intended equality operator. Since we need a sorting operator, + * it should be single-data-type even if the given operator is cross-type. + * The caller specifies whether to find an op for the LHS or RHS data type. + * + * Returns InvalidOid if no matching ordering operator can be found. + */ +Oid +get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type) +{ + Oid result = InvalidOid; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any btree opfamily. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* must be btree */ + if (aform->amopmethod != BTREE_AM_OID) + continue; + + if (aform->amopstrategy == BTEqualStrategyNumber) + { + /* Found a suitable opfamily, get matching ordering operator */ + Oid typid; + + typid = use_lhs_type ? aform->amoplefttype : aform->amoprighttype; + result = get_opfamily_member(aform->amopfamily, + typid, typid, + BTLessStrategyNumber); + if (OidIsValid(result)) + break; + /* failure probably shouldn't happen, but keep looking if so */ + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * get_mergejoin_opfamilies + * Given a putatively mergejoinable operator, return a list of the OIDs + * of the btree opfamilies in which it represents equality. + * + * It is possible (though at present unusual) for an operator to be equality + * in more than one opfamily, hence the result is a list. This also lets us + * return NIL if the operator is not found in any opfamilies. + * + * The planner currently uses simple equal() tests to compare the lists + * returned by this function, which makes the list order relevant, though + * strictly speaking it should not be. Because of the way syscache list + * searches are handled, in normal operation the result will be sorted by OID + * so everything works fine. If running with system index usage disabled, + * the result ordering is unspecified and hence the planner might fail to + * recognize optimization opportunities ... but that's hardly a scenario in + * which performance is good anyway, so there's no point in expending code + * or cycles here to guarantee the ordering in that case. + */ +List * +get_mergejoin_opfamilies(Oid opno) +{ + List *result = NIL; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any btree opfamily. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* must be btree equality */ + if (aform->amopmethod == BTREE_AM_OID && + aform->amopstrategy == BTEqualStrategyNumber) + result = lappend_oid(result, aform->amopfamily); + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * get_compatible_hash_operators + * Get the OID(s) of hash equality operator(s) compatible with the given + * operator, but operating on its LHS and/or RHS datatype. + * + * An operator for the LHS type is sought and returned into *lhs_opno if + * lhs_opno isn't NULL. Similarly, an operator for the RHS type is sought + * and returned into *rhs_opno if rhs_opno isn't NULL. + * + * If the given operator is not cross-type, the results should be the same + * operator, but in cross-type situations they will be different. + * + * Returns true if able to find the requested operator(s), false if not. + * (This indicates that the operator should not have been marked oprcanhash.) + */ +bool +get_compatible_hash_operators(Oid opno, + Oid *lhs_opno, Oid *rhs_opno) +{ + bool result = false; + CatCList *catlist; + int i; + + /* Ensure output args are initialized on failure */ + if (lhs_opno) + *lhs_opno = InvalidOid; + if (rhs_opno) + *rhs_opno = InvalidOid; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any hash opfamily. If the operator is registered in + * multiple opfamilies, assume we can use any one. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + if (aform->amopmethod == HASH_AM_OID && + aform->amopstrategy == HTEqualStrategyNumber) + { + /* No extra lookup needed if given operator is single-type */ + if (aform->amoplefttype == aform->amoprighttype) + { + if (lhs_opno) + *lhs_opno = opno; + if (rhs_opno) + *rhs_opno = opno; + result = true; + break; + } + + /* + * Get the matching single-type operator(s). Failure probably + * shouldn't happen --- it implies a bogus opfamily --- but + * continue looking if so. + */ + if (lhs_opno) + { + *lhs_opno = get_opfamily_member(aform->amopfamily, + aform->amoplefttype, + aform->amoplefttype, + HTEqualStrategyNumber); + if (!OidIsValid(*lhs_opno)) + continue; + /* Matching LHS found, done if caller doesn't want RHS */ + if (!rhs_opno) + { + result = true; + break; + } + } + if (rhs_opno) + { + *rhs_opno = get_opfamily_member(aform->amopfamily, + aform->amoprighttype, + aform->amoprighttype, + HTEqualStrategyNumber); + if (!OidIsValid(*rhs_opno)) + { + /* Forget any LHS operator from this opfamily */ + if (lhs_opno) + *lhs_opno = InvalidOid; + continue; + } + /* Matching RHS found, so done */ + result = true; + break; + } + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * get_op_hash_functions + * Get the OID(s) of the standard hash support function(s) compatible with + * the given operator, operating on its LHS and/or RHS datatype as required. + * + * A function for the LHS type is sought and returned into *lhs_procno if + * lhs_procno isn't NULL. Similarly, a function for the RHS type is sought + * and returned into *rhs_procno if rhs_procno isn't NULL. + * + * If the given operator is not cross-type, the results should be the same + * function, but in cross-type situations they will be different. + * + * Returns true if able to find the requested function(s), false if not. + * (This indicates that the operator should not have been marked oprcanhash.) + */ +bool +get_op_hash_functions(Oid opno, + RegProcedure *lhs_procno, RegProcedure *rhs_procno) +{ + bool result = false; + CatCList *catlist; + int i; + + /* Ensure output args are initialized on failure */ + if (lhs_procno) + *lhs_procno = InvalidOid; + if (rhs_procno) + *rhs_procno = InvalidOid; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any hash opfamily. If the operator is registered in + * multiple opfamilies, assume we can use any one. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + if (aform->amopmethod == HASH_AM_OID && + aform->amopstrategy == HTEqualStrategyNumber) + { + /* + * Get the matching support function(s). Failure probably + * shouldn't happen --- it implies a bogus opfamily --- but + * continue looking if so. + */ + if (lhs_procno) + { + *lhs_procno = get_opfamily_proc(aform->amopfamily, + aform->amoplefttype, + aform->amoplefttype, + HASHSTANDARD_PROC); + if (!OidIsValid(*lhs_procno)) + continue; + /* Matching LHS found, done if caller doesn't want RHS */ + if (!rhs_procno) + { + result = true; + break; + } + /* Only one lookup needed if given operator is single-type */ + if (aform->amoplefttype == aform->amoprighttype) + { + *rhs_procno = *lhs_procno; + result = true; + break; + } + } + if (rhs_procno) + { + *rhs_procno = get_opfamily_proc(aform->amopfamily, + aform->amoprighttype, + aform->amoprighttype, + HASHSTANDARD_PROC); + if (!OidIsValid(*rhs_procno)) + { + /* Forget any LHS function from this opfamily */ + if (lhs_procno) + *lhs_procno = InvalidOid; + continue; + } + /* Matching RHS found, so done */ + result = true; + break; + } + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * get_op_btree_interpretation + * Given an operator's OID, find out which btree opfamilies it belongs to, + * and what properties it has within each one. The results are returned + * as a palloc'd list of OpBtreeInterpretation structs. + * + * In addition to the normal btree operators, we consider a <> operator to be + * a "member" of an opfamily if its negator is an equality operator of the + * opfamily. ROWCOMPARE_NE is returned as the strategy number for this case. + */ +List * +get_op_btree_interpretation(Oid opno) +{ + List *result = NIL; + OpBtreeInterpretation *thisresult; + CatCList *catlist; + int i; + + /* + * Find all the pg_amop entries containing the operator. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple op_tuple = &catlist->members[i]->tuple; + Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); + StrategyNumber op_strategy; + + /* must be btree */ + if (op_form->amopmethod != BTREE_AM_OID) + continue; + + /* Get the operator's btree strategy number */ + op_strategy = (StrategyNumber) op_form->amopstrategy; + Assert(op_strategy >= 1 && op_strategy <= 5); + + thisresult = (OpBtreeInterpretation *) + palloc(sizeof(OpBtreeInterpretation)); + thisresult->opfamily_id = op_form->amopfamily; + thisresult->strategy = op_strategy; + thisresult->oplefttype = op_form->amoplefttype; + thisresult->oprighttype = op_form->amoprighttype; + result = lappend(result, thisresult); + } + + ReleaseSysCacheList(catlist); + + /* + * If we didn't find any btree opfamily containing the operator, perhaps + * it is a <> operator. See if it has a negator that is in an opfamily. + */ + if (result == NIL) + { + Oid op_negator = get_negator(opno); + + if (OidIsValid(op_negator)) + { + catlist = SearchSysCacheList1(AMOPOPID, + ObjectIdGetDatum(op_negator)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple op_tuple = &catlist->members[i]->tuple; + Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); + StrategyNumber op_strategy; + + /* must be btree */ + if (op_form->amopmethod != BTREE_AM_OID) + continue; + + /* Get the operator's btree strategy number */ + op_strategy = (StrategyNumber) op_form->amopstrategy; + Assert(op_strategy >= 1 && op_strategy <= 5); + + /* Only consider negators that are = */ + if (op_strategy != BTEqualStrategyNumber) + continue; + + /* OK, report it with "strategy" ROWCOMPARE_NE */ + thisresult = (OpBtreeInterpretation *) + palloc(sizeof(OpBtreeInterpretation)); + thisresult->opfamily_id = op_form->amopfamily; + thisresult->strategy = ROWCOMPARE_NE; + thisresult->oplefttype = op_form->amoplefttype; + thisresult->oprighttype = op_form->amoprighttype; + result = lappend(result, thisresult); + } + + ReleaseSysCacheList(catlist); + } + } + + return result; +} + +/* + * equality_ops_are_compatible + * Return true if the two given equality operators have compatible + * semantics. + * + * This is trivially true if they are the same operator. Otherwise, + * we look to see if they can be found in the same btree or hash opfamily. + * Either finding allows us to assume that they have compatible notions + * of equality. (The reason we need to do these pushups is that one might + * be a cross-type operator; for instance int24eq vs int4eq.) + */ +bool +equality_ops_are_compatible(Oid opno1, Oid opno2) +{ + bool result; + CatCList *catlist; + int i; + + /* Easy if they're the same operator */ + if (opno1 == opno2) + return true; + + /* + * We search through all the pg_amop entries for opno1. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno1)); + + result = false; + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple op_tuple = &catlist->members[i]->tuple; + Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); + + /* must be btree or hash */ + if (op_form->amopmethod == BTREE_AM_OID || + op_form->amopmethod == HASH_AM_OID) + { + if (op_in_opfamily(opno2, op_form->amopfamily)) + { + result = true; + break; + } + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + +/* + * comparison_ops_are_compatible + * Return true if the two given comparison operators have compatible + * semantics. + * + * This is trivially true if they are the same operator. Otherwise, + * we look to see if they can be found in the same btree opfamily. + * For example, '<' and '>=' ops match if they belong to the same family. + * + * (This is identical to equality_ops_are_compatible(), except that we + * don't bother to examine hash opclasses.) + */ +bool +comparison_ops_are_compatible(Oid opno1, Oid opno2) +{ + bool result; + CatCList *catlist; + int i; + + /* Easy if they're the same operator */ + if (opno1 == opno2) + return true; + + /* + * We search through all the pg_amop entries for opno1. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno1)); + + result = false; + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple op_tuple = &catlist->members[i]->tuple; + Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); + + if (op_form->amopmethod == BTREE_AM_OID) + { + if (op_in_opfamily(opno2, op_form->amopfamily)) + { + result = true; + break; + } + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + + +/* ---------- AMPROC CACHES ---------- */ + +/* + * get_opfamily_proc + * Get the OID of the specified support function + * for the specified opfamily and datatypes. + * + * Returns InvalidOid if there is no pg_amproc entry for the given keys. + */ +Oid +get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum) +{ + HeapTuple tp; + Form_pg_amproc amproc_tup; + RegProcedure result; + + tp = SearchSysCache4(AMPROCNUM, + ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(lefttype), + ObjectIdGetDatum(righttype), + Int16GetDatum(procnum)); + if (!HeapTupleIsValid(tp)) + return InvalidOid; + amproc_tup = (Form_pg_amproc) GETSTRUCT(tp); + result = amproc_tup->amproc; + ReleaseSysCache(tp); + return result; +} + + +/* ---------- ATTRIBUTE CACHES ---------- */ + +/* + * get_attname + * Given the relation id and the attribute number, return the "attname" + * field from the attribute relation as a palloc'ed string. + * + * If no such attribute exists and missing_ok is true, NULL is returned; + * otherwise a not-intended-for-user-consumption error is thrown. + */ +char * +get_attname(Oid relid, AttrNumber attnum, bool missing_ok) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(att_tup->attname)); + ReleaseSysCache(tp); + return result; + } + + if (!missing_ok) + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + attnum, relid); + return NULL; +} + +/* + * get_attnum + * + * Given the relation id and the attribute name, + * return the "attnum" field from the attribute relation. + * + * Returns InvalidAttrNumber if the attr doesn't exist (or is dropped). + */ +AttrNumber +get_attnum(Oid relid, const char *attname) +{ + HeapTuple tp; + + tp = SearchSysCacheAttName(relid, attname); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + AttrNumber result; + + result = att_tup->attnum; + ReleaseSysCache(tp); + return result; + } + else + return InvalidAttrNumber; +} + +/* + * get_attstattarget + * + * Given the relation id and the attribute number, + * return the "attstattarget" field from the attribute relation. + * + * Errors if not found. + */ +int +get_attstattarget(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + Form_pg_attribute att_tup; + int result; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + attnum, relid); + att_tup = (Form_pg_attribute) GETSTRUCT(tp); + result = att_tup->attstattarget; + ReleaseSysCache(tp); + return result; +} + +/* + * get_attgenerated + * + * Given the relation id and the attribute number, + * return the "attgenerated" field from the attribute relation. + * + * Errors if not found. + * + * Since not generated is represented by '\0', this can also be used as a + * Boolean test. + */ +char +get_attgenerated(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + Form_pg_attribute att_tup; + char result; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + attnum, relid); + att_tup = (Form_pg_attribute) GETSTRUCT(tp); + result = att_tup->attgenerated; + ReleaseSysCache(tp); + return result; +} + +/* + * get_atttype + * + * Given the relation OID and the attribute number with the relation, + * return the attribute type OID. + */ +Oid +get_atttype(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + Oid result; + + result = att_tup->atttypid; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_atttypetypmodcoll + * + * A three-fer: given the relation id and the attribute number, + * fetch atttypid, atttypmod, and attcollation in a single cache lookup. + * + * Unlike the otherwise-similar get_atttype, this routine + * raises an error if it can't obtain the information. + */ +void +get_atttypetypmodcoll(Oid relid, AttrNumber attnum, + Oid *typid, int32 *typmod, Oid *collid) +{ + HeapTuple tp; + Form_pg_attribute att_tup; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + attnum, relid); + att_tup = (Form_pg_attribute) GETSTRUCT(tp); + + *typid = att_tup->atttypid; + *typmod = att_tup->atttypmod; + *collid = att_tup->attcollation; + ReleaseSysCache(tp); +} + +/* + * get_attoptions + * + * Given the relation id and the attribute number, + * return the attribute options text[] datum, if any. + */ +Datum +get_attoptions(Oid relid, int16 attnum) +{ + HeapTuple tuple; + Datum attopts; + Datum result; + bool isnull; + + tuple = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + attnum, relid); + + attopts = SysCacheGetAttr(ATTNAME, tuple, Anum_pg_attribute_attoptions, + &isnull); + + if (isnull) + result = (Datum) 0; + else + result = datumCopy(attopts, false, -1); /* text[] */ + + ReleaseSysCache(tuple); + + return result; +} + +/* ---------- PG_CAST CACHE ---------- */ + +/* + * get_cast_oid - given two type OIDs, look up a cast OID + * + * If missing_ok is false, throw an error if the cast is not found. If + * true, just return InvalidOid. + */ +Oid +get_cast_oid(Oid sourcetypeid, Oid targettypeid, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid2(CASTSOURCETARGET, Anum_pg_cast_oid, + ObjectIdGetDatum(sourcetypeid), + ObjectIdGetDatum(targettypeid)); + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("cast from type %s to type %s does not exist", + format_type_be(sourcetypeid), + format_type_be(targettypeid)))); + return oid; +} + +/* ---------- COLLATION CACHE ---------- */ + +/* + * get_collation_name + * Returns the name of a given pg_collation entry. + * + * Returns a palloc'd copy of the string, or NULL if no such collation. + * + * NOTE: since collation name is not unique, be wary of code that uses this + * for anything except preparing error messages. + */ +char * +get_collation_name(Oid colloid) +{ + HeapTuple tp; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_collation colltup = (Form_pg_collation) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(colltup->collname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + +bool +get_collation_isdeterministic(Oid colloid) +{ + HeapTuple tp; + Form_pg_collation colltup; + bool result; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", colloid); + colltup = (Form_pg_collation) GETSTRUCT(tp); + result = colltup->collisdeterministic; + ReleaseSysCache(tp); + return result; +} + +/* ---------- CONSTRAINT CACHE ---------- */ + +/* + * get_constraint_name + * Returns the name of a given pg_constraint entry. + * + * Returns a palloc'd copy of the string, or NULL if no such constraint. + * + * NOTE: since constraint name is not unique, be wary of code that uses this + * for anything except preparing error messages. + */ +char * +get_constraint_name(Oid conoid) +{ + HeapTuple tp; + + tp = SearchSysCache1(CONSTROID, ObjectIdGetDatum(conoid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(contup->conname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + +/* + * get_constraint_index + * Given the OID of a unique, primary-key, or exclusion constraint, + * return the OID of the underlying index. + * + * Returns InvalidOid if the constraint could not be found or is of + * the wrong type. + * + * The intent of this function is to return the index "owned" by the + * specified constraint. Therefore we must check contype, since some + * pg_constraint entries (e.g. for foreign-key constraints) store the + * OID of an index that is referenced but not owned by the constraint. + */ +Oid +get_constraint_index(Oid conoid) +{ + HeapTuple tp; + + tp = SearchSysCache1(CONSTROID, ObjectIdGetDatum(conoid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(tp); + Oid result; + + if (contup->contype == CONSTRAINT_UNIQUE || + contup->contype == CONSTRAINT_PRIMARY || + contup->contype == CONSTRAINT_EXCLUSION) + result = contup->conindid; + else + result = InvalidOid; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* ---------- LANGUAGE CACHE ---------- */ + +char * +get_language_name(Oid langoid, bool missing_ok) +{ + HeapTuple tp; + + tp = SearchSysCache1(LANGOID, ObjectIdGetDatum(langoid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_language lantup = (Form_pg_language) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(lantup->lanname)); + ReleaseSysCache(tp); + return result; + } + + if (!missing_ok) + elog(ERROR, "cache lookup failed for language %u", + langoid); + return NULL; +} + +/* ---------- OPCLASS CACHE ---------- */ + +/* + * get_opclass_family + * + * Returns the OID of the operator family the opclass belongs to. + */ +Oid +get_opclass_family(Oid opclass) +{ + HeapTuple tp; + Form_pg_opclass cla_tup; + Oid result; + + tp = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for opclass %u", opclass); + cla_tup = (Form_pg_opclass) GETSTRUCT(tp); + + result = cla_tup->opcfamily; + ReleaseSysCache(tp); + return result; +} + +/* + * get_opclass_input_type + * + * Returns the OID of the datatype the opclass indexes. + */ +Oid +get_opclass_input_type(Oid opclass) +{ + HeapTuple tp; + Form_pg_opclass cla_tup; + Oid result; + + tp = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for opclass %u", opclass); + cla_tup = (Form_pg_opclass) GETSTRUCT(tp); + + result = cla_tup->opcintype; + ReleaseSysCache(tp); + return result; +} + +/* + * get_opclass_opfamily_and_input_type + * + * Returns the OID of the operator family the opclass belongs to, + * the OID of the datatype the opclass indexes + */ +bool +get_opclass_opfamily_and_input_type(Oid opclass, Oid *opfamily, Oid *opcintype) +{ + HeapTuple tp; + Form_pg_opclass cla_tup; + + tp = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(tp)) + return false; + + cla_tup = (Form_pg_opclass) GETSTRUCT(tp); + + *opfamily = cla_tup->opcfamily; + *opcintype = cla_tup->opcintype; + + ReleaseSysCache(tp); + + return true; +} + +/* ---------- OPERATOR CACHE ---------- */ + +/* + * get_opcode + * + * Returns the regproc id of the routine used to implement an + * operator given the operator oid. + */ +RegProcedure +get_opcode(Oid opno) +{ + HeapTuple tp; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + RegProcedure result; + + result = optup->oprcode; + ReleaseSysCache(tp); + return result; + } + else + return (RegProcedure) InvalidOid; +} + +/* + * get_opname + * returns the name of the operator with the given opno + * + * Note: returns a palloc'd copy of the string, or NULL if no such operator. + */ +char * +get_opname(Oid opno) +{ + HeapTuple tp; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(optup->oprname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + +/* + * get_op_rettype + * Given operator oid, return the operator's result type. + */ +Oid +get_op_rettype(Oid opno) +{ + HeapTuple tp; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + Oid result; + + result = optup->oprresult; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * op_input_types + * + * Returns the left and right input datatypes for an operator + * (InvalidOid if not relevant). + */ +void +op_input_types(Oid opno, Oid *lefttype, Oid *righttype) +{ + HeapTuple tp; + Form_pg_operator optup; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (!HeapTupleIsValid(tp)) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for operator %u", opno); + optup = (Form_pg_operator) GETSTRUCT(tp); + *lefttype = optup->oprleft; + *righttype = optup->oprright; + ReleaseSysCache(tp); +} + +/* + * op_mergejoinable + * + * Returns true if the operator is potentially mergejoinable. (The planner + * will fail to find any mergejoin plans unless there are suitable btree + * opfamily entries for this operator and associated sortops. The pg_operator + * flag is just a hint to tell the planner whether to bother looking.) + * + * In some cases (currently only array_eq and record_eq), mergejoinability + * depends on the specific input data type the operator is invoked for, so + * that must be passed as well. We currently assume that only one input's type + * is needed to check this --- by convention, pass the left input's data type. + */ +bool +op_mergejoinable(Oid opno, Oid inputtype) +{ + bool result = false; + HeapTuple tp; + TypeCacheEntry *typentry; + + /* + * For array_eq or record_eq, we can sort if the element or field types + * are all sortable. We could implement all the checks for that here, but + * the typcache already does that and caches the results too, so let's + * rely on the typcache. + */ + if (opno == ARRAY_EQ_OP) + { + typentry = lookup_type_cache(inputtype, TYPECACHE_CMP_PROC); + if (typentry->cmp_proc == F_BTARRAYCMP) + result = true; + } + else if (opno == RECORD_EQ_OP) + { + typentry = lookup_type_cache(inputtype, TYPECACHE_CMP_PROC); + if (typentry->cmp_proc == F_BTRECORDCMP) + result = true; + } + else + { + /* For all other operators, rely on pg_operator.oprcanmerge */ + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + + result = optup->oprcanmerge; + ReleaseSysCache(tp); + } + } + return result; +} + +/* + * op_hashjoinable + * + * Returns true if the operator is hashjoinable. (There must be a suitable + * hash opfamily entry for this operator if it is so marked.) + * + * In some cases (currently only array_eq), hashjoinability depends on the + * specific input data type the operator is invoked for, so that must be + * passed as well. We currently assume that only one input's type is needed + * to check this --- by convention, pass the left input's data type. + */ +bool +op_hashjoinable(Oid opno, Oid inputtype) +{ + bool result = false; + HeapTuple tp; + TypeCacheEntry *typentry; + + /* As in op_mergejoinable, let the typcache handle the hard cases */ + if (opno == ARRAY_EQ_OP) + { + typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC); + if (typentry->hash_proc == F_HASH_ARRAY) + result = true; + } + else if (opno == RECORD_EQ_OP) + { + typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC); + if (typentry->hash_proc == F_HASH_RECORD) + result = true; + } + else + { + /* For all other operators, rely on pg_operator.oprcanhash */ + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + + result = optup->oprcanhash; + ReleaseSysCache(tp); + } + } + return result; +} + +/* + * op_strict + * + * Get the proisstrict flag for the operator's underlying function. + */ +bool +op_strict(Oid opno) +{ + RegProcedure funcid = get_opcode(opno); + + if (funcid == (RegProcedure) InvalidOid) + elog(ERROR, "operator %u does not exist", opno); + + return func_strict((Oid) funcid); +} + +/* + * op_volatile + * + * Get the provolatile flag for the operator's underlying function. + */ +char +op_volatile(Oid opno) +{ + RegProcedure funcid = get_opcode(opno); + + if (funcid == (RegProcedure) InvalidOid) + elog(ERROR, "operator %u does not exist", opno); + + return func_volatile((Oid) funcid); +} + +/* + * get_commutator + * + * Returns the corresponding commutator of an operator. + */ +Oid +get_commutator(Oid opno) +{ + HeapTuple tp; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + Oid result; + + result = optup->oprcom; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_negator + * + * Returns the corresponding negator of an operator. + */ +Oid +get_negator(Oid opno) +{ + HeapTuple tp; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + Oid result; + + result = optup->oprnegate; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_oprrest + * + * Returns procedure id for computing selectivity of an operator. + */ +RegProcedure +get_oprrest(Oid opno) +{ + HeapTuple tp; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + RegProcedure result; + + result = optup->oprrest; + ReleaseSysCache(tp); + return result; + } + else + return (RegProcedure) InvalidOid; +} + +/* + * get_oprjoin + * + * Returns procedure id for computing selectivity of a join. + */ +RegProcedure +get_oprjoin(Oid opno) +{ + HeapTuple tp; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tp)) + { + Form_pg_operator optup = (Form_pg_operator) GETSTRUCT(tp); + RegProcedure result; + + result = optup->oprjoin; + ReleaseSysCache(tp); + return result; + } + else + return (RegProcedure) InvalidOid; +} + +/* ---------- FUNCTION CACHE ---------- */ + +/* + * get_func_name + * returns the name of the function with the given funcid + * + * Note: returns a palloc'd copy of the string, or NULL if no such function. + */ +char * +get_func_name(Oid funcid) +{ + HeapTuple tp; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(functup->proname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + +/* + * get_func_namespace + * + * Returns the pg_namespace OID associated with a given function. + */ +Oid +get_func_namespace(Oid funcid) +{ + HeapTuple tp; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp); + Oid result; + + result = functup->pronamespace; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_func_rettype + * Given procedure id, return the function's result type. + */ +Oid +get_func_rettype(Oid funcid) +{ + HeapTuple tp; + Oid result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->prorettype; + ReleaseSysCache(tp); + return result; +} + +/* + * get_func_nargs + * Given procedure id, return the number of arguments. + */ +int +get_func_nargs(Oid funcid) +{ + HeapTuple tp; + int result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->pronargs; + ReleaseSysCache(tp); + return result; +} + +/* + * get_func_signature + * Given procedure id, return the function's argument and result types. + * (The return value is the result type.) + * + * The arguments are returned as a palloc'd array. + */ +Oid +get_func_signature(Oid funcid, Oid **argtypes, int *nargs) +{ + HeapTuple tp; + Form_pg_proc procstruct; + Oid result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + procstruct = (Form_pg_proc) GETSTRUCT(tp); + + result = procstruct->prorettype; + *nargs = (int) procstruct->pronargs; + Assert(*nargs == procstruct->proargtypes.dim1); + *argtypes = (Oid *) palloc(*nargs * sizeof(Oid)); + memcpy(*argtypes, procstruct->proargtypes.values, *nargs * sizeof(Oid)); + + ReleaseSysCache(tp); + return result; +} + +/* + * get_func_variadictype + * Given procedure id, return the function's provariadic field. + */ +Oid +get_func_variadictype(Oid funcid) +{ + HeapTuple tp; + Oid result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->provariadic; + ReleaseSysCache(tp); + return result; +} + +/* + * get_func_retset + * Given procedure id, return the function's proretset flag. + */ +bool +get_func_retset(Oid funcid) +{ + HeapTuple tp; + bool result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->proretset; + ReleaseSysCache(tp); + return result; +} + +/* + * func_strict + * Given procedure id, return the function's proisstrict flag. + */ +bool +func_strict(Oid funcid) +{ + HeapTuple tp; + bool result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->proisstrict; + ReleaseSysCache(tp); + return result; +} + +/* + * func_volatile + * Given procedure id, return the function's provolatile flag. + */ +char +func_volatile(Oid funcid) +{ + HeapTuple tp; + char result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->provolatile; + ReleaseSysCache(tp); + return result; +} + +/* + * func_parallel + * Given procedure id, return the function's proparallel flag. + */ +char +func_parallel(Oid funcid) +{ + HeapTuple tp; + char result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->proparallel; + ReleaseSysCache(tp); + return result; +} + +/* + * get_func_prokind + * Given procedure id, return the routine kind. + */ +char +get_func_prokind(Oid funcid) +{ + HeapTuple tp; + char result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->prokind; + ReleaseSysCache(tp); + return result; +} + +/* + * get_func_leakproof + * Given procedure id, return the function's leakproof field. + */ +bool +get_func_leakproof(Oid funcid) +{ + HeapTuple tp; + bool result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->proleakproof; + ReleaseSysCache(tp); + return result; +} + +/* + * get_func_support + * + * Returns the support function OID associated with a given function, + * or InvalidOid if there is none. + */ +RegProcedure +get_func_support(Oid funcid) +{ + HeapTuple tp; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp); + RegProcedure result; + + result = functup->prosupport; + ReleaseSysCache(tp); + return result; + } + else + return (RegProcedure) InvalidOid; +} + +/* ---------- RELATION CACHE ---------- */ + +/* + * get_relname_relid + * Given name and namespace of a relation, look up the OID. + * + * Returns InvalidOid if there is no such relation. + */ +Oid +get_relname_relid(const char *relname, Oid relnamespace) +{ + return GetSysCacheOid2(RELNAMENSP, Anum_pg_class_oid, + PointerGetDatum(relname), + ObjectIdGetDatum(relnamespace)); +} + +#ifdef NOT_USED +/* + * get_relnatts + * + * Returns the number of attributes for a given relation. + */ +int +get_relnatts(Oid relid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); + int result; + + result = reltup->relnatts; + ReleaseSysCache(tp); + return result; + } + else + return InvalidAttrNumber; +} +#endif + +/* + * get_rel_name + * Returns the name of a given relation. + * + * Returns a palloc'd copy of the string, or NULL if no such relation. + * + * NOTE: since relation name is not unique, be wary of code that uses this + * for anything except preparing error messages. + */ +char * +get_rel_name(Oid relid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(reltup->relname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + +/* + * get_rel_namespace + * + * Returns the pg_namespace OID associated with a given relation. + */ +Oid +get_rel_namespace(Oid relid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); + Oid result; + + result = reltup->relnamespace; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_rel_type_id + * + * Returns the pg_type OID associated with a given relation. + * + * Note: not all pg_class entries have associated pg_type OIDs; so be + * careful to check for InvalidOid result. + */ +Oid +get_rel_type_id(Oid relid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); + Oid result; + + result = reltup->reltype; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_rel_relkind + * + * Returns the relkind associated with a given relation. + */ +char +get_rel_relkind(Oid relid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); + char result; + + result = reltup->relkind; + ReleaseSysCache(tp); + return result; + } + else + return '\0'; +} + +/* + * get_rel_relispartition + * + * Returns the relispartition flag associated with a given relation. + */ +bool +get_rel_relispartition(Oid relid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); + bool result; + + result = reltup->relispartition; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + +/* + * get_rel_tablespace + * + * Returns the pg_tablespace OID associated with a given relation. + * + * Note: InvalidOid might mean either that we couldn't find the relation, + * or that it is in the database's default tablespace. + */ +Oid +get_rel_tablespace(Oid relid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_class reltup = (Form_pg_class) GETSTRUCT(tp); + Oid result; + + result = reltup->reltablespace; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_rel_persistence + * + * Returns the relpersistence associated with a given relation. + */ +char +get_rel_persistence(Oid relid) +{ + HeapTuple tp; + Form_pg_class reltup; + char result; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for relation %u", relid); + reltup = (Form_pg_class) GETSTRUCT(tp); + result = reltup->relpersistence; + ReleaseSysCache(tp); + + return result; +} + + +/* ---------- TRANSFORM CACHE ---------- */ + +Oid +get_transform_fromsql(Oid typid, Oid langid, List *trftypes) +{ + HeapTuple tup; + + if (!list_member_oid(trftypes, typid)) + return InvalidOid; + + tup = SearchSysCache2(TRFTYPELANG, typid, langid); + if (HeapTupleIsValid(tup)) + { + Oid funcid; + + funcid = ((Form_pg_transform) GETSTRUCT(tup))->trffromsql; + ReleaseSysCache(tup); + return funcid; + } + else + return InvalidOid; +} + +Oid +get_transform_tosql(Oid typid, Oid langid, List *trftypes) +{ + HeapTuple tup; + + if (!list_member_oid(trftypes, typid)) + return InvalidOid; + + tup = SearchSysCache2(TRFTYPELANG, typid, langid); + if (HeapTupleIsValid(tup)) + { + Oid funcid; + + funcid = ((Form_pg_transform) GETSTRUCT(tup))->trftosql; + ReleaseSysCache(tup); + return funcid; + } + else + return InvalidOid; +} + + +/* ---------- TYPE CACHE ---------- */ + +/* + * get_typisdefined + * + * Given the type OID, determine whether the type is defined + * (if not, it's only a shell). + */ +bool +get_typisdefined(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + bool result; + + result = typtup->typisdefined; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + +/* + * get_typlen + * + * Given the type OID, return the length of the type. + */ +int16 +get_typlen(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + int16 result; + + result = typtup->typlen; + ReleaseSysCache(tp); + return result; + } + else + return 0; +} + +/* + * get_typbyval + * + * Given the type OID, determine whether the type is returned by value or + * not. Returns true if by value, false if by reference. + */ +bool +get_typbyval(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + bool result; + + result = typtup->typbyval; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + +/* + * get_typlenbyval + * + * A two-fer: given the type OID, return both typlen and typbyval. + * + * Since both pieces of info are needed to know how to copy a Datum, + * many places need both. Might as well get them with one cache lookup + * instead of two. Also, this routine raises an error instead of + * returning a bogus value when given a bad type OID. + */ +void +get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval) +{ + HeapTuple tp; + Form_pg_type typtup; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for type %u", typid); + typtup = (Form_pg_type) GETSTRUCT(tp); + *typlen = typtup->typlen; + *typbyval = typtup->typbyval; + ReleaseSysCache(tp); +} + +/* + * get_typlenbyvalalign + * + * A three-fer: given the type OID, return typlen, typbyval, typalign. + */ +void +get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, + char *typalign) +{ + HeapTuple tp; + Form_pg_type typtup; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for type %u", typid); + typtup = (Form_pg_type) GETSTRUCT(tp); + *typlen = typtup->typlen; + *typbyval = typtup->typbyval; + *typalign = typtup->typalign; + ReleaseSysCache(tp); +} + +/* + * getTypeIOParam + * Given a pg_type row, select the type OID to pass to I/O functions + * + * Formerly, all I/O functions were passed pg_type.typelem as their second + * parameter, but we now have a more complex rule about what to pass. + * This knowledge is intended to be centralized here --- direct references + * to typelem elsewhere in the code are wrong, if they are associated with + * I/O calls and not with actual subscripting operations! (But see + * bootstrap.c's boot_get_type_io_data() if you need to change this.) + * + * As of PostgreSQL 8.1, output functions receive only the value itself + * and not any auxiliary parameters, so the name of this routine is now + * a bit of a misnomer ... it should be getTypeInputParam. + */ +Oid +getTypeIOParam(HeapTuple typeTuple) +{ + Form_pg_type typeStruct = (Form_pg_type) GETSTRUCT(typeTuple); + + /* + * Array types get their typelem as parameter; everybody else gets their + * own type OID as parameter. + */ + if (OidIsValid(typeStruct->typelem)) + return typeStruct->typelem; + else + return typeStruct->oid; +} + +/* + * get_type_io_data + * + * A six-fer: given the type OID, return typlen, typbyval, typalign, + * typdelim, typioparam, and IO function OID. The IO function + * returned is controlled by IOFuncSelector + */ +void +get_type_io_data(Oid typid, + IOFuncSelector which_func, + int16 *typlen, + bool *typbyval, + char *typalign, + char *typdelim, + Oid *typioparam, + Oid *func) +{ + HeapTuple typeTuple; + Form_pg_type typeStruct; + + /* + * In bootstrap mode, pass it off to bootstrap.c. This hack allows us to + * use array_in and array_out during bootstrap. + */ + if (IsBootstrapProcessingMode()) + { + Oid typinput; + Oid typoutput; + + boot_get_type_io_data(typid, + typlen, + typbyval, + typalign, + typdelim, + typioparam, + &typinput, + &typoutput); + switch (which_func) + { + case IOFunc_input: + *func = typinput; + break; + case IOFunc_output: + *func = typoutput; + break; + default: + elog(ERROR, "binary I/O not supported during bootstrap"); + break; + } + return; + } + + typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "cache lookup failed for type %u", typid); + typeStruct = (Form_pg_type) GETSTRUCT(typeTuple); + + *typlen = typeStruct->typlen; + *typbyval = typeStruct->typbyval; + *typalign = typeStruct->typalign; + *typdelim = typeStruct->typdelim; + *typioparam = getTypeIOParam(typeTuple); + switch (which_func) + { + case IOFunc_input: + *func = typeStruct->typinput; + break; + case IOFunc_output: + *func = typeStruct->typoutput; + break; + case IOFunc_receive: + *func = typeStruct->typreceive; + break; + case IOFunc_send: + *func = typeStruct->typsend; + break; + } + ReleaseSysCache(typeTuple); +} + +#ifdef NOT_USED +char +get_typalign(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + char result; + + result = typtup->typalign; + ReleaseSysCache(tp); + return result; + } + else + return TYPALIGN_INT; +} +#endif + +char +get_typstorage(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + char result; + + result = typtup->typstorage; + ReleaseSysCache(tp); + return result; + } + else + return TYPSTORAGE_PLAIN; +} + +/* + * get_typdefault + * Given a type OID, return the type's default value, if any. + * + * The result is a palloc'd expression node tree, or NULL if there + * is no defined default for the datatype. + * + * NB: caller should be prepared to coerce result to correct datatype; + * the returned expression tree might produce something of the wrong type. + */ +Node * +get_typdefault(Oid typid) +{ + HeapTuple typeTuple; + Form_pg_type type; + Datum datum; + bool isNull; + Node *expr; + + typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "cache lookup failed for type %u", typid); + type = (Form_pg_type) GETSTRUCT(typeTuple); + + /* + * typdefault and typdefaultbin are potentially null, so don't try to + * access 'em as struct fields. Must do it the hard way with + * SysCacheGetAttr. + */ + datum = SysCacheGetAttr(TYPEOID, + typeTuple, + Anum_pg_type_typdefaultbin, + &isNull); + + if (!isNull) + { + /* We have an expression default */ + expr = stringToNode(TextDatumGetCString(datum)); + } + else + { + /* Perhaps we have a plain literal default */ + datum = SysCacheGetAttr(TYPEOID, + typeTuple, + Anum_pg_type_typdefault, + &isNull); + + if (!isNull) + { + char *strDefaultVal; + + /* Convert text datum to C string */ + strDefaultVal = TextDatumGetCString(datum); + /* Convert C string to a value of the given type */ + datum = OidInputFunctionCall(type->typinput, strDefaultVal, + getTypeIOParam(typeTuple), -1); + /* Build a Const node containing the value */ + expr = (Node *) makeConst(typid, + -1, + type->typcollation, + type->typlen, + datum, + false, + type->typbyval); + pfree(strDefaultVal); + } + else + { + /* No default */ + expr = NULL; + } + } + + ReleaseSysCache(typeTuple); + + return expr; +} + +/* + * getBaseType + * If the given type is a domain, return its base type; + * otherwise return the type's own OID. + */ +Oid +getBaseType(Oid typid) +{ + int32 typmod = -1; + + return getBaseTypeAndTypmod(typid, &typmod); +} + +/* + * getBaseTypeAndTypmod + * If the given type is a domain, return its base type and typmod; + * otherwise return the type's own OID, and leave *typmod unchanged. + * + * Note that the "applied typmod" should be -1 for every domain level + * above the bottommost; therefore, if the passed-in typid is indeed + * a domain, *typmod should be -1. + */ +Oid +getBaseTypeAndTypmod(Oid typid, int32 *typmod) +{ + /* + * We loop to find the bottom base type in a stack of domains. + */ + for (;;) + { + HeapTuple tup; + Form_pg_type typTup; + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typid); + typTup = (Form_pg_type) GETSTRUCT(tup); + if (typTup->typtype != TYPTYPE_DOMAIN) + { + /* Not a domain, so done */ + ReleaseSysCache(tup); + break; + } + + Assert(*typmod == -1); + typid = typTup->typbasetype; + *typmod = typTup->typtypmod; + + ReleaseSysCache(tup); + } + + return typid; +} + +/* + * get_typavgwidth + * + * Given a type OID and a typmod value (pass -1 if typmod is unknown), + * estimate the average width of values of the type. This is used by + * the planner, which doesn't require absolutely correct results; + * it's OK (and expected) to guess if we don't know for sure. + */ +int32 +get_typavgwidth(Oid typid, int32 typmod) +{ + int typlen = get_typlen(typid); + int32 maxwidth; + + /* + * Easy if it's a fixed-width type + */ + if (typlen > 0) + return typlen; + + /* + * type_maximum_size knows the encoding of typmod for some datatypes; + * don't duplicate that knowledge here. + */ + maxwidth = type_maximum_size(typid, typmod); + if (maxwidth > 0) + { + /* + * For BPCHAR, the max width is also the only width. Otherwise we + * need to guess about the typical data width given the max. A sliding + * scale for percentage of max width seems reasonable. + */ + if (typid == BPCHAROID) + return maxwidth; + if (maxwidth <= 32) + return maxwidth; /* assume full width */ + if (maxwidth < 1000) + return 32 + (maxwidth - 32) / 2; /* assume 50% */ + + /* + * Beyond 1000, assume we're looking at something like + * "varchar(10000)" where the limit isn't actually reached often, and + * use a fixed estimate. + */ + return 32 + (1000 - 32) / 2; + } + + /* + * Oops, we have no idea ... wild guess time. + */ + return 32; +} + +/* + * get_typtype + * + * Given the type OID, find if it is a basic type, a complex type, etc. + * It returns the null char if the cache lookup fails... + */ +char +get_typtype(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + char result; + + result = typtup->typtype; + ReleaseSysCache(tp); + return result; + } + else + return '\0'; +} + +/* + * type_is_rowtype + * + * Convenience function to determine whether a type OID represents + * a "rowtype" type --- either RECORD or a named composite type + * (including a domain over a named composite type). + */ +bool +type_is_rowtype(Oid typid) +{ + if (typid == RECORDOID) + return true; /* easy case */ + switch (get_typtype(typid)) + { + case TYPTYPE_COMPOSITE: + return true; + case TYPTYPE_DOMAIN: + if (get_typtype(getBaseType(typid)) == TYPTYPE_COMPOSITE) + return true; + break; + default: + break; + } + return false; +} + +/* + * type_is_enum + * Returns true if the given type is an enum type. + */ +bool +type_is_enum(Oid typid) +{ + return (get_typtype(typid) == TYPTYPE_ENUM); +} + +/* + * type_is_range + * Returns true if the given type is a range type. + */ +bool +type_is_range(Oid typid) +{ + return (get_typtype(typid) == TYPTYPE_RANGE); +} + +/* + * type_is_multirange + * Returns true if the given type is a multirange type. + */ +bool +type_is_multirange(Oid typid) +{ + return (get_typtype(typid) == TYPTYPE_MULTIRANGE); +} + +/* + * get_type_category_preferred + * + * Given the type OID, fetch its category and preferred-type status. + * Throws error on failure. + */ +void +get_type_category_preferred(Oid typid, char *typcategory, bool *typispreferred) +{ + HeapTuple tp; + Form_pg_type typtup; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for type %u", typid); + typtup = (Form_pg_type) GETSTRUCT(tp); + *typcategory = typtup->typcategory; + *typispreferred = typtup->typispreferred; + ReleaseSysCache(tp); +} + +/* + * get_typ_typrelid + * + * Given the type OID, get the typrelid (InvalidOid if not a complex + * type). + */ +Oid +get_typ_typrelid(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + Oid result; + + result = typtup->typrelid; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_element_type + * + * Given the type OID, get the typelem (InvalidOid if not an array type). + * + * NB: this only succeeds for "true" arrays having array_subscript_handler + * as typsubscript. For other types, InvalidOid is returned independently + * of whether they have typelem or typsubscript set. + */ +Oid +get_element_type(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + Oid result; + + if (IsTrueArrayType(typtup)) + result = typtup->typelem; + else + result = InvalidOid; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_array_type + * + * Given the type OID, get the corresponding "true" array type. + * Returns InvalidOid if no array type can be found. + */ +Oid +get_array_type(Oid typid) +{ + HeapTuple tp; + Oid result = InvalidOid; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + result = ((Form_pg_type) GETSTRUCT(tp))->typarray; + ReleaseSysCache(tp); + } + return result; +} + +/* + * get_promoted_array_type + * + * The "promoted" type is what you'd get from an ARRAY(SELECT ...) + * construct, that is, either the corresponding "true" array type + * if the input is a scalar type that has such an array type, + * or the same type if the input is already a "true" array type. + * Returns InvalidOid if neither rule is satisfied. + */ +Oid +get_promoted_array_type(Oid typid) +{ + Oid array_type = get_array_type(typid); + + if (OidIsValid(array_type)) + return array_type; + if (OidIsValid(get_element_type(typid))) + return typid; + return InvalidOid; +} + +/* + * get_base_element_type + * Given the type OID, get the typelem, looking "through" any domain + * to its underlying array type. + * + * This is equivalent to get_element_type(getBaseType(typid)), but avoids + * an extra cache lookup. Note that it fails to provide any information + * about the typmod of the array. + */ +Oid +get_base_element_type(Oid typid) +{ + /* + * We loop to find the bottom base type in a stack of domains. + */ + for (;;) + { + HeapTuple tup; + Form_pg_type typTup; + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (!HeapTupleIsValid(tup)) + break; + typTup = (Form_pg_type) GETSTRUCT(tup); + if (typTup->typtype != TYPTYPE_DOMAIN) + { + /* Not a domain, so stop descending */ + Oid result; + + /* This test must match get_element_type */ + if (IsTrueArrayType(typTup)) + result = typTup->typelem; + else + result = InvalidOid; + ReleaseSysCache(tup); + return result; + } + + typid = typTup->typbasetype; + ReleaseSysCache(tup); + } + + /* Like get_element_type, silently return InvalidOid for bogus input */ + return InvalidOid; +} + +/* + * getTypeInputInfo + * + * Get info needed for converting values of a type to internal form + */ +void +getTypeInputInfo(Oid type, Oid *typInput, Oid *typIOParam) +{ + HeapTuple typeTuple; + Form_pg_type pt; + + typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type)); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "cache lookup failed for type %u", type); + pt = (Form_pg_type) GETSTRUCT(typeTuple); + + if (!pt->typisdefined) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type %s is only a shell", + format_type_be(type)))); + if (!OidIsValid(pt->typinput)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no input function available for type %s", + format_type_be(type)))); + + *typInput = pt->typinput; + *typIOParam = getTypeIOParam(typeTuple); + + ReleaseSysCache(typeTuple); +} + +/* + * getTypeOutputInfo + * + * Get info needed for printing values of a type + */ +void +getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena) +{ + HeapTuple typeTuple; + Form_pg_type pt; + + typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type)); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "cache lookup failed for type %u", type); + pt = (Form_pg_type) GETSTRUCT(typeTuple); + + if (!pt->typisdefined) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type %s is only a shell", + format_type_be(type)))); + if (!OidIsValid(pt->typoutput)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no output function available for type %s", + format_type_be(type)))); + + *typOutput = pt->typoutput; + *typIsVarlena = (!pt->typbyval) && (pt->typlen == -1); + + ReleaseSysCache(typeTuple); +} + +/* + * getTypeBinaryInputInfo + * + * Get info needed for binary input of values of a type + */ +void +getTypeBinaryInputInfo(Oid type, Oid *typReceive, Oid *typIOParam) +{ + HeapTuple typeTuple; + Form_pg_type pt; + + typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type)); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "cache lookup failed for type %u", type); + pt = (Form_pg_type) GETSTRUCT(typeTuple); + + if (!pt->typisdefined) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type %s is only a shell", + format_type_be(type)))); + if (!OidIsValid(pt->typreceive)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary input function available for type %s", + format_type_be(type)))); + + *typReceive = pt->typreceive; + *typIOParam = getTypeIOParam(typeTuple); + + ReleaseSysCache(typeTuple); +} + +/* + * getTypeBinaryOutputInfo + * + * Get info needed for binary output of values of a type + */ +void +getTypeBinaryOutputInfo(Oid type, Oid *typSend, bool *typIsVarlena) +{ + HeapTuple typeTuple; + Form_pg_type pt; + + typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type)); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "cache lookup failed for type %u", type); + pt = (Form_pg_type) GETSTRUCT(typeTuple); + + if (!pt->typisdefined) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type %s is only a shell", + format_type_be(type)))); + if (!OidIsValid(pt->typsend)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("no binary output function available for type %s", + format_type_be(type)))); + + *typSend = pt->typsend; + *typIsVarlena = (!pt->typbyval) && (pt->typlen == -1); + + ReleaseSysCache(typeTuple); +} + +/* + * get_typmodin + * + * Given the type OID, return the type's typmodin procedure, if any. + */ +Oid +get_typmodin(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + Oid result; + + result = typtup->typmodin; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +#ifdef NOT_USED +/* + * get_typmodout + * + * Given the type OID, return the type's typmodout procedure, if any. + */ +Oid +get_typmodout(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + Oid result; + + result = typtup->typmodout; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} +#endif /* NOT_USED */ + +/* + * get_typcollation + * + * Given the type OID, return the type's typcollation attribute. + */ +Oid +get_typcollation(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + Oid result; + + result = typtup->typcollation; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + + +/* + * type_is_collatable + * + * Return whether the type cares about collations + */ +bool +type_is_collatable(Oid typid) +{ + return OidIsValid(get_typcollation(typid)); +} + + +/* + * get_typsubscript + * + * Given the type OID, return the type's subscripting handler's OID, + * if it has one. + * + * If typelemp isn't NULL, we also store the type's typelem value there. + * This saves some callers an extra catalog lookup. + */ +RegProcedure +get_typsubscript(Oid typid, Oid *typelemp) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typform = (Form_pg_type) GETSTRUCT(tp); + RegProcedure handler = typform->typsubscript; + + if (typelemp) + *typelemp = typform->typelem; + ReleaseSysCache(tp); + return handler; + } + else + { + if (typelemp) + *typelemp = InvalidOid; + return InvalidOid; + } +} + +/* + * getSubscriptingRoutines + * + * Given the type OID, fetch the type's subscripting methods struct. + * Return NULL if type is not subscriptable. + * + * If typelemp isn't NULL, we also store the type's typelem value there. + * This saves some callers an extra catalog lookup. + */ +const struct SubscriptRoutines * +getSubscriptingRoutines(Oid typid, Oid *typelemp) +{ + RegProcedure typsubscript = get_typsubscript(typid, typelemp); + + if (!OidIsValid(typsubscript)) + return NULL; + + return (const struct SubscriptRoutines *) + DatumGetPointer(OidFunctionCall0(typsubscript)); +} + + +/* ---------- STATISTICS CACHE ---------- */ + +/* + * get_attavgwidth + * + * Given the table and attribute number of a column, get the average + * width of entries in the column. Return zero if no data available. + * + * Currently this is only consulted for individual tables, not for inheritance + * trees, so we don't need an "inh" parameter. + * + * Calling a hook at this point looks somewhat strange, but is required + * because the optimizer calls this function without any other way for + * plug-ins to control the result. + */ +int32 +get_attavgwidth(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + int32 stawidth; + + if (get_attavgwidth_hook) + { + stawidth = (*get_attavgwidth_hook) (relid, attnum); + if (stawidth > 0) + return stawidth; + } + tp = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum), + BoolGetDatum(false)); + if (HeapTupleIsValid(tp)) + { + stawidth = ((Form_pg_statistic) GETSTRUCT(tp))->stawidth; + ReleaseSysCache(tp); + if (stawidth > 0) + return stawidth; + } + return 0; +} + +/* + * get_attstatsslot + * + * Extract the contents of a "slot" of a pg_statistic tuple. + * Returns true if requested slot type was found, else false. + * + * Unlike other routines in this file, this takes a pointer to an + * already-looked-up tuple in the pg_statistic cache. We do this since + * most callers will want to extract more than one value from the cache + * entry, and we don't want to repeat the cache lookup unnecessarily. + * Also, this API allows this routine to be used with statistics tuples + * that have been provided by a stats hook and didn't really come from + * pg_statistic. + * + * sslot: pointer to output area (typically, a local variable in the caller). + * statstuple: pg_statistic tuple to be examined. + * reqkind: STAKIND code for desired statistics slot kind. + * reqop: STAOP value wanted, or InvalidOid if don't care. + * flags: bitmask of ATTSTATSSLOT_VALUES and/or ATTSTATSSLOT_NUMBERS. + * + * If a matching slot is found, true is returned, and *sslot is filled thus: + * staop: receives the actual STAOP value. + * stacoll: receives the actual STACOLL value. + * valuetype: receives actual datatype of the elements of stavalues. + * values: receives pointer to an array of the slot's stavalues. + * nvalues: receives number of stavalues. + * numbers: receives pointer to an array of the slot's stanumbers (as float4). + * nnumbers: receives number of stanumbers. + * + * valuetype/values/nvalues are InvalidOid/NULL/0 if ATTSTATSSLOT_VALUES + * wasn't specified. Likewise, numbers/nnumbers are NULL/0 if + * ATTSTATSSLOT_NUMBERS wasn't specified. + * + * If no matching slot is found, false is returned, and *sslot is zeroed. + * + * Note that the current API doesn't allow for searching for a slot with + * a particular collation. If we ever actually support recording more than + * one collation, we'll have to extend the API, but for now simple is good. + * + * The data referred to by the fields of sslot is locally palloc'd and + * is independent of the original pg_statistic tuple. When the caller + * is done with it, call free_attstatsslot to release the palloc'd data. + * + * If it's desirable to call free_attstatsslot when get_attstatsslot might + * not have been called, memset'ing sslot to zeroes will allow that. + */ +bool +get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, + int reqkind, Oid reqop, int flags) +{ + Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple); + int i; + Datum val; + bool isnull; + ArrayType *statarray; + Oid arrayelemtype; + int narrayelem; + HeapTuple typeTuple; + Form_pg_type typeForm; + + /* initialize *sslot properly */ + memset(sslot, 0, sizeof(AttStatsSlot)); + + for (i = 0; i < STATISTIC_NUM_SLOTS; i++) + { + if ((&stats->stakind1)[i] == reqkind && + (reqop == InvalidOid || (&stats->staop1)[i] == reqop)) + break; + } + if (i >= STATISTIC_NUM_SLOTS) + return false; /* not there */ + + sslot->staop = (&stats->staop1)[i]; + sslot->stacoll = (&stats->stacoll1)[i]; + + if (flags & ATTSTATSSLOT_VALUES) + { + val = SysCacheGetAttr(STATRELATTINH, statstuple, + Anum_pg_statistic_stavalues1 + i, + &isnull); + if (isnull) + elog(ERROR, "stavalues is null"); + + /* + * Detoast the array if needed, and in any case make a copy that's + * under control of this AttStatsSlot. + */ + statarray = DatumGetArrayTypePCopy(val); + + /* + * Extract the actual array element type, and pass it back in case the + * caller needs it. + */ + sslot->valuetype = arrayelemtype = ARR_ELEMTYPE(statarray); + + /* Need info about element type */ + typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(arrayelemtype)); + if (!HeapTupleIsValid(typeTuple)) + elog(ERROR, "cache lookup failed for type %u", arrayelemtype); + typeForm = (Form_pg_type) GETSTRUCT(typeTuple); + + /* Deconstruct array into Datum elements; NULLs not expected */ + deconstruct_array(statarray, + arrayelemtype, + typeForm->typlen, + typeForm->typbyval, + typeForm->typalign, + &sslot->values, NULL, &sslot->nvalues); + + /* + * If the element type is pass-by-reference, we now have a bunch of + * Datums that are pointers into the statarray, so we need to keep + * that until free_attstatsslot. Otherwise, all the useful info is in + * sslot->values[], so we can free the array object immediately. + */ + if (!typeForm->typbyval) + sslot->values_arr = statarray; + else + pfree(statarray); + + ReleaseSysCache(typeTuple); + } + + if (flags & ATTSTATSSLOT_NUMBERS) + { + val = SysCacheGetAttr(STATRELATTINH, statstuple, + Anum_pg_statistic_stanumbers1 + i, + &isnull); + if (isnull) + elog(ERROR, "stanumbers is null"); + + /* + * Detoast the array if needed, and in any case make a copy that's + * under control of this AttStatsSlot. + */ + statarray = DatumGetArrayTypePCopy(val); + + /* + * We expect the array to be a 1-D float4 array; verify that. We don't + * need to use deconstruct_array() since the array data is just going + * to look like a C array of float4 values. + */ + narrayelem = ARR_DIMS(statarray)[0]; + if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 || + ARR_HASNULL(statarray) || + ARR_ELEMTYPE(statarray) != FLOAT4OID) + elog(ERROR, "stanumbers is not a 1-D float4 array"); + + /* Give caller a pointer directly into the statarray */ + sslot->numbers = (float4 *) ARR_DATA_PTR(statarray); + sslot->nnumbers = narrayelem; + + /* We'll free the statarray in free_attstatsslot */ + sslot->numbers_arr = statarray; + } + + return true; +} + +/* + * free_attstatsslot + * Free data allocated by get_attstatsslot + */ +void +free_attstatsslot(AttStatsSlot *sslot) +{ + /* The values[] array was separately palloc'd by deconstruct_array */ + if (sslot->values) + pfree(sslot->values); + /* The numbers[] array points into numbers_arr, do not pfree it */ + /* Free the detoasted array objects, if any */ + if (sslot->values_arr) + pfree(sslot->values_arr); + if (sslot->numbers_arr) + pfree(sslot->numbers_arr); +} + +/* ---------- PG_NAMESPACE CACHE ---------- */ + +/* + * get_namespace_name + * Returns the name of a given namespace + * + * Returns a palloc'd copy of the string, or NULL if no such namespace. + */ +char * +get_namespace_name(Oid nspid) +{ + HeapTuple tp; + + tp = SearchSysCache1(NAMESPACEOID, ObjectIdGetDatum(nspid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_namespace nsptup = (Form_pg_namespace) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(nsptup->nspname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + +/* + * get_namespace_name_or_temp + * As above, but if it is this backend's temporary namespace, return + * "pg_temp" instead. + */ +char * +get_namespace_name_or_temp(Oid nspid) +{ + if (isTempNamespace(nspid)) + return "pg_temp"; + else + return get_namespace_name(nspid); +} + +/* ---------- PG_RANGE CACHES ---------- */ + +/* + * get_range_subtype + * Returns the subtype of a given range type + * + * Returns InvalidOid if the type is not a range type. + */ +Oid +get_range_subtype(Oid rangeOid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RANGETYPE, ObjectIdGetDatum(rangeOid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_range rngtup = (Form_pg_range) GETSTRUCT(tp); + Oid result; + + result = rngtup->rngsubtype; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_range_collation + * Returns the collation of a given range type + * + * Returns InvalidOid if the type is not a range type, + * or if its subtype is not collatable. + */ +Oid +get_range_collation(Oid rangeOid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RANGETYPE, ObjectIdGetDatum(rangeOid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_range rngtup = (Form_pg_range) GETSTRUCT(tp); + Oid result; + + result = rngtup->rngcollation; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_range_multirange + * Returns the multirange type of a given range type + * + * Returns InvalidOid if the type is not a range type. + */ +Oid +get_range_multirange(Oid rangeOid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RANGETYPE, ObjectIdGetDatum(rangeOid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_range rngtup = (Form_pg_range) GETSTRUCT(tp); + Oid result; + + result = rngtup->rngmultitypid; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* + * get_multirange_range + * Returns the range type of a given multirange + * + * Returns InvalidOid if the type is not a multirange. + */ +Oid +get_multirange_range(Oid multirangeOid) +{ + HeapTuple tp; + + tp = SearchSysCache1(RANGEMULTIRANGE, ObjectIdGetDatum(multirangeOid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_range rngtup = (Form_pg_range) GETSTRUCT(tp); + Oid result; + + result = rngtup->rngtypid; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* ---------- PG_INDEX CACHE ---------- */ + +/* + * get_index_column_opclass + * + * Given the index OID and column number, + * return opclass of the index column + * or InvalidOid if the index was not found + * or column is non-key one. + */ +Oid +get_index_column_opclass(Oid index_oid, int attno) +{ + HeapTuple tuple; + Form_pg_index rd_index PG_USED_FOR_ASSERTS_ONLY; + Datum datum; + bool isnull; + oidvector *indclass; + Oid opclass; + + /* First we need to know the column's opclass. */ + + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(tuple)) + return InvalidOid; + + rd_index = (Form_pg_index) GETSTRUCT(tuple); + + /* caller is supposed to guarantee this */ + Assert(attno > 0 && attno <= rd_index->indnatts); + + /* Non-key attributes don't have an opclass */ + if (attno > rd_index->indnkeyatts) + { + ReleaseSysCache(tuple); + return InvalidOid; + } + + datum = SysCacheGetAttr(INDEXRELID, tuple, + Anum_pg_index_indclass, &isnull); + Assert(!isnull); + + indclass = ((oidvector *) DatumGetPointer(datum)); + + Assert(attno <= indclass->dim1); + opclass = indclass->values[attno - 1]; + + ReleaseSysCache(tuple); + + return opclass; +} + +/* + * get_index_isreplident + * + * Given the index OID, return pg_index.indisreplident. + */ +bool +get_index_isreplident(Oid index_oid) +{ + HeapTuple tuple; + Form_pg_index rd_index; + bool result; + + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(tuple)) + return false; + + rd_index = (Form_pg_index) GETSTRUCT(tuple); + result = rd_index->indisreplident; + ReleaseSysCache(tuple); + + return result; +} + +/* + * get_index_isvalid + * + * Given the index OID, return pg_index.indisvalid. + */ +bool +get_index_isvalid(Oid index_oid) +{ + bool isvalid; + HeapTuple tuple; + Form_pg_index rd_index; + + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for index %u", index_oid); + + rd_index = (Form_pg_index) GETSTRUCT(tuple); + isvalid = rd_index->indisvalid; + ReleaseSysCache(tuple); + + return isvalid; +} + +/* + * get_index_isclustered + * + * Given the index OID, return pg_index.indisclustered. + */ +bool +get_index_isclustered(Oid index_oid) +{ + bool isclustered; + HeapTuple tuple; + Form_pg_index rd_index; + + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for index %u", index_oid); + + rd_index = (Form_pg_index) GETSTRUCT(tuple); + isclustered = rd_index->indisclustered; + ReleaseSysCache(tuple); + + return isclustered; +} diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c new file mode 100644 index 0000000..21e60f0 --- /dev/null +++ b/src/backend/utils/cache/partcache.c @@ -0,0 +1,430 @@ +/*------------------------------------------------------------------------- + * + * partcache.c + * Support routines for manipulating partition information cached in + * relcache + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/partcache.c + * + *------------------------------------------------------------------------- +*/ +#include "postgres.h" + +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/relation.h" +#include "catalog/partition.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_partitioned_table.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "partitioning/partbounds.h" +#include "rewrite/rewriteHandler.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/partcache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +static void RelationBuildPartitionKey(Relation relation); +static List *generate_partition_qual(Relation rel); + +/* + * RelationGetPartitionKey -- get partition key, if relation is partitioned + * + * Note: partition keys are not allowed to change after the partitioned rel + * is created. RelationClearRelation knows this and preserves rd_partkey + * across relcache rebuilds, as long as the relation is open. Therefore, + * even though we hand back a direct pointer into the relcache entry, it's + * safe for callers to continue to use that pointer as long as they hold + * the relation open. + */ +PartitionKey +RelationGetPartitionKey(Relation rel) +{ + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + return NULL; + + if (unlikely(rel->rd_partkey == NULL)) + RelationBuildPartitionKey(rel); + + return rel->rd_partkey; +} + +/* + * RelationBuildPartitionKey + * Build partition key data of relation, and attach to relcache + * + * Partitioning key data is a complex structure; to avoid complicated logic to + * free individual elements whenever the relcache entry is flushed, we give it + * its own memory context, a child of CacheMemoryContext, which can easily be + * deleted on its own. To avoid leaking memory in that context in case of an + * error partway through this function, the context is initially created as a + * child of CurTransactionContext and only re-parented to CacheMemoryContext + * at the end, when no further errors are possible. Also, we don't make this + * context the current context except in very brief code sections, out of fear + * that some of our callees allocate memory on their own which would be leaked + * permanently. + */ +static void +RelationBuildPartitionKey(Relation relation) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + bool isnull; + int i; + PartitionKey key; + AttrNumber *attrs; + oidvector *opclass; + oidvector *collation; + ListCell *partexprs_item; + Datum datum; + MemoryContext partkeycxt, + oldcxt; + int16 procnum; + + tuple = SearchSysCache1(PARTRELID, + ObjectIdGetDatum(RelationGetRelid(relation))); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition key of relation %u", + RelationGetRelid(relation)); + + partkeycxt = AllocSetContextCreate(CurTransactionContext, + "partition key", + ALLOCSET_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(partkeycxt, + RelationGetRelationName(relation)); + + key = (PartitionKey) MemoryContextAllocZero(partkeycxt, + sizeof(PartitionKeyData)); + + /* Fixed-length attributes */ + form = (Form_pg_partitioned_table) GETSTRUCT(tuple); + key->strategy = form->partstrat; + key->partnatts = form->partnatts; + + /* + * We can rely on the first variable-length attribute being mapped to the + * relevant field of the catalog's C struct, because all previous + * attributes are non-nullable and fixed-length. + */ + attrs = form->partattrs.values; + + /* But use the hard way to retrieve further variable-length attributes */ + /* Operator class */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partclass, &isnull); + Assert(!isnull); + opclass = (oidvector *) DatumGetPointer(datum); + + /* Collation */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partcollation, &isnull); + Assert(!isnull); + collation = (oidvector *) DatumGetPointer(datum); + + /* Expressions */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partexprs, &isnull); + if (!isnull) + { + char *exprString; + Node *expr; + + exprString = TextDatumGetCString(datum); + expr = stringToNode(exprString); + pfree(exprString); + + /* + * Run the expressions through const-simplification since the planner + * will be comparing them to similarly-processed qual clause operands, + * and may fail to detect valid matches without this step; fix + * opfuncids while at it. We don't need to bother with + * canonicalize_qual() though, because partition expressions should be + * in canonical form already (ie, no need for OR-merging or constant + * elimination). + */ + expr = eval_const_expressions(NULL, expr); + fix_opfuncids(expr); + + oldcxt = MemoryContextSwitchTo(partkeycxt); + key->partexprs = (List *) copyObject(expr); + MemoryContextSwitchTo(oldcxt); + } + + /* Allocate assorted arrays in the partkeycxt, which we'll fill below */ + oldcxt = MemoryContextSwitchTo(partkeycxt); + key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); + key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); + + key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); + key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); + key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool)); + key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char)); + key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + MemoryContextSwitchTo(oldcxt); + + /* determine support function number to search for */ + procnum = (key->strategy == PARTITION_STRATEGY_HASH) ? + HASHEXTENDED_PROC : BTORDER_PROC; + + /* Copy partattrs and fill other per-attribute info */ + memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16)); + partexprs_item = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + AttrNumber attno = key->partattrs[i]; + HeapTuple opclasstup; + Form_pg_opclass opclassform; + Oid funcid; + + /* Collect opfamily information */ + opclasstup = SearchSysCache1(CLAOID, + ObjectIdGetDatum(opclass->values[i])); + if (!HeapTupleIsValid(opclasstup)) + elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]); + + opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup); + key->partopfamily[i] = opclassform->opcfamily; + key->partopcintype[i] = opclassform->opcintype; + + /* Get a support function for the specified opfamily and datatypes */ + funcid = get_opfamily_proc(opclassform->opcfamily, + opclassform->opcintype, + opclassform->opcintype, + procnum); + if (!OidIsValid(funcid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s", + NameStr(opclassform->opcname), + (key->strategy == PARTITION_STRATEGY_HASH) ? + "hash" : "btree", + procnum, + format_type_be(opclassform->opcintype)))); + + fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt); + + /* Collation */ + key->partcollation[i] = collation->values[i]; + + /* Collect type information */ + if (attno != 0) + { + Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1); + + key->parttypid[i] = att->atttypid; + key->parttypmod[i] = att->atttypmod; + key->parttypcoll[i] = att->attcollation; + } + else + { + if (partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + + key->parttypid[i] = exprType(lfirst(partexprs_item)); + key->parttypmod[i] = exprTypmod(lfirst(partexprs_item)); + key->parttypcoll[i] = exprCollation(lfirst(partexprs_item)); + + partexprs_item = lnext(key->partexprs, partexprs_item); + } + get_typlenbyvalalign(key->parttypid[i], + &key->parttyplen[i], + &key->parttypbyval[i], + &key->parttypalign[i]); + + ReleaseSysCache(opclasstup); + } + + ReleaseSysCache(tuple); + + /* Assert that we're not leaking any old data during assignments below */ + Assert(relation->rd_partkeycxt == NULL); + Assert(relation->rd_partkey == NULL); + + /* + * Success --- reparent our context and make the relcache point to the + * newly constructed key + */ + MemoryContextSetParent(partkeycxt, CacheMemoryContext); + relation->rd_partkeycxt = partkeycxt; + relation->rd_partkey = key; +} + +/* + * RelationGetPartitionQual + * + * Returns a list of partition quals + */ +List * +RelationGetPartitionQual(Relation rel) +{ + /* Quick exit */ + if (!rel->rd_rel->relispartition) + return NIL; + + return generate_partition_qual(rel); +} + +/* + * get_partition_qual_relid + * + * Returns an expression tree describing the passed-in relation's partition + * constraint. + * + * If the relation is not found, or is not a partition, or there is no + * partition constraint, return NULL. We must guard against the first two + * cases because this supports a SQL function that could be passed any OID. + * The last case can happen even if relispartition is true, when a default + * partition is the only partition. + */ +Expr * +get_partition_qual_relid(Oid relid) +{ + Expr *result = NULL; + + /* Do the work only if this relation exists and is a partition. */ + if (get_rel_relispartition(relid)) + { + Relation rel = relation_open(relid, AccessShareLock); + List *and_args; + + and_args = generate_partition_qual(rel); + + /* Convert implicit-AND list format to boolean expression */ + if (and_args == NIL) + result = NULL; + else if (list_length(and_args) > 1) + result = makeBoolExpr(AND_EXPR, and_args, -1); + else + result = linitial(and_args); + + /* Keep the lock, to allow safe deparsing against the rel by caller. */ + relation_close(rel, NoLock); + } + + return result; +} + +/* + * generate_partition_qual + * + * Generate partition predicate from rel's partition bound expression. The + * function returns a NIL list if there is no predicate. + * + * We cache a copy of the result in the relcache entry, after constructing + * it using the caller's context. This approach avoids leaking any data + * into long-lived cache contexts, especially if we fail partway through. + */ +static List * +generate_partition_qual(Relation rel) +{ + HeapTuple tuple; + MemoryContext oldcxt; + Datum boundDatum; + bool isnull; + List *my_qual = NIL, + *result = NIL; + Oid parentrelid; + Relation parent; + + /* Guard against stack overflow due to overly deep partition tree */ + check_stack_depth(); + + /* If we already cached the result, just return a copy */ + if (rel->rd_partcheckvalid) + return copyObject(rel->rd_partcheck); + + /* + * Grab at least an AccessShareLock on the parent table. Must do this + * even if the partition has been partially detached, because transactions + * concurrent with the detach might still be trying to use a partition + * descriptor that includes it. + */ + parentrelid = get_partition_parent(RelationGetRelid(rel), true); + parent = relation_open(parentrelid, AccessShareLock); + + /* Get pg_class.relpartbound */ + tuple = SearchSysCache1(RELOID, RelationGetRelid(rel)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", + RelationGetRelid(rel)); + + boundDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + if (!isnull) + { + PartitionBoundSpec *bound; + + bound = castNode(PartitionBoundSpec, + stringToNode(TextDatumGetCString(boundDatum))); + + my_qual = get_qual_from_partbound(rel, parent, bound); + } + + ReleaseSysCache(tuple); + + /* Add the parent's quals to the list (if any) */ + if (parent->rd_rel->relispartition) + result = list_concat(generate_partition_qual(parent), my_qual); + else + result = my_qual; + + /* + * Change Vars to have partition's attnos instead of the parent's. We do + * this after we concatenate the parent's quals, because we want every Var + * in it to bear this relation's attnos. It's safe to assume varno = 1 + * here. + */ + result = map_partition_varattnos(result, 1, rel, parent); + + /* Assert that we're not leaking any old data during assignments below */ + Assert(rel->rd_partcheckcxt == NULL); + Assert(rel->rd_partcheck == NIL); + + /* + * Save a copy in the relcache. The order of these operations is fairly + * critical to avoid memory leaks and ensure that we don't leave a corrupt + * relcache entry if we fail partway through copyObject. + * + * If, as is definitely possible, the partcheck list is NIL, then we do + * not need to make a context to hold it. + */ + if (result != NIL) + { + rel->rd_partcheckcxt = AllocSetContextCreate(CacheMemoryContext, + "partition constraint", + ALLOCSET_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(rel->rd_partcheckcxt, + RelationGetRelationName(rel)); + oldcxt = MemoryContextSwitchTo(rel->rd_partcheckcxt); + rel->rd_partcheck = copyObject(result); + MemoryContextSwitchTo(oldcxt); + } + else + rel->rd_partcheck = NIL; + rel->rd_partcheckvalid = true; + + /* Keep the parent locked until commit */ + relation_close(parent, NoLock); + + /* Return the working copy to the caller */ + return result; +} diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c new file mode 100644 index 0000000..6767eae --- /dev/null +++ b/src/backend/utils/cache/plancache.c @@ -0,0 +1,2207 @@ +/*------------------------------------------------------------------------- + * + * plancache.c + * Plan cache management. + * + * The plan cache manager has two principal responsibilities: deciding when + * to use a generic plan versus a custom (parameter-value-specific) plan, + * and tracking whether cached plans need to be invalidated because of schema + * changes in the objects they depend on. + * + * The logic for choosing generic or custom plans is in choose_custom_plan, + * which see for comments. + * + * Cache invalidation is driven off sinval events. Any CachedPlanSource + * that matches the event is marked invalid, as is its generic CachedPlan + * if it has one. When (and if) the next demand for a cached plan occurs, + * parse analysis and rewrite is repeated to build a new valid query tree, + * and then planning is performed as normal. We also force re-analysis and + * re-planning if the active search_path is different from the previous time + * or, if RLS is involved, if the user changes or the RLS environment changes. + * + * Note that if the sinval was a result of user DDL actions, parse analysis + * could throw an error, for example if a column referenced by the query is + * no longer present. Another possibility is for the query's output tupdesc + * to change (for instance "SELECT *" might expand differently than before). + * The creator of a cached plan can specify whether it is allowable for the + * query to change output tupdesc on replan --- if so, it's up to the + * caller to notice changes and cope with them. + * + * Currently, we track exactly the dependencies of plans on relations, + * user-defined functions, and domains. On relcache invalidation events or + * pg_proc or pg_type syscache invalidation events, we invalidate just those + * plans that depend on the particular object being modified. (Note: this + * scheme assumes that any table modification that requires replanning will + * generate a relcache inval event.) We also watch for inval events on + * certain other system catalogs, such as pg_namespace; but for them, our + * response is just to invalidate all plans. We expect updates on those + * catalogs to be infrequent enough that more-detailed tracking is not worth + * the effort. + * + * In addition to full-fledged query plans, we provide a facility for + * detecting invalidations of simple scalar expressions. This is fairly + * bare-bones; it's the caller's responsibility to build a new expression + * if the old one gets invalidated. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/plancache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> + +#include "access/transam.h" +#include "catalog/namespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "parser/analyze.h" +#include "parser/parsetree.h" +#include "storage/lmgr.h" +#include "tcop/pquery.h" +#include "tcop/utility.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/resowner_private.h" +#include "utils/rls.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +/* + * We must skip "overhead" operations that involve database access when the + * cached plan's subject statement is a transaction control command. + */ +#define IsTransactionStmtPlan(plansource) \ + ((plansource)->raw_parse_tree && \ + IsA((plansource)->raw_parse_tree->stmt, TransactionStmt)) + +/* + * This is the head of the backend's list of "saved" CachedPlanSources (i.e., + * those that are in long-lived storage and are examined for sinval events). + * We use a dlist instead of separate List cells so that we can guarantee + * to save a CachedPlanSource without error. + */ +static dlist_head saved_plan_list = DLIST_STATIC_INIT(saved_plan_list); + +/* + * This is the head of the backend's list of CachedExpressions. + */ +static dlist_head cached_expression_list = DLIST_STATIC_INIT(cached_expression_list); + +static void ReleaseGenericPlan(CachedPlanSource *plansource); +static List *RevalidateCachedQuery(CachedPlanSource *plansource, + QueryEnvironment *queryEnv); +static bool CheckCachedPlan(CachedPlanSource *plansource); +static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist, + ParamListInfo boundParams, QueryEnvironment *queryEnv); +static bool choose_custom_plan(CachedPlanSource *plansource, + ParamListInfo boundParams); +static double cached_plan_cost(CachedPlan *plan, bool include_planner); +static Query *QueryListGetPrimaryStmt(List *stmts); +static void AcquireExecutorLocks(List *stmt_list, bool acquire); +static void AcquirePlannerLocks(List *stmt_list, bool acquire); +static void ScanQueryForLocks(Query *parsetree, bool acquire); +static bool ScanQueryWalker(Node *node, bool *acquire); +static TupleDesc PlanCacheComputeResultDesc(List *stmt_list); +static void PlanCacheRelCallback(Datum arg, Oid relid); +static void PlanCacheObjectCallback(Datum arg, int cacheid, uint32 hashvalue); +static void PlanCacheSysCallback(Datum arg, int cacheid, uint32 hashvalue); + +/* GUC parameter */ +int plan_cache_mode; + +/* + * InitPlanCache: initialize module during InitPostgres. + * + * All we need to do is hook into inval.c's callback lists. + */ +void +InitPlanCache(void) +{ + CacheRegisterRelcacheCallback(PlanCacheRelCallback, (Datum) 0); + CacheRegisterSyscacheCallback(PROCOID, PlanCacheObjectCallback, (Datum) 0); + CacheRegisterSyscacheCallback(TYPEOID, PlanCacheObjectCallback, (Datum) 0); + CacheRegisterSyscacheCallback(NAMESPACEOID, PlanCacheSysCallback, (Datum) 0); + CacheRegisterSyscacheCallback(OPEROID, PlanCacheSysCallback, (Datum) 0); + CacheRegisterSyscacheCallback(AMOPOPID, PlanCacheSysCallback, (Datum) 0); + CacheRegisterSyscacheCallback(FOREIGNSERVEROID, PlanCacheSysCallback, (Datum) 0); + CacheRegisterSyscacheCallback(FOREIGNDATAWRAPPEROID, PlanCacheSysCallback, (Datum) 0); +} + +/* + * CreateCachedPlan: initially create a plan cache entry. + * + * Creation of a cached plan is divided into two steps, CreateCachedPlan and + * CompleteCachedPlan. CreateCachedPlan should be called after running the + * query through raw_parser, but before doing parse analysis and rewrite; + * CompleteCachedPlan is called after that. The reason for this arrangement + * is that it can save one round of copying of the raw parse tree, since + * the parser will normally scribble on the raw parse tree. Callers would + * otherwise need to make an extra copy of the parse tree to ensure they + * still had a clean copy to present at plan cache creation time. + * + * All arguments presented to CreateCachedPlan are copied into a memory + * context created as a child of the call-time CurrentMemoryContext, which + * should be a reasonably short-lived working context that will go away in + * event of an error. This ensures that the cached plan data structure will + * likewise disappear if an error occurs before we have fully constructed it. + * Once constructed, the cached plan can be made longer-lived, if needed, + * by calling SaveCachedPlan. + * + * raw_parse_tree: output of raw_parser(), or NULL if empty query + * query_string: original query text + * commandTag: command tag for query, or UNKNOWN if empty query + */ +CachedPlanSource * +CreateCachedPlan(RawStmt *raw_parse_tree, + const char *query_string, + CommandTag commandTag) +{ + CachedPlanSource *plansource; + MemoryContext source_context; + MemoryContext oldcxt; + + Assert(query_string != NULL); /* required as of 8.4 */ + + /* + * Make a dedicated memory context for the CachedPlanSource and its + * permanent subsidiary data. It's probably not going to be large, but + * just in case, allow it to grow large. Initially it's a child of the + * caller's context (which we assume to be transient), so that it will be + * cleaned up on error. + */ + source_context = AllocSetContextCreate(CurrentMemoryContext, + "CachedPlanSource", + ALLOCSET_START_SMALL_SIZES); + + /* + * Create and fill the CachedPlanSource struct within the new context. + * Most fields are just left empty for the moment. + */ + oldcxt = MemoryContextSwitchTo(source_context); + + plansource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource)); + plansource->magic = CACHEDPLANSOURCE_MAGIC; + plansource->raw_parse_tree = copyObject(raw_parse_tree); + plansource->query_string = pstrdup(query_string); + MemoryContextSetIdentifier(source_context, plansource->query_string); + plansource->commandTag = commandTag; + plansource->param_types = NULL; + plansource->num_params = 0; + plansource->parserSetup = NULL; + plansource->parserSetupArg = NULL; + plansource->cursor_options = 0; + plansource->fixed_result = false; + plansource->resultDesc = NULL; + plansource->context = source_context; + plansource->query_list = NIL; + plansource->relationOids = NIL; + plansource->invalItems = NIL; + plansource->search_path = NULL; + plansource->query_context = NULL; + plansource->rewriteRoleId = InvalidOid; + plansource->rewriteRowSecurity = false; + plansource->dependsOnRLS = false; + plansource->gplan = NULL; + plansource->is_oneshot = false; + plansource->is_complete = false; + plansource->is_saved = false; + plansource->is_valid = false; + plansource->generation = 0; + plansource->generic_cost = -1; + plansource->total_custom_cost = 0; + plansource->num_generic_plans = 0; + plansource->num_custom_plans = 0; + + MemoryContextSwitchTo(oldcxt); + + return plansource; +} + +/* + * CreateOneShotCachedPlan: initially create a one-shot plan cache entry. + * + * This variant of CreateCachedPlan creates a plan cache entry that is meant + * to be used only once. No data copying occurs: all data structures remain + * in the caller's memory context (which typically should get cleared after + * completing execution). The CachedPlanSource struct itself is also created + * in that context. + * + * A one-shot plan cannot be saved or copied, since we make no effort to + * preserve the raw parse tree unmodified. There is also no support for + * invalidation, so plan use must be completed in the current transaction, + * and DDL that might invalidate the querytree_list must be avoided as well. + * + * raw_parse_tree: output of raw_parser(), or NULL if empty query + * query_string: original query text + * commandTag: command tag for query, or NULL if empty query + */ +CachedPlanSource * +CreateOneShotCachedPlan(RawStmt *raw_parse_tree, + const char *query_string, + CommandTag commandTag) +{ + CachedPlanSource *plansource; + + Assert(query_string != NULL); /* required as of 8.4 */ + + /* + * Create and fill the CachedPlanSource struct within the caller's memory + * context. Most fields are just left empty for the moment. + */ + plansource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource)); + plansource->magic = CACHEDPLANSOURCE_MAGIC; + plansource->raw_parse_tree = raw_parse_tree; + plansource->query_string = query_string; + plansource->commandTag = commandTag; + plansource->param_types = NULL; + plansource->num_params = 0; + plansource->parserSetup = NULL; + plansource->parserSetupArg = NULL; + plansource->cursor_options = 0; + plansource->fixed_result = false; + plansource->resultDesc = NULL; + plansource->context = CurrentMemoryContext; + plansource->query_list = NIL; + plansource->relationOids = NIL; + plansource->invalItems = NIL; + plansource->search_path = NULL; + plansource->query_context = NULL; + plansource->rewriteRoleId = InvalidOid; + plansource->rewriteRowSecurity = false; + plansource->dependsOnRLS = false; + plansource->gplan = NULL; + plansource->is_oneshot = true; + plansource->is_complete = false; + plansource->is_saved = false; + plansource->is_valid = false; + plansource->generation = 0; + plansource->generic_cost = -1; + plansource->total_custom_cost = 0; + plansource->num_generic_plans = 0; + plansource->num_custom_plans = 0; + + return plansource; +} + +/* + * CompleteCachedPlan: second step of creating a plan cache entry. + * + * Pass in the analyzed-and-rewritten form of the query, as well as the + * required subsidiary data about parameters and such. All passed values will + * be copied into the CachedPlanSource's memory, except as specified below. + * After this is called, GetCachedPlan can be called to obtain a plan, and + * optionally the CachedPlanSource can be saved using SaveCachedPlan. + * + * If querytree_context is not NULL, the querytree_list must be stored in that + * context (but the other parameters need not be). The querytree_list is not + * copied, rather the given context is kept as the initial query_context of + * the CachedPlanSource. (It should have been created as a child of the + * caller's working memory context, but it will now be reparented to belong + * to the CachedPlanSource.) The querytree_context is normally the context in + * which the caller did raw parsing and parse analysis. This approach saves + * one tree copying step compared to passing NULL, but leaves lots of extra + * cruft in the query_context, namely whatever extraneous stuff parse analysis + * created, as well as whatever went unused from the raw parse tree. Using + * this option is a space-for-time tradeoff that is appropriate if the + * CachedPlanSource is not expected to survive long. + * + * plancache.c cannot know how to copy the data referenced by parserSetupArg, + * and it would often be inappropriate to do so anyway. When using that + * option, it is caller's responsibility that the referenced data remains + * valid for as long as the CachedPlanSource exists. + * + * If the CachedPlanSource is a "oneshot" plan, then no querytree copying + * occurs at all, and querytree_context is ignored; it is caller's + * responsibility that the passed querytree_list is sufficiently long-lived. + * + * plansource: structure returned by CreateCachedPlan + * querytree_list: analyzed-and-rewritten form of query (list of Query nodes) + * querytree_context: memory context containing querytree_list, + * or NULL to copy querytree_list into a fresh context + * param_types: array of fixed parameter type OIDs, or NULL if none + * num_params: number of fixed parameters + * parserSetup: alternate method for handling query parameters + * parserSetupArg: data to pass to parserSetup + * cursor_options: options bitmask to pass to planner + * fixed_result: true to disallow future changes in query's result tupdesc + */ +void +CompleteCachedPlan(CachedPlanSource *plansource, + List *querytree_list, + MemoryContext querytree_context, + Oid *param_types, + int num_params, + ParserSetupHook parserSetup, + void *parserSetupArg, + int cursor_options, + bool fixed_result) +{ + MemoryContext source_context = plansource->context; + MemoryContext oldcxt = CurrentMemoryContext; + + /* Assert caller is doing things in a sane order */ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + Assert(!plansource->is_complete); + + /* + * If caller supplied a querytree_context, reparent it underneath the + * CachedPlanSource's context; otherwise, create a suitable context and + * copy the querytree_list into it. But no data copying should be done + * for one-shot plans; for those, assume the passed querytree_list is + * sufficiently long-lived. + */ + if (plansource->is_oneshot) + { + querytree_context = CurrentMemoryContext; + } + else if (querytree_context != NULL) + { + MemoryContextSetParent(querytree_context, source_context); + MemoryContextSwitchTo(querytree_context); + } + else + { + /* Again, it's a good bet the querytree_context can be small */ + querytree_context = AllocSetContextCreate(source_context, + "CachedPlanQuery", + ALLOCSET_START_SMALL_SIZES); + MemoryContextSwitchTo(querytree_context); + querytree_list = copyObject(querytree_list); + } + + plansource->query_context = querytree_context; + plansource->query_list = querytree_list; + + if (!plansource->is_oneshot && !IsTransactionStmtPlan(plansource)) + { + /* + * Use the planner machinery to extract dependencies. Data is saved + * in query_context. (We assume that not a lot of extra cruft is + * created by this call.) We can skip this for one-shot plans, and + * transaction control commands have no such dependencies anyway. + */ + extract_query_dependencies((Node *) querytree_list, + &plansource->relationOids, + &plansource->invalItems, + &plansource->dependsOnRLS); + + /* Update RLS info as well. */ + plansource->rewriteRoleId = GetUserId(); + plansource->rewriteRowSecurity = row_security; + + /* + * Also save the current search_path in the query_context. (This + * should not generate much extra cruft either, since almost certainly + * the path is already valid.) Again, we don't really need this for + * one-shot plans; and we *must* skip this for transaction control + * commands, because this could result in catalog accesses. + */ + plansource->search_path = GetOverrideSearchPath(querytree_context); + } + + /* + * Save the final parameter types (or other parameter specification data) + * into the source_context, as well as our other parameters. Also save + * the result tuple descriptor. + */ + MemoryContextSwitchTo(source_context); + + if (num_params > 0) + { + plansource->param_types = (Oid *) palloc(num_params * sizeof(Oid)); + memcpy(plansource->param_types, param_types, num_params * sizeof(Oid)); + } + else + plansource->param_types = NULL; + plansource->num_params = num_params; + plansource->parserSetup = parserSetup; + plansource->parserSetupArg = parserSetupArg; + plansource->cursor_options = cursor_options; + plansource->fixed_result = fixed_result; + plansource->resultDesc = PlanCacheComputeResultDesc(querytree_list); + + MemoryContextSwitchTo(oldcxt); + + plansource->is_complete = true; + plansource->is_valid = true; +} + +/* + * SaveCachedPlan: save a cached plan permanently + * + * This function moves the cached plan underneath CacheMemoryContext (making + * it live for the life of the backend, unless explicitly dropped), and adds + * it to the list of cached plans that are checked for invalidation when an + * sinval event occurs. + * + * This is guaranteed not to throw error, except for the caller-error case + * of trying to save a one-shot plan. Callers typically depend on that + * since this is called just before or just after adding a pointer to the + * CachedPlanSource to some permanent data structure of their own. Up until + * this is done, a CachedPlanSource is just transient data that will go away + * automatically on transaction abort. + */ +void +SaveCachedPlan(CachedPlanSource *plansource) +{ + /* Assert caller is doing things in a sane order */ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + Assert(plansource->is_complete); + Assert(!plansource->is_saved); + + /* This seems worth a real test, though */ + if (plansource->is_oneshot) + elog(ERROR, "cannot save one-shot cached plan"); + + /* + * In typical use, this function would be called before generating any + * plans from the CachedPlanSource. If there is a generic plan, moving it + * into CacheMemoryContext would be pretty risky since it's unclear + * whether the caller has taken suitable care with making references + * long-lived. Best thing to do seems to be to discard the plan. + */ + ReleaseGenericPlan(plansource); + + /* + * Reparent the source memory context under CacheMemoryContext so that it + * will live indefinitely. The query_context follows along since it's + * already a child of the other one. + */ + MemoryContextSetParent(plansource->context, CacheMemoryContext); + + /* + * Add the entry to the global list of cached plans. + */ + dlist_push_tail(&saved_plan_list, &plansource->node); + + plansource->is_saved = true; +} + +/* + * DropCachedPlan: destroy a cached plan. + * + * Actually this only destroys the CachedPlanSource: any referenced CachedPlan + * is released, but not destroyed until its refcount goes to zero. That + * handles the situation where DropCachedPlan is called while the plan is + * still in use. + */ +void +DropCachedPlan(CachedPlanSource *plansource) +{ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + + /* If it's been saved, remove it from the list */ + if (plansource->is_saved) + { + dlist_delete(&plansource->node); + plansource->is_saved = false; + } + + /* Decrement generic CachedPlan's refcount and drop if no longer needed */ + ReleaseGenericPlan(plansource); + + /* Mark it no longer valid */ + plansource->magic = 0; + + /* + * Remove the CachedPlanSource and all subsidiary data (including the + * query_context if any). But if it's a one-shot we can't free anything. + */ + if (!plansource->is_oneshot) + MemoryContextDelete(plansource->context); +} + +/* + * ReleaseGenericPlan: release a CachedPlanSource's generic plan, if any. + */ +static void +ReleaseGenericPlan(CachedPlanSource *plansource) +{ + /* Be paranoid about the possibility that ReleaseCachedPlan fails */ + if (plansource->gplan) + { + CachedPlan *plan = plansource->gplan; + + Assert(plan->magic == CACHEDPLAN_MAGIC); + plansource->gplan = NULL; + ReleaseCachedPlan(plan, NULL); + } +} + +/* + * RevalidateCachedQuery: ensure validity of analyzed-and-rewritten query tree. + * + * What we do here is re-acquire locks and redo parse analysis if necessary. + * On return, the query_list is valid and we have sufficient locks to begin + * planning. + * + * If any parse analysis activity is required, the caller's memory context is + * used for that work. + * + * The result value is the transient analyzed-and-rewritten query tree if we + * had to do re-analysis, and NIL otherwise. (This is returned just to save + * a tree copying step in a subsequent BuildCachedPlan call.) + */ +static List * +RevalidateCachedQuery(CachedPlanSource *plansource, + QueryEnvironment *queryEnv) +{ + bool snapshot_set; + RawStmt *rawtree; + List *tlist; /* transient query-tree list */ + List *qlist; /* permanent query-tree list */ + TupleDesc resultDesc; + MemoryContext querytree_context; + MemoryContext oldcxt; + + /* + * For one-shot plans, we do not support revalidation checking; it's + * assumed the query is parsed, planned, and executed in one transaction, + * so that no lock re-acquisition is necessary. Also, there is never any + * need to revalidate plans for transaction control commands (and we + * mustn't risk any catalog accesses when handling those). + */ + if (plansource->is_oneshot || IsTransactionStmtPlan(plansource)) + { + Assert(plansource->is_valid); + return NIL; + } + + /* + * If the query is currently valid, we should have a saved search_path --- + * check to see if that matches the current environment. If not, we want + * to force replan. + */ + if (plansource->is_valid) + { + Assert(plansource->search_path != NULL); + if (!OverrideSearchPathMatchesCurrent(plansource->search_path)) + { + /* Invalidate the querytree and generic plan */ + plansource->is_valid = false; + if (plansource->gplan) + plansource->gplan->is_valid = false; + } + } + + /* + * If the query rewrite phase had a possible RLS dependency, we must redo + * it if either the role or the row_security setting has changed. + */ + if (plansource->is_valid && plansource->dependsOnRLS && + (plansource->rewriteRoleId != GetUserId() || + plansource->rewriteRowSecurity != row_security)) + plansource->is_valid = false; + + /* + * If the query is currently valid, acquire locks on the referenced + * objects; then check again. We need to do it this way to cover the race + * condition that an invalidation message arrives before we get the locks. + */ + if (plansource->is_valid) + { + AcquirePlannerLocks(plansource->query_list, true); + + /* + * By now, if any invalidation has happened, the inval callback + * functions will have marked the query invalid. + */ + if (plansource->is_valid) + { + /* Successfully revalidated and locked the query. */ + return NIL; + } + + /* Oops, the race case happened. Release useless locks. */ + AcquirePlannerLocks(plansource->query_list, false); + } + + /* + * Discard the no-longer-useful query tree. (Note: we don't want to do + * this any earlier, else we'd not have been able to release locks + * correctly in the race condition case.) + */ + plansource->is_valid = false; + plansource->query_list = NIL; + plansource->relationOids = NIL; + plansource->invalItems = NIL; + plansource->search_path = NULL; + + /* + * Free the query_context. We don't really expect MemoryContextDelete to + * fail, but just in case, make sure the CachedPlanSource is left in a + * reasonably sane state. (The generic plan won't get unlinked yet, but + * that's acceptable.) + */ + if (plansource->query_context) + { + MemoryContext qcxt = plansource->query_context; + + plansource->query_context = NULL; + MemoryContextDelete(qcxt); + } + + /* Drop the generic plan reference if any */ + ReleaseGenericPlan(plansource); + + /* + * Now re-do parse analysis and rewrite. This not incidentally acquires + * the locks we need to do planning safely. + */ + Assert(plansource->is_complete); + + /* + * If a snapshot is already set (the normal case), we can just use that + * for parsing/planning. But if it isn't, install one. Note: no point in + * checking whether parse analysis requires a snapshot; utility commands + * don't have invalidatable plans, so we'd not get here for such a + * command. + */ + snapshot_set = false; + if (!ActiveSnapshotSet()) + { + PushActiveSnapshot(GetTransactionSnapshot()); + snapshot_set = true; + } + + /* + * Run parse analysis and rule rewriting. The parser tends to scribble on + * its input, so we must copy the raw parse tree to prevent corruption of + * the cache. + */ + rawtree = copyObject(plansource->raw_parse_tree); + if (rawtree == NULL) + tlist = NIL; + else if (plansource->parserSetup != NULL) + tlist = pg_analyze_and_rewrite_params(rawtree, + plansource->query_string, + plansource->parserSetup, + plansource->parserSetupArg, + queryEnv); + else + tlist = pg_analyze_and_rewrite(rawtree, + plansource->query_string, + plansource->param_types, + plansource->num_params, + queryEnv); + + /* Release snapshot if we got one */ + if (snapshot_set) + PopActiveSnapshot(); + + /* + * Check or update the result tupdesc. XXX should we use a weaker + * condition than equalTupleDescs() here? + * + * We assume the parameter types didn't change from the first time, so no + * need to update that. + */ + resultDesc = PlanCacheComputeResultDesc(tlist); + if (resultDesc == NULL && plansource->resultDesc == NULL) + { + /* OK, doesn't return tuples */ + } + else if (resultDesc == NULL || plansource->resultDesc == NULL || + !equalTupleDescs(resultDesc, plansource->resultDesc)) + { + /* can we give a better error message? */ + if (plansource->fixed_result) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cached plan must not change result type"))); + oldcxt = MemoryContextSwitchTo(plansource->context); + if (resultDesc) + resultDesc = CreateTupleDescCopy(resultDesc); + if (plansource->resultDesc) + FreeTupleDesc(plansource->resultDesc); + plansource->resultDesc = resultDesc; + MemoryContextSwitchTo(oldcxt); + } + + /* + * Allocate new query_context and copy the completed querytree into it. + * It's transient until we complete the copying and dependency extraction. + */ + querytree_context = AllocSetContextCreate(CurrentMemoryContext, + "CachedPlanQuery", + ALLOCSET_START_SMALL_SIZES); + oldcxt = MemoryContextSwitchTo(querytree_context); + + qlist = copyObject(tlist); + + /* + * Use the planner machinery to extract dependencies. Data is saved in + * query_context. (We assume that not a lot of extra cruft is created by + * this call.) + */ + extract_query_dependencies((Node *) qlist, + &plansource->relationOids, + &plansource->invalItems, + &plansource->dependsOnRLS); + + /* Update RLS info as well. */ + plansource->rewriteRoleId = GetUserId(); + plansource->rewriteRowSecurity = row_security; + + /* + * Also save the current search_path in the query_context. (This should + * not generate much extra cruft either, since almost certainly the path + * is already valid.) + */ + plansource->search_path = GetOverrideSearchPath(querytree_context); + + MemoryContextSwitchTo(oldcxt); + + /* Now reparent the finished query_context and save the links */ + MemoryContextSetParent(querytree_context, plansource->context); + + plansource->query_context = querytree_context; + plansource->query_list = qlist; + + /* + * Note: we do not reset generic_cost or total_custom_cost, although we + * could choose to do so. If the DDL or statistics change that prompted + * the invalidation meant a significant change in the cost estimates, it + * would be better to reset those variables and start fresh; but often it + * doesn't, and we're better retaining our hard-won knowledge about the + * relative costs. + */ + + plansource->is_valid = true; + + /* Return transient copy of querytrees for possible use in planning */ + return tlist; +} + +/* + * CheckCachedPlan: see if the CachedPlanSource's generic plan is valid. + * + * Caller must have already called RevalidateCachedQuery to verify that the + * querytree is up to date. + * + * On a "true" return, we have acquired the locks needed to run the plan. + * (We must do this for the "true" result to be race-condition-free.) + */ +static bool +CheckCachedPlan(CachedPlanSource *plansource) +{ + CachedPlan *plan = plansource->gplan; + + /* Assert that caller checked the querytree */ + Assert(plansource->is_valid); + + /* If there's no generic plan, just say "false" */ + if (!plan) + return false; + + Assert(plan->magic == CACHEDPLAN_MAGIC); + /* Generic plans are never one-shot */ + Assert(!plan->is_oneshot); + + /* + * If plan isn't valid for current role, we can't use it. + */ + if (plan->is_valid && plan->dependsOnRole && + plan->planRoleId != GetUserId()) + plan->is_valid = false; + + /* + * If it appears valid, acquire locks and recheck; this is much the same + * logic as in RevalidateCachedQuery, but for a plan. + */ + if (plan->is_valid) + { + /* + * Plan must have positive refcount because it is referenced by + * plansource; so no need to fear it disappears under us here. + */ + Assert(plan->refcount > 0); + + AcquireExecutorLocks(plan->stmt_list, true); + + /* + * If plan was transient, check to see if TransactionXmin has + * advanced, and if so invalidate it. + */ + if (plan->is_valid && + TransactionIdIsValid(plan->saved_xmin) && + !TransactionIdEquals(plan->saved_xmin, TransactionXmin)) + plan->is_valid = false; + + /* + * By now, if any invalidation has happened, the inval callback + * functions will have marked the plan invalid. + */ + if (plan->is_valid) + { + /* Successfully revalidated and locked the query. */ + return true; + } + + /* Oops, the race case happened. Release useless locks. */ + AcquireExecutorLocks(plan->stmt_list, false); + } + + /* + * Plan has been invalidated, so unlink it from the parent and release it. + */ + ReleaseGenericPlan(plansource); + + return false; +} + +/* + * BuildCachedPlan: construct a new CachedPlan from a CachedPlanSource. + * + * qlist should be the result value from a previous RevalidateCachedQuery, + * or it can be set to NIL if we need to re-copy the plansource's query_list. + * + * To build a generic, parameter-value-independent plan, pass NULL for + * boundParams. To build a custom plan, pass the actual parameter values via + * boundParams. For best effect, the PARAM_FLAG_CONST flag should be set on + * each parameter value; otherwise the planner will treat the value as a + * hint rather than a hard constant. + * + * Planning work is done in the caller's memory context. The finished plan + * is in a child memory context, which typically should get reparented + * (unless this is a one-shot plan, in which case we don't copy the plan). + */ +static CachedPlan * +BuildCachedPlan(CachedPlanSource *plansource, List *qlist, + ParamListInfo boundParams, QueryEnvironment *queryEnv) +{ + CachedPlan *plan; + List *plist; + bool snapshot_set; + bool is_transient; + MemoryContext plan_context; + MemoryContext oldcxt = CurrentMemoryContext; + ListCell *lc; + + /* + * Normally the querytree should be valid already, but if it's not, + * rebuild it. + * + * NOTE: GetCachedPlan should have called RevalidateCachedQuery first, so + * we ought to be holding sufficient locks to prevent any invalidation. + * However, if we're building a custom plan after having built and + * rejected a generic plan, it's possible to reach here with is_valid + * false due to an invalidation while making the generic plan. In theory + * the invalidation must be a false positive, perhaps a consequence of an + * sinval reset event or the debug_discard_caches code. But for safety, + * let's treat it as real and redo the RevalidateCachedQuery call. + */ + if (!plansource->is_valid) + qlist = RevalidateCachedQuery(plansource, queryEnv); + + /* + * If we don't already have a copy of the querytree list that can be + * scribbled on by the planner, make one. For a one-shot plan, we assume + * it's okay to scribble on the original query_list. + */ + if (qlist == NIL) + { + if (!plansource->is_oneshot) + qlist = copyObject(plansource->query_list); + else + qlist = plansource->query_list; + } + + /* + * If a snapshot is already set (the normal case), we can just use that + * for planning. But if it isn't, and we need one, install one. + */ + snapshot_set = false; + if (!ActiveSnapshotSet() && + plansource->raw_parse_tree && + analyze_requires_snapshot(plansource->raw_parse_tree)) + { + PushActiveSnapshot(GetTransactionSnapshot()); + snapshot_set = true; + } + + /* + * Generate the plan. + */ + plist = pg_plan_queries(qlist, plansource->query_string, + plansource->cursor_options, boundParams); + + /* Release snapshot if we got one */ + if (snapshot_set) + PopActiveSnapshot(); + + /* + * Normally we make a dedicated memory context for the CachedPlan and its + * subsidiary data. (It's probably not going to be large, but just in + * case, allow it to grow large. It's transient for the moment.) But for + * a one-shot plan, we just leave it in the caller's memory context. + */ + if (!plansource->is_oneshot) + { + plan_context = AllocSetContextCreate(CurrentMemoryContext, + "CachedPlan", + ALLOCSET_START_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(plan_context, plansource->query_string); + + /* + * Copy plan into the new context. + */ + MemoryContextSwitchTo(plan_context); + + plist = copyObject(plist); + } + else + plan_context = CurrentMemoryContext; + + /* + * Create and fill the CachedPlan struct within the new context. + */ + plan = (CachedPlan *) palloc(sizeof(CachedPlan)); + plan->magic = CACHEDPLAN_MAGIC; + plan->stmt_list = plist; + + /* + * CachedPlan is dependent on role either if RLS affected the rewrite + * phase or if a role dependency was injected during planning. And it's + * transient if any plan is marked so. + */ + plan->planRoleId = GetUserId(); + plan->dependsOnRole = plansource->dependsOnRLS; + is_transient = false; + foreach(lc, plist) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc); + + if (plannedstmt->commandType == CMD_UTILITY) + continue; /* Ignore utility statements */ + + if (plannedstmt->transientPlan) + is_transient = true; + if (plannedstmt->dependsOnRole) + plan->dependsOnRole = true; + } + if (is_transient) + { + Assert(TransactionIdIsNormal(TransactionXmin)); + plan->saved_xmin = TransactionXmin; + } + else + plan->saved_xmin = InvalidTransactionId; + plan->refcount = 0; + plan->context = plan_context; + plan->is_oneshot = plansource->is_oneshot; + plan->is_saved = false; + plan->is_valid = true; + + /* assign generation number to new plan */ + plan->generation = ++(plansource->generation); + + MemoryContextSwitchTo(oldcxt); + + return plan; +} + +/* + * choose_custom_plan: choose whether to use custom or generic plan + * + * This defines the policy followed by GetCachedPlan. + */ +static bool +choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams) +{ + double avg_custom_cost; + + /* One-shot plans will always be considered custom */ + if (plansource->is_oneshot) + return true; + + /* Otherwise, never any point in a custom plan if there's no parameters */ + if (boundParams == NULL) + return false; + /* ... nor for transaction control statements */ + if (IsTransactionStmtPlan(plansource)) + return false; + + /* Let settings force the decision */ + if (plan_cache_mode == PLAN_CACHE_MODE_FORCE_GENERIC_PLAN) + return false; + if (plan_cache_mode == PLAN_CACHE_MODE_FORCE_CUSTOM_PLAN) + return true; + + /* See if caller wants to force the decision */ + if (plansource->cursor_options & CURSOR_OPT_GENERIC_PLAN) + return false; + if (plansource->cursor_options & CURSOR_OPT_CUSTOM_PLAN) + return true; + + /* Generate custom plans until we have done at least 5 (arbitrary) */ + if (plansource->num_custom_plans < 5) + return true; + + avg_custom_cost = plansource->total_custom_cost / plansource->num_custom_plans; + + /* + * Prefer generic plan if it's less expensive than the average custom + * plan. (Because we include a charge for cost of planning in the + * custom-plan costs, this means the generic plan only has to be less + * expensive than the execution cost plus replan cost of the custom + * plans.) + * + * Note that if generic_cost is -1 (indicating we've not yet determined + * the generic plan cost), we'll always prefer generic at this point. + */ + if (plansource->generic_cost < avg_custom_cost) + return false; + + return true; +} + +/* + * cached_plan_cost: calculate estimated cost of a plan + * + * If include_planner is true, also include the estimated cost of constructing + * the plan. (We must factor that into the cost of using a custom plan, but + * we don't count it for a generic plan.) + */ +static double +cached_plan_cost(CachedPlan *plan, bool include_planner) +{ + double result = 0; + ListCell *lc; + + foreach(lc, plan->stmt_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc); + + if (plannedstmt->commandType == CMD_UTILITY) + continue; /* Ignore utility statements */ + + result += plannedstmt->planTree->total_cost; + + if (include_planner) + { + /* + * Currently we use a very crude estimate of planning effort based + * on the number of relations in the finished plan's rangetable. + * Join planning effort actually scales much worse than linearly + * in the number of relations --- but only until the join collapse + * limits kick in. Also, while inheritance child relations surely + * add to planning effort, they don't make the join situation + * worse. So the actual shape of the planning cost curve versus + * number of relations isn't all that obvious. It will take + * considerable work to arrive at a less crude estimate, and for + * now it's not clear that's worth doing. + * + * The other big difficulty here is that we don't have any very + * good model of how planning cost compares to execution costs. + * The current multiplier of 1000 * cpu_operator_cost is probably + * on the low side, but we'll try this for awhile before making a + * more aggressive correction. + * + * If we ever do write a more complicated estimator, it should + * probably live in src/backend/optimizer/ not here. + */ + int nrelations = list_length(plannedstmt->rtable); + + result += 1000.0 * cpu_operator_cost * (nrelations + 1); + } + } + + return result; +} + +/* + * GetCachedPlan: get a cached plan from a CachedPlanSource. + * + * This function hides the logic that decides whether to use a generic + * plan or a custom plan for the given parameters: the caller does not know + * which it will get. + * + * On return, the plan is valid and we have sufficient locks to begin + * execution. + * + * On return, the refcount of the plan has been incremented; a later + * ReleaseCachedPlan() call is expected. If "owner" is not NULL then + * the refcount has been reported to that ResourceOwner (note that this + * is only supported for "saved" CachedPlanSources). + * + * Note: if any replanning activity is required, the caller's memory context + * is used for that work. + */ +CachedPlan * +GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, + ResourceOwner owner, QueryEnvironment *queryEnv) +{ + CachedPlan *plan = NULL; + List *qlist; + bool customplan; + + /* Assert caller is doing things in a sane order */ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + Assert(plansource->is_complete); + /* This seems worth a real test, though */ + if (owner && !plansource->is_saved) + elog(ERROR, "cannot apply ResourceOwner to non-saved cached plan"); + + /* Make sure the querytree list is valid and we have parse-time locks */ + qlist = RevalidateCachedQuery(plansource, queryEnv); + + /* Decide whether to use a custom plan */ + customplan = choose_custom_plan(plansource, boundParams); + + if (!customplan) + { + if (CheckCachedPlan(plansource)) + { + /* We want a generic plan, and we already have a valid one */ + plan = plansource->gplan; + Assert(plan->magic == CACHEDPLAN_MAGIC); + } + else + { + /* Build a new generic plan */ + plan = BuildCachedPlan(plansource, qlist, NULL, queryEnv); + /* Just make real sure plansource->gplan is clear */ + ReleaseGenericPlan(plansource); + /* Link the new generic plan into the plansource */ + plansource->gplan = plan; + plan->refcount++; + /* Immediately reparent into appropriate context */ + if (plansource->is_saved) + { + /* saved plans all live under CacheMemoryContext */ + MemoryContextSetParent(plan->context, CacheMemoryContext); + plan->is_saved = true; + } + else + { + /* otherwise, it should be a sibling of the plansource */ + MemoryContextSetParent(plan->context, + MemoryContextGetParent(plansource->context)); + } + /* Update generic_cost whenever we make a new generic plan */ + plansource->generic_cost = cached_plan_cost(plan, false); + + /* + * If, based on the now-known value of generic_cost, we'd not have + * chosen to use a generic plan, then forget it and make a custom + * plan. This is a bit of a wart but is necessary to avoid a + * glitch in behavior when the custom plans are consistently big + * winners; at some point we'll experiment with a generic plan and + * find it's a loser, but we don't want to actually execute that + * plan. + */ + customplan = choose_custom_plan(plansource, boundParams); + + /* + * If we choose to plan again, we need to re-copy the query_list, + * since the planner probably scribbled on it. We can force + * BuildCachedPlan to do that by passing NIL. + */ + qlist = NIL; + } + } + + if (customplan) + { + /* Build a custom plan */ + plan = BuildCachedPlan(plansource, qlist, boundParams, queryEnv); + /* Accumulate total costs of custom plans */ + plansource->total_custom_cost += cached_plan_cost(plan, true); + + plansource->num_custom_plans++; + } + else + { + plansource->num_generic_plans++; + } + + Assert(plan != NULL); + + /* Flag the plan as in use by caller */ + if (owner) + ResourceOwnerEnlargePlanCacheRefs(owner); + plan->refcount++; + if (owner) + ResourceOwnerRememberPlanCacheRef(owner, plan); + + /* + * Saved plans should be under CacheMemoryContext so they will not go away + * until their reference count goes to zero. In the generic-plan cases we + * already took care of that, but for a custom plan, do it as soon as we + * have created a reference-counted link. + */ + if (customplan && plansource->is_saved) + { + MemoryContextSetParent(plan->context, CacheMemoryContext); + plan->is_saved = true; + } + + return plan; +} + +/* + * ReleaseCachedPlan: release active use of a cached plan. + * + * This decrements the reference count, and frees the plan if the count + * has thereby gone to zero. If "owner" is not NULL, it is assumed that + * the reference count is managed by that ResourceOwner. + * + * Note: owner == NULL is used for releasing references that are in + * persistent data structures, such as the parent CachedPlanSource or a + * Portal. Transient references should be protected by a resource owner. + */ +void +ReleaseCachedPlan(CachedPlan *plan, ResourceOwner owner) +{ + Assert(plan->magic == CACHEDPLAN_MAGIC); + if (owner) + { + Assert(plan->is_saved); + ResourceOwnerForgetPlanCacheRef(owner, plan); + } + Assert(plan->refcount > 0); + plan->refcount--; + if (plan->refcount == 0) + { + /* Mark it no longer valid */ + plan->magic = 0; + + /* One-shot plans do not own their context, so we can't free them */ + if (!plan->is_oneshot) + MemoryContextDelete(plan->context); + } +} + +/* + * CachedPlanAllowsSimpleValidityCheck: can we use CachedPlanIsSimplyValid? + * + * This function, together with CachedPlanIsSimplyValid, provides a fast path + * for revalidating "simple" generic plans. The core requirement to be simple + * is that the plan must not require taking any locks, which translates to + * not touching any tables; this happens to match up well with an important + * use-case in PL/pgSQL. This function tests whether that's true, along + * with checking some other corner cases that we'd rather not bother with + * handling in the fast path. (Note that it's still possible for such a plan + * to be invalidated, for example due to a change in a function that was + * inlined into the plan.) + * + * If the plan is simply valid, and "owner" is not NULL, record a refcount on + * the plan in that resowner before returning. It is caller's responsibility + * to be sure that a refcount is held on any plan that's being actively used. + * + * This must only be called on known-valid generic plans (eg, ones just + * returned by GetCachedPlan). If it returns true, the caller may re-use + * the cached plan as long as CachedPlanIsSimplyValid returns true; that + * check is much cheaper than the full revalidation done by GetCachedPlan. + * Nonetheless, no required checks are omitted. + */ +bool +CachedPlanAllowsSimpleValidityCheck(CachedPlanSource *plansource, + CachedPlan *plan, ResourceOwner owner) +{ + ListCell *lc; + + /* + * Sanity-check that the caller gave us a validated generic plan. Notice + * that we *don't* assert plansource->is_valid as you might expect; that's + * because it's possible that that's already false when GetCachedPlan + * returns, e.g. because ResetPlanCache happened partway through. We + * should accept the plan as long as plan->is_valid is true, and expect to + * replan after the next CachedPlanIsSimplyValid call. + */ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + Assert(plan->magic == CACHEDPLAN_MAGIC); + Assert(plan->is_valid); + Assert(plan == plansource->gplan); + Assert(plansource->search_path != NULL); + Assert(OverrideSearchPathMatchesCurrent(plansource->search_path)); + + /* We don't support oneshot plans here. */ + if (plansource->is_oneshot) + return false; + Assert(!plan->is_oneshot); + + /* + * If the plan is dependent on RLS considerations, or it's transient, + * reject. These things probably can't ever happen for table-free + * queries, but for safety's sake let's check. + */ + if (plansource->dependsOnRLS) + return false; + if (plan->dependsOnRole) + return false; + if (TransactionIdIsValid(plan->saved_xmin)) + return false; + + /* + * Reject if AcquirePlannerLocks would have anything to do. This is + * simplistic, but there's no need to inquire any more carefully; indeed, + * for current callers it shouldn't even be possible to hit any of these + * checks. + */ + foreach(lc, plansource->query_list) + { + Query *query = lfirst_node(Query, lc); + + if (query->commandType == CMD_UTILITY) + return false; + if (query->rtable || query->cteList || query->hasSubLinks) + return false; + } + + /* + * Reject if AcquireExecutorLocks would have anything to do. This is + * probably unnecessary given the previous check, but let's be safe. + */ + foreach(lc, plan->stmt_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc); + ListCell *lc2; + + if (plannedstmt->commandType == CMD_UTILITY) + return false; + + /* + * We have to grovel through the rtable because it's likely to contain + * an RTE_RESULT relation, rather than being totally empty. + */ + foreach(lc2, plannedstmt->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); + + if (rte->rtekind == RTE_RELATION) + return false; + } + } + + /* + * Okay, it's simple. Note that what we've primarily established here is + * that no locks need be taken before checking the plan's is_valid flag. + */ + + /* Bump refcount if requested. */ + if (owner) + { + ResourceOwnerEnlargePlanCacheRefs(owner); + plan->refcount++; + ResourceOwnerRememberPlanCacheRef(owner, plan); + } + + return true; +} + +/* + * CachedPlanIsSimplyValid: quick check for plan still being valid + * + * This function must not be used unless CachedPlanAllowsSimpleValidityCheck + * previously said it was OK. + * + * If the plan is valid, and "owner" is not NULL, record a refcount on + * the plan in that resowner before returning. It is caller's responsibility + * to be sure that a refcount is held on any plan that's being actively used. + * + * The code here is unconditionally safe as long as the only use of this + * CachedPlanSource is in connection with the particular CachedPlan pointer + * that's passed in. If the plansource were being used for other purposes, + * it's possible that its generic plan could be invalidated and regenerated + * while the current caller wasn't looking, and then there could be a chance + * collision of address between this caller's now-stale plan pointer and the + * actual address of the new generic plan. For current uses, that scenario + * can't happen; but with a plansource shared across multiple uses, it'd be + * advisable to also save plan->generation and verify that that still matches. + */ +bool +CachedPlanIsSimplyValid(CachedPlanSource *plansource, CachedPlan *plan, + ResourceOwner owner) +{ + /* + * Careful here: since the caller doesn't necessarily hold a refcount on + * the plan to start with, it's possible that "plan" is a dangling + * pointer. Don't dereference it until we've verified that it still + * matches the plansource's gplan (which is either valid or NULL). + */ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + + /* + * Has cache invalidation fired on this plan? We can check this right + * away since there are no locks that we'd need to acquire first. Note + * that here we *do* check plansource->is_valid, so as to force plan + * rebuild if that's become false. + */ + if (!plansource->is_valid || plan != plansource->gplan || !plan->is_valid) + return false; + + Assert(plan->magic == CACHEDPLAN_MAGIC); + + /* Is the search_path still the same as when we made it? */ + Assert(plansource->search_path != NULL); + if (!OverrideSearchPathMatchesCurrent(plansource->search_path)) + return false; + + /* It's still good. Bump refcount if requested. */ + if (owner) + { + ResourceOwnerEnlargePlanCacheRefs(owner); + plan->refcount++; + ResourceOwnerRememberPlanCacheRef(owner, plan); + } + + return true; +} + +/* + * CachedPlanSetParentContext: move a CachedPlanSource to a new memory context + * + * This can only be applied to unsaved plans; once saved, a plan always + * lives underneath CacheMemoryContext. + */ +void +CachedPlanSetParentContext(CachedPlanSource *plansource, + MemoryContext newcontext) +{ + /* Assert caller is doing things in a sane order */ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + Assert(plansource->is_complete); + + /* These seem worth real tests, though */ + if (plansource->is_saved) + elog(ERROR, "cannot move a saved cached plan to another context"); + if (plansource->is_oneshot) + elog(ERROR, "cannot move a one-shot cached plan to another context"); + + /* OK, let the caller keep the plan where he wishes */ + MemoryContextSetParent(plansource->context, newcontext); + + /* + * The query_context needs no special handling, since it's a child of + * plansource->context. But if there's a generic plan, it should be + * maintained as a sibling of plansource->context. + */ + if (plansource->gplan) + { + Assert(plansource->gplan->magic == CACHEDPLAN_MAGIC); + MemoryContextSetParent(plansource->gplan->context, newcontext); + } +} + +/* + * CopyCachedPlan: make a copy of a CachedPlanSource + * + * This is a convenience routine that does the equivalent of + * CreateCachedPlan + CompleteCachedPlan, using the data stored in the + * input CachedPlanSource. The result is therefore "unsaved" (regardless + * of the state of the source), and we don't copy any generic plan either. + * The result will be currently valid, or not, the same as the source. + */ +CachedPlanSource * +CopyCachedPlan(CachedPlanSource *plansource) +{ + CachedPlanSource *newsource; + MemoryContext source_context; + MemoryContext querytree_context; + MemoryContext oldcxt; + + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + Assert(plansource->is_complete); + + /* + * One-shot plans can't be copied, because we haven't taken care that + * parsing/planning didn't scribble on the raw parse tree or querytrees. + */ + if (plansource->is_oneshot) + elog(ERROR, "cannot copy a one-shot cached plan"); + + source_context = AllocSetContextCreate(CurrentMemoryContext, + "CachedPlanSource", + ALLOCSET_START_SMALL_SIZES); + + oldcxt = MemoryContextSwitchTo(source_context); + + newsource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource)); + newsource->magic = CACHEDPLANSOURCE_MAGIC; + newsource->raw_parse_tree = copyObject(plansource->raw_parse_tree); + newsource->query_string = pstrdup(plansource->query_string); + MemoryContextSetIdentifier(source_context, newsource->query_string); + newsource->commandTag = plansource->commandTag; + if (plansource->num_params > 0) + { + newsource->param_types = (Oid *) + palloc(plansource->num_params * sizeof(Oid)); + memcpy(newsource->param_types, plansource->param_types, + plansource->num_params * sizeof(Oid)); + } + else + newsource->param_types = NULL; + newsource->num_params = plansource->num_params; + newsource->parserSetup = plansource->parserSetup; + newsource->parserSetupArg = plansource->parserSetupArg; + newsource->cursor_options = plansource->cursor_options; + newsource->fixed_result = plansource->fixed_result; + if (plansource->resultDesc) + newsource->resultDesc = CreateTupleDescCopy(plansource->resultDesc); + else + newsource->resultDesc = NULL; + newsource->context = source_context; + + querytree_context = AllocSetContextCreate(source_context, + "CachedPlanQuery", + ALLOCSET_START_SMALL_SIZES); + MemoryContextSwitchTo(querytree_context); + newsource->query_list = copyObject(plansource->query_list); + newsource->relationOids = copyObject(plansource->relationOids); + newsource->invalItems = copyObject(plansource->invalItems); + if (plansource->search_path) + newsource->search_path = CopyOverrideSearchPath(plansource->search_path); + newsource->query_context = querytree_context; + newsource->rewriteRoleId = plansource->rewriteRoleId; + newsource->rewriteRowSecurity = plansource->rewriteRowSecurity; + newsource->dependsOnRLS = plansource->dependsOnRLS; + + newsource->gplan = NULL; + + newsource->is_oneshot = false; + newsource->is_complete = true; + newsource->is_saved = false; + newsource->is_valid = plansource->is_valid; + newsource->generation = plansource->generation; + + /* We may as well copy any acquired cost knowledge */ + newsource->generic_cost = plansource->generic_cost; + newsource->total_custom_cost = plansource->total_custom_cost; + newsource->num_generic_plans = plansource->num_generic_plans; + newsource->num_custom_plans = plansource->num_custom_plans; + + MemoryContextSwitchTo(oldcxt); + + return newsource; +} + +/* + * CachedPlanIsValid: test whether the rewritten querytree within a + * CachedPlanSource is currently valid (that is, not marked as being in need + * of revalidation). + * + * This result is only trustworthy (ie, free from race conditions) if + * the caller has acquired locks on all the relations used in the plan. + */ +bool +CachedPlanIsValid(CachedPlanSource *plansource) +{ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + return plansource->is_valid; +} + +/* + * CachedPlanGetTargetList: return tlist, if any, describing plan's output + * + * The result is guaranteed up-to-date. However, it is local storage + * within the cached plan, and may disappear next time the plan is updated. + */ +List * +CachedPlanGetTargetList(CachedPlanSource *plansource, + QueryEnvironment *queryEnv) +{ + Query *pstmt; + + /* Assert caller is doing things in a sane order */ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + Assert(plansource->is_complete); + + /* + * No work needed if statement doesn't return tuples (we assume this + * feature cannot be changed by an invalidation) + */ + if (plansource->resultDesc == NULL) + return NIL; + + /* Make sure the querytree list is valid and we have parse-time locks */ + RevalidateCachedQuery(plansource, queryEnv); + + /* Get the primary statement and find out what it returns */ + pstmt = QueryListGetPrimaryStmt(plansource->query_list); + + return FetchStatementTargetList((Node *) pstmt); +} + +/* + * GetCachedExpression: construct a CachedExpression for an expression. + * + * This performs the same transformations on the expression as + * expression_planner(), ie, convert an expression as emitted by parse + * analysis to be ready to pass to the executor. + * + * The result is stashed in a private, long-lived memory context. + * (Note that this might leak a good deal of memory in the caller's + * context before that.) The passed-in expr tree is not modified. + */ +CachedExpression * +GetCachedExpression(Node *expr) +{ + CachedExpression *cexpr; + List *relationOids; + List *invalItems; + MemoryContext cexpr_context; + MemoryContext oldcxt; + + /* + * Pass the expression through the planner, and collect dependencies. + * Everything built here is leaked in the caller's context; that's + * intentional to minimize the size of the permanent data structure. + */ + expr = (Node *) expression_planner_with_deps((Expr *) expr, + &relationOids, + &invalItems); + + /* + * Make a private memory context, and copy what we need into that. To + * avoid leaking a long-lived context if we fail while copying data, we + * initially make the context under the caller's context. + */ + cexpr_context = AllocSetContextCreate(CurrentMemoryContext, + "CachedExpression", + ALLOCSET_SMALL_SIZES); + + oldcxt = MemoryContextSwitchTo(cexpr_context); + + cexpr = (CachedExpression *) palloc(sizeof(CachedExpression)); + cexpr->magic = CACHEDEXPR_MAGIC; + cexpr->expr = copyObject(expr); + cexpr->is_valid = true; + cexpr->relationOids = copyObject(relationOids); + cexpr->invalItems = copyObject(invalItems); + cexpr->context = cexpr_context; + + MemoryContextSwitchTo(oldcxt); + + /* + * Reparent the expr's memory context under CacheMemoryContext so that it + * will live indefinitely. + */ + MemoryContextSetParent(cexpr_context, CacheMemoryContext); + + /* + * Add the entry to the global list of cached expressions. + */ + dlist_push_tail(&cached_expression_list, &cexpr->node); + + return cexpr; +} + +/* + * FreeCachedExpression + * Delete a CachedExpression. + */ +void +FreeCachedExpression(CachedExpression *cexpr) +{ + /* Sanity check */ + Assert(cexpr->magic == CACHEDEXPR_MAGIC); + /* Unlink from global list */ + dlist_delete(&cexpr->node); + /* Free all storage associated with CachedExpression */ + MemoryContextDelete(cexpr->context); +} + +/* + * QueryListGetPrimaryStmt + * Get the "primary" stmt within a list, ie, the one marked canSetTag. + * + * Returns NULL if no such stmt. If multiple queries within the list are + * marked canSetTag, returns the first one. Neither of these cases should + * occur in present usages of this function. + */ +static Query * +QueryListGetPrimaryStmt(List *stmts) +{ + ListCell *lc; + + foreach(lc, stmts) + { + Query *stmt = lfirst_node(Query, lc); + + if (stmt->canSetTag) + return stmt; + } + return NULL; +} + +/* + * AcquireExecutorLocks: acquire locks needed for execution of a cached plan; + * or release them if acquire is false. + */ +static void +AcquireExecutorLocks(List *stmt_list, bool acquire) +{ + ListCell *lc1; + + foreach(lc1, stmt_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); + ListCell *lc2; + + if (plannedstmt->commandType == CMD_UTILITY) + { + /* + * Ignore utility statements, except those (such as EXPLAIN) that + * contain a parsed-but-not-planned query. Note: it's okay to use + * ScanQueryForLocks, even though the query hasn't been through + * rule rewriting, because rewriting doesn't change the query + * representation. + */ + Query *query = UtilityContainsQuery(plannedstmt->utilityStmt); + + if (query) + ScanQueryForLocks(query, acquire); + continue; + } + + foreach(lc2, plannedstmt->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); + + if (rte->rtekind != RTE_RELATION) + continue; + + /* + * Acquire the appropriate type of lock on each relation OID. Note + * that we don't actually try to open the rel, and hence will not + * fail if it's been dropped entirely --- we'll just transiently + * acquire a non-conflicting lock. + */ + if (acquire) + LockRelationOid(rte->relid, rte->rellockmode); + else + UnlockRelationOid(rte->relid, rte->rellockmode); + } + } +} + +/* + * AcquirePlannerLocks: acquire locks needed for planning of a querytree list; + * or release them if acquire is false. + * + * Note that we don't actually try to open the relations, and hence will not + * fail if one has been dropped entirely --- we'll just transiently acquire + * a non-conflicting lock. + */ +static void +AcquirePlannerLocks(List *stmt_list, bool acquire) +{ + ListCell *lc; + + foreach(lc, stmt_list) + { + Query *query = lfirst_node(Query, lc); + + if (query->commandType == CMD_UTILITY) + { + /* Ignore utility statements, unless they contain a Query */ + query = UtilityContainsQuery(query->utilityStmt); + if (query) + ScanQueryForLocks(query, acquire); + continue; + } + + ScanQueryForLocks(query, acquire); + } +} + +/* + * ScanQueryForLocks: recursively scan one Query for AcquirePlannerLocks. + */ +static void +ScanQueryForLocks(Query *parsetree, bool acquire) +{ + ListCell *lc; + + /* Shouldn't get called on utility commands */ + Assert(parsetree->commandType != CMD_UTILITY); + + /* + * First, process RTEs of the current query level. + */ + foreach(lc, parsetree->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + switch (rte->rtekind) + { + case RTE_RELATION: + /* Acquire or release the appropriate type of lock */ + if (acquire) + LockRelationOid(rte->relid, rte->rellockmode); + else + UnlockRelationOid(rte->relid, rte->rellockmode); + break; + + case RTE_SUBQUERY: + /* Recurse into subquery-in-FROM */ + ScanQueryForLocks(rte->subquery, acquire); + break; + + default: + /* ignore other types of RTEs */ + break; + } + } + + /* Recurse into subquery-in-WITH */ + foreach(lc, parsetree->cteList) + { + CommonTableExpr *cte = lfirst_node(CommonTableExpr, lc); + + ScanQueryForLocks(castNode(Query, cte->ctequery), acquire); + } + + /* + * Recurse into sublink subqueries, too. But we already did the ones in + * the rtable and cteList. + */ + if (parsetree->hasSubLinks) + { + query_tree_walker(parsetree, ScanQueryWalker, + (void *) &acquire, + QTW_IGNORE_RC_SUBQUERIES); + } +} + +/* + * Walker to find sublink subqueries for ScanQueryForLocks + */ +static bool +ScanQueryWalker(Node *node, bool *acquire) +{ + if (node == NULL) + return false; + if (IsA(node, SubLink)) + { + SubLink *sub = (SubLink *) node; + + /* Do what we came for */ + ScanQueryForLocks(castNode(Query, sub->subselect), *acquire); + /* Fall through to process lefthand args of SubLink */ + } + + /* + * Do NOT recurse into Query nodes, because ScanQueryForLocks already + * processed subselects of subselects for us. + */ + return expression_tree_walker(node, ScanQueryWalker, + (void *) acquire); +} + +/* + * PlanCacheComputeResultDesc: given a list of analyzed-and-rewritten Queries, + * determine the result tupledesc it will produce. Returns NULL if the + * execution will not return tuples. + * + * Note: the result is created or copied into current memory context. + */ +static TupleDesc +PlanCacheComputeResultDesc(List *stmt_list) +{ + Query *query; + + switch (ChoosePortalStrategy(stmt_list)) + { + case PORTAL_ONE_SELECT: + case PORTAL_ONE_MOD_WITH: + query = linitial_node(Query, stmt_list); + return ExecCleanTypeFromTL(query->targetList); + + case PORTAL_ONE_RETURNING: + query = QueryListGetPrimaryStmt(stmt_list); + Assert(query->returningList); + return ExecCleanTypeFromTL(query->returningList); + + case PORTAL_UTIL_SELECT: + query = linitial_node(Query, stmt_list); + Assert(query->utilityStmt); + return UtilityTupleDescriptor(query->utilityStmt); + + case PORTAL_MULTI_QUERY: + /* will not return tuples */ + break; + } + return NULL; +} + +/* + * PlanCacheRelCallback + * Relcache inval callback function + * + * Invalidate all plans mentioning the given rel, or all plans mentioning + * any rel at all if relid == InvalidOid. + */ +static void +PlanCacheRelCallback(Datum arg, Oid relid) +{ + dlist_iter iter; + + dlist_foreach(iter, &saved_plan_list) + { + CachedPlanSource *plansource = dlist_container(CachedPlanSource, + node, iter.cur); + + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + + /* No work if it's already invalidated */ + if (!plansource->is_valid) + continue; + + /* Never invalidate transaction control commands */ + if (IsTransactionStmtPlan(plansource)) + continue; + + /* + * Check the dependency list for the rewritten querytree. + */ + if ((relid == InvalidOid) ? plansource->relationOids != NIL : + list_member_oid(plansource->relationOids, relid)) + { + /* Invalidate the querytree and generic plan */ + plansource->is_valid = false; + if (plansource->gplan) + plansource->gplan->is_valid = false; + } + + /* + * The generic plan, if any, could have more dependencies than the + * querytree does, so we have to check it too. + */ + if (plansource->gplan && plansource->gplan->is_valid) + { + ListCell *lc; + + foreach(lc, plansource->gplan->stmt_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc); + + if (plannedstmt->commandType == CMD_UTILITY) + continue; /* Ignore utility statements */ + if ((relid == InvalidOid) ? plannedstmt->relationOids != NIL : + list_member_oid(plannedstmt->relationOids, relid)) + { + /* Invalidate the generic plan only */ + plansource->gplan->is_valid = false; + break; /* out of stmt_list scan */ + } + } + } + } + + /* Likewise check cached expressions */ + dlist_foreach(iter, &cached_expression_list) + { + CachedExpression *cexpr = dlist_container(CachedExpression, + node, iter.cur); + + Assert(cexpr->magic == CACHEDEXPR_MAGIC); + + /* No work if it's already invalidated */ + if (!cexpr->is_valid) + continue; + + if ((relid == InvalidOid) ? cexpr->relationOids != NIL : + list_member_oid(cexpr->relationOids, relid)) + { + cexpr->is_valid = false; + } + } +} + +/* + * PlanCacheObjectCallback + * Syscache inval callback function for PROCOID and TYPEOID caches + * + * Invalidate all plans mentioning the object with the specified hash value, + * or all plans mentioning any member of this cache if hashvalue == 0. + */ +static void +PlanCacheObjectCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + dlist_iter iter; + + dlist_foreach(iter, &saved_plan_list) + { + CachedPlanSource *plansource = dlist_container(CachedPlanSource, + node, iter.cur); + ListCell *lc; + + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + + /* No work if it's already invalidated */ + if (!plansource->is_valid) + continue; + + /* Never invalidate transaction control commands */ + if (IsTransactionStmtPlan(plansource)) + continue; + + /* + * Check the dependency list for the rewritten querytree. + */ + foreach(lc, plansource->invalItems) + { + PlanInvalItem *item = (PlanInvalItem *) lfirst(lc); + + if (item->cacheId != cacheid) + continue; + if (hashvalue == 0 || + item->hashValue == hashvalue) + { + /* Invalidate the querytree and generic plan */ + plansource->is_valid = false; + if (plansource->gplan) + plansource->gplan->is_valid = false; + break; + } + } + + /* + * The generic plan, if any, could have more dependencies than the + * querytree does, so we have to check it too. + */ + if (plansource->gplan && plansource->gplan->is_valid) + { + foreach(lc, plansource->gplan->stmt_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc); + ListCell *lc3; + + if (plannedstmt->commandType == CMD_UTILITY) + continue; /* Ignore utility statements */ + foreach(lc3, plannedstmt->invalItems) + { + PlanInvalItem *item = (PlanInvalItem *) lfirst(lc3); + + if (item->cacheId != cacheid) + continue; + if (hashvalue == 0 || + item->hashValue == hashvalue) + { + /* Invalidate the generic plan only */ + plansource->gplan->is_valid = false; + break; /* out of invalItems scan */ + } + } + if (!plansource->gplan->is_valid) + break; /* out of stmt_list scan */ + } + } + } + + /* Likewise check cached expressions */ + dlist_foreach(iter, &cached_expression_list) + { + CachedExpression *cexpr = dlist_container(CachedExpression, + node, iter.cur); + ListCell *lc; + + Assert(cexpr->magic == CACHEDEXPR_MAGIC); + + /* No work if it's already invalidated */ + if (!cexpr->is_valid) + continue; + + foreach(lc, cexpr->invalItems) + { + PlanInvalItem *item = (PlanInvalItem *) lfirst(lc); + + if (item->cacheId != cacheid) + continue; + if (hashvalue == 0 || + item->hashValue == hashvalue) + { + cexpr->is_valid = false; + break; + } + } + } +} + +/* + * PlanCacheSysCallback + * Syscache inval callback function for other caches + * + * Just invalidate everything... + */ +static void +PlanCacheSysCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + ResetPlanCache(); +} + +/* + * ResetPlanCache: invalidate all cached plans. + */ +void +ResetPlanCache(void) +{ + dlist_iter iter; + + dlist_foreach(iter, &saved_plan_list) + { + CachedPlanSource *plansource = dlist_container(CachedPlanSource, + node, iter.cur); + ListCell *lc; + + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + + /* No work if it's already invalidated */ + if (!plansource->is_valid) + continue; + + /* + * We *must not* mark transaction control statements as invalid, + * particularly not ROLLBACK, because they may need to be executed in + * aborted transactions when we can't revalidate them (cf bug #5269). + */ + if (IsTransactionStmtPlan(plansource)) + continue; + + /* + * In general there is no point in invalidating utility statements + * since they have no plans anyway. So invalidate it only if it + * contains at least one non-utility statement, or contains a utility + * statement that contains a pre-analyzed query (which could have + * dependencies.) + */ + foreach(lc, plansource->query_list) + { + Query *query = lfirst_node(Query, lc); + + if (query->commandType != CMD_UTILITY || + UtilityContainsQuery(query->utilityStmt)) + { + /* non-utility statement, so invalidate */ + plansource->is_valid = false; + if (plansource->gplan) + plansource->gplan->is_valid = false; + /* no need to look further */ + break; + } + } + } + + /* Likewise invalidate cached expressions */ + dlist_foreach(iter, &cached_expression_list) + { + CachedExpression *cexpr = dlist_container(CachedExpression, + node, iter.cur); + + Assert(cexpr->magic == CACHEDEXPR_MAGIC); + + cexpr->is_valid = false; + } +} diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c new file mode 100644 index 0000000..dcf56d4 --- /dev/null +++ b/src/backend/utils/cache/relcache.c @@ -0,0 +1,6651 @@ +/*------------------------------------------------------------------------- + * + * relcache.c + * POSTGRES relation descriptor cache code + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/cache/relcache.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * RelationCacheInitialize - initialize relcache (to empty) + * RelationCacheInitializePhase2 - initialize shared-catalog entries + * RelationCacheInitializePhase3 - finish initializing relcache + * RelationIdGetRelation - get a reldesc by relation id + * RelationClose - close an open relation + * + * NOTES + * The following code contains many undocumented hacks. Please be + * careful.... + */ +#include "postgres.h" + +#include <sys/file.h> +#include <fcntl.h> +#include <unistd.h> + +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/nbtree.h" +#include "access/parallel.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/tupdesc_details.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/partition.h" +#include "catalog/pg_am.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_attrdef.h" +#include "catalog/pg_auth_members.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_database.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_rewrite.h" +#include "catalog/pg_shseclabel.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_subscription.h" +#include "catalog/pg_tablespace.h" +#include "catalog/pg_trigger.h" +#include "catalog/pg_type.h" +#include "catalog/schemapg.h" +#include "catalog/storage.h" +#include "commands/policy.h" +#include "commands/trigger.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "rewrite/rewriteDefine.h" +#include "rewrite/rowsecurity.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/relmapper.h" +#include "utils/resowner_private.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + +#define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */ + +/* + * Whether to bother checking if relation cache memory needs to be freed + * eagerly. See also RelationBuildDesc() and pg_config_manual.h. + */ +#if defined(RECOVER_RELATION_BUILD_MEMORY) && (RECOVER_RELATION_BUILD_MEMORY != 0) +#define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1 +#else +#define RECOVER_RELATION_BUILD_MEMORY 0 +#ifdef DISCARD_CACHES_ENABLED +#define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1 +#endif +#endif + +/* + * hardcoded tuple descriptors, contents generated by genbki.pl + */ +static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class}; +static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute}; +static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc}; +static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type}; +static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database}; +static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid}; +static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members}; +static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index}; +static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel}; +static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription}; + +/* + * Hash tables that index the relation cache + * + * We used to index the cache by both name and OID, but now there + * is only an index by OID. + */ +typedef struct relidcacheent +{ + Oid reloid; + Relation reldesc; +} RelIdCacheEnt; + +static HTAB *RelationIdCache; + +/* + * This flag is false until we have prepared the critical relcache entries + * that are needed to do indexscans on the tables read by relcache building. + */ +bool criticalRelcachesBuilt = false; + +/* + * This flag is false until we have prepared the critical relcache entries + * for shared catalogs (which are the tables needed for login). + */ +bool criticalSharedRelcachesBuilt = false; + +/* + * This counter counts relcache inval events received since backend startup + * (but only for rels that are actually in cache). Presently, we use it only + * to detect whether data about to be written by write_relcache_init_file() + * might already be obsolete. + */ +static long relcacheInvalsReceived = 0L; + +/* + * in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE + * INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock. + * It critically relies on each backend absorbing those changes no later than + * next transaction start. Hence, RelationBuildDesc() loops until it finishes + * without accepting a relevant invalidation. (Most invalidation consumers + * don't do this.) + */ +typedef struct inprogressent +{ + Oid reloid; /* OID of relation being built */ + bool invalidated; /* whether an invalidation arrived for it */ +} InProgressEnt; + +static InProgressEnt *in_progress_list; +static int in_progress_list_len; +static int in_progress_list_maxlen; + +/* + * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact + * cleanup work. This list intentionally has limited size; if it overflows, + * we fall back to scanning the whole hashtable. There is no value in a very + * large list because (1) at some point, a hash_seq_search scan is faster than + * retail lookups, and (2) the value of this is to reduce EOXact work for + * short transactions, which can't have dirtied all that many tables anyway. + * EOXactListAdd() does not bother to prevent duplicate list entries, so the + * cleanup processing must be idempotent. + */ +#define MAX_EOXACT_LIST 32 +static Oid eoxact_list[MAX_EOXACT_LIST]; +static int eoxact_list_len = 0; +static bool eoxact_list_overflowed = false; + +#define EOXactListAdd(rel) \ + do { \ + if (eoxact_list_len < MAX_EOXACT_LIST) \ + eoxact_list[eoxact_list_len++] = (rel)->rd_id; \ + else \ + eoxact_list_overflowed = true; \ + } while (0) + +/* + * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact + * cleanup work. The array expands as needed; there is no hashtable because + * we don't need to access individual items except at EOXact. + */ +static TupleDesc *EOXactTupleDescArray; +static int NextEOXactTupleDescNum = 0; +static int EOXactTupleDescArrayLen = 0; + +/* + * macros to manipulate the lookup hashtable + */ +#define RelationCacheInsert(RELATION, replace_allowed) \ +do { \ + RelIdCacheEnt *hentry; bool found; \ + hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \ + (void *) &((RELATION)->rd_id), \ + HASH_ENTER, &found); \ + if (found) \ + { \ + /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \ + Relation _old_rel = hentry->reldesc; \ + Assert(replace_allowed); \ + hentry->reldesc = (RELATION); \ + if (RelationHasReferenceCountZero(_old_rel)) \ + RelationDestroyRelation(_old_rel, false); \ + else if (!IsBootstrapProcessingMode()) \ + elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \ + RelationGetRelationName(_old_rel)); \ + } \ + else \ + hentry->reldesc = (RELATION); \ +} while(0) + +#define RelationIdCacheLookup(ID, RELATION) \ +do { \ + RelIdCacheEnt *hentry; \ + hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \ + (void *) &(ID), \ + HASH_FIND, NULL); \ + if (hentry) \ + RELATION = hentry->reldesc; \ + else \ + RELATION = NULL; \ +} while(0) + +#define RelationCacheDelete(RELATION) \ +do { \ + RelIdCacheEnt *hentry; \ + hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \ + (void *) &((RELATION)->rd_id), \ + HASH_REMOVE, NULL); \ + if (hentry == NULL) \ + elog(WARNING, "failed to delete relcache entry for OID %u", \ + (RELATION)->rd_id); \ +} while(0) + + +/* + * Special cache for opclass-related information + * + * Note: only default support procs get cached, ie, those with + * lefttype = righttype = opcintype. + */ +typedef struct opclasscacheent +{ + Oid opclassoid; /* lookup key: OID of opclass */ + bool valid; /* set true after successful fill-in */ + StrategyNumber numSupport; /* max # of support procs (from pg_am) */ + Oid opcfamily; /* OID of opclass's family */ + Oid opcintype; /* OID of opclass's declared input type */ + RegProcedure *supportProcs; /* OIDs of support procedures */ +} OpClassCacheEnt; + +static HTAB *OpClassCache = NULL; + + +/* non-export function prototypes */ + +static void RelationDestroyRelation(Relation relation, bool remember_tupdesc); +static void RelationClearRelation(Relation relation, bool rebuild); + +static void RelationReloadIndexInfo(Relation relation); +static void RelationReloadNailed(Relation relation); +static void RelationFlushRelation(Relation relation); +static void RememberToFreeTupleDescAtEOX(TupleDesc td); +#ifdef USE_ASSERT_CHECKING +static void AssertPendingSyncConsistency(Relation relation); +#endif +static void AtEOXact_cleanup(Relation relation, bool isCommit); +static void AtEOSubXact_cleanup(Relation relation, bool isCommit, + SubTransactionId mySubid, SubTransactionId parentSubid); +static bool load_relcache_init_file(bool shared); +static void write_relcache_init_file(bool shared); +static void write_item(const void *data, Size len, FILE *fp); + +static void formrdesc(const char *relationName, Oid relationReltype, + bool isshared, int natts, const FormData_pg_attribute *attrs); + +static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic); +static Relation AllocateRelationDesc(Form_pg_class relp); +static void RelationParseRelOptions(Relation relation, HeapTuple tuple); +static void RelationBuildTupleDesc(Relation relation); +static Relation RelationBuildDesc(Oid targetRelId, bool insertIt); +static void RelationInitPhysicalAddr(Relation relation); +static void load_critical_index(Oid indexoid, Oid heapoid); +static TupleDesc GetPgClassDescriptor(void); +static TupleDesc GetPgIndexDescriptor(void); +static void AttrDefaultFetch(Relation relation, int ndef); +static int AttrDefaultCmp(const void *a, const void *b); +static void CheckConstraintFetch(Relation relation); +static int CheckConstraintCmp(const void *a, const void *b); +static void InitIndexAmRoutine(Relation relation); +static void IndexSupportInitialize(oidvector *indclass, + RegProcedure *indexSupport, + Oid *opFamily, + Oid *opcInType, + StrategyNumber maxSupportNumber, + AttrNumber maxAttributeNumber); +static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, + StrategyNumber numSupport); +static void RelationCacheInitFileRemoveInDir(const char *tblspcpath); +static void unlink_initfile(const char *initfilename, int elevel); + + +/* + * ScanPgRelation + * + * This is used by RelationBuildDesc to find a pg_class + * tuple matching targetRelId. The caller must hold at least + * AccessShareLock on the target relid to prevent concurrent-update + * scenarios; it isn't guaranteed that all scans used to build the + * relcache entry will use the same snapshot. If, for example, + * an attribute were to be added after scanning pg_class and before + * scanning pg_attribute, relnatts wouldn't match. + * + * NB: the returned tuple has been copied into palloc'd storage + * and must eventually be freed with heap_freetuple. + */ +static HeapTuple +ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic) +{ + HeapTuple pg_class_tuple; + Relation pg_class_desc; + SysScanDesc pg_class_scan; + ScanKeyData key[1]; + Snapshot snapshot = NULL; + + /* + * If something goes wrong during backend startup, we might find ourselves + * trying to read pg_class before we've selected a database. That ain't + * gonna work, so bail out with a useful error message. If this happens, + * it probably means a relcache entry that needs to be nailed isn't. + */ + if (!OidIsValid(MyDatabaseId)) + elog(FATAL, "cannot read pg_class without having selected a database"); + + /* + * form a scan key + */ + ScanKeyInit(&key[0], + Anum_pg_class_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(targetRelId)); + + /* + * Open pg_class and fetch a tuple. Force heap scan if we haven't yet + * built the critical relcache entries (this includes initdb and startup + * without a pg_internal.init file). The caller can also force a heap + * scan by setting indexOK == false. + */ + pg_class_desc = table_open(RelationRelationId, AccessShareLock); + + /* + * The caller might need a tuple that's newer than the one the historic + * snapshot; currently the only case requiring to do so is looking up the + * relfilenode of non mapped system relations during decoding. That + * snapshot can't change in the midst of a relcache build, so there's no + * need to register the snapshot. + */ + if (force_non_historic) + snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId); + + pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId, + indexOK && criticalRelcachesBuilt, + snapshot, + 1, key); + + pg_class_tuple = systable_getnext(pg_class_scan); + + /* + * Must copy tuple before releasing buffer. + */ + if (HeapTupleIsValid(pg_class_tuple)) + pg_class_tuple = heap_copytuple(pg_class_tuple); + + /* all done */ + systable_endscan(pg_class_scan); + table_close(pg_class_desc, AccessShareLock); + + return pg_class_tuple; +} + +/* + * AllocateRelationDesc + * + * This is used to allocate memory for a new relation descriptor + * and initialize the rd_rel field from the given pg_class tuple. + */ +static Relation +AllocateRelationDesc(Form_pg_class relp) +{ + Relation relation; + MemoryContext oldcxt; + Form_pg_class relationForm; + + /* Relcache entries must live in CacheMemoryContext */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * allocate and zero space for new relation descriptor + */ + relation = (Relation) palloc0(sizeof(RelationData)); + + /* make sure relation is marked as having no open file yet */ + relation->rd_smgr = NULL; + + /* + * Copy the relation tuple form + * + * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The + * variable-length fields (relacl, reloptions) are NOT stored in the + * relcache --- there'd be little point in it, since we don't copy the + * tuple's nulls bitmap and hence wouldn't know if the values are valid. + * Bottom line is that relacl *cannot* be retrieved from the relcache. Get + * it from the syscache if you need it. The same goes for the original + * form of reloptions (however, we do store the parsed form of reloptions + * in rd_options). + */ + relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE); + + memcpy(relationForm, relp, CLASS_TUPLE_SIZE); + + /* initialize relation tuple form */ + relation->rd_rel = relationForm; + + /* and allocate attribute tuple form storage */ + relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts); + /* which we mark as a reference-counted tupdesc */ + relation->rd_att->tdrefcount = 1; + + MemoryContextSwitchTo(oldcxt); + + return relation; +} + +/* + * RelationParseRelOptions + * Convert pg_class.reloptions into pre-parsed rd_options + * + * tuple is the real pg_class tuple (not rd_rel!) for relation + * + * Note: rd_rel and (if an index) rd_indam must be valid already + */ +static void +RelationParseRelOptions(Relation relation, HeapTuple tuple) +{ + bytea *options; + amoptions_function amoptsfn; + + relation->rd_options = NULL; + + /* + * Look up any AM-specific parse function; fall out if relkind should not + * have options. + */ + switch (relation->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_TOASTVALUE: + case RELKIND_VIEW: + case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: + amoptsfn = NULL; + break; + case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: + amoptsfn = relation->rd_indam->amoptions; + break; + default: + return; + } + + /* + * Fetch reloptions from tuple; have to use a hardwired descriptor because + * we might not have any other for pg_class yet (consider executing this + * code for pg_class itself) + */ + options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn); + + /* + * Copy parsed data into CacheMemoryContext. To guard against the + * possibility of leaks in the reloptions code, we want to do the actual + * parsing in the caller's memory context and copy the results into + * CacheMemoryContext after the fact. + */ + if (options) + { + relation->rd_options = MemoryContextAlloc(CacheMemoryContext, + VARSIZE(options)); + memcpy(relation->rd_options, options, VARSIZE(options)); + pfree(options); + } +} + +/* + * RelationBuildTupleDesc + * + * Form the relation's tuple descriptor from information in + * the pg_attribute, pg_attrdef & pg_constraint system catalogs. + */ +static void +RelationBuildTupleDesc(Relation relation) +{ + HeapTuple pg_attribute_tuple; + Relation pg_attribute_desc; + SysScanDesc pg_attribute_scan; + ScanKeyData skey[2]; + int need; + TupleConstr *constr; + AttrMissing *attrmiss = NULL; + int ndef = 0; + + /* fill rd_att's type ID fields (compare heap.c's AddNewRelationTuple) */ + relation->rd_att->tdtypeid = + relation->rd_rel->reltype ? relation->rd_rel->reltype : RECORDOID; + relation->rd_att->tdtypmod = -1; /* just to be sure */ + + constr = (TupleConstr *) MemoryContextAllocZero(CacheMemoryContext, + sizeof(TupleConstr)); + constr->has_not_null = false; + constr->has_generated_stored = false; + + /* + * Form a scan key that selects only user attributes (attnum > 0). + * (Eliminating system attribute rows at the index level is lots faster + * than fetching them.) + */ + ScanKeyInit(&skey[0], + Anum_pg_attribute_attrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + ScanKeyInit(&skey[1], + Anum_pg_attribute_attnum, + BTGreaterStrategyNumber, F_INT2GT, + Int16GetDatum(0)); + + /* + * Open pg_attribute and begin a scan. Force heap scan if we haven't yet + * built the critical relcache entries (this includes initdb and startup + * without a pg_internal.init file). + */ + pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock); + pg_attribute_scan = systable_beginscan(pg_attribute_desc, + AttributeRelidNumIndexId, + criticalRelcachesBuilt, + NULL, + 2, skey); + + /* + * add attribute data to relation->rd_att + */ + need = RelationGetNumberOfAttributes(relation); + + while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan))) + { + Form_pg_attribute attp; + int attnum; + + attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple); + + attnum = attp->attnum; + if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation)) + elog(ERROR, "invalid attribute number %d for relation \"%s\"", + attp->attnum, RelationGetRelationName(relation)); + + memcpy(TupleDescAttr(relation->rd_att, attnum - 1), + attp, + ATTRIBUTE_FIXED_PART_SIZE); + + /* Update constraint/default info */ + if (attp->attnotnull) + constr->has_not_null = true; + if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED) + constr->has_generated_stored = true; + if (attp->atthasdef) + ndef++; + + /* If the column has a "missing" value, put it in the attrmiss array */ + if (attp->atthasmissing) + { + Datum missingval; + bool missingNull; + + /* Do we have a missing value? */ + missingval = heap_getattr(pg_attribute_tuple, + Anum_pg_attribute_attmissingval, + pg_attribute_desc->rd_att, + &missingNull); + if (!missingNull) + { + /* Yes, fetch from the array */ + MemoryContext oldcxt; + bool is_null; + int one = 1; + Datum missval; + + if (attrmiss == NULL) + attrmiss = (AttrMissing *) + MemoryContextAllocZero(CacheMemoryContext, + relation->rd_rel->relnatts * + sizeof(AttrMissing)); + + missval = array_get_element(missingval, + 1, + &one, + -1, + attp->attlen, + attp->attbyval, + attp->attalign, + &is_null); + Assert(!is_null); + if (attp->attbyval) + { + /* for copy by val just copy the datum direct */ + attrmiss[attnum - 1].am_value = missval; + } + else + { + /* otherwise copy in the correct context */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + attrmiss[attnum - 1].am_value = datumCopy(missval, + attp->attbyval, + attp->attlen); + MemoryContextSwitchTo(oldcxt); + } + attrmiss[attnum - 1].am_present = true; + } + } + need--; + if (need == 0) + break; + } + + /* + * end the scan and close the attribute relation + */ + systable_endscan(pg_attribute_scan); + table_close(pg_attribute_desc, AccessShareLock); + + if (need != 0) + elog(ERROR, "pg_attribute catalog is missing %d attribute(s) for relation OID %u", + need, RelationGetRelid(relation)); + + /* + * The attcacheoff values we read from pg_attribute should all be -1 + * ("unknown"). Verify this if assert checking is on. They will be + * computed when and if needed during tuple access. + */ +#ifdef USE_ASSERT_CHECKING + { + int i; + + for (i = 0; i < RelationGetNumberOfAttributes(relation); i++) + Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1); + } +#endif + + /* + * However, we can easily set the attcacheoff value for the first + * attribute: it must be zero. This eliminates the need for special cases + * for attnum=1 that used to exist in fastgetattr() and index_getattr(). + */ + if (RelationGetNumberOfAttributes(relation) > 0) + TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0; + + /* + * Set up constraint/default info + */ + if (constr->has_not_null || + constr->has_generated_stored || + ndef > 0 || + attrmiss || + relation->rd_rel->relchecks > 0) + { + relation->rd_att->constr = constr; + + if (ndef > 0) /* DEFAULTs */ + AttrDefaultFetch(relation, ndef); + else + constr->num_defval = 0; + + constr->missing = attrmiss; + + if (relation->rd_rel->relchecks > 0) /* CHECKs */ + CheckConstraintFetch(relation); + else + constr->num_check = 0; + } + else + { + pfree(constr); + relation->rd_att->constr = NULL; + } +} + +/* + * RelationBuildRuleLock + * + * Form the relation's rewrite rules from information in + * the pg_rewrite system catalog. + * + * Note: The rule parsetrees are potentially very complex node structures. + * To allow these trees to be freed when the relcache entry is flushed, + * we make a private memory context to hold the RuleLock information for + * each relcache entry that has associated rules. The context is used + * just for rule info, not for any other subsidiary data of the relcache + * entry, because that keeps the update logic in RelationClearRelation() + * manageable. The other subsidiary data structures are simple enough + * to be easy to free explicitly, anyway. + */ +static void +RelationBuildRuleLock(Relation relation) +{ + MemoryContext rulescxt; + MemoryContext oldcxt; + HeapTuple rewrite_tuple; + Relation rewrite_desc; + TupleDesc rewrite_tupdesc; + SysScanDesc rewrite_scan; + ScanKeyData key; + RuleLock *rulelock; + int numlocks; + RewriteRule **rules; + int maxlocks; + + /* + * Make the private context. Assume it'll not contain much data. + */ + rulescxt = AllocSetContextCreate(CacheMemoryContext, + "relation rules", + ALLOCSET_SMALL_SIZES); + relation->rd_rulescxt = rulescxt; + MemoryContextCopyAndSetIdentifier(rulescxt, + RelationGetRelationName(relation)); + + /* + * allocate an array to hold the rewrite rules (the array is extended if + * necessary) + */ + maxlocks = 4; + rules = (RewriteRule **) + MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks); + numlocks = 0; + + /* + * form a scan key + */ + ScanKeyInit(&key, + Anum_pg_rewrite_ev_class, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + /* + * open pg_rewrite and begin a scan + * + * Note: since we scan the rules using RewriteRelRulenameIndexId, we will + * be reading the rules in name order, except possibly during + * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn + * ensures that rules will be fired in name order. + */ + rewrite_desc = table_open(RewriteRelationId, AccessShareLock); + rewrite_tupdesc = RelationGetDescr(rewrite_desc); + rewrite_scan = systable_beginscan(rewrite_desc, + RewriteRelRulenameIndexId, + true, NULL, + 1, &key); + + while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan))) + { + Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple); + bool isnull; + Datum rule_datum; + char *rule_str; + RewriteRule *rule; + + rule = (RewriteRule *) MemoryContextAlloc(rulescxt, + sizeof(RewriteRule)); + + rule->ruleId = rewrite_form->oid; + + rule->event = rewrite_form->ev_type - '0'; + rule->enabled = rewrite_form->ev_enabled; + rule->isInstead = rewrite_form->is_instead; + + /* + * Must use heap_getattr to fetch ev_action and ev_qual. Also, the + * rule strings are often large enough to be toasted. To avoid + * leaking memory in the caller's context, do the detoasting here so + * we can free the detoasted version. + */ + rule_datum = heap_getattr(rewrite_tuple, + Anum_pg_rewrite_ev_action, + rewrite_tupdesc, + &isnull); + Assert(!isnull); + rule_str = TextDatumGetCString(rule_datum); + oldcxt = MemoryContextSwitchTo(rulescxt); + rule->actions = (List *) stringToNode(rule_str); + MemoryContextSwitchTo(oldcxt); + pfree(rule_str); + + rule_datum = heap_getattr(rewrite_tuple, + Anum_pg_rewrite_ev_qual, + rewrite_tupdesc, + &isnull); + Assert(!isnull); + rule_str = TextDatumGetCString(rule_datum); + oldcxt = MemoryContextSwitchTo(rulescxt); + rule->qual = (Node *) stringToNode(rule_str); + MemoryContextSwitchTo(oldcxt); + pfree(rule_str); + + /* + * We want the rule's table references to be checked as though by the + * table owner, not the user referencing the rule. Therefore, scan + * through the rule's actions and set the checkAsUser field on all + * rtable entries. We have to look at the qual as well, in case it + * contains sublinks. + * + * The reason for doing this when the rule is loaded, rather than when + * it is stored, is that otherwise ALTER TABLE OWNER would have to + * grovel through stored rules to update checkAsUser fields. Scanning + * the rule tree during load is relatively cheap (compared to + * constructing it in the first place), so we do it here. + */ + setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner); + setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner); + + if (numlocks >= maxlocks) + { + maxlocks *= 2; + rules = (RewriteRule **) + repalloc(rules, sizeof(RewriteRule *) * maxlocks); + } + rules[numlocks++] = rule; + } + + /* + * end the scan and close the attribute relation + */ + systable_endscan(rewrite_scan); + table_close(rewrite_desc, AccessShareLock); + + /* + * there might not be any rules (if relhasrules is out-of-date) + */ + if (numlocks == 0) + { + relation->rd_rules = NULL; + relation->rd_rulescxt = NULL; + MemoryContextDelete(rulescxt); + return; + } + + /* + * form a RuleLock and insert into relation + */ + rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock)); + rulelock->numLocks = numlocks; + rulelock->rules = rules; + + relation->rd_rules = rulelock; +} + +/* + * equalRuleLocks + * + * Determine whether two RuleLocks are equivalent + * + * Probably this should be in the rules code someplace... + */ +static bool +equalRuleLocks(RuleLock *rlock1, RuleLock *rlock2) +{ + int i; + + /* + * As of 7.3 we assume the rule ordering is repeatable, because + * RelationBuildRuleLock should read 'em in a consistent order. So just + * compare corresponding slots. + */ + if (rlock1 != NULL) + { + if (rlock2 == NULL) + return false; + if (rlock1->numLocks != rlock2->numLocks) + return false; + for (i = 0; i < rlock1->numLocks; i++) + { + RewriteRule *rule1 = rlock1->rules[i]; + RewriteRule *rule2 = rlock2->rules[i]; + + if (rule1->ruleId != rule2->ruleId) + return false; + if (rule1->event != rule2->event) + return false; + if (rule1->enabled != rule2->enabled) + return false; + if (rule1->isInstead != rule2->isInstead) + return false; + if (!equal(rule1->qual, rule2->qual)) + return false; + if (!equal(rule1->actions, rule2->actions)) + return false; + } + } + else if (rlock2 != NULL) + return false; + return true; +} + +/* + * equalPolicy + * + * Determine whether two policies are equivalent + */ +static bool +equalPolicy(RowSecurityPolicy *policy1, RowSecurityPolicy *policy2) +{ + int i; + Oid *r1, + *r2; + + if (policy1 != NULL) + { + if (policy2 == NULL) + return false; + + if (policy1->polcmd != policy2->polcmd) + return false; + if (policy1->hassublinks != policy2->hassublinks) + return false; + if (strcmp(policy1->policy_name, policy2->policy_name) != 0) + return false; + if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0]) + return false; + + r1 = (Oid *) ARR_DATA_PTR(policy1->roles); + r2 = (Oid *) ARR_DATA_PTR(policy2->roles); + + for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++) + { + if (r1[i] != r2[i]) + return false; + } + + if (!equal(policy1->qual, policy2->qual)) + return false; + if (!equal(policy1->with_check_qual, policy2->with_check_qual)) + return false; + } + else if (policy2 != NULL) + return false; + + return true; +} + +/* + * equalRSDesc + * + * Determine whether two RowSecurityDesc's are equivalent + */ +static bool +equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2) +{ + ListCell *lc, + *rc; + + if (rsdesc1 == NULL && rsdesc2 == NULL) + return true; + + if ((rsdesc1 != NULL && rsdesc2 == NULL) || + (rsdesc1 == NULL && rsdesc2 != NULL)) + return false; + + if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies)) + return false; + + /* RelationBuildRowSecurity should build policies in order */ + forboth(lc, rsdesc1->policies, rc, rsdesc2->policies) + { + RowSecurityPolicy *l = (RowSecurityPolicy *) lfirst(lc); + RowSecurityPolicy *r = (RowSecurityPolicy *) lfirst(rc); + + if (!equalPolicy(l, r)) + return false; + } + + return true; +} + +/* + * RelationBuildDesc + * + * Build a relation descriptor. The caller must hold at least + * AccessShareLock on the target relid. + * + * The new descriptor is inserted into the hash table if insertIt is true. + * + * Returns NULL if no pg_class row could be found for the given relid + * (suggesting we are trying to access a just-deleted relation). + * Any other error is reported via elog. + */ +static Relation +RelationBuildDesc(Oid targetRelId, bool insertIt) +{ + int in_progress_offset; + Relation relation; + Oid relid; + HeapTuple pg_class_tuple; + Form_pg_class relp; + + /* + * This function and its subroutines can allocate a good deal of transient + * data in CurrentMemoryContext. Traditionally we've just leaked that + * data, reasoning that the caller's context is at worst of transaction + * scope, and relcache loads shouldn't happen so often that it's essential + * to recover transient data before end of statement/transaction. However + * that's definitely not true when debug_discard_caches is active, and + * perhaps it's not true in other cases. + * + * When debug_discard_caches is active or when forced to by + * RECOVER_RELATION_BUILD_MEMORY=1, arrange to allocate the junk in a + * temporary context that we'll free before returning. Make it a child of + * caller's context so that it will get cleaned up appropriately if we + * error out partway through. + */ +#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY + MemoryContext tmpcxt = NULL; + MemoryContext oldcxt = NULL; + + if (RECOVER_RELATION_BUILD_MEMORY || debug_discard_caches > 0) + { + tmpcxt = AllocSetContextCreate(CurrentMemoryContext, + "RelationBuildDesc workspace", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(tmpcxt); + } +#endif + + /* Register to catch invalidation messages */ + if (in_progress_list_len >= in_progress_list_maxlen) + { + int allocsize; + + allocsize = in_progress_list_maxlen * 2; + in_progress_list = repalloc(in_progress_list, + allocsize * sizeof(*in_progress_list)); + in_progress_list_maxlen = allocsize; + } + in_progress_offset = in_progress_list_len++; + in_progress_list[in_progress_offset].reloid = targetRelId; +retry: + in_progress_list[in_progress_offset].invalidated = false; + + /* + * find the tuple in pg_class corresponding to the given relation id + */ + pg_class_tuple = ScanPgRelation(targetRelId, true, false); + + /* + * if no such tuple exists, return NULL + */ + if (!HeapTupleIsValid(pg_class_tuple)) + { +#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY + if (tmpcxt) + { + /* Return to caller's context, and blow away the temporary context */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(tmpcxt); + } +#endif + Assert(in_progress_offset + 1 == in_progress_list_len); + in_progress_list_len--; + return NULL; + } + + /* + * get information from the pg_class_tuple + */ + relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); + relid = relp->oid; + Assert(relid == targetRelId); + + /* + * allocate storage for the relation descriptor, and copy pg_class_tuple + * to relation->rd_rel. + */ + relation = AllocateRelationDesc(relp); + + /* + * initialize the relation's relation id (relation->rd_id) + */ + RelationGetRelid(relation) = relid; + + /* + * Normal relations are not nailed into the cache. Since we don't flush + * new relations, it won't be new. It could be temp though. + */ + relation->rd_refcnt = 0; + relation->rd_isnailed = false; + relation->rd_createSubid = InvalidSubTransactionId; + relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; + switch (relation->rd_rel->relpersistence) + { + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + relation->rd_backend = InvalidBackendId; + relation->rd_islocaltemp = false; + break; + case RELPERSISTENCE_TEMP: + if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace)) + { + relation->rd_backend = BackendIdForTempRelations(); + relation->rd_islocaltemp = true; + } + else + { + /* + * If it's a temp table, but not one of ours, we have to use + * the slow, grotty method to figure out the owning backend. + * + * Note: it's possible that rd_backend gets set to MyBackendId + * here, in case we are looking at a pg_class entry left over + * from a crashed backend that coincidentally had the same + * BackendId we're using. We should *not* consider such a + * table to be "ours"; this is why we need the separate + * rd_islocaltemp flag. The pg_class entry will get flushed + * if/when we clean out the corresponding temp table namespace + * in preparation for using it. + */ + relation->rd_backend = + GetTempNamespaceBackendId(relation->rd_rel->relnamespace); + Assert(relation->rd_backend != InvalidBackendId); + relation->rd_islocaltemp = false; + } + break; + default: + elog(ERROR, "invalid relpersistence: %c", + relation->rd_rel->relpersistence); + break; + } + + /* + * initialize the tuple descriptor (relation->rd_att). + */ + RelationBuildTupleDesc(relation); + + /* + * Fetch rules and triggers that affect this relation + */ + if (relation->rd_rel->relhasrules) + RelationBuildRuleLock(relation); + else + { + relation->rd_rules = NULL; + relation->rd_rulescxt = NULL; + } + + if (relation->rd_rel->relhastriggers) + RelationBuildTriggers(relation); + else + relation->trigdesc = NULL; + + if (relation->rd_rel->relrowsecurity) + RelationBuildRowSecurity(relation); + else + relation->rd_rsdesc = NULL; + + /* foreign key data is not loaded till asked for */ + relation->rd_fkeylist = NIL; + relation->rd_fkeyvalid = false; + + /* partitioning data is not loaded till asked for */ + relation->rd_partkey = NULL; + relation->rd_partkeycxt = NULL; + relation->rd_partdesc = NULL; + relation->rd_partdesc_nodetached = NULL; + relation->rd_partdesc_nodetached_xmin = InvalidTransactionId; + relation->rd_pdcxt = NULL; + relation->rd_pddcxt = NULL; + relation->rd_partcheck = NIL; + relation->rd_partcheckvalid = false; + relation->rd_partcheckcxt = NULL; + + /* + * initialize access method information + */ + switch (relation->rd_rel->relkind) + { + case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: + Assert(relation->rd_rel->relam != InvalidOid); + RelationInitIndexAccessInfo(relation); + break; + case RELKIND_RELATION: + case RELKIND_TOASTVALUE: + case RELKIND_MATVIEW: + Assert(relation->rd_rel->relam != InvalidOid); + RelationInitTableAccessMethod(relation); + break; + case RELKIND_SEQUENCE: + Assert(relation->rd_rel->relam == InvalidOid); + RelationInitTableAccessMethod(relation); + break; + case RELKIND_VIEW: + case RELKIND_COMPOSITE_TYPE: + case RELKIND_FOREIGN_TABLE: + case RELKIND_PARTITIONED_TABLE: + Assert(relation->rd_rel->relam == InvalidOid); + break; + } + + /* extract reloptions if any */ + RelationParseRelOptions(relation, pg_class_tuple); + + /* + * initialize the relation lock manager information + */ + RelationInitLockInfo(relation); /* see lmgr.c */ + + /* + * initialize physical addressing information for the relation + */ + RelationInitPhysicalAddr(relation); + + /* make sure relation is marked as having no open file yet */ + relation->rd_smgr = NULL; + + /* + * now we can free the memory allocated for pg_class_tuple + */ + heap_freetuple(pg_class_tuple); + + /* + * If an invalidation arrived mid-build, start over. Between here and the + * end of this function, don't add code that does or reasonably could read + * system catalogs. That range must be free from invalidation processing + * for the !insertIt case. For the insertIt case, RelationCacheInsert() + * will enroll this relation in ordinary relcache invalidation processing, + */ + if (in_progress_list[in_progress_offset].invalidated) + { + RelationDestroyRelation(relation, false); + goto retry; + } + Assert(in_progress_offset + 1 == in_progress_list_len); + in_progress_list_len--; + + /* + * Insert newly created relation into relcache hash table, if requested. + * + * There is one scenario in which we might find a hashtable entry already + * present, even though our caller failed to find it: if the relation is a + * system catalog or index that's used during relcache load, we might have + * recursively created the same relcache entry during the preceding steps. + * So allow RelationCacheInsert to delete any already-present relcache + * entry for the same OID. The already-present entry should have refcount + * zero (else somebody forgot to close it); in the event that it doesn't, + * we'll elog a WARNING and leak the already-present entry. + */ + if (insertIt) + RelationCacheInsert(relation, true); + + /* It's fully valid */ + relation->rd_isvalid = true; + +#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY + if (tmpcxt) + { + /* Return to caller's context, and blow away the temporary context */ + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(tmpcxt); + } +#endif + + return relation; +} + +/* + * Initialize the physical addressing info (RelFileNode) for a relcache entry + * + * Note: at the physical level, relations in the pg_global tablespace must + * be treated as shared, even if relisshared isn't set. Hence we do not + * look at relisshared here. + */ +static void +RelationInitPhysicalAddr(Relation relation) +{ + Oid oldnode = relation->rd_node.relNode; + + /* these relations kinds never have storage */ + if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) + return; + + if (relation->rd_rel->reltablespace) + relation->rd_node.spcNode = relation->rd_rel->reltablespace; + else + relation->rd_node.spcNode = MyDatabaseTableSpace; + if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID) + relation->rd_node.dbNode = InvalidOid; + else + relation->rd_node.dbNode = MyDatabaseId; + + if (relation->rd_rel->relfilenode) + { + /* + * Even if we are using a decoding snapshot that doesn't represent the + * current state of the catalog we need to make sure the filenode + * points to the current file since the older file will be gone (or + * truncated). The new file will still contain older rows so lookups + * in them will work correctly. This wouldn't work correctly if + * rewrites were allowed to change the schema in an incompatible way, + * but those are prevented both on catalog tables and on user tables + * declared as additional catalog tables. + */ + if (HistoricSnapshotActive() + && RelationIsAccessibleInLogicalDecoding(relation) + && IsTransactionState()) + { + HeapTuple phys_tuple; + Form_pg_class physrel; + + phys_tuple = ScanPgRelation(RelationGetRelid(relation), + RelationGetRelid(relation) != ClassOidIndexId, + true); + if (!HeapTupleIsValid(phys_tuple)) + elog(ERROR, "could not find pg_class entry for %u", + RelationGetRelid(relation)); + physrel = (Form_pg_class) GETSTRUCT(phys_tuple); + + relation->rd_rel->reltablespace = physrel->reltablespace; + relation->rd_rel->relfilenode = physrel->relfilenode; + heap_freetuple(phys_tuple); + } + + relation->rd_node.relNode = relation->rd_rel->relfilenode; + } + else + { + /* Consult the relation mapper */ + relation->rd_node.relNode = + RelationMapOidToFilenode(relation->rd_id, + relation->rd_rel->relisshared); + if (!OidIsValid(relation->rd_node.relNode)) + elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", + RelationGetRelationName(relation), relation->rd_id); + } + + /* + * For RelationNeedsWAL() to answer correctly on parallel workers, restore + * rd_firstRelfilenodeSubid. No subtransactions start or end while in + * parallel mode, so the specific SubTransactionId does not matter. + */ + if (IsParallelWorker() && oldnode != relation->rd_node.relNode) + { + if (RelFileNodeSkippingWAL(relation->rd_node)) + relation->rd_firstRelfilenodeSubid = TopSubTransactionId; + else + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + } +} + +/* + * Fill in the IndexAmRoutine for an index relation. + * + * relation's rd_amhandler and rd_indexcxt must be valid already. + */ +static void +InitIndexAmRoutine(Relation relation) +{ + IndexAmRoutine *cached, + *tmp; + + /* + * Call the amhandler in current, short-lived memory context, just in case + * it leaks anything (it probably won't, but let's be paranoid). + */ + tmp = GetIndexAmRoutine(relation->rd_amhandler); + + /* OK, now transfer the data into relation's rd_indexcxt. */ + cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt, + sizeof(IndexAmRoutine)); + memcpy(cached, tmp, sizeof(IndexAmRoutine)); + relation->rd_indam = cached; + + pfree(tmp); +} + +/* + * Initialize index-access-method support data for an index relation + */ +void +RelationInitIndexAccessInfo(Relation relation) +{ + HeapTuple tuple; + Form_pg_am aform; + Datum indcollDatum; + Datum indclassDatum; + Datum indoptionDatum; + bool isnull; + oidvector *indcoll; + oidvector *indclass; + int2vector *indoption; + MemoryContext indexcxt; + MemoryContext oldcontext; + int indnatts; + int indnkeyatts; + uint16 amsupport; + + /* + * Make a copy of the pg_index entry for the index. Since pg_index + * contains variable-length and possibly-null fields, we have to do this + * honestly rather than just treating it as a Form_pg_index struct. + */ + tuple = SearchSysCache1(INDEXRELID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for index %u", + RelationGetRelid(relation)); + oldcontext = MemoryContextSwitchTo(CacheMemoryContext); + relation->rd_indextuple = heap_copytuple(tuple); + relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple); + MemoryContextSwitchTo(oldcontext); + ReleaseSysCache(tuple); + + /* + * Look up the index's access method, save the OID of its handler function + */ + tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", + relation->rd_rel->relam); + aform = (Form_pg_am) GETSTRUCT(tuple); + relation->rd_amhandler = aform->amhandler; + ReleaseSysCache(tuple); + + indnatts = RelationGetNumberOfAttributes(relation); + if (indnatts != IndexRelationGetNumberOfAttributes(relation)) + elog(ERROR, "relnatts disagrees with indnatts for index %u", + RelationGetRelid(relation)); + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation); + + /* + * Make the private context to hold index access info. The reason we need + * a context, and not just a couple of pallocs, is so that we won't leak + * any subsidiary info attached to fmgr lookup records. + */ + indexcxt = AllocSetContextCreate(CacheMemoryContext, + "index info", + ALLOCSET_SMALL_SIZES); + relation->rd_indexcxt = indexcxt; + MemoryContextCopyAndSetIdentifier(indexcxt, + RelationGetRelationName(relation)); + + /* + * Now we can fetch the index AM's API struct + */ + InitIndexAmRoutine(relation); + + /* + * Allocate arrays to hold data. Opclasses are not used for included + * columns, so allocate them for indnkeyatts only. + */ + relation->rd_opfamily = (Oid *) + MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid)); + relation->rd_opcintype = (Oid *) + MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid)); + + amsupport = relation->rd_indam->amsupport; + if (amsupport > 0) + { + int nsupport = indnatts * amsupport; + + relation->rd_support = (RegProcedure *) + MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure)); + relation->rd_supportinfo = (FmgrInfo *) + MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo)); + } + else + { + relation->rd_support = NULL; + relation->rd_supportinfo = NULL; + } + + relation->rd_indcollation = (Oid *) + MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid)); + + relation->rd_indoption = (int16 *) + MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16)); + + /* + * indcollation cannot be referenced directly through the C struct, + * because it comes after the variable-width indkey field. Must extract + * the datum the hard way... + */ + indcollDatum = fastgetattr(relation->rd_indextuple, + Anum_pg_index_indcollation, + GetPgIndexDescriptor(), + &isnull); + Assert(!isnull); + indcoll = (oidvector *) DatumGetPointer(indcollDatum); + memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid)); + + /* + * indclass cannot be referenced directly through the C struct, because it + * comes after the variable-width indkey field. Must extract the datum + * the hard way... + */ + indclassDatum = fastgetattr(relation->rd_indextuple, + Anum_pg_index_indclass, + GetPgIndexDescriptor(), + &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + + /* + * Fill the support procedure OID array, as well as the info about + * opfamilies and opclass input types. (aminfo and supportinfo are left + * as zeroes, and are filled on-the-fly when used) + */ + IndexSupportInitialize(indclass, relation->rd_support, + relation->rd_opfamily, relation->rd_opcintype, + amsupport, indnkeyatts); + + /* + * Similarly extract indoption and copy it to the cache entry + */ + indoptionDatum = fastgetattr(relation->rd_indextuple, + Anum_pg_index_indoption, + GetPgIndexDescriptor(), + &isnull); + Assert(!isnull); + indoption = (int2vector *) DatumGetPointer(indoptionDatum); + memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16)); + + (void) RelationGetIndexAttOptions(relation, false); + + /* + * expressions, predicate, exclusion caches will be filled later + */ + relation->rd_indexprs = NIL; + relation->rd_indpred = NIL; + relation->rd_exclops = NULL; + relation->rd_exclprocs = NULL; + relation->rd_exclstrats = NULL; + relation->rd_amcache = NULL; +} + +/* + * IndexSupportInitialize + * Initializes an index's cached opclass information, + * given the index's pg_index.indclass entry. + * + * Data is returned into *indexSupport, *opFamily, and *opcInType, + * which are arrays allocated by the caller. + * + * The caller also passes maxSupportNumber and maxAttributeNumber, since these + * indicate the size of the arrays it has allocated --- but in practice these + * numbers must always match those obtainable from the system catalog entries + * for the index and access method. + */ +static void +IndexSupportInitialize(oidvector *indclass, + RegProcedure *indexSupport, + Oid *opFamily, + Oid *opcInType, + StrategyNumber maxSupportNumber, + AttrNumber maxAttributeNumber) +{ + int attIndex; + + for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++) + { + OpClassCacheEnt *opcentry; + + if (!OidIsValid(indclass->values[attIndex])) + elog(ERROR, "bogus pg_index tuple"); + + /* look up the info for this opclass, using a cache */ + opcentry = LookupOpclassInfo(indclass->values[attIndex], + maxSupportNumber); + + /* copy cached data into relcache entry */ + opFamily[attIndex] = opcentry->opcfamily; + opcInType[attIndex] = opcentry->opcintype; + if (maxSupportNumber > 0) + memcpy(&indexSupport[attIndex * maxSupportNumber], + opcentry->supportProcs, + maxSupportNumber * sizeof(RegProcedure)); + } +} + +/* + * LookupOpclassInfo + * + * This routine maintains a per-opclass cache of the information needed + * by IndexSupportInitialize(). This is more efficient than relying on + * the catalog cache, because we can load all the info about a particular + * opclass in a single indexscan of pg_amproc. + * + * The information from pg_am about expected range of support function + * numbers is passed in, rather than being looked up, mainly because the + * caller will have it already. + * + * Note there is no provision for flushing the cache. This is OK at the + * moment because there is no way to ALTER any interesting properties of an + * existing opclass --- all you can do is drop it, which will result in + * a useless but harmless dead entry in the cache. To support altering + * opclass membership (not the same as opfamily membership!), we'd need to + * be able to flush this cache as well as the contents of relcache entries + * for indexes. + */ +static OpClassCacheEnt * +LookupOpclassInfo(Oid operatorClassOid, + StrategyNumber numSupport) +{ + OpClassCacheEnt *opcentry; + bool found; + Relation rel; + SysScanDesc scan; + ScanKeyData skey[3]; + HeapTuple htup; + bool indexOK; + + if (OpClassCache == NULL) + { + /* First time through: initialize the opclass cache */ + HASHCTL ctl; + + /* Also make sure CacheMemoryContext exists */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(OpClassCacheEnt); + OpClassCache = hash_create("Operator class cache", 64, + &ctl, HASH_ELEM | HASH_BLOBS); + } + + opcentry = (OpClassCacheEnt *) hash_search(OpClassCache, + (void *) &operatorClassOid, + HASH_ENTER, &found); + + if (!found) + { + /* Initialize new entry */ + opcentry->valid = false; /* until known OK */ + opcentry->numSupport = numSupport; + opcentry->supportProcs = NULL; /* filled below */ + } + else + { + Assert(numSupport == opcentry->numSupport); + } + + /* + * When aggressively testing cache-flush hazards, we disable the operator + * class cache and force reloading of the info on each call. This models + * no real-world behavior, since the cache entries are never invalidated + * otherwise. However it can be helpful for detecting bugs in the cache + * loading logic itself, such as reliance on a non-nailed index. Given + * the limited use-case and the fact that this adds a great deal of + * expense, we enable it only for high values of debug_discard_caches. + */ +#ifdef DISCARD_CACHES_ENABLED + if (debug_discard_caches > 2) + opcentry->valid = false; +#endif + + if (opcentry->valid) + return opcentry; + + /* + * Need to fill in new entry. First allocate space, unless we already did + * so in some previous attempt. + */ + if (opcentry->supportProcs == NULL && numSupport > 0) + opcentry->supportProcs = (RegProcedure *) + MemoryContextAllocZero(CacheMemoryContext, + numSupport * sizeof(RegProcedure)); + + /* + * To avoid infinite recursion during startup, force heap scans if we're + * looking up info for the opclasses used by the indexes we would like to + * reference here. + */ + indexOK = criticalRelcachesBuilt || + (operatorClassOid != OID_BTREE_OPS_OID && + operatorClassOid != INT2_BTREE_OPS_OID); + + /* + * We have to fetch the pg_opclass row to determine its opfamily and + * opcintype, which are needed to look up related operators and functions. + * It'd be convenient to use the syscache here, but that probably doesn't + * work while bootstrapping. + */ + ScanKeyInit(&skey[0], + Anum_pg_opclass_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(operatorClassOid)); + rel = table_open(OperatorClassRelationId, AccessShareLock); + scan = systable_beginscan(rel, OpclassOidIndexId, indexOK, + NULL, 1, skey); + + if (HeapTupleIsValid(htup = systable_getnext(scan))) + { + Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup); + + opcentry->opcfamily = opclassform->opcfamily; + opcentry->opcintype = opclassform->opcintype; + } + else + elog(ERROR, "could not find tuple for opclass %u", operatorClassOid); + + systable_endscan(scan); + table_close(rel, AccessShareLock); + + /* + * Scan pg_amproc to obtain support procs for the opclass. We only fetch + * the default ones (those with lefttype = righttype = opcintype). + */ + if (numSupport > 0) + { + ScanKeyInit(&skey[0], + Anum_pg_amproc_amprocfamily, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(opcentry->opcfamily)); + ScanKeyInit(&skey[1], + Anum_pg_amproc_amproclefttype, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(opcentry->opcintype)); + ScanKeyInit(&skey[2], + Anum_pg_amproc_amprocrighttype, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(opcentry->opcintype)); + rel = table_open(AccessMethodProcedureRelationId, AccessShareLock); + scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK, + NULL, 3, skey); + + while (HeapTupleIsValid(htup = systable_getnext(scan))) + { + Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup); + + if (amprocform->amprocnum <= 0 || + (StrategyNumber) amprocform->amprocnum > numSupport) + elog(ERROR, "invalid amproc number %d for opclass %u", + amprocform->amprocnum, operatorClassOid); + + opcentry->supportProcs[amprocform->amprocnum - 1] = + amprocform->amproc; + } + + systable_endscan(scan); + table_close(rel, AccessShareLock); + } + + opcentry->valid = true; + return opcentry; +} + +/* + * Fill in the TableAmRoutine for a relation + * + * relation's rd_amhandler must be valid already. + */ +static void +InitTableAmRoutine(Relation relation) +{ + relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler); +} + +/* + * Initialize table access method support for a table like relation + */ +void +RelationInitTableAccessMethod(Relation relation) +{ + HeapTuple tuple; + Form_pg_am aform; + + if (relation->rd_rel->relkind == RELKIND_SEQUENCE) + { + /* + * Sequences are currently accessed like heap tables, but it doesn't + * seem prudent to show that in the catalog. So just overwrite it + * here. + */ + relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER; + } + else if (IsCatalogRelation(relation)) + { + /* + * Avoid doing a syscache lookup for catalog tables. + */ + Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID); + relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER; + } + else + { + /* + * Look up the table access method, save the OID of its handler + * function. + */ + Assert(relation->rd_rel->relam != InvalidOid); + tuple = SearchSysCache1(AMOID, + ObjectIdGetDatum(relation->rd_rel->relam)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", + relation->rd_rel->relam); + aform = (Form_pg_am) GETSTRUCT(tuple); + relation->rd_amhandler = aform->amhandler; + ReleaseSysCache(tuple); + } + + /* + * Now we can fetch the table AM's API struct + */ + InitTableAmRoutine(relation); +} + +/* + * formrdesc + * + * This is a special cut-down version of RelationBuildDesc(), + * used while initializing the relcache. + * The relation descriptor is built just from the supplied parameters, + * without actually looking at any system table entries. We cheat + * quite a lot since we only need to work for a few basic system + * catalogs. + * + * The catalogs this is used for can't have constraints (except attnotnull), + * default values, rules, or triggers, since we don't cope with any of that. + * (Well, actually, this only matters for properties that need to be valid + * during bootstrap or before RelationCacheInitializePhase3 runs, and none of + * these properties matter then...) + * + * NOTE: we assume we are already switched into CacheMemoryContext. + */ +static void +formrdesc(const char *relationName, Oid relationReltype, + bool isshared, + int natts, const FormData_pg_attribute *attrs) +{ + Relation relation; + int i; + bool has_not_null; + + /* + * allocate new relation desc, clear all fields of reldesc + */ + relation = (Relation) palloc0(sizeof(RelationData)); + + /* make sure relation is marked as having no open file yet */ + relation->rd_smgr = NULL; + + /* + * initialize reference count: 1 because it is nailed in cache + */ + relation->rd_refcnt = 1; + + /* + * all entries built with this routine are nailed-in-cache; none are for + * new or temp relations. + */ + relation->rd_isnailed = true; + relation->rd_createSubid = InvalidSubTransactionId; + relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; + relation->rd_backend = InvalidBackendId; + relation->rd_islocaltemp = false; + + /* + * initialize relation tuple form + * + * The data we insert here is pretty incomplete/bogus, but it'll serve to + * get us launched. RelationCacheInitializePhase3() will read the real + * data from pg_class and replace what we've done here. Note in + * particular that relowner is left as zero; this cues + * RelationCacheInitializePhase3 that the real data isn't there yet. + */ + relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE); + + namestrcpy(&relation->rd_rel->relname, relationName); + relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE; + relation->rd_rel->reltype = relationReltype; + + /* + * It's important to distinguish between shared and non-shared relations, + * even at bootstrap time, to make sure we know where they are stored. + */ + relation->rd_rel->relisshared = isshared; + if (isshared) + relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID; + + /* formrdesc is used only for permanent relations */ + relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT; + + /* ... and they're always populated, too */ + relation->rd_rel->relispopulated = true; + + relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; + relation->rd_rel->relpages = 0; + relation->rd_rel->reltuples = -1; + relation->rd_rel->relallvisible = 0; + relation->rd_rel->relkind = RELKIND_RELATION; + relation->rd_rel->relnatts = (int16) natts; + relation->rd_rel->relam = HEAP_TABLE_AM_OID; + + /* + * initialize attribute tuple form + * + * Unlike the case with the relation tuple, this data had better be right + * because it will never be replaced. The data comes from + * src/include/catalog/ headers via genbki.pl. + */ + relation->rd_att = CreateTemplateTupleDesc(natts); + relation->rd_att->tdrefcount = 1; /* mark as refcounted */ + + relation->rd_att->tdtypeid = relationReltype; + relation->rd_att->tdtypmod = -1; /* just to be sure */ + + /* + * initialize tuple desc info + */ + has_not_null = false; + for (i = 0; i < natts; i++) + { + memcpy(TupleDescAttr(relation->rd_att, i), + &attrs[i], + ATTRIBUTE_FIXED_PART_SIZE); + has_not_null |= attrs[i].attnotnull; + /* make sure attcacheoff is valid */ + TupleDescAttr(relation->rd_att, i)->attcacheoff = -1; + } + + /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */ + TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0; + + /* mark not-null status */ + if (has_not_null) + { + TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr)); + + constr->has_not_null = true; + relation->rd_att->constr = constr; + } + + /* + * initialize relation id from info in att array (my, this is ugly) + */ + RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid; + + /* + * All relations made with formrdesc are mapped. This is necessarily so + * because there is no other way to know what filenode they currently + * have. In bootstrap mode, add them to the initial relation mapper data, + * specifying that the initial filenode is the same as the OID. + */ + relation->rd_rel->relfilenode = InvalidOid; + if (IsBootstrapProcessingMode()) + RelationMapUpdateMap(RelationGetRelid(relation), + RelationGetRelid(relation), + isshared, true); + + /* + * initialize the relation lock manager information + */ + RelationInitLockInfo(relation); /* see lmgr.c */ + + /* + * initialize physical addressing information for the relation + */ + RelationInitPhysicalAddr(relation); + + /* + * initialize the table am handler + */ + relation->rd_rel->relam = HEAP_TABLE_AM_OID; + relation->rd_tableam = GetHeapamTableAmRoutine(); + + /* + * initialize the rel-has-index flag, using hardwired knowledge + */ + if (IsBootstrapProcessingMode()) + { + /* In bootstrap mode, we have no indexes */ + relation->rd_rel->relhasindex = false; + } + else + { + /* Otherwise, all the rels formrdesc is used for have indexes */ + relation->rd_rel->relhasindex = true; + } + + /* + * add new reldesc to relcache + */ + RelationCacheInsert(relation, false); + + /* It's fully valid */ + relation->rd_isvalid = true; +} + + +/* ---------------------------------------------------------------- + * Relation Descriptor Lookup Interface + * ---------------------------------------------------------------- + */ + +/* + * RelationIdGetRelation + * + * Lookup a reldesc by OID; make one if not already in cache. + * + * Returns NULL if no pg_class row could be found for the given relid + * (suggesting we are trying to access a just-deleted relation). + * Any other error is reported via elog. + * + * NB: caller should already have at least AccessShareLock on the + * relation ID, else there are nasty race conditions. + * + * NB: relation ref count is incremented, or set to 1 if new entry. + * Caller should eventually decrement count. (Usually, + * that happens by calling RelationClose().) + */ +Relation +RelationIdGetRelation(Oid relationId) +{ + Relation rd; + + /* Make sure we're in an xact, even if this ends up being a cache hit */ + Assert(IsTransactionState()); + + /* + * first try to find reldesc in the cache + */ + RelationIdCacheLookup(relationId, rd); + + if (RelationIsValid(rd)) + { + /* return NULL for dropped relations */ + if (rd->rd_droppedSubid != InvalidSubTransactionId) + { + Assert(!rd->rd_isvalid); + return NULL; + } + + RelationIncrementReferenceCount(rd); + /* revalidate cache entry if necessary */ + if (!rd->rd_isvalid) + { + /* + * Indexes only have a limited number of possible schema changes, + * and we don't want to use the full-blown procedure because it's + * a headache for indexes that reload itself depends on. + */ + if (rd->rd_rel->relkind == RELKIND_INDEX || + rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + RelationReloadIndexInfo(rd); + else + RelationClearRelation(rd, true); + + /* + * Normally entries need to be valid here, but before the relcache + * has been initialized, not enough infrastructure exists to + * perform pg_class lookups. The structure of such entries doesn't + * change, but we still want to update the rd_rel entry. So + * rd_isvalid = false is left in place for a later lookup. + */ + Assert(rd->rd_isvalid || + (rd->rd_isnailed && !criticalRelcachesBuilt)); + } + return rd; + } + + /* + * no reldesc in the cache, so have RelationBuildDesc() build one and add + * it. + */ + rd = RelationBuildDesc(relationId, true); + if (RelationIsValid(rd)) + RelationIncrementReferenceCount(rd); + return rd; +} + +/* ---------------------------------------------------------------- + * cache invalidation support routines + * ---------------------------------------------------------------- + */ + +/* + * RelationIncrementReferenceCount + * Increments relation reference count. + * + * Note: bootstrap mode has its own weird ideas about relation refcount + * behavior; we ought to fix it someday, but for now, just disable + * reference count ownership tracking in bootstrap mode. + */ +void +RelationIncrementReferenceCount(Relation rel) +{ + ResourceOwnerEnlargeRelationRefs(CurrentResourceOwner); + rel->rd_refcnt += 1; + if (!IsBootstrapProcessingMode()) + ResourceOwnerRememberRelationRef(CurrentResourceOwner, rel); +} + +/* + * RelationDecrementReferenceCount + * Decrements relation reference count. + */ +void +RelationDecrementReferenceCount(Relation rel) +{ + Assert(rel->rd_refcnt > 0); + rel->rd_refcnt -= 1; + if (!IsBootstrapProcessingMode()) + ResourceOwnerForgetRelationRef(CurrentResourceOwner, rel); +} + +/* + * RelationClose - close an open relation + * + * Actually, we just decrement the refcount. + * + * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries + * will be freed as soon as their refcount goes to zero. In combination + * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test + * to catch references to already-released relcache entries. It slows + * things down quite a bit, however. + */ +void +RelationClose(Relation relation) +{ + /* Note: no locking manipulations needed */ + RelationDecrementReferenceCount(relation); + + /* + * If the relation is no longer open in this session, we can clean up any + * stale partition descriptors it has. This is unlikely, so check to see + * if there are child contexts before expending a call to mcxt.c. + */ + if (RelationHasReferenceCountZero(relation)) + { + if (relation->rd_pdcxt != NULL && + relation->rd_pdcxt->firstchild != NULL) + MemoryContextDeleteChildren(relation->rd_pdcxt); + + if (relation->rd_pddcxt != NULL && + relation->rd_pddcxt->firstchild != NULL) + MemoryContextDeleteChildren(relation->rd_pddcxt); + } + +#ifdef RELCACHE_FORCE_RELEASE + if (RelationHasReferenceCountZero(relation) && + relation->rd_createSubid == InvalidSubTransactionId && + relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId) + RelationClearRelation(relation, false); +#endif +} + +/* + * RelationReloadIndexInfo - reload minimal information for an open index + * + * This function is used only for indexes. A relcache inval on an index + * can mean that its pg_class or pg_index row changed. There are only + * very limited changes that are allowed to an existing index's schema, + * so we can update the relcache entry without a complete rebuild; which + * is fortunate because we can't rebuild an index entry that is "nailed" + * and/or in active use. We support full replacement of the pg_class row, + * as well as updates of a few simple fields of the pg_index row. + * + * We can't necessarily reread the catalog rows right away; we might be + * in a failed transaction when we receive the SI notification. If so, + * RelationClearRelation just marks the entry as invalid by setting + * rd_isvalid to false. This routine is called to fix the entry when it + * is next needed. + * + * We assume that at the time we are called, we have at least AccessShareLock + * on the target index. (Note: in the calls from RelationClearRelation, + * this is legitimate because we know the rel has positive refcount.) + * + * If the target index is an index on pg_class or pg_index, we'd better have + * previously gotten at least AccessShareLock on its underlying catalog, + * else we are at risk of deadlock against someone trying to exclusive-lock + * the heap and index in that order. This is ensured in current usage by + * only applying this to indexes being opened or having positive refcount. + */ +static void +RelationReloadIndexInfo(Relation relation) +{ + bool indexOK; + HeapTuple pg_class_tuple; + Form_pg_class relp; + + /* Should be called only for invalidated, live indexes */ + Assert((relation->rd_rel->relkind == RELKIND_INDEX || + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) && + !relation->rd_isvalid && + relation->rd_droppedSubid == InvalidSubTransactionId); + + /* Ensure it's closed at smgr level */ + RelationCloseSmgr(relation); + + /* Must free any AM cached data upon relcache flush */ + if (relation->rd_amcache) + pfree(relation->rd_amcache); + relation->rd_amcache = NULL; + + /* + * If it's a shared index, we might be called before backend startup has + * finished selecting a database, in which case we have no way to read + * pg_class yet. However, a shared index can never have any significant + * schema updates, so it's okay to ignore the invalidation signal. Just + * mark it valid and return without doing anything more. + */ + if (relation->rd_rel->relisshared && !criticalRelcachesBuilt) + { + relation->rd_isvalid = true; + return; + } + + /* + * Read the pg_class row + * + * Don't try to use an indexscan of pg_class_oid_index to reload the info + * for pg_class_oid_index ... + */ + indexOK = (RelationGetRelid(relation) != ClassOidIndexId); + pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false); + if (!HeapTupleIsValid(pg_class_tuple)) + elog(ERROR, "could not find pg_class tuple for index %u", + RelationGetRelid(relation)); + relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); + memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE); + /* Reload reloptions in case they changed */ + if (relation->rd_options) + pfree(relation->rd_options); + RelationParseRelOptions(relation, pg_class_tuple); + /* done with pg_class tuple */ + heap_freetuple(pg_class_tuple); + /* We must recalculate physical address in case it changed */ + RelationInitPhysicalAddr(relation); + + /* + * For a non-system index, there are fields of the pg_index row that are + * allowed to change, so re-read that row and update the relcache entry. + * Most of the info derived from pg_index (such as support function lookup + * info) cannot change, and indeed the whole point of this routine is to + * update the relcache entry without clobbering that data; so wholesale + * replacement is not appropriate. + */ + if (!IsSystemRelation(relation)) + { + HeapTuple tuple; + Form_pg_index index; + + tuple = SearchSysCache1(INDEXRELID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for index %u", + RelationGetRelid(relation)); + index = (Form_pg_index) GETSTRUCT(tuple); + + /* + * Basically, let's just copy all the bool fields. There are one or + * two of these that can't actually change in the current code, but + * it's not worth it to track exactly which ones they are. None of + * the array fields are allowed to change, though. + */ + relation->rd_index->indisunique = index->indisunique; + relation->rd_index->indisprimary = index->indisprimary; + relation->rd_index->indisexclusion = index->indisexclusion; + relation->rd_index->indimmediate = index->indimmediate; + relation->rd_index->indisclustered = index->indisclustered; + relation->rd_index->indisvalid = index->indisvalid; + relation->rd_index->indcheckxmin = index->indcheckxmin; + relation->rd_index->indisready = index->indisready; + relation->rd_index->indislive = index->indislive; + + /* Copy xmin too, as that is needed to make sense of indcheckxmin */ + HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, + HeapTupleHeaderGetXmin(tuple->t_data)); + + ReleaseSysCache(tuple); + } + + /* Okay, now it's valid again */ + relation->rd_isvalid = true; +} + +/* + * RelationReloadNailed - reload minimal information for nailed relations. + * + * The structure of a nailed relation can never change (which is good, because + * we rely on knowing their structure to be able to read catalog content). But + * some parts, e.g. pg_class.relfrozenxid, are still important to have + * accurate content for. Therefore those need to be reloaded after the arrival + * of invalidations. + */ +static void +RelationReloadNailed(Relation relation) +{ + Assert(relation->rd_isnailed); + + /* + * Redo RelationInitPhysicalAddr in case it is a mapped relation whose + * mapping changed. + */ + RelationInitPhysicalAddr(relation); + + /* flag as needing to be revalidated */ + relation->rd_isvalid = false; + + /* + * Can only reread catalog contents if in a transaction. If the relation + * is currently open (not counting the nailed refcount), do so + * immediately. Otherwise we've already marked the entry as possibly + * invalid, and it'll be fixed when next opened. + */ + if (!IsTransactionState() || relation->rd_refcnt <= 1) + return; + + if (relation->rd_rel->relkind == RELKIND_INDEX) + { + /* + * If it's a nailed-but-not-mapped index, then we need to re-read the + * pg_class row to see if its relfilenode changed. + */ + RelationReloadIndexInfo(relation); + } + else + { + /* + * Reload a non-index entry. We can't easily do so if relcaches + * aren't yet built, but that's fine because at that stage the + * attributes that need to be current (like relfrozenxid) aren't yet + * accessed. To ensure the entry will later be revalidated, we leave + * it in invalid state, but allow use (cf. RelationIdGetRelation()). + */ + if (criticalRelcachesBuilt) + { + HeapTuple pg_class_tuple; + Form_pg_class relp; + + /* + * NB: Mark the entry as valid before starting to scan, to avoid + * self-recursion when re-building pg_class. + */ + relation->rd_isvalid = true; + + pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), + true, false); + relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); + memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE); + heap_freetuple(pg_class_tuple); + + /* + * Again mark as valid, to protect against concurrently arriving + * invalidations. + */ + relation->rd_isvalid = true; + } + } +} + +/* + * RelationDestroyRelation + * + * Physically delete a relation cache entry and all subsidiary data. + * Caller must already have unhooked the entry from the hash table. + */ +static void +RelationDestroyRelation(Relation relation, bool remember_tupdesc) +{ + Assert(RelationHasReferenceCountZero(relation)); + + /* + * Make sure smgr and lower levels close the relation's files, if they + * weren't closed already. (This was probably done by caller, but let's + * just be real sure.) + */ + RelationCloseSmgr(relation); + + /* + * Free all the subsidiary data structures of the relcache entry, then the + * entry itself. + */ + if (relation->rd_rel) + pfree(relation->rd_rel); + /* can't use DecrTupleDescRefCount here */ + Assert(relation->rd_att->tdrefcount > 0); + if (--relation->rd_att->tdrefcount == 0) + { + /* + * If we Rebuilt a relcache entry during a transaction then its + * possible we did that because the TupDesc changed as the result of + * an ALTER TABLE that ran at less than AccessExclusiveLock. It's + * possible someone copied that TupDesc, in which case the copy would + * point to free'd memory. So if we rebuild an entry we keep the + * TupDesc around until end of transaction, to be safe. + */ + if (remember_tupdesc) + RememberToFreeTupleDescAtEOX(relation->rd_att); + else + FreeTupleDesc(relation->rd_att); + } + FreeTriggerDesc(relation->trigdesc); + list_free_deep(relation->rd_fkeylist); + list_free(relation->rd_indexlist); + list_free(relation->rd_statlist); + bms_free(relation->rd_indexattr); + bms_free(relation->rd_keyattr); + bms_free(relation->rd_pkattr); + bms_free(relation->rd_idattr); + if (relation->rd_pubactions) + pfree(relation->rd_pubactions); + if (relation->rd_options) + pfree(relation->rd_options); + if (relation->rd_indextuple) + pfree(relation->rd_indextuple); + if (relation->rd_amcache) + pfree(relation->rd_amcache); + if (relation->rd_fdwroutine) + pfree(relation->rd_fdwroutine); + if (relation->rd_indexcxt) + MemoryContextDelete(relation->rd_indexcxt); + if (relation->rd_rulescxt) + MemoryContextDelete(relation->rd_rulescxt); + if (relation->rd_rsdesc) + MemoryContextDelete(relation->rd_rsdesc->rscxt); + if (relation->rd_partkeycxt) + MemoryContextDelete(relation->rd_partkeycxt); + if (relation->rd_pdcxt) + MemoryContextDelete(relation->rd_pdcxt); + if (relation->rd_pddcxt) + MemoryContextDelete(relation->rd_pddcxt); + if (relation->rd_partcheckcxt) + MemoryContextDelete(relation->rd_partcheckcxt); + pfree(relation); +} + +/* + * RelationClearRelation + * + * Physically blow away a relation cache entry, or reset it and rebuild + * it from scratch (that is, from catalog entries). The latter path is + * used when we are notified of a change to an open relation (one with + * refcount > 0). + * + * NB: when rebuilding, we'd better hold some lock on the relation, + * else the catalog data we need to read could be changing under us. + * Also, a rel to be rebuilt had better have refcnt > 0. This is because + * a sinval reset could happen while we're accessing the catalogs, and + * the rel would get blown away underneath us by RelationCacheInvalidate + * if it has zero refcnt. + * + * The "rebuild" parameter is redundant in current usage because it has + * to match the relation's refcnt status, but we keep it as a crosscheck + * that we're doing what the caller expects. + */ +static void +RelationClearRelation(Relation relation, bool rebuild) +{ + /* + * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of + * course it would be an equally bad idea to blow away one with nonzero + * refcnt, since that would leave someone somewhere with a dangling + * pointer. All callers are expected to have verified that this holds. + */ + Assert(rebuild ? + !RelationHasReferenceCountZero(relation) : + RelationHasReferenceCountZero(relation)); + + /* + * Make sure smgr and lower levels close the relation's files, if they + * weren't closed already. If the relation is not getting deleted, the + * next smgr access should reopen the files automatically. This ensures + * that the low-level file access state is updated after, say, a vacuum + * truncation. + */ + RelationCloseSmgr(relation); + + /* Free AM cached data, if any */ + if (relation->rd_amcache) + pfree(relation->rd_amcache); + relation->rd_amcache = NULL; + + /* + * Treat nailed-in system relations separately, they always need to be + * accessible, so we can't blow them away. + */ + if (relation->rd_isnailed) + { + RelationReloadNailed(relation); + return; + } + + /* Mark it invalid until we've finished rebuild */ + relation->rd_isvalid = false; + + /* See RelationForgetRelation(). */ + if (relation->rd_droppedSubid != InvalidSubTransactionId) + return; + + /* + * Even non-system indexes should not be blown away if they are open and + * have valid index support information. This avoids problems with active + * use of the index support information. As with nailed indexes, we + * re-read the pg_class row to handle possible physical relocation of the + * index, and we check for pg_index updates too. + */ + if ((relation->rd_rel->relkind == RELKIND_INDEX || + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) && + relation->rd_refcnt > 0 && + relation->rd_indexcxt != NULL) + { + if (IsTransactionState()) + RelationReloadIndexInfo(relation); + return; + } + + /* + * If we're really done with the relcache entry, blow it away. But if + * someone is still using it, reconstruct the whole deal without moving + * the physical RelationData record (so that the someone's pointer is + * still valid). + */ + if (!rebuild) + { + /* Remove it from the hash table */ + RelationCacheDelete(relation); + + /* And release storage */ + RelationDestroyRelation(relation, false); + } + else if (!IsTransactionState()) + { + /* + * If we're not inside a valid transaction, we can't do any catalog + * access so it's not possible to rebuild yet. Just exit, leaving + * rd_isvalid = false so that the rebuild will occur when the entry is + * next opened. + * + * Note: it's possible that we come here during subtransaction abort, + * and the reason for wanting to rebuild is that the rel is open in + * the outer transaction. In that case it might seem unsafe to not + * rebuild immediately, since whatever code has the rel already open + * will keep on using the relcache entry as-is. However, in such a + * case the outer transaction should be holding a lock that's + * sufficient to prevent any significant change in the rel's schema, + * so the existing entry contents should be good enough for its + * purposes; at worst we might be behind on statistics updates or the + * like. (See also CheckTableNotInUse() and its callers.) These same + * remarks also apply to the cases above where we exit without having + * done RelationReloadIndexInfo() yet. + */ + return; + } + else + { + /* + * Our strategy for rebuilding an open relcache entry is to build a + * new entry from scratch, swap its contents with the old entry, and + * finally delete the new entry (along with any infrastructure swapped + * over from the old entry). This is to avoid trouble in case an + * error causes us to lose control partway through. The old entry + * will still be marked !rd_isvalid, so we'll try to rebuild it again + * on next access. Meanwhile it's not any less valid than it was + * before, so any code that might expect to continue accessing it + * isn't hurt by the rebuild failure. (Consider for example a + * subtransaction that ALTERs a table and then gets canceled partway + * through the cache entry rebuild. The outer transaction should + * still see the not-modified cache entry as valid.) The worst + * consequence of an error is leaking the necessarily-unreferenced new + * entry, and this shouldn't happen often enough for that to be a big + * problem. + * + * When rebuilding an open relcache entry, we must preserve ref count, + * rd_*Subid, and rd_toastoid state. Also attempt to preserve the + * pg_class entry (rd_rel), tupledesc, rewrite-rule, partition key, + * and partition descriptor substructures in place, because various + * places assume that these structures won't move while they are + * working with an open relcache entry. (Note: the refcount + * mechanism for tupledescs might someday allow us to remove this hack + * for the tupledesc.) + * + * Note that this process does not touch CurrentResourceOwner; which + * is good because whatever ref counts the entry may have do not + * necessarily belong to that resource owner. + */ + Relation newrel; + Oid save_relid = RelationGetRelid(relation); + bool keep_tupdesc; + bool keep_rules; + bool keep_policies; + bool keep_partkey; + + /* Build temporary entry, but don't link it into hashtable */ + newrel = RelationBuildDesc(save_relid, false); + + /* + * Between here and the end of the swap, don't add code that does or + * reasonably could read system catalogs. That range must be free + * from invalidation processing. See RelationBuildDesc() manipulation + * of in_progress_list. + */ + + if (newrel == NULL) + { + /* + * We can validly get here, if we're using a historic snapshot in + * which a relation, accessed from outside logical decoding, is + * still invisible. In that case it's fine to just mark the + * relation as invalid and return - it'll fully get reloaded by + * the cache reset at the end of logical decoding (or at the next + * access). During normal processing we don't want to ignore this + * case as it shouldn't happen there, as explained below. + */ + if (HistoricSnapshotActive()) + return; + + /* + * This shouldn't happen as dropping a relation is intended to be + * impossible if still referenced (cf. CheckTableNotInUse()). But + * if we get here anyway, we can't just delete the relcache entry, + * as it possibly could get accessed later (as e.g. the error + * might get trapped and handled via a subtransaction rollback). + */ + elog(ERROR, "relation %u deleted while still in use", save_relid); + } + + keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att); + keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules); + keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc); + /* partkey is immutable once set up, so we can always keep it */ + keep_partkey = (relation->rd_partkey != NULL); + + /* + * Perform swapping of the relcache entry contents. Within this + * process the old entry is momentarily invalid, so there *must* be no + * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in + * all-in-line code for safety. + * + * Since the vast majority of fields should be swapped, our method is + * to swap the whole structures and then re-swap those few fields we + * didn't want swapped. + */ +#define SWAPFIELD(fldtype, fldname) \ + do { \ + fldtype _tmp = newrel->fldname; \ + newrel->fldname = relation->fldname; \ + relation->fldname = _tmp; \ + } while (0) + + /* swap all Relation struct fields */ + { + RelationData tmpstruct; + + memcpy(&tmpstruct, newrel, sizeof(RelationData)); + memcpy(newrel, relation, sizeof(RelationData)); + memcpy(relation, &tmpstruct, sizeof(RelationData)); + } + + /* rd_smgr must not be swapped, due to back-links from smgr level */ + SWAPFIELD(SMgrRelation, rd_smgr); + /* rd_refcnt must be preserved */ + SWAPFIELD(int, rd_refcnt); + /* isnailed shouldn't change */ + Assert(newrel->rd_isnailed == relation->rd_isnailed); + /* creation sub-XIDs must be preserved */ + SWAPFIELD(SubTransactionId, rd_createSubid); + SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid); + SWAPFIELD(SubTransactionId, rd_firstRelfilenodeSubid); + SWAPFIELD(SubTransactionId, rd_droppedSubid); + /* un-swap rd_rel pointers, swap contents instead */ + SWAPFIELD(Form_pg_class, rd_rel); + /* ... but actually, we don't have to update newrel->rd_rel */ + memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE); + /* preserve old tupledesc, rules, policies if no logical change */ + if (keep_tupdesc) + SWAPFIELD(TupleDesc, rd_att); + if (keep_rules) + { + SWAPFIELD(RuleLock *, rd_rules); + SWAPFIELD(MemoryContext, rd_rulescxt); + } + if (keep_policies) + SWAPFIELD(RowSecurityDesc *, rd_rsdesc); + /* toast OID override must be preserved */ + SWAPFIELD(Oid, rd_toastoid); + /* pgstat_info must be preserved */ + SWAPFIELD(struct PgStat_TableStatus *, pgstat_info); + /* preserve old partition key if we have one */ + if (keep_partkey) + { + SWAPFIELD(PartitionKey, rd_partkey); + SWAPFIELD(MemoryContext, rd_partkeycxt); + } + if (newrel->rd_pdcxt != NULL || newrel->rd_pddcxt != NULL) + { + /* + * We are rebuilding a partitioned relation with a non-zero + * reference count, so we must keep the old partition descriptor + * around, in case there's a PartitionDirectory with a pointer to + * it. This means we can't free the old rd_pdcxt yet. (This is + * necessary because RelationGetPartitionDesc hands out direct + * pointers to the relcache's data structure, unlike our usual + * practice which is to hand out copies. We'd have the same + * problem with rd_partkey, except that we always preserve that + * once created.) + * + * To ensure that it's not leaked completely, re-attach it to the + * new reldesc, or make it a child of the new reldesc's rd_pdcxt + * in the unlikely event that there is one already. (Compare hack + * in RelationBuildPartitionDesc.) RelationClose will clean up + * any such contexts once the reference count reaches zero. + * + * In the case where the reference count is zero, this code is not + * reached, which should be OK because in that case there should + * be no PartitionDirectory with a pointer to the old entry. + * + * Note that newrel and relation have already been swapped, so the + * "old" partition descriptor is actually the one hanging off of + * newrel. + */ + relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */ + relation->rd_partdesc_nodetached = NULL; + relation->rd_partdesc_nodetached_xmin = InvalidTransactionId; + if (relation->rd_pdcxt != NULL) /* probably never happens */ + MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt); + else + relation->rd_pdcxt = newrel->rd_pdcxt; + if (relation->rd_pddcxt != NULL) + MemoryContextSetParent(newrel->rd_pddcxt, relation->rd_pddcxt); + else + relation->rd_pddcxt = newrel->rd_pddcxt; + /* drop newrel's pointers so we don't destroy it below */ + newrel->rd_partdesc = NULL; + newrel->rd_partdesc_nodetached = NULL; + newrel->rd_partdesc_nodetached_xmin = InvalidTransactionId; + newrel->rd_pdcxt = NULL; + newrel->rd_pddcxt = NULL; + } + +#undef SWAPFIELD + + /* And now we can throw away the temporary entry */ + RelationDestroyRelation(newrel, !keep_tupdesc); + } +} + +/* + * RelationFlushRelation + * + * Rebuild the relation if it is open (refcount > 0), else blow it away. + * This is used when we receive a cache invalidation event for the rel. + */ +static void +RelationFlushRelation(Relation relation) +{ + if (relation->rd_createSubid != InvalidSubTransactionId || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId) + { + /* + * New relcache entries are always rebuilt, not flushed; else we'd + * forget the "new" status of the relation. Ditto for the + * new-relfilenode status. + * + * The rel could have zero refcnt here, so temporarily increment the + * refcnt to ensure it's safe to rebuild it. We can assume that the + * current transaction has some lock on the rel already. + */ + RelationIncrementReferenceCount(relation); + RelationClearRelation(relation, true); + RelationDecrementReferenceCount(relation); + } + else + { + /* + * Pre-existing rels can be dropped from the relcache if not open. + */ + bool rebuild = !RelationHasReferenceCountZero(relation); + + RelationClearRelation(relation, rebuild); + } +} + +/* + * RelationForgetRelation - caller reports that it dropped the relation + */ +void +RelationForgetRelation(Oid rid) +{ + Relation relation; + + RelationIdCacheLookup(rid, relation); + + if (!PointerIsValid(relation)) + return; /* not in cache, nothing to do */ + + if (!RelationHasReferenceCountZero(relation)) + elog(ERROR, "relation %u is still open", rid); + + Assert(relation->rd_droppedSubid == InvalidSubTransactionId); + if (relation->rd_createSubid != InvalidSubTransactionId || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId) + { + /* + * In the event of subtransaction rollback, we must not forget + * rd_*Subid. Mark the entry "dropped" so RelationClearRelation() + * invalidates it in lieu of destroying it. (If we're in a top + * transaction, we could opt to destroy the entry.) + */ + relation->rd_droppedSubid = GetCurrentSubTransactionId(); + } + + RelationClearRelation(relation, false); +} + +/* + * RelationCacheInvalidateEntry + * + * This routine is invoked for SI cache flush messages. + * + * Any relcache entry matching the relid must be flushed. (Note: caller has + * already determined that the relid belongs to our database or is a shared + * relation.) + * + * We used to skip local relations, on the grounds that they could + * not be targets of cross-backend SI update messages; but it seems + * safer to process them, so that our *own* SI update messages will + * have the same effects during CommandCounterIncrement for both + * local and nonlocal relations. + */ +void +RelationCacheInvalidateEntry(Oid relationId) +{ + Relation relation; + + RelationIdCacheLookup(relationId, relation); + + if (PointerIsValid(relation)) + { + relcacheInvalsReceived++; + RelationFlushRelation(relation); + } + else + { + int i; + + for (i = 0; i < in_progress_list_len; i++) + if (in_progress_list[i].reloid == relationId) + in_progress_list[i].invalidated = true; + } +} + +/* + * RelationCacheInvalidate + * Blow away cached relation descriptors that have zero reference counts, + * and rebuild those with positive reference counts. Also reset the smgr + * relation cache and re-read relation mapping data. + * + * Apart from debug_discard_caches, this is currently used only to recover + * from SI message buffer overflow, so we do not touch relations having + * new-in-transaction relfilenodes; they cannot be targets of cross-backend + * SI updates (and our own updates now go through a separate linked list + * that isn't limited by the SI message buffer size). + * + * We do this in two phases: the first pass deletes deletable items, and + * the second one rebuilds the rebuildable items. This is essential for + * safety, because hash_seq_search only copes with concurrent deletion of + * the element it is currently visiting. If a second SI overflow were to + * occur while we are walking the table, resulting in recursive entry to + * this routine, we could crash because the inner invocation blows away + * the entry next to be visited by the outer scan. But this way is OK, + * because (a) during the first pass we won't process any more SI messages, + * so hash_seq_search will complete safely; (b) during the second pass we + * only hold onto pointers to nondeletable entries. + * + * The two-phase approach also makes it easy to update relfilenodes for + * mapped relations before we do anything else, and to ensure that the + * second pass processes nailed-in-cache items before other nondeletable + * items. This should ensure that system catalogs are up to date before + * we attempt to use them to reload information about other open relations. + * + * After those two phases of work having immediate effects, we normally + * signal any RelationBuildDesc() on the stack to start over. However, we + * don't do this if called as part of debug_discard_caches. Otherwise, + * RelationBuildDesc() would become an infinite loop. + */ +void +RelationCacheInvalidate(bool debug_discard) +{ + HASH_SEQ_STATUS status; + RelIdCacheEnt *idhentry; + Relation relation; + List *rebuildFirstList = NIL; + List *rebuildList = NIL; + ListCell *l; + int i; + + /* + * Reload relation mapping data before starting to reconstruct cache. + */ + RelationMapInvalidateAll(); + + /* Phase 1 */ + hash_seq_init(&status, RelationIdCache); + + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + { + relation = idhentry->reldesc; + + /* Must close all smgr references to avoid leaving dangling ptrs */ + RelationCloseSmgr(relation); + + /* + * Ignore new relations; no other backend will manipulate them before + * we commit. Likewise, before replacing a relation's relfilenode, we + * shall have acquired AccessExclusiveLock and drained any applicable + * pending invalidations. + */ + if (relation->rd_createSubid != InvalidSubTransactionId || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId) + continue; + + relcacheInvalsReceived++; + + if (RelationHasReferenceCountZero(relation)) + { + /* Delete this entry immediately */ + Assert(!relation->rd_isnailed); + RelationClearRelation(relation, false); + } + else + { + /* + * If it's a mapped relation, immediately update its rd_node in + * case its relfilenode changed. We must do this during phase 1 + * in case the relation is consulted during rebuild of other + * relcache entries in phase 2. It's safe since consulting the + * map doesn't involve any access to relcache entries. + */ + if (RelationIsMapped(relation)) + RelationInitPhysicalAddr(relation); + + /* + * Add this entry to list of stuff to rebuild in second pass. + * pg_class goes to the front of rebuildFirstList while + * pg_class_oid_index goes to the back of rebuildFirstList, so + * they are done first and second respectively. Other nailed + * relations go to the front of rebuildList, so they'll be done + * next in no particular order; and everything else goes to the + * back of rebuildList. + */ + if (RelationGetRelid(relation) == RelationRelationId) + rebuildFirstList = lcons(relation, rebuildFirstList); + else if (RelationGetRelid(relation) == ClassOidIndexId) + rebuildFirstList = lappend(rebuildFirstList, relation); + else if (relation->rd_isnailed) + rebuildList = lcons(relation, rebuildList); + else + rebuildList = lappend(rebuildList, relation); + } + } + + /* + * Now zap any remaining smgr cache entries. This must happen before we + * start to rebuild entries, since that may involve catalog fetches which + * will re-open catalog files. + */ + smgrcloseall(); + + /* Phase 2: rebuild the items found to need rebuild in phase 1 */ + foreach(l, rebuildFirstList) + { + relation = (Relation) lfirst(l); + RelationClearRelation(relation, true); + } + list_free(rebuildFirstList); + foreach(l, rebuildList) + { + relation = (Relation) lfirst(l); + RelationClearRelation(relation, true); + } + list_free(rebuildList); + + if (!debug_discard) + /* Any RelationBuildDesc() on the stack must start over. */ + for (i = 0; i < in_progress_list_len; i++) + in_progress_list[i].invalidated = true; +} + +/* + * RelationCloseSmgrByOid - close a relcache entry's smgr link + * + * Needed in some cases where we are changing a relation's physical mapping. + * The link will be automatically reopened on next use. + */ +void +RelationCloseSmgrByOid(Oid relationId) +{ + Relation relation; + + RelationIdCacheLookup(relationId, relation); + + if (!PointerIsValid(relation)) + return; /* not in cache, nothing to do */ + + RelationCloseSmgr(relation); +} + +static void +RememberToFreeTupleDescAtEOX(TupleDesc td) +{ + if (EOXactTupleDescArray == NULL) + { + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc)); + EOXactTupleDescArrayLen = 16; + NextEOXactTupleDescNum = 0; + MemoryContextSwitchTo(oldcxt); + } + else if (NextEOXactTupleDescNum >= EOXactTupleDescArrayLen) + { + int32 newlen = EOXactTupleDescArrayLen * 2; + + Assert(EOXactTupleDescArrayLen > 0); + + EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray, + newlen * sizeof(TupleDesc)); + EOXactTupleDescArrayLen = newlen; + } + + EOXactTupleDescArray[NextEOXactTupleDescNum++] = td; +} + +#ifdef USE_ASSERT_CHECKING +static void +AssertPendingSyncConsistency(Relation relation) +{ + bool relcache_verdict = + RelationIsPermanent(relation) && + ((relation->rd_createSubid != InvalidSubTransactionId && + RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId); + + Assert(relcache_verdict == RelFileNodeSkippingWAL(relation->rd_node)); + + if (relation->rd_droppedSubid != InvalidSubTransactionId) + Assert(!relation->rd_isvalid && + (relation->rd_createSubid != InvalidSubTransactionId || + relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)); +} + +/* + * AssertPendingSyncs_RelationCache + * + * Assert that relcache.c and storage.c agree on whether to skip WAL. + */ +void +AssertPendingSyncs_RelationCache(void) +{ + HASH_SEQ_STATUS status; + LOCALLOCK *locallock; + Relation *rels; + int maxrels; + int nrels; + RelIdCacheEnt *idhentry; + int i; + + /* + * Open every relation that this transaction has locked. If, for some + * relation, storage.c is skipping WAL and relcache.c is not skipping WAL, + * a CommandCounterIncrement() typically yields a local invalidation + * message that destroys the relcache entry. By recreating such entries + * here, we detect the problem. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + maxrels = 1; + rels = palloc(maxrels * sizeof(*rels)); + nrels = 0; + hash_seq_init(&status, GetLockMethodLocalHash()); + while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL) + { + Oid relid; + Relation r; + + if (locallock->nLocks <= 0) + continue; + if ((LockTagType) locallock->tag.lock.locktag_type != + LOCKTAG_RELATION) + continue; + relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2); + r = RelationIdGetRelation(relid); + if (!RelationIsValid(r)) + continue; + if (nrels >= maxrels) + { + maxrels *= 2; + rels = repalloc(rels, maxrels * sizeof(*rels)); + } + rels[nrels++] = r; + } + + hash_seq_init(&status, RelationIdCache); + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + AssertPendingSyncConsistency(idhentry->reldesc); + + for (i = 0; i < nrels; i++) + RelationClose(rels[i]); + PopActiveSnapshot(); +} +#endif + +/* + * AtEOXact_RelationCache + * + * Clean up the relcache at main-transaction commit or abort. + * + * Note: this must be called *before* processing invalidation messages. + * In the case of abort, we don't want to try to rebuild any invalidated + * cache entries (since we can't safely do database accesses). Therefore + * we must reset refcnts before handling pending invalidations. + * + * As of PostgreSQL 8.1, relcache refcnts should get released by the + * ResourceOwner mechanism. This routine just does a debugging + * cross-check that no pins remain. However, we also need to do special + * cleanup when the current transaction created any relations or made use + * of forced index lists. + */ +void +AtEOXact_RelationCache(bool isCommit) +{ + HASH_SEQ_STATUS status; + RelIdCacheEnt *idhentry; + int i; + + /* + * Forget in_progress_list. This is relevant when we're aborting due to + * an error during RelationBuildDesc(). + */ + Assert(in_progress_list_len == 0 || !isCommit); + in_progress_list_len = 0; + + /* + * Unless the eoxact_list[] overflowed, we only need to examine the rels + * listed in it. Otherwise fall back on a hash_seq_search scan. + * + * For simplicity, eoxact_list[] entries are not deleted till end of + * top-level transaction, even though we could remove them at + * subtransaction end in some cases, or remove relations from the list if + * they are cleared for other reasons. Therefore we should expect the + * case that list entries are not found in the hashtable; if not, there's + * nothing to do for them. + */ + if (eoxact_list_overflowed) + { + hash_seq_init(&status, RelationIdCache); + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + { + AtEOXact_cleanup(idhentry->reldesc, isCommit); + } + } + else + { + for (i = 0; i < eoxact_list_len; i++) + { + idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache, + (void *) &eoxact_list[i], + HASH_FIND, + NULL); + if (idhentry != NULL) + AtEOXact_cleanup(idhentry->reldesc, isCommit); + } + } + + if (EOXactTupleDescArrayLen > 0) + { + Assert(EOXactTupleDescArray != NULL); + for (i = 0; i < NextEOXactTupleDescNum; i++) + FreeTupleDesc(EOXactTupleDescArray[i]); + pfree(EOXactTupleDescArray); + EOXactTupleDescArray = NULL; + } + + /* Now we're out of the transaction and can clear the lists */ + eoxact_list_len = 0; + eoxact_list_overflowed = false; + NextEOXactTupleDescNum = 0; + EOXactTupleDescArrayLen = 0; +} + +/* + * AtEOXact_cleanup + * + * Clean up a single rel at main-transaction commit or abort + * + * NB: this processing must be idempotent, because EOXactListAdd() doesn't + * bother to prevent duplicate entries in eoxact_list[]. + */ +static void +AtEOXact_cleanup(Relation relation, bool isCommit) +{ + bool clear_relcache = false; + + /* + * The relcache entry's ref count should be back to its normal + * not-in-a-transaction state: 0 unless it's nailed in cache. + * + * In bootstrap mode, this is NOT true, so don't check it --- the + * bootstrap code expects relations to stay open across start/commit + * transaction calls. (That seems bogus, but it's not worth fixing.) + * + * Note: ideally this check would be applied to every relcache entry, not + * just those that have eoxact work to do. But it's not worth forcing a + * scan of the whole relcache just for this. (Moreover, doing so would + * mean that assert-enabled testing never tests the hash_search code path + * above, which seems a bad idea.) + */ +#ifdef USE_ASSERT_CHECKING + if (!IsBootstrapProcessingMode()) + { + int expected_refcnt; + + expected_refcnt = relation->rd_isnailed ? 1 : 0; + Assert(relation->rd_refcnt == expected_refcnt); + } +#endif + + /* + * Is the relation live after this transaction ends? + * + * During commit, clear the relcache entry if it is preserved after + * relation drop, in order not to orphan the entry. During rollback, + * clear the relcache entry if the relation is created in the current + * transaction since it isn't interesting any longer once we are out of + * the transaction. + */ + clear_relcache = + (isCommit ? + relation->rd_droppedSubid != InvalidSubTransactionId : + relation->rd_createSubid != InvalidSubTransactionId); + + /* + * Since we are now out of the transaction, reset the subids to zero. That + * also lets RelationClearRelation() drop the relcache entry. + */ + relation->rd_createSubid = InvalidSubTransactionId; + relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; + + if (clear_relcache) + { + if (RelationHasReferenceCountZero(relation)) + { + RelationClearRelation(relation, false); + return; + } + else + { + /* + * Hmm, somewhere there's a (leaked?) reference to the relation. + * We daren't remove the entry for fear of dereferencing a + * dangling pointer later. Bleat, and mark it as not belonging to + * the current transaction. Hopefully it'll get cleaned up + * eventually. This must be just a WARNING to avoid + * error-during-error-recovery loops. + */ + elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount", + RelationGetRelationName(relation)); + } + } +} + +/* + * AtEOSubXact_RelationCache + * + * Clean up the relcache at sub-transaction commit or abort. + * + * Note: this must be called *before* processing invalidation messages. + */ +void +AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, + SubTransactionId parentSubid) +{ + HASH_SEQ_STATUS status; + RelIdCacheEnt *idhentry; + int i; + + /* + * Forget in_progress_list. This is relevant when we're aborting due to + * an error during RelationBuildDesc(). We don't commit subtransactions + * during RelationBuildDesc(). + */ + Assert(in_progress_list_len == 0 || !isCommit); + in_progress_list_len = 0; + + /* + * Unless the eoxact_list[] overflowed, we only need to examine the rels + * listed in it. Otherwise fall back on a hash_seq_search scan. Same + * logic as in AtEOXact_RelationCache. + */ + if (eoxact_list_overflowed) + { + hash_seq_init(&status, RelationIdCache); + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + { + AtEOSubXact_cleanup(idhentry->reldesc, isCommit, + mySubid, parentSubid); + } + } + else + { + for (i = 0; i < eoxact_list_len; i++) + { + idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache, + (void *) &eoxact_list[i], + HASH_FIND, + NULL); + if (idhentry != NULL) + AtEOSubXact_cleanup(idhentry->reldesc, isCommit, + mySubid, parentSubid); + } + } + + /* Don't reset the list; we still need more cleanup later */ +} + +/* + * AtEOSubXact_cleanup + * + * Clean up a single rel at subtransaction commit or abort + * + * NB: this processing must be idempotent, because EOXactListAdd() doesn't + * bother to prevent duplicate entries in eoxact_list[]. + */ +static void +AtEOSubXact_cleanup(Relation relation, bool isCommit, + SubTransactionId mySubid, SubTransactionId parentSubid) +{ + /* + * Is it a relation created in the current subtransaction? + * + * During subcommit, mark it as belonging to the parent, instead, as long + * as it has not been dropped. Otherwise simply delete the relcache entry. + * --- it isn't interesting any longer. + */ + if (relation->rd_createSubid == mySubid) + { + /* + * Valid rd_droppedSubid means the corresponding relation is dropped + * but the relcache entry is preserved for at-commit pending sync. We + * need to drop it explicitly here not to make the entry orphan. + */ + Assert(relation->rd_droppedSubid == mySubid || + relation->rd_droppedSubid == InvalidSubTransactionId); + if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId) + relation->rd_createSubid = parentSubid; + else if (RelationHasReferenceCountZero(relation)) + { + /* allow the entry to be removed */ + relation->rd_createSubid = InvalidSubTransactionId; + relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + relation->rd_droppedSubid = InvalidSubTransactionId; + RelationClearRelation(relation, false); + return; + } + else + { + /* + * Hmm, somewhere there's a (leaked?) reference to the relation. + * We daren't remove the entry for fear of dereferencing a + * dangling pointer later. Bleat, and transfer it to the parent + * subtransaction so we can try again later. This must be just a + * WARNING to avoid error-during-error-recovery loops. + */ + relation->rd_createSubid = parentSubid; + elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount", + RelationGetRelationName(relation)); + } + } + + /* + * Likewise, update or drop any new-relfilenode-in-subtransaction record + * or drop record. + */ + if (relation->rd_newRelfilenodeSubid == mySubid) + { + if (isCommit) + relation->rd_newRelfilenodeSubid = parentSubid; + else + relation->rd_newRelfilenodeSubid = InvalidSubTransactionId; + } + + if (relation->rd_firstRelfilenodeSubid == mySubid) + { + if (isCommit) + relation->rd_firstRelfilenodeSubid = parentSubid; + else + relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + } + + if (relation->rd_droppedSubid == mySubid) + { + if (isCommit) + relation->rd_droppedSubid = parentSubid; + else + relation->rd_droppedSubid = InvalidSubTransactionId; + } +} + + +/* + * RelationBuildLocalRelation + * Build a relcache entry for an about-to-be-created relation, + * and enter it into the relcache. + */ +Relation +RelationBuildLocalRelation(const char *relname, + Oid relnamespace, + TupleDesc tupDesc, + Oid relid, + Oid accessmtd, + Oid relfilenode, + Oid reltablespace, + bool shared_relation, + bool mapped_relation, + char relpersistence, + char relkind) +{ + Relation rel; + MemoryContext oldcxt; + int natts = tupDesc->natts; + int i; + bool has_not_null; + bool nailit; + + AssertArg(natts >= 0); + + /* + * check for creation of a rel that must be nailed in cache. + * + * XXX this list had better match the relations specially handled in + * RelationCacheInitializePhase2/3. + */ + switch (relid) + { + case DatabaseRelationId: + case AuthIdRelationId: + case AuthMemRelationId: + case RelationRelationId: + case AttributeRelationId: + case ProcedureRelationId: + case TypeRelationId: + nailit = true; + break; + default: + nailit = false; + break; + } + + /* + * check that hardwired list of shared rels matches what's in the + * bootstrap .bki file. If you get a failure here during initdb, you + * probably need to fix IsSharedRelation() to match whatever you've done + * to the set of shared relations. + */ + if (shared_relation != IsSharedRelation(relid)) + elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)", + relname, relid); + + /* Shared relations had better be mapped, too */ + Assert(mapped_relation || !shared_relation); + + /* + * switch to the cache context to create the relcache entry. + */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * allocate a new relation descriptor and fill in basic state fields. + */ + rel = (Relation) palloc0(sizeof(RelationData)); + + /* make sure relation is marked as having no open file yet */ + rel->rd_smgr = NULL; + + /* mark it nailed if appropriate */ + rel->rd_isnailed = nailit; + + rel->rd_refcnt = nailit ? 1 : 0; + + /* it's being created in this transaction */ + rel->rd_createSubid = GetCurrentSubTransactionId(); + rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; + rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + rel->rd_droppedSubid = InvalidSubTransactionId; + + /* + * create a new tuple descriptor from the one passed in. We do this + * partly to copy it into the cache context, and partly because the new + * relation can't have any defaults or constraints yet; they have to be + * added in later steps, because they require additions to multiple system + * catalogs. We can copy attnotnull constraints here, however. + */ + rel->rd_att = CreateTupleDescCopy(tupDesc); + rel->rd_att->tdrefcount = 1; /* mark as refcounted */ + has_not_null = false; + for (i = 0; i < natts; i++) + { + Form_pg_attribute satt = TupleDescAttr(tupDesc, i); + Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i); + + datt->attidentity = satt->attidentity; + datt->attgenerated = satt->attgenerated; + datt->attnotnull = satt->attnotnull; + has_not_null |= satt->attnotnull; + } + + if (has_not_null) + { + TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr)); + + constr->has_not_null = true; + rel->rd_att->constr = constr; + } + + /* + * initialize relation tuple form (caller may add/override data later) + */ + rel->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE); + + namestrcpy(&rel->rd_rel->relname, relname); + rel->rd_rel->relnamespace = relnamespace; + + rel->rd_rel->relkind = relkind; + rel->rd_rel->relnatts = natts; + rel->rd_rel->reltype = InvalidOid; + /* needed when bootstrapping: */ + rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID; + + /* set up persistence and relcache fields dependent on it */ + rel->rd_rel->relpersistence = relpersistence; + switch (relpersistence) + { + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + rel->rd_backend = InvalidBackendId; + rel->rd_islocaltemp = false; + break; + case RELPERSISTENCE_TEMP: + Assert(isTempOrTempToastNamespace(relnamespace)); + rel->rd_backend = BackendIdForTempRelations(); + rel->rd_islocaltemp = true; + break; + default: + elog(ERROR, "invalid relpersistence: %c", relpersistence); + break; + } + + /* if it's a materialized view, it's not populated initially */ + if (relkind == RELKIND_MATVIEW) + rel->rd_rel->relispopulated = false; + else + rel->rd_rel->relispopulated = true; + + /* set replica identity -- system catalogs and non-tables don't have one */ + if (!IsCatalogNamespace(relnamespace) && + (relkind == RELKIND_RELATION || + relkind == RELKIND_MATVIEW || + relkind == RELKIND_PARTITIONED_TABLE)) + rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT; + else + rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; + + /* + * Insert relation physical and logical identifiers (OIDs) into the right + * places. For a mapped relation, we set relfilenode to zero and rely on + * RelationInitPhysicalAddr to consult the map. + */ + rel->rd_rel->relisshared = shared_relation; + + RelationGetRelid(rel) = relid; + + for (i = 0; i < natts; i++) + TupleDescAttr(rel->rd_att, i)->attrelid = relid; + + rel->rd_rel->reltablespace = reltablespace; + + if (mapped_relation) + { + rel->rd_rel->relfilenode = InvalidOid; + /* Add it to the active mapping information */ + RelationMapUpdateMap(relid, relfilenode, shared_relation, true); + } + else + rel->rd_rel->relfilenode = relfilenode; + + RelationInitLockInfo(rel); /* see lmgr.c */ + + RelationInitPhysicalAddr(rel); + + rel->rd_rel->relam = accessmtd; + + /* + * RelationInitTableAccessMethod will do syscache lookups, so we mustn't + * run it in CacheMemoryContext. Fortunately, the remaining steps don't + * require a long-lived current context. + */ + MemoryContextSwitchTo(oldcxt); + + if (relkind == RELKIND_RELATION || + relkind == RELKIND_SEQUENCE || + relkind == RELKIND_TOASTVALUE || + relkind == RELKIND_MATVIEW) + RelationInitTableAccessMethod(rel); + + /* + * Okay to insert into the relcache hash table. + * + * Ordinarily, there should certainly not be an existing hash entry for + * the same OID; but during bootstrap, when we create a "real" relcache + * entry for one of the bootstrap relations, we'll be overwriting the + * phony one created with formrdesc. So allow that to happen for nailed + * rels. + */ + RelationCacheInsert(rel, nailit); + + /* + * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We + * can't do this before storing relid in it. + */ + EOXactListAdd(rel); + + /* It's fully valid */ + rel->rd_isvalid = true; + + /* + * Caller expects us to pin the returned entry. + */ + RelationIncrementReferenceCount(rel); + + return rel; +} + + +/* + * RelationSetNewRelfilenode + * + * Assign a new relfilenode (physical file name), and possibly a new + * persistence setting, to the relation. + * + * This allows a full rewrite of the relation to be done with transactional + * safety (since the filenode assignment can be rolled back). Note however + * that there is no simple way to access the relation's old data for the + * remainder of the current transaction. This limits the usefulness to cases + * such as TRUNCATE or rebuilding an index from scratch. + * + * Caller must already hold exclusive lock on the relation. + */ +void +RelationSetNewRelfilenode(Relation relation, char persistence) +{ + Oid newrelfilenode; + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; + MultiXactId minmulti = InvalidMultiXactId; + TransactionId freezeXid = InvalidTransactionId; + RelFileNode newrnode; + + /* Allocate a new relfilenode */ + newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, + persistence); + + /* + * Get a writable copy of the pg_class tuple for the given relation. + */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for relation %u", + RelationGetRelid(relation)); + classform = (Form_pg_class) GETSTRUCT(tuple); + + /* + * Schedule unlinking of the old storage at transaction commit. + */ + RelationDropStorage(relation); + + /* + * Create storage for the main fork of the new relfilenode. If it's a + * table-like object, call into the table AM to do so, which'll also + * create the table's init fork if needed. + * + * NOTE: If relevant for the AM, any conflict in relfilenode value will be + * caught here, if GetNewRelFileNode messes up for any reason. + */ + newrnode = relation->rd_node; + newrnode.relNode = newrelfilenode; + + switch (relation->rd_rel->relkind) + { + case RELKIND_INDEX: + case RELKIND_SEQUENCE: + { + /* handle these directly, at least for now */ + SMgrRelation srel; + + srel = RelationCreateStorage(newrnode, persistence); + smgrclose(srel); + } + break; + + case RELKIND_RELATION: + case RELKIND_TOASTVALUE: + case RELKIND_MATVIEW: + table_relation_set_new_filenode(relation, &newrnode, + persistence, + &freezeXid, &minmulti); + break; + + default: + /* we shouldn't be called for anything else */ + elog(ERROR, "relation \"%s\" does not have storage", + RelationGetRelationName(relation)); + break; + } + + /* + * If we're dealing with a mapped index, pg_class.relfilenode doesn't + * change; instead we have to send the update to the relation mapper. + * + * For mapped indexes, we don't actually change the pg_class entry at all; + * this is essential when reindexing pg_class itself. That leaves us with + * possibly-inaccurate values of relpages etc, but those will be fixed up + * later. + */ + if (RelationIsMapped(relation)) + { + /* This case is only supported for indexes */ + Assert(relation->rd_rel->relkind == RELKIND_INDEX); + + /* Since we're not updating pg_class, these had better not change */ + Assert(classform->relfrozenxid == freezeXid); + Assert(classform->relminmxid == minmulti); + Assert(classform->relpersistence == persistence); + + /* + * In some code paths it's possible that the tuple update we'd + * otherwise do here is the only thing that would assign an XID for + * the current transaction. However, we must have an XID to delete + * files, so make sure one is assigned. + */ + (void) GetCurrentTransactionId(); + + /* Do the deed */ + RelationMapUpdateMap(RelationGetRelid(relation), + newrelfilenode, + relation->rd_rel->relisshared, + false); + + /* Since we're not updating pg_class, must trigger inval manually */ + CacheInvalidateRelcache(relation); + } + else + { + /* Normal case, update the pg_class entry */ + classform->relfilenode = newrelfilenode; + + /* relpages etc. never change for sequences */ + if (relation->rd_rel->relkind != RELKIND_SEQUENCE) + { + classform->relpages = 0; /* it's empty until further notice */ + classform->reltuples = -1; + classform->relallvisible = 0; + } + classform->relfrozenxid = freezeXid; + classform->relminmxid = minmulti; + classform->relpersistence = persistence; + + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + } + + heap_freetuple(tuple); + + table_close(pg_class, RowExclusiveLock); + + /* + * Make the pg_class row change or relation map change visible. This will + * cause the relcache entry to get updated, too. + */ + CommandCounterIncrement(); + + RelationAssumeNewRelfilenode(relation); +} + +/* + * RelationAssumeNewRelfilenode + * + * Code that modifies pg_class.reltablespace or pg_class.relfilenode must call + * this. The call shall precede any code that might insert WAL records whose + * replay would modify bytes in the new RelFileNode, and the call shall follow + * any WAL modifying bytes in the prior RelFileNode. See struct RelationData. + * Ideally, call this as near as possible to the CommandCounterIncrement() + * that makes the pg_class change visible (before it or after it); that + * minimizes the chance of future development adding a forbidden WAL insertion + * between RelationAssumeNewRelfilenode() and CommandCounterIncrement(). + */ +void +RelationAssumeNewRelfilenode(Relation relation) +{ + relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId(); + if (relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId) + relation->rd_firstRelfilenodeSubid = relation->rd_newRelfilenodeSubid; + + /* Flag relation as needing eoxact cleanup (to clear these fields) */ + EOXactListAdd(relation); +} + + +/* + * RelationCacheInitialize + * + * This initializes the relation descriptor cache. At the time + * that this is invoked, we can't do database access yet (mainly + * because the transaction subsystem is not up); all we are doing + * is making an empty cache hashtable. This must be done before + * starting the initialization transaction, because otherwise + * AtEOXact_RelationCache would crash if that transaction aborts + * before we can get the relcache set up. + */ + +#define INITRELCACHESIZE 400 + +void +RelationCacheInitialize(void) +{ + HASHCTL ctl; + int allocsize; + + /* + * make sure cache memory context exists + */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + /* + * create hashtable that indexes the relcache + */ + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RelIdCacheEnt); + RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE, + &ctl, HASH_ELEM | HASH_BLOBS); + + /* + * reserve enough in_progress_list slots for many cases + */ + allocsize = 4; + in_progress_list = + MemoryContextAlloc(CacheMemoryContext, + allocsize * sizeof(*in_progress_list)); + in_progress_list_maxlen = allocsize; + + /* + * relation mapper needs to be initialized too + */ + RelationMapInitialize(); +} + +/* + * RelationCacheInitializePhase2 + * + * This is called to prepare for access to shared catalogs during startup. + * We must at least set up nailed reldescs for pg_database, pg_authid, + * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs + * for their indexes, too. We attempt to load this information from the + * shared relcache init file. If that's missing or broken, just make + * phony entries for the catalogs themselves. + * RelationCacheInitializePhase3 will clean up as needed. + */ +void +RelationCacheInitializePhase2(void) +{ + MemoryContext oldcxt; + + /* + * relation mapper needs initialized too + */ + RelationMapInitializePhase2(); + + /* + * In bootstrap mode, the shared catalogs aren't there yet anyway, so do + * nothing. + */ + if (IsBootstrapProcessingMode()) + return; + + /* + * switch to cache memory context + */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * Try to load the shared relcache cache file. If unsuccessful, bootstrap + * the cache with pre-made descriptors for the critical shared catalogs. + */ + if (!load_relcache_init_file(true)) + { + formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true, + Natts_pg_database, Desc_pg_database); + formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true, + Natts_pg_authid, Desc_pg_authid); + formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true, + Natts_pg_auth_members, Desc_pg_auth_members); + formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true, + Natts_pg_shseclabel, Desc_pg_shseclabel); + formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true, + Natts_pg_subscription, Desc_pg_subscription); + +#define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */ + } + + MemoryContextSwitchTo(oldcxt); +} + +/* + * RelationCacheInitializePhase3 + * + * This is called as soon as the catcache and transaction system + * are functional and we have determined MyDatabaseId. At this point + * we can actually read data from the database's system catalogs. + * We first try to read pre-computed relcache entries from the local + * relcache init file. If that's missing or broken, make phony entries + * for the minimum set of nailed-in-cache relations. Then (unless + * bootstrapping) make sure we have entries for the critical system + * indexes. Once we've done all this, we have enough infrastructure to + * open any system catalog or use any catcache. The last step is to + * rewrite the cache files if needed. + */ +void +RelationCacheInitializePhase3(void) +{ + HASH_SEQ_STATUS status; + RelIdCacheEnt *idhentry; + MemoryContext oldcxt; + bool needNewCacheFile = !criticalSharedRelcachesBuilt; + + /* + * relation mapper needs initialized too + */ + RelationMapInitializePhase3(); + + /* + * switch to cache memory context + */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* + * Try to load the local relcache cache file. If unsuccessful, bootstrap + * the cache with pre-made descriptors for the critical "nailed-in" system + * catalogs. + */ + if (IsBootstrapProcessingMode() || + !load_relcache_init_file(false)) + { + needNewCacheFile = true; + + formrdesc("pg_class", RelationRelation_Rowtype_Id, false, + Natts_pg_class, Desc_pg_class); + formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false, + Natts_pg_attribute, Desc_pg_attribute); + formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false, + Natts_pg_proc, Desc_pg_proc); + formrdesc("pg_type", TypeRelation_Rowtype_Id, false, + Natts_pg_type, Desc_pg_type); + +#define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */ + } + + MemoryContextSwitchTo(oldcxt); + + /* In bootstrap mode, the faked-up formrdesc info is all we'll have */ + if (IsBootstrapProcessingMode()) + return; + + /* + * If we didn't get the critical system indexes loaded into relcache, do + * so now. These are critical because the catcache and/or opclass cache + * depend on them for fetches done during relcache load. Thus, we have an + * infinite-recursion problem. We can break the recursion by doing + * heapscans instead of indexscans at certain key spots. To avoid hobbling + * performance, we only want to do that until we have the critical indexes + * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to + * decide whether to do heapscan or indexscan at the key spots, and we set + * it true after we've loaded the critical indexes. + * + * The critical indexes are marked as "nailed in cache", partly to make it + * easy for load_relcache_init_file to count them, but mainly because we + * cannot flush and rebuild them once we've set criticalRelcachesBuilt to + * true. (NOTE: perhaps it would be possible to reload them by + * temporarily setting criticalRelcachesBuilt to false again. For now, + * though, we just nail 'em in.) + * + * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical + * in the same way as the others, because the critical catalogs don't + * (currently) have any rules or triggers, and so these indexes can be + * rebuilt without inducing recursion. However they are used during + * relcache load when a rel does have rules or triggers, so we choose to + * nail them for performance reasons. + */ + if (!criticalRelcachesBuilt) + { + load_critical_index(ClassOidIndexId, + RelationRelationId); + load_critical_index(AttributeRelidNumIndexId, + AttributeRelationId); + load_critical_index(IndexRelidIndexId, + IndexRelationId); + load_critical_index(OpclassOidIndexId, + OperatorClassRelationId); + load_critical_index(AccessMethodProcedureIndexId, + AccessMethodProcedureRelationId); + load_critical_index(RewriteRelRulenameIndexId, + RewriteRelationId); + load_critical_index(TriggerRelidNameIndexId, + TriggerRelationId); + +#define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */ + + criticalRelcachesBuilt = true; + } + + /* + * Process critical shared indexes too. + * + * DatabaseNameIndexId isn't critical for relcache loading, but rather for + * initial lookup of MyDatabaseId, without which we'll never find any + * non-shared catalogs at all. Autovacuum calls InitPostgres with a + * database OID, so it instead depends on DatabaseOidIndexId. We also + * need to nail up some indexes on pg_authid and pg_auth_members for use + * during client authentication. SharedSecLabelObjectIndexId isn't + * critical for the core system, but authentication hooks might be + * interested in it. + */ + if (!criticalSharedRelcachesBuilt) + { + load_critical_index(DatabaseNameIndexId, + DatabaseRelationId); + load_critical_index(DatabaseOidIndexId, + DatabaseRelationId); + load_critical_index(AuthIdRolnameIndexId, + AuthIdRelationId); + load_critical_index(AuthIdOidIndexId, + AuthIdRelationId); + load_critical_index(AuthMemMemRoleIndexId, + AuthMemRelationId); + load_critical_index(SharedSecLabelObjectIndexId, + SharedSecLabelRelationId); + +#define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */ + + criticalSharedRelcachesBuilt = true; + } + + /* + * Now, scan all the relcache entries and update anything that might be + * wrong in the results from formrdesc or the relcache cache file. If we + * faked up relcache entries using formrdesc, then read the real pg_class + * rows and replace the fake entries with them. Also, if any of the + * relcache entries have rules, triggers, or security policies, load that + * info the hard way since it isn't recorded in the cache file. + * + * Whenever we access the catalogs to read data, there is a possibility of + * a shared-inval cache flush causing relcache entries to be removed. + * Since hash_seq_search only guarantees to still work after the *current* + * entry is removed, it's unsafe to continue the hashtable scan afterward. + * We handle this by restarting the scan from scratch after each access. + * This is theoretically O(N^2), but the number of entries that actually + * need to be fixed is small enough that it doesn't matter. + */ + hash_seq_init(&status, RelationIdCache); + + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + { + Relation relation = idhentry->reldesc; + bool restart = false; + + /* + * Make sure *this* entry doesn't get flushed while we work with it. + */ + RelationIncrementReferenceCount(relation); + + /* + * If it's a faked-up entry, read the real pg_class tuple. + */ + if (relation->rd_rel->relowner == InvalidOid) + { + HeapTuple htup; + Form_pg_class relp; + + htup = SearchSysCache1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(htup)) + elog(FATAL, "cache lookup failed for relation %u", + RelationGetRelid(relation)); + relp = (Form_pg_class) GETSTRUCT(htup); + + /* + * Copy tuple to relation->rd_rel. (See notes in + * AllocateRelationDesc()) + */ + memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE); + + /* Update rd_options while we have the tuple */ + if (relation->rd_options) + pfree(relation->rd_options); + RelationParseRelOptions(relation, htup); + + /* + * Check the values in rd_att were set up correctly. (We cannot + * just copy them over now: formrdesc must have set up the rd_att + * data correctly to start with, because it may already have been + * copied into one or more catcache entries.) + */ + Assert(relation->rd_att->tdtypeid == relp->reltype); + Assert(relation->rd_att->tdtypmod == -1); + + ReleaseSysCache(htup); + + /* relowner had better be OK now, else we'll loop forever */ + if (relation->rd_rel->relowner == InvalidOid) + elog(ERROR, "invalid relowner in pg_class entry for \"%s\"", + RelationGetRelationName(relation)); + + restart = true; + } + + /* + * Fix data that isn't saved in relcache cache file. + * + * relhasrules or relhastriggers could possibly be wrong or out of + * date. If we don't actually find any rules or triggers, clear the + * local copy of the flag so that we don't get into an infinite loop + * here. We don't make any attempt to fix the pg_class entry, though. + */ + if (relation->rd_rel->relhasrules && relation->rd_rules == NULL) + { + RelationBuildRuleLock(relation); + if (relation->rd_rules == NULL) + relation->rd_rel->relhasrules = false; + restart = true; + } + if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL) + { + RelationBuildTriggers(relation); + if (relation->trigdesc == NULL) + relation->rd_rel->relhastriggers = false; + restart = true; + } + + /* + * Re-load the row security policies if the relation has them, since + * they are not preserved in the cache. Note that we can never NOT + * have a policy while relrowsecurity is true, + * RelationBuildRowSecurity will create a single default-deny policy + * if there is no policy defined in pg_policy. + */ + if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL) + { + RelationBuildRowSecurity(relation); + + Assert(relation->rd_rsdesc != NULL); + restart = true; + } + + /* Reload tableam data if needed */ + if (relation->rd_tableam == NULL && + (relation->rd_rel->relkind == RELKIND_RELATION || + relation->rd_rel->relkind == RELKIND_SEQUENCE || + relation->rd_rel->relkind == RELKIND_TOASTVALUE || + relation->rd_rel->relkind == RELKIND_MATVIEW)) + { + RelationInitTableAccessMethod(relation); + Assert(relation->rd_tableam != NULL); + + restart = true; + } + + /* Release hold on the relation */ + RelationDecrementReferenceCount(relation); + + /* Now, restart the hashtable scan if needed */ + if (restart) + { + hash_seq_term(&status); + hash_seq_init(&status, RelationIdCache); + } + } + + /* + * Lastly, write out new relcache cache files if needed. We don't bother + * to distinguish cases where only one of the two needs an update. + */ + if (needNewCacheFile) + { + /* + * Force all the catcaches to finish initializing and thereby open the + * catalogs and indexes they use. This will preload the relcache with + * entries for all the most important system catalogs and indexes, so + * that the init files will be most useful for future backends. + */ + InitCatalogCachePhase2(); + + /* now write the files */ + write_relcache_init_file(true); + write_relcache_init_file(false); + } +} + +/* + * Load one critical system index into the relcache + * + * indexoid is the OID of the target index, heapoid is the OID of the catalog + * it belongs to. + */ +static void +load_critical_index(Oid indexoid, Oid heapoid) +{ + Relation ird; + + /* + * We must lock the underlying catalog before locking the index to avoid + * deadlock, since RelationBuildDesc might well need to read the catalog, + * and if anyone else is exclusive-locking this catalog and index they'll + * be doing it in that order. + */ + LockRelationOid(heapoid, AccessShareLock); + LockRelationOid(indexoid, AccessShareLock); + ird = RelationBuildDesc(indexoid, true); + if (ird == NULL) + elog(PANIC, "could not open critical system index %u", indexoid); + ird->rd_isnailed = true; + ird->rd_refcnt = 1; + UnlockRelationOid(indexoid, AccessShareLock); + UnlockRelationOid(heapoid, AccessShareLock); + + (void) RelationGetIndexAttOptions(ird, false); +} + +/* + * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class + * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index + * + * We need this kluge because we have to be able to access non-fixed-width + * fields of pg_class and pg_index before we have the standard catalog caches + * available. We use predefined data that's set up in just the same way as + * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is + * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor + * does it have a TupleConstr field. But it's good enough for the purpose of + * extracting fields. + */ +static TupleDesc +BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs) +{ + TupleDesc result; + MemoryContext oldcxt; + int i; + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + result = CreateTemplateTupleDesc(natts); + result->tdtypeid = RECORDOID; /* not right, but we don't care */ + result->tdtypmod = -1; + + for (i = 0; i < natts; i++) + { + memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE); + /* make sure attcacheoff is valid */ + TupleDescAttr(result, i)->attcacheoff = -1; + } + + /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */ + TupleDescAttr(result, 0)->attcacheoff = 0; + + /* Note: we don't bother to set up a TupleConstr entry */ + + MemoryContextSwitchTo(oldcxt); + + return result; +} + +static TupleDesc +GetPgClassDescriptor(void) +{ + static TupleDesc pgclassdesc = NULL; + + /* Already done? */ + if (pgclassdesc == NULL) + pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class, + Desc_pg_class); + + return pgclassdesc; +} + +static TupleDesc +GetPgIndexDescriptor(void) +{ + static TupleDesc pgindexdesc = NULL; + + /* Already done? */ + if (pgindexdesc == NULL) + pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index, + Desc_pg_index); + + return pgindexdesc; +} + +/* + * Load any default attribute value definitions for the relation. + * + * ndef is the number of attributes that were marked atthasdef. + * + * Note: we don't make it a hard error to be missing some pg_attrdef records. + * We can limp along as long as nothing needs to use the default value. Code + * that fails to find an expected AttrDefault record should throw an error. + */ +static void +AttrDefaultFetch(Relation relation, int ndef) +{ + AttrDefault *attrdef; + Relation adrel; + SysScanDesc adscan; + ScanKeyData skey; + HeapTuple htup; + int found = 0; + + /* Allocate array with room for as many entries as expected */ + attrdef = (AttrDefault *) + MemoryContextAllocZero(CacheMemoryContext, + ndef * sizeof(AttrDefault)); + + /* Search pg_attrdef for relevant entries */ + ScanKeyInit(&skey, + Anum_pg_attrdef_adrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + adrel = table_open(AttrDefaultRelationId, AccessShareLock); + adscan = systable_beginscan(adrel, AttrDefaultIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(adscan))) + { + Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup); + Datum val; + bool isnull; + + /* protect limited size of array */ + if (found >= ndef) + { + elog(WARNING, "unexpected pg_attrdef record found for attribute %d of relation \"%s\"", + adform->adnum, RelationGetRelationName(relation)); + break; + } + + val = fastgetattr(htup, + Anum_pg_attrdef_adbin, + adrel->rd_att, &isnull); + if (isnull) + elog(WARNING, "null adbin for attribute %d of relation \"%s\"", + adform->adnum, RelationGetRelationName(relation)); + else + { + /* detoast and convert to cstring in caller's context */ + char *s = TextDatumGetCString(val); + + attrdef[found].adnum = adform->adnum; + attrdef[found].adbin = MemoryContextStrdup(CacheMemoryContext, s); + pfree(s); + found++; + } + } + + systable_endscan(adscan); + table_close(adrel, AccessShareLock); + + if (found != ndef) + elog(WARNING, "%d pg_attrdef record(s) missing for relation \"%s\"", + ndef - found, RelationGetRelationName(relation)); + + /* + * Sort the AttrDefault entries by adnum, for the convenience of + * equalTupleDescs(). (Usually, they already will be in order, but this + * might not be so if systable_getnext isn't using an index.) + */ + if (found > 1) + qsort(attrdef, found, sizeof(AttrDefault), AttrDefaultCmp); + + /* Install array only after it's fully valid */ + relation->rd_att->constr->defval = attrdef; + relation->rd_att->constr->num_defval = found; +} + +/* + * qsort comparator to sort AttrDefault entries by adnum + */ +static int +AttrDefaultCmp(const void *a, const void *b) +{ + const AttrDefault *ada = (const AttrDefault *) a; + const AttrDefault *adb = (const AttrDefault *) b; + + return ada->adnum - adb->adnum; +} + +/* + * Load any check constraints for the relation. + * + * As with defaults, if we don't find the expected number of them, just warn + * here. The executor should throw an error if an INSERT/UPDATE is attempted. + */ +static void +CheckConstraintFetch(Relation relation) +{ + ConstrCheck *check; + int ncheck = relation->rd_rel->relchecks; + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey[1]; + HeapTuple htup; + int found = 0; + + /* Allocate array with room for as many entries as expected */ + check = (ConstrCheck *) + MemoryContextAllocZero(CacheMemoryContext, + ncheck * sizeof(ConstrCheck)); + + /* Search pg_constraint for relevant entries */ + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + conrel = table_open(ConstraintRelationId, AccessShareLock); + conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true, + NULL, 1, skey); + + while (HeapTupleIsValid(htup = systable_getnext(conscan))) + { + Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup); + Datum val; + bool isnull; + + /* We want check constraints only */ + if (conform->contype != CONSTRAINT_CHECK) + continue; + + /* protect limited size of array */ + if (found >= ncheck) + { + elog(WARNING, "unexpected pg_constraint record found for relation \"%s\"", + RelationGetRelationName(relation)); + break; + } + + check[found].ccvalid = conform->convalidated; + check[found].ccnoinherit = conform->connoinherit; + check[found].ccname = MemoryContextStrdup(CacheMemoryContext, + NameStr(conform->conname)); + + /* Grab and test conbin is actually set */ + val = fastgetattr(htup, + Anum_pg_constraint_conbin, + conrel->rd_att, &isnull); + if (isnull) + elog(WARNING, "null conbin for relation \"%s\"", + RelationGetRelationName(relation)); + else + { + /* detoast and convert to cstring in caller's context */ + char *s = TextDatumGetCString(val); + + check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s); + pfree(s); + found++; + } + } + + systable_endscan(conscan); + table_close(conrel, AccessShareLock); + + if (found != ncheck) + elog(WARNING, "%d pg_constraint record(s) missing for relation \"%s\"", + ncheck - found, RelationGetRelationName(relation)); + + /* + * Sort the records by name. This ensures that CHECKs are applied in a + * deterministic order, and it also makes equalTupleDescs() faster. + */ + if (found > 1) + qsort(check, found, sizeof(ConstrCheck), CheckConstraintCmp); + + /* Install array only after it's fully valid */ + relation->rd_att->constr->check = check; + relation->rd_att->constr->num_check = found; +} + +/* + * qsort comparator to sort ConstrCheck entries by name + */ +static int +CheckConstraintCmp(const void *a, const void *b) +{ + const ConstrCheck *ca = (const ConstrCheck *) a; + const ConstrCheck *cb = (const ConstrCheck *) b; + + return strcmp(ca->ccname, cb->ccname); +} + +/* + * RelationGetFKeyList -- get a list of foreign key info for the relation + * + * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining + * the given relation. This data is a direct copy of relevant fields from + * pg_constraint. The list items are in no particular order. + * + * CAUTION: the returned list is part of the relcache's data, and could + * vanish in a relcache entry reset. Callers must inspect or copy it + * before doing anything that might trigger a cache flush, such as + * system catalog accesses. copyObject() can be used if desired. + * (We define it this way because current callers want to filter and + * modify the list entries anyway, so copying would be a waste of time.) + */ +List * +RelationGetFKeyList(Relation relation) +{ + List *result; + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey; + HeapTuple htup; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_fkeyvalid) + return relation->rd_fkeylist; + + /* Fast path: non-partitioned tables without triggers can't have FKs */ + if (!relation->rd_rel->relhastriggers && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + return NIL; + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_constraint for entries having conrelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + conrel = table_open(ConstraintRelationId, AccessShareLock); + conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(conscan))) + { + Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup); + ForeignKeyCacheInfo *info; + + /* consider only foreign keys */ + if (constraint->contype != CONSTRAINT_FOREIGN) + continue; + + info = makeNode(ForeignKeyCacheInfo); + info->conoid = constraint->oid; + info->conrelid = constraint->conrelid; + info->confrelid = constraint->confrelid; + + DeconstructFkConstraintRow(htup, &info->nkeys, + info->conkey, + info->confkey, + info->conpfeqop, + NULL, NULL); + + /* Add FK's node to the result list */ + result = lappend(result, info); + } + + systable_endscan(conscan); + table_close(conrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_fkeylist; + relation->rd_fkeylist = copyObject(result); + relation->rd_fkeyvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free_deep(oldlist); + + return result; +} + +/* + * RelationGetIndexList -- get a list of OIDs of indexes on this relation + * + * The index list is created only if someone requests it. We scan pg_index + * to find relevant indexes, and add the list to the relcache entry so that + * we won't have to compute it again. Note that shared cache inval of a + * relcache entry will delete the old list and set rd_indexvalid to false, + * so that we must recompute the index list on next request. This handles + * creation or deletion of an index. + * + * Indexes that are marked not indislive are omitted from the returned list. + * Such indexes are expected to be dropped momentarily, and should not be + * touched at all by any caller of this function. + * + * The returned list is guaranteed to be sorted in order by OID. This is + * needed by the executor, since for index types that we obtain exclusive + * locks on when updating the index, all backends must lock the indexes in + * the same order or we will get deadlocks (see ExecOpenIndices()). Any + * consistent ordering would do, but ordering by OID is easy. + * + * Since shared cache inval causes the relcache's copy of the list to go away, + * we return a copy of the list palloc'd in the caller's context. The caller + * may list_free() the returned list after scanning it. This is necessary + * since the caller will typically be doing syscache lookups on the relevant + * indexes, and syscache lookup could cause SI messages to be processed! + * + * In exactly the same way, we update rd_pkindex, which is the OID of the + * relation's primary key index if any, else InvalidOid; and rd_replidindex, + * which is the pg_class OID of an index to be used as the relation's + * replication identity index, or InvalidOid if there is no such index. + */ +List * +RelationGetIndexList(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + char replident = relation->rd_rel->relreplident; + Oid pkeyIndex = InvalidOid; + Oid candidateIndex = InvalidOid; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_indexvalid) + return list_copy(relation->rd_indexlist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_index for entries having indrelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_index_indrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = table_open(IndexRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + { + Form_pg_index index = (Form_pg_index) GETSTRUCT(htup); + + /* + * Ignore any indexes that are currently being dropped. This will + * prevent them from being searched, inserted into, or considered in + * HOT-safety decisions. It's unsafe to touch such an index at all + * since its catalog entries could disappear at any instant. + */ + if (!index->indislive) + continue; + + /* add index's OID to result list */ + result = lappend_oid(result, index->indexrelid); + + /* + * Invalid, non-unique, non-immediate or predicate indexes aren't + * interesting for either oid indexes or replication identity indexes, + * so don't check them. + */ + if (!index->indisvalid || !index->indisunique || + !index->indimmediate || + !heap_attisnull(htup, Anum_pg_index_indpred, NULL)) + continue; + + /* remember primary key index if any */ + if (index->indisprimary) + pkeyIndex = index->indexrelid; + + /* remember explicitly chosen replica index */ + if (index->indisreplident) + candidateIndex = index->indexrelid; + } + + systable_endscan(indscan); + + table_close(indrel, AccessShareLock); + + /* Sort the result list into OID order, per API spec. */ + list_sort(result, list_oid_cmp); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_indexlist; + relation->rd_indexlist = list_copy(result); + relation->rd_pkindex = pkeyIndex; + if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex)) + relation->rd_replidindex = pkeyIndex; + else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex)) + relation->rd_replidindex = candidateIndex; + else + relation->rd_replidindex = InvalidOid; + relation->rd_indexvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + +/* + * RelationGetStatExtList + * get a list of OIDs of statistics objects on this relation + * + * The statistics list is created only if someone requests it, in a way + * similar to RelationGetIndexList(). We scan pg_statistic_ext to find + * relevant statistics, and add the list to the relcache entry so that we + * won't have to compute it again. Note that shared cache inval of a + * relcache entry will delete the old list and set rd_statvalid to 0, + * so that we must recompute the statistics list on next request. This + * handles creation or deletion of a statistics object. + * + * The returned list is guaranteed to be sorted in order by OID, although + * this is not currently needed. + * + * Since shared cache inval causes the relcache's copy of the list to go away, + * we return a copy of the list palloc'd in the caller's context. The caller + * may list_free() the returned list after scanning it. This is necessary + * since the caller will typically be doing syscache lookups on the relevant + * statistics, and syscache lookup could cause SI messages to be processed! + */ +List * +RelationGetStatExtList(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_statvalid != 0) + return list_copy(relation->rd_statlist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* + * Prepare to scan pg_statistic_ext for entries having stxrelid = this + * rel. + */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_stxrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = table_open(StatisticExtRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + { + Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid; + + result = lappend_oid(result, oid); + } + + systable_endscan(indscan); + + table_close(indrel, AccessShareLock); + + /* Sort the result list into OID order, per API spec. */ + list_sort(result, list_oid_cmp); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_statlist; + relation->rd_statlist = list_copy(result); + + relation->rd_statvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + +/* + * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index + * + * Returns InvalidOid if there is no such index. + */ +Oid +RelationGetPrimaryKeyIndex(Relation relation) +{ + List *ilist; + + if (!relation->rd_indexvalid) + { + /* RelationGetIndexList does the heavy lifting. */ + ilist = RelationGetIndexList(relation); + list_free(ilist); + Assert(relation->rd_indexvalid); + } + + return relation->rd_pkindex; +} + +/* + * RelationGetReplicaIndex -- get OID of the relation's replica identity index + * + * Returns InvalidOid if there is no such index. + */ +Oid +RelationGetReplicaIndex(Relation relation) +{ + List *ilist; + + if (!relation->rd_indexvalid) + { + /* RelationGetIndexList does the heavy lifting. */ + ilist = RelationGetIndexList(relation); + list_free(ilist); + Assert(relation->rd_indexvalid); + } + + return relation->rd_replidindex; +} + +/* + * RelationGetIndexExpressions -- get the index expressions for an index + * + * We cache the result of transforming pg_index.indexprs into a node tree. + * If the rel is not an index or has no expressional columns, we return NIL. + * Otherwise, the returned tree is copied into the caller's memory context. + * (We don't want to return a pointer to the relcache copy, since it could + * disappear due to relcache invalidation.) + */ +List * +RelationGetIndexExpressions(Relation relation) +{ + List *result; + Datum exprsDatum; + bool isnull; + char *exprsString; + MemoryContext oldcxt; + + /* Quick exit if we already computed the result. */ + if (relation->rd_indexprs) + return copyObject(relation->rd_indexprs); + + /* Quick exit if there is nothing to do. */ + if (relation->rd_indextuple == NULL || + heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL)) + return NIL; + + /* + * We build the tree we intend to return in the caller's context. After + * successfully completing the work, we copy it into the relcache entry. + * This avoids problems if we get some sort of error partway through. + */ + exprsDatum = heap_getattr(relation->rd_indextuple, + Anum_pg_index_indexprs, + GetPgIndexDescriptor(), + &isnull); + Assert(!isnull); + exprsString = TextDatumGetCString(exprsDatum); + result = (List *) stringToNode(exprsString); + pfree(exprsString); + + /* + * Run the expressions through eval_const_expressions. This is not just an + * optimization, but is necessary, because the planner will be comparing + * them to similarly-processed qual clauses, and may fail to detect valid + * matches without this. We must not use canonicalize_qual, however, + * since these aren't qual expressions. + */ + result = (List *) eval_const_expressions(NULL, (Node *) result); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) result); + + /* Now save a copy of the completed tree in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt); + relation->rd_indexprs = copyObject(result); + MemoryContextSwitchTo(oldcxt); + + return result; +} + +/* + * RelationGetDummyIndexExpressions -- get dummy expressions for an index + * + * Return a list of dummy expressions (just Const nodes) with the same + * types/typmods/collations as the index's real expressions. This is + * useful in situations where we don't want to run any user-defined code. + */ +List * +RelationGetDummyIndexExpressions(Relation relation) +{ + List *result; + Datum exprsDatum; + bool isnull; + char *exprsString; + List *rawExprs; + ListCell *lc; + + /* Quick exit if there is nothing to do. */ + if (relation->rd_indextuple == NULL || + heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL)) + return NIL; + + /* Extract raw node tree(s) from index tuple. */ + exprsDatum = heap_getattr(relation->rd_indextuple, + Anum_pg_index_indexprs, + GetPgIndexDescriptor(), + &isnull); + Assert(!isnull); + exprsString = TextDatumGetCString(exprsDatum); + rawExprs = (List *) stringToNode(exprsString); + pfree(exprsString); + + /* Construct null Consts; the typlen and typbyval are arbitrary. */ + result = NIL; + foreach(lc, rawExprs) + { + Node *rawExpr = (Node *) lfirst(lc); + + result = lappend(result, + makeConst(exprType(rawExpr), + exprTypmod(rawExpr), + exprCollation(rawExpr), + 1, + (Datum) 0, + true, + true)); + } + + return result; +} + +/* + * RelationGetIndexPredicate -- get the index predicate for an index + * + * We cache the result of transforming pg_index.indpred into an implicit-AND + * node tree (suitable for use in planning). + * If the rel is not an index or has no predicate, we return NIL. + * Otherwise, the returned tree is copied into the caller's memory context. + * (We don't want to return a pointer to the relcache copy, since it could + * disappear due to relcache invalidation.) + */ +List * +RelationGetIndexPredicate(Relation relation) +{ + List *result; + Datum predDatum; + bool isnull; + char *predString; + MemoryContext oldcxt; + + /* Quick exit if we already computed the result. */ + if (relation->rd_indpred) + return copyObject(relation->rd_indpred); + + /* Quick exit if there is nothing to do. */ + if (relation->rd_indextuple == NULL || + heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL)) + return NIL; + + /* + * We build the tree we intend to return in the caller's context. After + * successfully completing the work, we copy it into the relcache entry. + * This avoids problems if we get some sort of error partway through. + */ + predDatum = heap_getattr(relation->rd_indextuple, + Anum_pg_index_indpred, + GetPgIndexDescriptor(), + &isnull); + Assert(!isnull); + predString = TextDatumGetCString(predDatum); + result = (List *) stringToNode(predString); + pfree(predString); + + /* + * Run the expression through const-simplification and canonicalization. + * This is not just an optimization, but is necessary, because the planner + * will be comparing it to similarly-processed qual clauses, and may fail + * to detect valid matches without this. This must match the processing + * done to qual clauses in preprocess_expression()! (We can skip the + * stuff involving subqueries, however, since we don't allow any in index + * predicates.) + */ + result = (List *) eval_const_expressions(NULL, (Node *) result); + + result = (List *) canonicalize_qual((Expr *) result, false); + + /* Also convert to implicit-AND format */ + result = make_ands_implicit((Expr *) result); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) result); + + /* Now save a copy of the completed tree in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt); + relation->rd_indpred = copyObject(result); + MemoryContextSwitchTo(oldcxt); + + return result; +} + +/* + * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers + * + * The result has a bit set for each attribute used anywhere in the index + * definitions of all the indexes on this relation. (This includes not only + * simple index keys, but attributes used in expressions and partial-index + * predicates.) + * + * Depending on attrKind, a bitmap covering the attnums for all index columns, + * for all potential foreign key columns, or for all columns in the configured + * replica identity index is returned. + * + * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that + * we can include system attributes (e.g., OID) in the bitmap representation. + * + * Caller had better hold at least RowExclusiveLock on the target relation + * to ensure it is safe (deadlock-free) for us to take locks on the relation's + * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY, + * that lock level doesn't guarantee a stable set of indexes, so we have to + * be prepared to retry here in case of a change in the set of indexes. + * + * The returned result is palloc'd in the caller's memory context and should + * be bms_free'd when not needed anymore. + */ +Bitmapset * +RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) +{ + Bitmapset *indexattrs; /* indexed columns */ + Bitmapset *uindexattrs; /* columns in unique indexes */ + Bitmapset *pkindexattrs; /* columns in the primary index */ + Bitmapset *idindexattrs; /* columns in the replica identity */ + List *indexoidlist; + List *newindexoidlist; + Oid relpkindex; + Oid relreplindex; + ListCell *l; + MemoryContext oldcxt; + + /* Quick exit if we already computed the result. */ + if (relation->rd_indexattr != NULL) + { + switch (attrKind) + { + case INDEX_ATTR_BITMAP_ALL: + return bms_copy(relation->rd_indexattr); + case INDEX_ATTR_BITMAP_KEY: + return bms_copy(relation->rd_keyattr); + case INDEX_ATTR_BITMAP_PRIMARY_KEY: + return bms_copy(relation->rd_pkattr); + case INDEX_ATTR_BITMAP_IDENTITY_KEY: + return bms_copy(relation->rd_idattr); + default: + elog(ERROR, "unknown attrKind %u", attrKind); + } + } + + /* Fast path if definitely no indexes */ + if (!RelationGetForm(relation)->relhasindex) + return NULL; + + /* + * Get cached list of index OIDs. If we have to start over, we do so here. + */ +restart: + indexoidlist = RelationGetIndexList(relation); + + /* Fall out if no indexes (but relhasindex was set) */ + if (indexoidlist == NIL) + return NULL; + + /* + * Copy the rd_pkindex and rd_replidindex values computed by + * RelationGetIndexList before proceeding. This is needed because a + * relcache flush could occur inside index_open below, resetting the + * fields managed by RelationGetIndexList. We need to do the work with + * stable values of these fields. + */ + relpkindex = relation->rd_pkindex; + relreplindex = relation->rd_replidindex; + + /* + * For each index, add referenced attributes to indexattrs. + * + * Note: we consider all indexes returned by RelationGetIndexList, even if + * they are not indisready or indisvalid. This is important because an + * index for which CREATE INDEX CONCURRENTLY has just started must be + * included in HOT-safety decisions (see README.HOT). If a DROP INDEX + * CONCURRENTLY is far enough along that we should ignore the index, it + * won't be returned at all by RelationGetIndexList. + */ + indexattrs = NULL; + uindexattrs = NULL; + pkindexattrs = NULL; + idindexattrs = NULL; + foreach(l, indexoidlist) + { + Oid indexOid = lfirst_oid(l); + Relation indexDesc; + Datum datum; + bool isnull; + Node *indexExpressions; + Node *indexPredicate; + int i; + bool isKey; /* candidate key */ + bool isPK; /* primary key */ + bool isIDKey; /* replica identity index */ + + indexDesc = index_open(indexOid, AccessShareLock); + + /* + * Extract index expressions and index predicate. Note: Don't use + * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because + * those might run constant expressions evaluation, which needs a + * snapshot, which we might not have here. (Also, it's probably more + * sound to collect the bitmaps before any transformations that might + * eliminate columns, but the practical impact of this is limited.) + */ + + datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs, + GetPgIndexDescriptor(), &isnull); + if (!isnull) + indexExpressions = stringToNode(TextDatumGetCString(datum)); + else + indexExpressions = NULL; + + datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred, + GetPgIndexDescriptor(), &isnull); + if (!isnull) + indexPredicate = stringToNode(TextDatumGetCString(datum)); + else + indexPredicate = NULL; + + /* Can this index be referenced by a foreign key? */ + isKey = indexDesc->rd_index->indisunique && + indexExpressions == NULL && + indexPredicate == NULL; + + /* Is this a primary key? */ + isPK = (indexOid == relpkindex); + + /* Is this index the configured (or default) replica identity? */ + isIDKey = (indexOid == relreplindex); + + /* Collect simple attribute references */ + for (i = 0; i < indexDesc->rd_index->indnatts; i++) + { + int attrnum = indexDesc->rd_index->indkey.values[i]; + + /* + * Since we have covering indexes with non-key columns, we must + * handle them accurately here. non-key columns must be added into + * indexattrs, since they are in index, and HOT-update shouldn't + * miss them. Obviously, non-key columns couldn't be referenced by + * foreign key or identity key. Hence we do not include them into + * uindexattrs, pkindexattrs and idindexattrs bitmaps. + */ + if (attrnum != 0) + { + indexattrs = bms_add_member(indexattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + + if (isKey && i < indexDesc->rd_index->indnkeyatts) + uindexattrs = bms_add_member(uindexattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + + if (isPK && i < indexDesc->rd_index->indnkeyatts) + pkindexattrs = bms_add_member(pkindexattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + + if (isIDKey && i < indexDesc->rd_index->indnkeyatts) + idindexattrs = bms_add_member(idindexattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + } + } + + /* Collect all attributes used in expressions, too */ + pull_varattnos(indexExpressions, 1, &indexattrs); + + /* Collect all attributes in the index predicate, too */ + pull_varattnos(indexPredicate, 1, &indexattrs); + + index_close(indexDesc, AccessShareLock); + } + + /* + * During one of the index_opens in the above loop, we might have received + * a relcache flush event on this relcache entry, which might have been + * signaling a change in the rel's index list. If so, we'd better start + * over to ensure we deliver up-to-date attribute bitmaps. + */ + newindexoidlist = RelationGetIndexList(relation); + if (equal(indexoidlist, newindexoidlist) && + relpkindex == relation->rd_pkindex && + relreplindex == relation->rd_replidindex) + { + /* Still the same index set, so proceed */ + list_free(newindexoidlist); + list_free(indexoidlist); + } + else + { + /* Gotta do it over ... might as well not leak memory */ + list_free(newindexoidlist); + list_free(indexoidlist); + bms_free(uindexattrs); + bms_free(pkindexattrs); + bms_free(idindexattrs); + bms_free(indexattrs); + + goto restart; + } + + /* Don't leak the old values of these bitmaps, if any */ + bms_free(relation->rd_indexattr); + relation->rd_indexattr = NULL; + bms_free(relation->rd_keyattr); + relation->rd_keyattr = NULL; + bms_free(relation->rd_pkattr); + relation->rd_pkattr = NULL; + bms_free(relation->rd_idattr); + relation->rd_idattr = NULL; + + /* + * Now save copies of the bitmaps in the relcache entry. We intentionally + * set rd_indexattr last, because that's the one that signals validity of + * the values; if we run out of memory before making that copy, we won't + * leave the relcache entry looking like the other ones are valid but + * empty. + */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + relation->rd_keyattr = bms_copy(uindexattrs); + relation->rd_pkattr = bms_copy(pkindexattrs); + relation->rd_idattr = bms_copy(idindexattrs); + relation->rd_indexattr = bms_copy(indexattrs); + MemoryContextSwitchTo(oldcxt); + + /* We return our original working copy for caller to play with */ + switch (attrKind) + { + case INDEX_ATTR_BITMAP_ALL: + return indexattrs; + case INDEX_ATTR_BITMAP_KEY: + return uindexattrs; + case INDEX_ATTR_BITMAP_PRIMARY_KEY: + return pkindexattrs; + case INDEX_ATTR_BITMAP_IDENTITY_KEY: + return idindexattrs; + default: + elog(ERROR, "unknown attrKind %u", attrKind); + return NULL; + } +} + +/* + * RelationGetIdentityKeyBitmap -- get a bitmap of replica identity attribute + * numbers + * + * A bitmap of index attribute numbers for the configured replica identity + * index is returned. + * + * See also comments of RelationGetIndexAttrBitmap(). + * + * This is a special purpose function used during logical replication. Here, + * unlike RelationGetIndexAttrBitmap(), we don't acquire a lock on the required + * index as we build the cache entry using a historic snapshot and all the + * later changes are absorbed while decoding WAL. Due to this reason, we don't + * need to retry here in case of a change in the set of indexes. + */ +Bitmapset * +RelationGetIdentityKeyBitmap(Relation relation) +{ + Bitmapset *idindexattrs = NULL; /* columns in the replica identity */ + Relation indexDesc; + int i; + Oid replidindex; + MemoryContext oldcxt; + + /* Quick exit if we already computed the result */ + if (relation->rd_idattr != NULL) + return bms_copy(relation->rd_idattr); + + /* Fast path if definitely no indexes */ + if (!RelationGetForm(relation)->relhasindex) + return NULL; + + /* Historic snapshot must be set. */ + Assert(HistoricSnapshotActive()); + + replidindex = RelationGetReplicaIndex(relation); + + /* Fall out if there is no replica identity index */ + if (!OidIsValid(replidindex)) + return NULL; + + /* Look up the description for the replica identity index */ + indexDesc = RelationIdGetRelation(replidindex); + + if (!RelationIsValid(indexDesc)) + elog(ERROR, "could not open relation with OID %u", + relation->rd_replidindex); + + /* Add referenced attributes to idindexattrs */ + for (i = 0; i < indexDesc->rd_index->indnatts; i++) + { + int attrnum = indexDesc->rd_index->indkey.values[i]; + + /* + * We don't include non-key columns into idindexattrs bitmaps. See + * RelationGetIndexAttrBitmap. + */ + if (attrnum != 0) + { + if (i < indexDesc->rd_index->indnkeyatts) + idindexattrs = bms_add_member(idindexattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + } + } + + RelationClose(indexDesc); + + /* Don't leak the old values of these bitmaps, if any */ + bms_free(relation->rd_idattr); + relation->rd_idattr = NULL; + + /* Now save copy of the bitmap in the relcache entry */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + relation->rd_idattr = bms_copy(idindexattrs); + MemoryContextSwitchTo(oldcxt); + + /* We return our original working copy for caller to play with */ + return idindexattrs; +} + +/* + * RelationGetExclusionInfo -- get info about index's exclusion constraint + * + * This should be called only for an index that is known to have an + * associated exclusion constraint. It returns arrays (palloc'd in caller's + * context) of the exclusion operator OIDs, their underlying functions' + * OIDs, and their strategy numbers in the index's opclasses. We cache + * all this information since it requires a fair amount of work to get. + */ +void +RelationGetExclusionInfo(Relation indexRelation, + Oid **operators, + Oid **procs, + uint16 **strategies) +{ + int indnkeyatts; + Oid *ops; + Oid *funcs; + uint16 *strats; + Relation conrel; + SysScanDesc conscan; + ScanKeyData skey[1]; + HeapTuple htup; + bool found; + MemoryContext oldcxt; + int i; + + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation); + + /* Allocate result space in caller context */ + *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts); + + /* Quick exit if we have the data cached already */ + if (indexRelation->rd_exclstrats != NULL) + { + memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts); + memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts); + memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts); + return; + } + + /* + * Search pg_constraint for the constraint associated with the index. To + * make this not too painfully slow, we use the index on conrelid; that + * will hold the parent relation's OID not the index's own OID. + * + * Note: if we wanted to rely on the constraint name matching the index's + * name, we could just do a direct lookup using pg_constraint's unique + * index. For the moment it doesn't seem worth requiring that. + */ + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(indexRelation->rd_index->indrelid)); + + conrel = table_open(ConstraintRelationId, AccessShareLock); + conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true, + NULL, 1, skey); + found = false; + + while (HeapTupleIsValid(htup = systable_getnext(conscan))) + { + Form_pg_constraint conform = (Form_pg_constraint) GETSTRUCT(htup); + Datum val; + bool isnull; + ArrayType *arr; + int nelem; + + /* We want the exclusion constraint owning the index */ + if (conform->contype != CONSTRAINT_EXCLUSION || + conform->conindid != RelationGetRelid(indexRelation)) + continue; + + /* There should be only one */ + if (found) + elog(ERROR, "unexpected exclusion constraint record found for rel %s", + RelationGetRelationName(indexRelation)); + found = true; + + /* Extract the operator OIDS from conexclop */ + val = fastgetattr(htup, + Anum_pg_constraint_conexclop, + conrel->rd_att, &isnull); + if (isnull) + elog(ERROR, "null conexclop for rel %s", + RelationGetRelationName(indexRelation)); + + arr = DatumGetArrayTypeP(val); /* ensure not toasted */ + nelem = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + nelem != indnkeyatts || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "conexclop is not a 1-D Oid array"); + + memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts); + } + + systable_endscan(conscan); + table_close(conrel, AccessShareLock); + + if (!found) + elog(ERROR, "exclusion constraint record missing for rel %s", + RelationGetRelationName(indexRelation)); + + /* We need the func OIDs and strategy numbers too */ + for (i = 0; i < indnkeyatts; i++) + { + funcs[i] = get_opcode(ops[i]); + strats[i] = get_op_opfamily_strategy(ops[i], + indexRelation->rd_opfamily[i]); + /* shouldn't fail, since it was checked at index creation */ + if (strats[i] == InvalidStrategy) + elog(ERROR, "could not find strategy for operator %u in family %u", + ops[i], indexRelation->rd_opfamily[i]); + } + + /* Save a copy of the results in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt); + indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts); + indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts); + memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts); + memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts); + memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts); + MemoryContextSwitchTo(oldcxt); +} + +/* + * Get publication actions for the given relation. + */ +struct PublicationActions * +GetRelationPublicationActions(Relation relation) +{ + List *puboids; + ListCell *lc; + MemoryContext oldcxt; + PublicationActions *pubactions = palloc0(sizeof(PublicationActions)); + + /* + * If not publishable, it publishes no actions. (pgoutput_change() will + * ignore it.) + */ + if (!is_publishable_relation(relation)) + return pubactions; + + if (relation->rd_pubactions) + return memcpy(pubactions, relation->rd_pubactions, + sizeof(PublicationActions)); + + /* Fetch the publication membership info. */ + puboids = GetRelationPublications(RelationGetRelid(relation)); + if (relation->rd_rel->relispartition) + { + /* Add publications that the ancestors are in too. */ + List *ancestors = get_partition_ancestors(RelationGetRelid(relation)); + ListCell *lc; + + foreach(lc, ancestors) + { + Oid ancestor = lfirst_oid(lc); + + puboids = list_concat_unique_oid(puboids, + GetRelationPublications(ancestor)); + } + } + puboids = list_concat_unique_oid(puboids, GetAllTablesPublications()); + + foreach(lc, puboids) + { + Oid pubid = lfirst_oid(lc); + HeapTuple tup; + Form_pg_publication pubform; + + tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication %u", pubid); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + + pubactions->pubinsert |= pubform->pubinsert; + pubactions->pubupdate |= pubform->pubupdate; + pubactions->pubdelete |= pubform->pubdelete; + pubactions->pubtruncate |= pubform->pubtruncate; + + ReleaseSysCache(tup); + + /* + * If we know everything is replicated, there is no point to check for + * other publications. + */ + if (pubactions->pubinsert && pubactions->pubupdate && + pubactions->pubdelete && pubactions->pubtruncate) + break; + } + + if (relation->rd_pubactions) + { + pfree(relation->rd_pubactions); + relation->rd_pubactions = NULL; + } + + /* Now save copy of the actions in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + relation->rd_pubactions = palloc(sizeof(PublicationActions)); + memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions)); + MemoryContextSwitchTo(oldcxt); + + return pubactions; +} + +/* + * RelationGetIndexRawAttOptions -- get AM/opclass-specific options for the index + */ +Datum * +RelationGetIndexRawAttOptions(Relation indexrel) +{ + Oid indexrelid = RelationGetRelid(indexrel); + int16 natts = RelationGetNumberOfAttributes(indexrel); + Datum *options = NULL; + int16 attnum; + + for (attnum = 1; attnum <= natts; attnum++) + { + if (indexrel->rd_indam->amoptsprocnum == 0) + continue; + + if (!OidIsValid(index_getprocid(indexrel, attnum, + indexrel->rd_indam->amoptsprocnum))) + continue; + + if (!options) + options = palloc0(sizeof(Datum) * natts); + + options[attnum - 1] = get_attoptions(indexrelid, attnum); + } + + return options; +} + +static bytea ** +CopyIndexAttOptions(bytea **srcopts, int natts) +{ + bytea **opts = palloc(sizeof(*opts) * natts); + + for (int i = 0; i < natts; i++) + { + bytea *opt = srcopts[i]; + + opts[i] = !opt ? NULL : (bytea *) + DatumGetPointer(datumCopy(PointerGetDatum(opt), false, -1)); + } + + return opts; +} + +/* + * RelationGetIndexAttOptions + * get AM/opclass-specific options for an index parsed into a binary form + */ +bytea ** +RelationGetIndexAttOptions(Relation relation, bool copy) +{ + MemoryContext oldcxt; + bytea **opts = relation->rd_opcoptions; + Oid relid = RelationGetRelid(relation); + int natts = RelationGetNumberOfAttributes(relation); /* XXX + * IndexRelationGetNumberOfKeyAttributes */ + int i; + + /* Try to copy cached options. */ + if (opts) + return copy ? CopyIndexAttOptions(opts, natts) : opts; + + /* Get and parse opclass options. */ + opts = palloc0(sizeof(*opts) * natts); + + for (i = 0; i < natts; i++) + { + if (criticalRelcachesBuilt && relid != AttributeRelidNumIndexId) + { + Datum attoptions = get_attoptions(relid, i + 1); + + opts[i] = index_opclass_options(relation, i + 1, attoptions, false); + + if (attoptions != (Datum) 0) + pfree(DatumGetPointer(attoptions)); + } + } + + /* Copy parsed options to the cache. */ + oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt); + relation->rd_opcoptions = CopyIndexAttOptions(opts, natts); + MemoryContextSwitchTo(oldcxt); + + if (copy) + return opts; + + for (i = 0; i < natts; i++) + { + if (opts[i]) + pfree(opts[i]); + } + + pfree(opts); + + return relation->rd_opcoptions; +} + +/* + * Routines to support ereport() reports of relation-related errors + * + * These could have been put into elog.c, but it seems like a module layering + * violation to have elog.c calling relcache or syscache stuff --- and we + * definitely don't want elog.h including rel.h. So we put them here. + */ + +/* + * errtable --- stores schema_name and table_name of a table + * within the current errordata. + */ +int +errtable(Relation rel) +{ + err_generic_string(PG_DIAG_SCHEMA_NAME, + get_namespace_name(RelationGetNamespace(rel))); + err_generic_string(PG_DIAG_TABLE_NAME, RelationGetRelationName(rel)); + + return 0; /* return value does not matter */ +} + +/* + * errtablecol --- stores schema_name, table_name and column_name + * of a table column within the current errordata. + * + * The column is specified by attribute number --- for most callers, this is + * easier and less error-prone than getting the column name for themselves. + */ +int +errtablecol(Relation rel, int attnum) +{ + TupleDesc reldesc = RelationGetDescr(rel); + const char *colname; + + /* Use reldesc if it's a user attribute, else consult the catalogs */ + if (attnum > 0 && attnum <= reldesc->natts) + colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname); + else + colname = get_attname(RelationGetRelid(rel), attnum, false); + + return errtablecolname(rel, colname); +} + +/* + * errtablecolname --- stores schema_name, table_name and column_name + * of a table column within the current errordata, where the column name is + * given directly rather than extracted from the relation's catalog data. + * + * Don't use this directly unless errtablecol() is inconvenient for some + * reason. This might possibly be needed during intermediate states in ALTER + * TABLE, for instance. + */ +int +errtablecolname(Relation rel, const char *colname) +{ + errtable(rel); + err_generic_string(PG_DIAG_COLUMN_NAME, colname); + + return 0; /* return value does not matter */ +} + +/* + * errtableconstraint --- stores schema_name, table_name and constraint_name + * of a table-related constraint within the current errordata. + */ +int +errtableconstraint(Relation rel, const char *conname) +{ + errtable(rel); + err_generic_string(PG_DIAG_CONSTRAINT_NAME, conname); + + return 0; /* return value does not matter */ +} + + +/* + * load_relcache_init_file, write_relcache_init_file + * + * In late 1992, we started regularly having databases with more than + * a thousand classes in them. With this number of classes, it became + * critical to do indexed lookups on the system catalogs. + * + * Bootstrapping these lookups is very hard. We want to be able to + * use an index on pg_attribute, for example, but in order to do so, + * we must have read pg_attribute for the attributes in the index, + * which implies that we need to use the index. + * + * In order to get around the problem, we do the following: + * + * + When the database system is initialized (at initdb time), we + * don't use indexes. We do sequential scans. + * + * + When the backend is started up in normal mode, we load an image + * of the appropriate relation descriptors, in internal format, + * from an initialization file in the data/base/... directory. + * + * + If the initialization file isn't there, then we create the + * relation descriptors using sequential scans and write 'em to + * the initialization file for use by subsequent backends. + * + * As of Postgres 9.0, there is one local initialization file in each + * database, plus one shared initialization file for shared catalogs. + * + * We could dispense with the initialization files and just build the + * critical reldescs the hard way on every backend startup, but that + * slows down backend startup noticeably. + * + * We can in fact go further, and save more relcache entries than + * just the ones that are absolutely critical; this allows us to speed + * up backend startup by not having to build such entries the hard way. + * Presently, all the catalog and index entries that are referred to + * by catcaches are stored in the initialization files. + * + * The same mechanism that detects when catcache and relcache entries + * need to be invalidated (due to catalog updates) also arranges to + * unlink the initialization files when the contents may be out of date. + * The files will then be rebuilt during the next backend startup. + */ + +/* + * load_relcache_init_file -- attempt to load cache from the shared + * or local cache init file + * + * If successful, return true and set criticalRelcachesBuilt or + * criticalSharedRelcachesBuilt to true. + * If not successful, return false. + * + * NOTE: we assume we are already switched into CacheMemoryContext. + */ +static bool +load_relcache_init_file(bool shared) +{ + FILE *fp; + char initfilename[MAXPGPATH]; + Relation *rels; + int relno, + num_rels, + max_rels, + nailed_rels, + nailed_indexes, + magic; + int i; + + if (shared) + snprintf(initfilename, sizeof(initfilename), "global/%s", + RELCACHE_INIT_FILENAME); + else + snprintf(initfilename, sizeof(initfilename), "%s/%s", + DatabasePath, RELCACHE_INIT_FILENAME); + + fp = AllocateFile(initfilename, PG_BINARY_R); + if (fp == NULL) + return false; + + /* + * Read the index relcache entries from the file. Note we will not enter + * any of them into the cache if the read fails partway through; this + * helps to guard against broken init files. + */ + max_rels = 100; + rels = (Relation *) palloc(max_rels * sizeof(Relation)); + num_rels = 0; + nailed_rels = nailed_indexes = 0; + + /* check for correct magic number (compatible version) */ + if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic)) + goto read_failed; + if (magic != RELCACHE_INIT_FILEMAGIC) + goto read_failed; + + for (relno = 0;; relno++) + { + Size len; + size_t nread; + Relation rel; + Form_pg_class relform; + bool has_not_null; + + /* first read the relation descriptor length */ + nread = fread(&len, 1, sizeof(len), fp); + if (nread != sizeof(len)) + { + if (nread == 0) + break; /* end of file */ + goto read_failed; + } + + /* safety check for incompatible relcache layout */ + if (len != sizeof(RelationData)) + goto read_failed; + + /* allocate another relcache header */ + if (num_rels >= max_rels) + { + max_rels *= 2; + rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation)); + } + + rel = rels[num_rels++] = (Relation) palloc(len); + + /* then, read the Relation structure */ + if (fread(rel, 1, len, fp) != len) + goto read_failed; + + /* next read the relation tuple form */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + relform = (Form_pg_class) palloc(len); + if (fread(relform, 1, len, fp) != len) + goto read_failed; + + rel->rd_rel = relform; + + /* initialize attribute tuple forms */ + rel->rd_att = CreateTemplateTupleDesc(relform->relnatts); + rel->rd_att->tdrefcount = 1; /* mark as refcounted */ + + rel->rd_att->tdtypeid = relform->reltype ? relform->reltype : RECORDOID; + rel->rd_att->tdtypmod = -1; /* just to be sure */ + + /* next read all the attribute tuple form data entries */ + has_not_null = false; + for (i = 0; i < relform->relnatts; i++) + { + Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i); + + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + if (len != ATTRIBUTE_FIXED_PART_SIZE) + goto read_failed; + if (fread(attr, 1, len, fp) != len) + goto read_failed; + + has_not_null |= attr->attnotnull; + } + + /* next read the access method specific field */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + if (len > 0) + { + rel->rd_options = palloc(len); + if (fread(rel->rd_options, 1, len, fp) != len) + goto read_failed; + if (len != VARSIZE(rel->rd_options)) + goto read_failed; /* sanity check */ + } + else + { + rel->rd_options = NULL; + } + + /* mark not-null status */ + if (has_not_null) + { + TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr)); + + constr->has_not_null = true; + rel->rd_att->constr = constr; + } + + /* + * If it's an index, there's more to do. Note we explicitly ignore + * partitioned indexes here. + */ + if (rel->rd_rel->relkind == RELKIND_INDEX) + { + MemoryContext indexcxt; + Oid *opfamily; + Oid *opcintype; + RegProcedure *support; + int nsupport; + int16 *indoption; + Oid *indcollation; + + /* Count nailed indexes to ensure we have 'em all */ + if (rel->rd_isnailed) + nailed_indexes++; + + /* next, read the pg_index tuple */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + rel->rd_indextuple = (HeapTuple) palloc(len); + if (fread(rel->rd_indextuple, 1, len, fp) != len) + goto read_failed; + + /* Fix up internal pointers in the tuple -- see heap_copytuple */ + rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE); + rel->rd_index = (Form_pg_index) GETSTRUCT(rel->rd_indextuple); + + /* + * prepare index info context --- parameters should match + * RelationInitIndexAccessInfo + */ + indexcxt = AllocSetContextCreate(CacheMemoryContext, + "index info", + ALLOCSET_SMALL_SIZES); + rel->rd_indexcxt = indexcxt; + MemoryContextCopyAndSetIdentifier(indexcxt, + RelationGetRelationName(rel)); + + /* + * Now we can fetch the index AM's API struct. (We can't store + * that in the init file, since it contains function pointers that + * might vary across server executions. Fortunately, it should be + * safe to call the amhandler even while bootstrapping indexes.) + */ + InitIndexAmRoutine(rel); + + /* next, read the vector of opfamily OIDs */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + opfamily = (Oid *) MemoryContextAlloc(indexcxt, len); + if (fread(opfamily, 1, len, fp) != len) + goto read_failed; + + rel->rd_opfamily = opfamily; + + /* next, read the vector of opcintype OIDs */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + opcintype = (Oid *) MemoryContextAlloc(indexcxt, len); + if (fread(opcintype, 1, len, fp) != len) + goto read_failed; + + rel->rd_opcintype = opcintype; + + /* next, read the vector of support procedure OIDs */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + support = (RegProcedure *) MemoryContextAlloc(indexcxt, len); + if (fread(support, 1, len, fp) != len) + goto read_failed; + + rel->rd_support = support; + + /* next, read the vector of collation OIDs */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + indcollation = (Oid *) MemoryContextAlloc(indexcxt, len); + if (fread(indcollation, 1, len, fp) != len) + goto read_failed; + + rel->rd_indcollation = indcollation; + + /* finally, read the vector of indoption values */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + indoption = (int16 *) MemoryContextAlloc(indexcxt, len); + if (fread(indoption, 1, len, fp) != len) + goto read_failed; + + rel->rd_indoption = indoption; + + /* finally, read the vector of opcoptions values */ + rel->rd_opcoptions = (bytea **) + MemoryContextAllocZero(indexcxt, sizeof(*rel->rd_opcoptions) * relform->relnatts); + + for (i = 0; i < relform->relnatts; i++) + { + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + if (len > 0) + { + rel->rd_opcoptions[i] = (bytea *) MemoryContextAlloc(indexcxt, len); + if (fread(rel->rd_opcoptions[i], 1, len, fp) != len) + goto read_failed; + } + } + + /* set up zeroed fmgr-info vector */ + nsupport = relform->relnatts * rel->rd_indam->amsupport; + rel->rd_supportinfo = (FmgrInfo *) + MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo)); + } + else + { + /* Count nailed rels to ensure we have 'em all */ + if (rel->rd_isnailed) + nailed_rels++; + + /* Load table AM data */ + if (rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_SEQUENCE || + rel->rd_rel->relkind == RELKIND_TOASTVALUE || + rel->rd_rel->relkind == RELKIND_MATVIEW) + RelationInitTableAccessMethod(rel); + + Assert(rel->rd_index == NULL); + Assert(rel->rd_indextuple == NULL); + Assert(rel->rd_indexcxt == NULL); + Assert(rel->rd_indam == NULL); + Assert(rel->rd_opfamily == NULL); + Assert(rel->rd_opcintype == NULL); + Assert(rel->rd_support == NULL); + Assert(rel->rd_supportinfo == NULL); + Assert(rel->rd_indoption == NULL); + Assert(rel->rd_indcollation == NULL); + Assert(rel->rd_opcoptions == NULL); + } + + /* + * Rules and triggers are not saved (mainly because the internal + * format is complex and subject to change). They must be rebuilt if + * needed by RelationCacheInitializePhase3. This is not expected to + * be a big performance hit since few system catalogs have such. Ditto + * for RLS policy data, partition info, index expressions, predicates, + * exclusion info, and FDW info. + */ + rel->rd_rules = NULL; + rel->rd_rulescxt = NULL; + rel->trigdesc = NULL; + rel->rd_rsdesc = NULL; + rel->rd_partkey = NULL; + rel->rd_partkeycxt = NULL; + rel->rd_partdesc = NULL; + rel->rd_partdesc_nodetached = NULL; + rel->rd_partdesc_nodetached_xmin = InvalidTransactionId; + rel->rd_pdcxt = NULL; + rel->rd_pddcxt = NULL; + rel->rd_partcheck = NIL; + rel->rd_partcheckvalid = false; + rel->rd_partcheckcxt = NULL; + rel->rd_indexprs = NIL; + rel->rd_indpred = NIL; + rel->rd_exclops = NULL; + rel->rd_exclprocs = NULL; + rel->rd_exclstrats = NULL; + rel->rd_fdwroutine = NULL; + + /* + * Reset transient-state fields in the relcache entry + */ + rel->rd_smgr = NULL; + if (rel->rd_isnailed) + rel->rd_refcnt = 1; + else + rel->rd_refcnt = 0; + rel->rd_indexvalid = false; + rel->rd_indexlist = NIL; + rel->rd_pkindex = InvalidOid; + rel->rd_replidindex = InvalidOid; + rel->rd_indexattr = NULL; + rel->rd_keyattr = NULL; + rel->rd_pkattr = NULL; + rel->rd_idattr = NULL; + rel->rd_pubactions = NULL; + rel->rd_statvalid = false; + rel->rd_statlist = NIL; + rel->rd_fkeyvalid = false; + rel->rd_fkeylist = NIL; + rel->rd_createSubid = InvalidSubTransactionId; + rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; + rel->rd_firstRelfilenodeSubid = InvalidSubTransactionId; + rel->rd_droppedSubid = InvalidSubTransactionId; + rel->rd_amcache = NULL; + MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info)); + + /* + * Recompute lock and physical addressing info. This is needed in + * case the pg_internal.init file was copied from some other database + * by CREATE DATABASE. + */ + RelationInitLockInfo(rel); + RelationInitPhysicalAddr(rel); + } + + /* + * We reached the end of the init file without apparent problem. Did we + * get the right number of nailed items? This is a useful crosscheck in + * case the set of critical rels or indexes changes. However, that should + * not happen in a normally-running system, so let's bleat if it does. + * + * For the shared init file, we're called before client authentication is + * done, which means that elog(WARNING) will go only to the postmaster + * log, where it's easily missed. To ensure that developers notice bad + * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put + * an Assert(false) there. + */ + if (shared) + { + if (nailed_rels != NUM_CRITICAL_SHARED_RELS || + nailed_indexes != NUM_CRITICAL_SHARED_INDEXES) + { + elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively", + nailed_rels, nailed_indexes, + NUM_CRITICAL_SHARED_RELS, NUM_CRITICAL_SHARED_INDEXES); + /* Make sure we get developers' attention about this */ + Assert(false); + /* In production builds, recover by bootstrapping the relcache */ + goto read_failed; + } + } + else + { + if (nailed_rels != NUM_CRITICAL_LOCAL_RELS || + nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES) + { + elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively", + nailed_rels, nailed_indexes, + NUM_CRITICAL_LOCAL_RELS, NUM_CRITICAL_LOCAL_INDEXES); + /* We don't need an Assert() in this case */ + goto read_failed; + } + } + + /* + * OK, all appears well. + * + * Now insert all the new relcache entries into the cache. + */ + for (relno = 0; relno < num_rels; relno++) + { + RelationCacheInsert(rels[relno], false); + } + + pfree(rels); + FreeFile(fp); + + if (shared) + criticalSharedRelcachesBuilt = true; + else + criticalRelcachesBuilt = true; + return true; + + /* + * init file is broken, so do it the hard way. We don't bother trying to + * free the clutter we just allocated; it's not in the relcache so it + * won't hurt. + */ +read_failed: + pfree(rels); + FreeFile(fp); + + return false; +} + +/* + * Write out a new initialization file with the current contents + * of the relcache (either shared rels or local rels, as indicated). + */ +static void +write_relcache_init_file(bool shared) +{ + FILE *fp; + char tempfilename[MAXPGPATH]; + char finalfilename[MAXPGPATH]; + int magic; + HASH_SEQ_STATUS status; + RelIdCacheEnt *idhentry; + int i; + + /* + * If we have already received any relcache inval events, there's no + * chance of succeeding so we may as well skip the whole thing. + */ + if (relcacheInvalsReceived != 0L) + return; + + /* + * We must write a temporary file and rename it into place. Otherwise, + * another backend starting at about the same time might crash trying to + * read the partially-complete file. + */ + if (shared) + { + snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d", + RELCACHE_INIT_FILENAME, MyProcPid); + snprintf(finalfilename, sizeof(finalfilename), "global/%s", + RELCACHE_INIT_FILENAME); + } + else + { + snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d", + DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid); + snprintf(finalfilename, sizeof(finalfilename), "%s/%s", + DatabasePath, RELCACHE_INIT_FILENAME); + } + + unlink(tempfilename); /* in case it exists w/wrong permissions */ + + fp = AllocateFile(tempfilename, PG_BINARY_W); + if (fp == NULL) + { + /* + * We used to consider this a fatal error, but we might as well + * continue with backend startup ... + */ + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not create relation-cache initialization file \"%s\": %m", + tempfilename), + errdetail("Continuing anyway, but there's something wrong."))); + return; + } + + /* + * Write a magic number to serve as a file version identifier. We can + * change the magic number whenever the relcache layout changes. + */ + magic = RELCACHE_INIT_FILEMAGIC; + if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic)) + elog(FATAL, "could not write init file"); + + /* + * Write all the appropriate reldescs (in no particular order). + */ + hash_seq_init(&status, RelationIdCache); + + while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL) + { + Relation rel = idhentry->reldesc; + Form_pg_class relform = rel->rd_rel; + + /* ignore if not correct group */ + if (relform->relisshared != shared) + continue; + + /* + * Ignore if not supposed to be in init file. We can allow any shared + * relation that's been loaded so far to be in the shared init file, + * but unshared relations must be ones that should be in the local + * file per RelationIdIsInInitFile. (Note: if you want to change the + * criterion for rels to be kept in the init file, see also inval.c. + * The reason for filtering here is to be sure that we don't put + * anything into the local init file for which a relcache inval would + * not cause invalidation of that init file.) + */ + if (!shared && !RelationIdIsInInitFile(RelationGetRelid(rel))) + { + /* Nailed rels had better get stored. */ + Assert(!rel->rd_isnailed); + continue; + } + + /* first write the relcache entry proper */ + write_item(rel, sizeof(RelationData), fp); + + /* next write the relation tuple form */ + write_item(relform, CLASS_TUPLE_SIZE, fp); + + /* next, do all the attribute tuple form data entries */ + for (i = 0; i < relform->relnatts; i++) + { + write_item(TupleDescAttr(rel->rd_att, i), + ATTRIBUTE_FIXED_PART_SIZE, fp); + } + + /* next, do the access method specific field */ + write_item(rel->rd_options, + (rel->rd_options ? VARSIZE(rel->rd_options) : 0), + fp); + + /* + * If it's an index, there's more to do. Note we explicitly ignore + * partitioned indexes here. + */ + if (rel->rd_rel->relkind == RELKIND_INDEX) + { + /* write the pg_index tuple */ + /* we assume this was created by heap_copytuple! */ + write_item(rel->rd_indextuple, + HEAPTUPLESIZE + rel->rd_indextuple->t_len, + fp); + + /* next, write the vector of opfamily OIDs */ + write_item(rel->rd_opfamily, + relform->relnatts * sizeof(Oid), + fp); + + /* next, write the vector of opcintype OIDs */ + write_item(rel->rd_opcintype, + relform->relnatts * sizeof(Oid), + fp); + + /* next, write the vector of support procedure OIDs */ + write_item(rel->rd_support, + relform->relnatts * (rel->rd_indam->amsupport * sizeof(RegProcedure)), + fp); + + /* next, write the vector of collation OIDs */ + write_item(rel->rd_indcollation, + relform->relnatts * sizeof(Oid), + fp); + + /* finally, write the vector of indoption values */ + write_item(rel->rd_indoption, + relform->relnatts * sizeof(int16), + fp); + + Assert(rel->rd_opcoptions); + + /* finally, write the vector of opcoptions values */ + for (i = 0; i < relform->relnatts; i++) + { + bytea *opt = rel->rd_opcoptions[i]; + + write_item(opt, opt ? VARSIZE(opt) : 0, fp); + } + } + } + + if (FreeFile(fp)) + elog(FATAL, "could not write init file"); + + /* + * Now we have to check whether the data we've so painstakingly + * accumulated is already obsolete due to someone else's just-committed + * catalog changes. If so, we just delete the temp file and leave it to + * the next backend to try again. (Our own relcache entries will be + * updated by SI message processing, but we can't be sure whether what we + * wrote out was up-to-date.) + * + * This mustn't run concurrently with the code that unlinks an init file + * and sends SI messages, so grab a serialization lock for the duration. + */ + LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); + + /* Make sure we have seen all incoming SI messages */ + AcceptInvalidationMessages(); + + /* + * If we have received any SI relcache invals since backend start, assume + * we may have written out-of-date data. + */ + if (relcacheInvalsReceived == 0L) + { + /* + * OK, rename the temp file to its final name, deleting any + * previously-existing init file. + * + * Note: a failure here is possible under Cygwin, if some other + * backend is holding open an unlinked-but-not-yet-gone init file. So + * treat this as a noncritical failure; just remove the useless temp + * file on failure. + */ + if (rename(tempfilename, finalfilename) < 0) + unlink(tempfilename); + } + else + { + /* Delete the already-obsolete temp file */ + unlink(tempfilename); + } + + LWLockRelease(RelCacheInitLock); +} + +/* write a chunk of data preceded by its length */ +static void +write_item(const void *data, Size len, FILE *fp) +{ + if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len)) + elog(FATAL, "could not write init file"); + if (len > 0 && fwrite(data, 1, len, fp) != len) + elog(FATAL, "could not write init file"); +} + +/* + * Determine whether a given relation (identified by OID) is one of the ones + * we should store in a relcache init file. + * + * We must cache all nailed rels, and for efficiency we should cache every rel + * that supports a syscache. The former set is almost but not quite a subset + * of the latter. The special cases are relations where + * RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but + * which do not support any syscache. + */ +bool +RelationIdIsInInitFile(Oid relationId) +{ + if (relationId == SharedSecLabelRelationId || + relationId == TriggerRelidNameIndexId || + relationId == DatabaseNameIndexId || + relationId == SharedSecLabelObjectIndexId) + { + /* + * If this Assert fails, we don't need the applicable special case + * anymore. + */ + Assert(!RelationSupportsSysCache(relationId)); + return true; + } + return RelationSupportsSysCache(relationId); +} + +/* + * Invalidate (remove) the init file during commit of a transaction that + * changed one or more of the relation cache entries that are kept in the + * local init file. + * + * To be safe against concurrent inspection or rewriting of the init file, + * we must take RelCacheInitLock, then remove the old init file, then send + * the SI messages that include relcache inval for such relations, and then + * release RelCacheInitLock. This serializes the whole affair against + * write_relcache_init_file, so that we can be sure that any other process + * that's concurrently trying to create a new init file won't move an + * already-stale version into place after we unlink. Also, because we unlink + * before sending the SI messages, a backend that's currently starting cannot + * read the now-obsolete init file and then miss the SI messages that will + * force it to update its relcache entries. (This works because the backend + * startup sequence gets into the sinval array before trying to load the init + * file.) + * + * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate, + * then release the lock in RelationCacheInitFilePostInvalidate. Caller must + * send any pending SI messages between those calls. + */ +void +RelationCacheInitFilePreInvalidate(void) +{ + char localinitfname[MAXPGPATH]; + char sharedinitfname[MAXPGPATH]; + + if (DatabasePath) + snprintf(localinitfname, sizeof(localinitfname), "%s/%s", + DatabasePath, RELCACHE_INIT_FILENAME); + snprintf(sharedinitfname, sizeof(sharedinitfname), "global/%s", + RELCACHE_INIT_FILENAME); + + LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); + + /* + * The files might not be there if no backend has been started since the + * last removal. But complain about failures other than ENOENT with + * ERROR. Fortunately, it's not too late to abort the transaction if we + * can't get rid of the would-be-obsolete init file. + */ + if (DatabasePath) + unlink_initfile(localinitfname, ERROR); + unlink_initfile(sharedinitfname, ERROR); +} + +void +RelationCacheInitFilePostInvalidate(void) +{ + LWLockRelease(RelCacheInitLock); +} + +/* + * Remove the init files during postmaster startup. + * + * We used to keep the init files across restarts, but that is unsafe in PITR + * scenarios, and even in simple crash-recovery cases there are windows for + * the init files to become out-of-sync with the database. So now we just + * remove them during startup and expect the first backend launch to rebuild + * them. Of course, this has to happen in each database of the cluster. + */ +void +RelationCacheInitFileRemove(void) +{ + const char *tblspcdir = "pg_tblspc"; + DIR *dir; + struct dirent *de; + char path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)]; + + snprintf(path, sizeof(path), "global/%s", + RELCACHE_INIT_FILENAME); + unlink_initfile(path, LOG); + + /* Scan everything in the default tablespace */ + RelationCacheInitFileRemoveInDir("base"); + + /* Scan the tablespace link directory to find non-default tablespaces */ + dir = AllocateDir(tblspcdir); + + while ((de = ReadDirExtended(dir, tblspcdir, LOG)) != NULL) + { + if (strspn(de->d_name, "0123456789") == strlen(de->d_name)) + { + /* Scan the tablespace dir for per-database dirs */ + snprintf(path, sizeof(path), "%s/%s/%s", + tblspcdir, de->d_name, TABLESPACE_VERSION_DIRECTORY); + RelationCacheInitFileRemoveInDir(path); + } + } + + FreeDir(dir); +} + +/* Process one per-tablespace directory for RelationCacheInitFileRemove */ +static void +RelationCacheInitFileRemoveInDir(const char *tblspcpath) +{ + DIR *dir; + struct dirent *de; + char initfilename[MAXPGPATH * 2]; + + /* Scan the tablespace directory to find per-database directories */ + dir = AllocateDir(tblspcpath); + + while ((de = ReadDirExtended(dir, tblspcpath, LOG)) != NULL) + { + if (strspn(de->d_name, "0123456789") == strlen(de->d_name)) + { + /* Try to remove the init file in each database */ + snprintf(initfilename, sizeof(initfilename), "%s/%s/%s", + tblspcpath, de->d_name, RELCACHE_INIT_FILENAME); + unlink_initfile(initfilename, LOG); + } + } + + FreeDir(dir); +} + +static void +unlink_initfile(const char *initfilename, int elevel) +{ + if (unlink(initfilename) < 0) + { + /* It might not be there, but log any error other than ENOENT */ + if (errno != ENOENT) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not remove cache file \"%s\": %m", + initfilename))); + } +} diff --git a/src/backend/utils/cache/relfilenodemap.c b/src/backend/utils/cache/relfilenodemap.c new file mode 100644 index 0000000..56d7c73 --- /dev/null +++ b/src/backend/utils/cache/relfilenodemap.c @@ -0,0 +1,244 @@ +/*------------------------------------------------------------------------- + * + * relfilenodemap.c + * relfilenode to oid mapping cache. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/relfilenodemap.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/pg_class.h" +#include "catalog/pg_tablespace.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/hsearch.h" +#include "utils/inval.h" +#include "utils/rel.h" +#include "utils/relfilenodemap.h" +#include "utils/relmapper.h" + +/* Hash table for information about each relfilenode <-> oid pair */ +static HTAB *RelfilenodeMapHash = NULL; + +/* built first time through in InitializeRelfilenodeMap */ +static ScanKeyData relfilenode_skey[2]; + +typedef struct +{ + Oid reltablespace; + Oid relfilenode; +} RelfilenodeMapKey; + +typedef struct +{ + RelfilenodeMapKey key; /* lookup key - must be first */ + Oid relid; /* pg_class.oid */ +} RelfilenodeMapEntry; + +/* + * RelfilenodeMapInvalidateCallback + * Flush mapping entries when pg_class is updated in a relevant fashion. + */ +static void +RelfilenodeMapInvalidateCallback(Datum arg, Oid relid) +{ + HASH_SEQ_STATUS status; + RelfilenodeMapEntry *entry; + + /* callback only gets registered after creating the hash */ + Assert(RelfilenodeMapHash != NULL); + + hash_seq_init(&status, RelfilenodeMapHash); + while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL) + { + /* + * If relid is InvalidOid, signaling a complete reset, we must remove + * all entries, otherwise just remove the specific relation's entry. + * Always remove negative cache entries. + */ + if (relid == InvalidOid || /* complete reset */ + entry->relid == InvalidOid || /* negative cache entry */ + entry->relid == relid) /* individual flushed relation */ + { + if (hash_search(RelfilenodeMapHash, + (void *) &entry->key, + HASH_REMOVE, + NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } + } +} + +/* + * InitializeRelfilenodeMap + * Initialize cache, either on first use or after a reset. + */ +static void +InitializeRelfilenodeMap(void) +{ + HASHCTL ctl; + int i; + + /* Make sure we've initialized CacheMemoryContext. */ + if (CacheMemoryContext == NULL) + CreateCacheMemoryContext(); + + /* build skey */ + MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey)); + + for (i = 0; i < 2; i++) + { + fmgr_info_cxt(F_OIDEQ, + &relfilenode_skey[i].sk_func, + CacheMemoryContext); + relfilenode_skey[i].sk_strategy = BTEqualStrategyNumber; + relfilenode_skey[i].sk_subtype = InvalidOid; + relfilenode_skey[i].sk_collation = InvalidOid; + } + + relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace; + relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode; + + /* + * Only create the RelfilenodeMapHash now, so we don't end up partially + * initialized when fmgr_info_cxt() above ERRORs out with an out of memory + * error. + */ + ctl.keysize = sizeof(RelfilenodeMapKey); + ctl.entrysize = sizeof(RelfilenodeMapEntry); + ctl.hcxt = CacheMemoryContext; + + RelfilenodeMapHash = + hash_create("RelfilenodeMap cache", 64, &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* Watch for invalidation events. */ + CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback, + (Datum) 0); +} + +/* + * Map a relation's (tablespace, filenode) to a relation's oid and cache the + * result. + * + * Returns InvalidOid if no relation matching the criteria could be found. + */ +Oid +RelidByRelfilenode(Oid reltablespace, Oid relfilenode) +{ + RelfilenodeMapKey key; + RelfilenodeMapEntry *entry; + bool found; + SysScanDesc scandesc; + Relation relation; + HeapTuple ntp; + ScanKeyData skey[2]; + Oid relid; + + if (RelfilenodeMapHash == NULL) + InitializeRelfilenodeMap(); + + /* pg_class will show 0 when the value is actually MyDatabaseTableSpace */ + if (reltablespace == MyDatabaseTableSpace) + reltablespace = 0; + + MemSet(&key, 0, sizeof(key)); + key.reltablespace = reltablespace; + key.relfilenode = relfilenode; + + /* + * Check cache and return entry if one is found. Even if no target + * relation can be found later on we store the negative match and return a + * InvalidOid from cache. That's not really necessary for performance + * since querying invalid values isn't supposed to be a frequent thing, + * but it's basically free. + */ + entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_FIND, &found); + + if (found) + return entry->relid; + + /* ok, no previous cache entry, do it the hard way */ + + /* initialize empty/negative cache entry before doing the actual lookups */ + relid = InvalidOid; + + if (reltablespace == GLOBALTABLESPACE_OID) + { + /* + * Ok, shared table, check relmapper. + */ + relid = RelationMapFilenodeToOid(relfilenode, true); + } + else + { + /* + * Not a shared table, could either be a plain relation or a + * non-shared, nailed one, like e.g. pg_class. + */ + + /* check for plain relations by looking in pg_class */ + relation = table_open(RelationRelationId, AccessShareLock); + + /* copy scankey to local copy, it will be modified during the scan */ + memcpy(skey, relfilenode_skey, sizeof(skey)); + + /* set scan arguments */ + skey[0].sk_argument = ObjectIdGetDatum(reltablespace); + skey[1].sk_argument = ObjectIdGetDatum(relfilenode); + + scandesc = systable_beginscan(relation, + ClassTblspcRelfilenodeIndexId, + true, + NULL, + 2, + skey); + + found = false; + + while (HeapTupleIsValid(ntp = systable_getnext(scandesc))) + { + Form_pg_class classform = (Form_pg_class) GETSTRUCT(ntp); + + if (found) + elog(ERROR, + "unexpected duplicate for tablespace %u, relfilenode %u", + reltablespace, relfilenode); + found = true; + + Assert(classform->reltablespace == reltablespace); + Assert(classform->relfilenode == relfilenode); + relid = classform->oid; + } + + systable_endscan(scandesc); + table_close(relation, AccessShareLock); + + /* check for tables that are mapped but not shared */ + if (!found) + relid = RelationMapFilenodeToOid(relfilenode, false); + } + + /* + * Only enter entry into cache now, our opening of pg_class could have + * caused cache invalidations to be executed which would have deleted a + * new entry if we had entered it above. + */ + entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found); + if (found) + elog(ERROR, "corrupted hashtable"); + entry->relid = relid; + + return relid; +} diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c new file mode 100644 index 0000000..a6e38ad --- /dev/null +++ b/src/backend/utils/cache/relmapper.c @@ -0,0 +1,1045 @@ +/*------------------------------------------------------------------------- + * + * relmapper.c + * Catalog-to-filenode mapping + * + * For most tables, the physical file underlying the table is specified by + * pg_class.relfilenode. However, that obviously won't work for pg_class + * itself, nor for the other "nailed" catalogs for which we have to be able + * to set up working Relation entries without access to pg_class. It also + * does not work for shared catalogs, since there is no practical way to + * update other databases' pg_class entries when relocating a shared catalog. + * Therefore, for these special catalogs (henceforth referred to as "mapped + * catalogs") we rely on a separately maintained file that shows the mapping + * from catalog OIDs to filenode numbers. Each database has a map file for + * its local mapped catalogs, and there is a separate map file for shared + * catalogs. Mapped catalogs have zero in their pg_class.relfilenode entries. + * + * Relocation of a normal table is committed (ie, the new physical file becomes + * authoritative) when the pg_class row update commits. For mapped catalogs, + * the act of updating the map file is effectively commit of the relocation. + * We postpone the file update till just before commit of the transaction + * doing the rewrite, but there is necessarily a window between. Therefore + * mapped catalogs can only be relocated by operations such as VACUUM FULL + * and CLUSTER, which make no transactionally-significant changes: it must be + * safe for the new file to replace the old, even if the transaction itself + * aborts. An important factor here is that the indexes and toast table of + * a mapped catalog must also be mapped, so that the rewrites/relocations of + * all these files commit in a single map file update rather than being tied + * to transaction commit. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/cache/relmapper.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "access/xact.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "catalog/catalog.h" +#include "catalog/pg_tablespace.h" +#include "catalog/storage.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/fd.h" +#include "storage/lwlock.h" +#include "utils/inval.h" +#include "utils/relmapper.h" + + +/* + * The map file is critical data: we have no automatic method for recovering + * from loss or corruption of it. We use a CRC so that we can detect + * corruption. To minimize the risk of failed updates, the map file should + * be kept to no more than one standard-size disk sector (ie 512 bytes), + * and we use overwrite-in-place rather than playing renaming games. + * The struct layout below is designed to occupy exactly 512 bytes, which + * might make filesystem updates a bit more efficient. + * + * Entries in the mappings[] array are in no particular order. We could + * speed searching by insisting on OID order, but it really shouldn't be + * worth the trouble given the intended size of the mapping sets. + */ +#define RELMAPPER_FILENAME "pg_filenode.map" + +#define RELMAPPER_FILEMAGIC 0x592717 /* version ID value */ + +#define MAX_MAPPINGS 62 /* 62 * 8 + 16 = 512 */ + +typedef struct RelMapping +{ + Oid mapoid; /* OID of a catalog */ + Oid mapfilenode; /* its filenode number */ +} RelMapping; + +typedef struct RelMapFile +{ + int32 magic; /* always RELMAPPER_FILEMAGIC */ + int32 num_mappings; /* number of valid RelMapping entries */ + RelMapping mappings[MAX_MAPPINGS]; + pg_crc32c crc; /* CRC of all above */ + int32 pad; /* to make the struct size be 512 exactly */ +} RelMapFile; + +/* + * State for serializing local and shared relmappings for parallel workers + * (active states only). See notes on active_* and pending_* updates state. + */ +typedef struct SerializedActiveRelMaps +{ + RelMapFile active_shared_updates; + RelMapFile active_local_updates; +} SerializedActiveRelMaps; + +/* + * The currently known contents of the shared map file and our database's + * local map file are stored here. These can be reloaded from disk + * immediately whenever we receive an update sinval message. + */ +static RelMapFile shared_map; +static RelMapFile local_map; + +/* + * We use the same RelMapFile data structure to track uncommitted local + * changes in the mappings (but note the magic and crc fields are not made + * valid in these variables). Currently, map updates are not allowed within + * subtransactions, so one set of transaction-level changes is sufficient. + * + * The active_xxx variables contain updates that are valid in our transaction + * and should be honored by RelationMapOidToFilenode. The pending_xxx + * variables contain updates we have been told about that aren't active yet; + * they will become active at the next CommandCounterIncrement. This setup + * lets map updates act similarly to updates of pg_class rows, ie, they + * become visible only at the next CommandCounterIncrement boundary. + * + * Active shared and active local updates are serialized by the parallel + * infrastructure, and deserialized within parallel workers. + */ +static RelMapFile active_shared_updates; +static RelMapFile active_local_updates; +static RelMapFile pending_shared_updates; +static RelMapFile pending_local_updates; + + +/* non-export function prototypes */ +static void apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode, + bool add_okay); +static void merge_map_updates(RelMapFile *map, const RelMapFile *updates, + bool add_okay); +static void load_relmap_file(bool shared, bool lock_held); +static void write_relmap_file(bool shared, RelMapFile *newmap, + bool write_wal, bool send_sinval, bool preserve_files, + Oid dbid, Oid tsid, const char *dbpath); +static void perform_relmap_update(bool shared, const RelMapFile *updates); + + +/* + * RelationMapOidToFilenode + * + * The raison d' etre ... given a relation OID, look up its filenode. + * + * Although shared and local relation OIDs should never overlap, the caller + * always knows which we need --- so pass that information to avoid useless + * searching. + * + * Returns InvalidOid if the OID is not known (which should never happen, + * but the caller is in a better position to report a meaningful error). + */ +Oid +RelationMapOidToFilenode(Oid relationId, bool shared) +{ + const RelMapFile *map; + int32 i; + + /* If there are active updates, believe those over the main maps */ + if (shared) + { + map = &active_shared_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (relationId == map->mappings[i].mapoid) + return map->mappings[i].mapfilenode; + } + map = &shared_map; + for (i = 0; i < map->num_mappings; i++) + { + if (relationId == map->mappings[i].mapoid) + return map->mappings[i].mapfilenode; + } + } + else + { + map = &active_local_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (relationId == map->mappings[i].mapoid) + return map->mappings[i].mapfilenode; + } + map = &local_map; + for (i = 0; i < map->num_mappings; i++) + { + if (relationId == map->mappings[i].mapoid) + return map->mappings[i].mapfilenode; + } + } + + return InvalidOid; +} + +/* + * RelationMapFilenodeToOid + * + * Do the reverse of the normal direction of mapping done in + * RelationMapOidToFilenode. + * + * This is not supposed to be used during normal running but rather for + * information purposes when looking at the filesystem or xlog. + * + * Returns InvalidOid if the OID is not known; this can easily happen if the + * relfilenode doesn't pertain to a mapped relation. + */ +Oid +RelationMapFilenodeToOid(Oid filenode, bool shared) +{ + const RelMapFile *map; + int32 i; + + /* If there are active updates, believe those over the main maps */ + if (shared) + { + map = &active_shared_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + map = &shared_map; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + } + else + { + map = &active_local_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + map = &local_map; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + } + + return InvalidOid; +} + +/* + * RelationMapUpdateMap + * + * Install a new relfilenode mapping for the specified relation. + * + * If immediate is true (or we're bootstrapping), the mapping is activated + * immediately. Otherwise it is made pending until CommandCounterIncrement. + */ +void +RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, + bool immediate) +{ + RelMapFile *map; + + if (IsBootstrapProcessingMode()) + { + /* + * In bootstrap mode, the mapping gets installed in permanent map. + */ + if (shared) + map = &shared_map; + else + map = &local_map; + } + else + { + /* + * We don't currently support map changes within subtransactions, or + * when in parallel mode. This could be done with more bookkeeping + * infrastructure, but it doesn't presently seem worth it. + */ + if (GetCurrentTransactionNestLevel() > 1) + elog(ERROR, "cannot change relation mapping within subtransaction"); + + if (IsInParallelMode()) + elog(ERROR, "cannot change relation mapping in parallel mode"); + + if (immediate) + { + /* Make it active, but only locally */ + if (shared) + map = &active_shared_updates; + else + map = &active_local_updates; + } + else + { + /* Make it pending */ + if (shared) + map = &pending_shared_updates; + else + map = &pending_local_updates; + } + } + apply_map_update(map, relationId, fileNode, true); +} + +/* + * apply_map_update + * + * Insert a new mapping into the given map variable, replacing any existing + * mapping for the same relation. + * + * In some cases the caller knows there must be an existing mapping; pass + * add_okay = false to draw an error if not. + */ +static void +apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode, bool add_okay) +{ + int32 i; + + /* Replace any existing mapping */ + for (i = 0; i < map->num_mappings; i++) + { + if (relationId == map->mappings[i].mapoid) + { + map->mappings[i].mapfilenode = fileNode; + return; + } + } + + /* Nope, need to add a new mapping */ + if (!add_okay) + elog(ERROR, "attempt to apply a mapping to unmapped relation %u", + relationId); + if (map->num_mappings >= MAX_MAPPINGS) + elog(ERROR, "ran out of space in relation map"); + map->mappings[map->num_mappings].mapoid = relationId; + map->mappings[map->num_mappings].mapfilenode = fileNode; + map->num_mappings++; +} + +/* + * merge_map_updates + * + * Merge all the updates in the given pending-update map into the target map. + * This is just a bulk form of apply_map_update. + */ +static void +merge_map_updates(RelMapFile *map, const RelMapFile *updates, bool add_okay) +{ + int32 i; + + for (i = 0; i < updates->num_mappings; i++) + { + apply_map_update(map, + updates->mappings[i].mapoid, + updates->mappings[i].mapfilenode, + add_okay); + } +} + +/* + * RelationMapRemoveMapping + * + * Remove a relation's entry in the map. This is only allowed for "active" + * (but not committed) local mappings. We need it so we can back out the + * entry for the transient target file when doing VACUUM FULL/CLUSTER on + * a mapped relation. + */ +void +RelationMapRemoveMapping(Oid relationId) +{ + RelMapFile *map = &active_local_updates; + int32 i; + + for (i = 0; i < map->num_mappings; i++) + { + if (relationId == map->mappings[i].mapoid) + { + /* Found it, collapse it out */ + map->mappings[i] = map->mappings[map->num_mappings - 1]; + map->num_mappings--; + return; + } + } + elog(ERROR, "could not find temporary mapping for relation %u", + relationId); +} + +/* + * RelationMapInvalidate + * + * This routine is invoked for SI cache flush messages. We must re-read + * the indicated map file. However, we might receive a SI message in a + * process that hasn't yet, and might never, load the mapping files; + * for example the autovacuum launcher, which *must not* try to read + * a local map since it is attached to no particular database. + * So, re-read only if the map is valid now. + */ +void +RelationMapInvalidate(bool shared) +{ + if (shared) + { + if (shared_map.magic == RELMAPPER_FILEMAGIC) + load_relmap_file(true, false); + } + else + { + if (local_map.magic == RELMAPPER_FILEMAGIC) + load_relmap_file(false, false); + } +} + +/* + * RelationMapInvalidateAll + * + * Reload all map files. This is used to recover from SI message buffer + * overflow: we can't be sure if we missed an inval message. + * Again, reload only currently-valid maps. + */ +void +RelationMapInvalidateAll(void) +{ + if (shared_map.magic == RELMAPPER_FILEMAGIC) + load_relmap_file(true, false); + if (local_map.magic == RELMAPPER_FILEMAGIC) + load_relmap_file(false, false); +} + +/* + * AtCCI_RelationMap + * + * Activate any "pending" relation map updates at CommandCounterIncrement time. + */ +void +AtCCI_RelationMap(void) +{ + if (pending_shared_updates.num_mappings != 0) + { + merge_map_updates(&active_shared_updates, + &pending_shared_updates, + true); + pending_shared_updates.num_mappings = 0; + } + if (pending_local_updates.num_mappings != 0) + { + merge_map_updates(&active_local_updates, + &pending_local_updates, + true); + pending_local_updates.num_mappings = 0; + } +} + +/* + * AtEOXact_RelationMap + * + * Handle relation mapping at main-transaction commit or abort. + * + * During commit, this must be called as late as possible before the actual + * transaction commit, so as to minimize the window where the transaction + * could still roll back after committing map changes. Although nothing + * critically bad happens in such a case, we still would prefer that it + * not happen, since we'd possibly be losing useful updates to the relations' + * pg_class row(s). + * + * During abort, we just have to throw away any pending map changes. + * Normal post-abort cleanup will take care of fixing relcache entries. + * Parallel worker commit/abort is handled by resetting active mappings + * that may have been received from the leader process. (There should be + * no pending updates in parallel workers.) + */ +void +AtEOXact_RelationMap(bool isCommit, bool isParallelWorker) +{ + if (isCommit && !isParallelWorker) + { + /* + * We should not get here with any "pending" updates. (We could + * logically choose to treat such as committed, but in the current + * code this should never happen.) + */ + Assert(pending_shared_updates.num_mappings == 0); + Assert(pending_local_updates.num_mappings == 0); + + /* + * Write any active updates to the actual map files, then reset them. + */ + if (active_shared_updates.num_mappings != 0) + { + perform_relmap_update(true, &active_shared_updates); + active_shared_updates.num_mappings = 0; + } + if (active_local_updates.num_mappings != 0) + { + perform_relmap_update(false, &active_local_updates); + active_local_updates.num_mappings = 0; + } + } + else + { + /* Abort or parallel worker --- drop all local and pending updates */ + Assert(!isParallelWorker || pending_shared_updates.num_mappings == 0); + Assert(!isParallelWorker || pending_local_updates.num_mappings == 0); + + active_shared_updates.num_mappings = 0; + active_local_updates.num_mappings = 0; + pending_shared_updates.num_mappings = 0; + pending_local_updates.num_mappings = 0; + } +} + +/* + * AtPrepare_RelationMap + * + * Handle relation mapping at PREPARE. + * + * Currently, we don't support preparing any transaction that changes the map. + */ +void +AtPrepare_RelationMap(void) +{ + if (active_shared_updates.num_mappings != 0 || + active_local_updates.num_mappings != 0 || + pending_shared_updates.num_mappings != 0 || + pending_local_updates.num_mappings != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot PREPARE a transaction that modified relation mapping"))); +} + +/* + * CheckPointRelationMap + * + * This is called during a checkpoint. It must ensure that any relation map + * updates that were WAL-logged before the start of the checkpoint are + * securely flushed to disk and will not need to be replayed later. This + * seems unlikely to be a performance-critical issue, so we use a simple + * method: we just take and release the RelationMappingLock. This ensures + * that any already-logged map update is complete, because write_relmap_file + * will fsync the map file before the lock is released. + */ +void +CheckPointRelationMap(void) +{ + LWLockAcquire(RelationMappingLock, LW_SHARED); + LWLockRelease(RelationMappingLock); +} + +/* + * RelationMapFinishBootstrap + * + * Write out the initial relation mapping files at the completion of + * bootstrap. All the mapped files should have been made known to us + * via RelationMapUpdateMap calls. + */ +void +RelationMapFinishBootstrap(void) +{ + Assert(IsBootstrapProcessingMode()); + + /* Shouldn't be anything "pending" ... */ + Assert(active_shared_updates.num_mappings == 0); + Assert(active_local_updates.num_mappings == 0); + Assert(pending_shared_updates.num_mappings == 0); + Assert(pending_local_updates.num_mappings == 0); + + /* Write the files; no WAL or sinval needed */ + write_relmap_file(true, &shared_map, false, false, false, + InvalidOid, GLOBALTABLESPACE_OID, NULL); + write_relmap_file(false, &local_map, false, false, false, + MyDatabaseId, MyDatabaseTableSpace, DatabasePath); +} + +/* + * RelationMapInitialize + * + * This initializes the mapper module at process startup. We can't access the + * database yet, so just make sure the maps are empty. + */ +void +RelationMapInitialize(void) +{ + /* The static variables should initialize to zeroes, but let's be sure */ + shared_map.magic = 0; /* mark it not loaded */ + local_map.magic = 0; + shared_map.num_mappings = 0; + local_map.num_mappings = 0; + active_shared_updates.num_mappings = 0; + active_local_updates.num_mappings = 0; + pending_shared_updates.num_mappings = 0; + pending_local_updates.num_mappings = 0; +} + +/* + * RelationMapInitializePhase2 + * + * This is called to prepare for access to pg_database during startup. + * We should be able to read the shared map file now. + */ +void +RelationMapInitializePhase2(void) +{ + /* + * In bootstrap mode, the map file isn't there yet, so do nothing. + */ + if (IsBootstrapProcessingMode()) + return; + + /* + * Load the shared map file, die on error. + */ + load_relmap_file(true, false); +} + +/* + * RelationMapInitializePhase3 + * + * This is called as soon as we have determined MyDatabaseId and set up + * DatabasePath. At this point we should be able to read the local map file. + */ +void +RelationMapInitializePhase3(void) +{ + /* + * In bootstrap mode, the map file isn't there yet, so do nothing. + */ + if (IsBootstrapProcessingMode()) + return; + + /* + * Load the local map file, die on error. + */ + load_relmap_file(false, false); +} + +/* + * EstimateRelationMapSpace + * + * Estimate space needed to pass active shared and local relmaps to parallel + * workers. + */ +Size +EstimateRelationMapSpace(void) +{ + return sizeof(SerializedActiveRelMaps); +} + +/* + * SerializeRelationMap + * + * Serialize active shared and local relmap state for parallel workers. + */ +void +SerializeRelationMap(Size maxSize, char *startAddress) +{ + SerializedActiveRelMaps *relmaps; + + Assert(maxSize >= EstimateRelationMapSpace()); + + relmaps = (SerializedActiveRelMaps *) startAddress; + relmaps->active_shared_updates = active_shared_updates; + relmaps->active_local_updates = active_local_updates; +} + +/* + * RestoreRelationMap + * + * Restore active shared and local relmap state within a parallel worker. + */ +void +RestoreRelationMap(char *startAddress) +{ + SerializedActiveRelMaps *relmaps; + + if (active_shared_updates.num_mappings != 0 || + active_local_updates.num_mappings != 0 || + pending_shared_updates.num_mappings != 0 || + pending_local_updates.num_mappings != 0) + elog(ERROR, "parallel worker has existing mappings"); + + relmaps = (SerializedActiveRelMaps *) startAddress; + active_shared_updates = relmaps->active_shared_updates; + active_local_updates = relmaps->active_local_updates; +} + +/* + * load_relmap_file -- load data from the shared or local map file + * + * Because the map file is essential for access to core system catalogs, + * failure to read it is a fatal error. + * + * Note that the local case requires DatabasePath to be set up. + */ +static void +load_relmap_file(bool shared, bool lock_held) +{ + RelMapFile *map; + char mapfilename[MAXPGPATH]; + pg_crc32c crc; + int fd; + int r; + + if (shared) + { + snprintf(mapfilename, sizeof(mapfilename), "global/%s", + RELMAPPER_FILENAME); + map = &shared_map; + } + else + { + snprintf(mapfilename, sizeof(mapfilename), "%s/%s", + DatabasePath, RELMAPPER_FILENAME); + map = &local_map; + } + + /* Read data ... */ + fd = OpenTransientFile(mapfilename, O_RDONLY | PG_BINARY); + if (fd < 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + mapfilename))); + + /* + * Grab the lock to prevent the file from being updated while we read it, + * unless the caller is already holding the lock. If the file is updated + * shortly after we look, the sinval signaling mechanism will make us + * re-read it before we are able to access any relation that's affected by + * the change. + */ + if (!lock_held) + LWLockAcquire(RelationMappingLock, LW_SHARED); + + pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ); + r = read(fd, map, sizeof(RelMapFile)); + if (r != sizeof(RelMapFile)) + { + if (r < 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", mapfilename))); + else + ereport(FATAL, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not read file \"%s\": read %d of %zu", + mapfilename, r, sizeof(RelMapFile)))); + } + pgstat_report_wait_end(); + + if (!lock_held) + LWLockRelease(RelationMappingLock); + + if (CloseTransientFile(fd) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + mapfilename))); + + /* check for correct magic number, etc */ + if (map->magic != RELMAPPER_FILEMAGIC || + map->num_mappings < 0 || + map->num_mappings > MAX_MAPPINGS) + ereport(FATAL, + (errmsg("relation mapping file \"%s\" contains invalid data", + mapfilename))); + + /* verify the CRC */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, (char *) map, offsetof(RelMapFile, crc)); + FIN_CRC32C(crc); + + if (!EQ_CRC32C(crc, map->crc)) + ereport(FATAL, + (errmsg("relation mapping file \"%s\" contains incorrect checksum", + mapfilename))); +} + +/* + * Write out a new shared or local map file with the given contents. + * + * The magic number and CRC are automatically updated in *newmap. On + * success, we copy the data to the appropriate permanent static variable. + * + * If write_wal is true then an appropriate WAL message is emitted. + * (It will be false for bootstrap and WAL replay cases.) + * + * If send_sinval is true then a SI invalidation message is sent. + * (This should be true except in bootstrap case.) + * + * If preserve_files is true then the storage manager is warned not to + * delete the files listed in the map. + * + * Because this may be called during WAL replay when MyDatabaseId, + * DatabasePath, etc aren't valid, we require the caller to pass in suitable + * values. The caller is also responsible for being sure no concurrent + * map update could be happening. + */ +static void +write_relmap_file(bool shared, RelMapFile *newmap, + bool write_wal, bool send_sinval, bool preserve_files, + Oid dbid, Oid tsid, const char *dbpath) +{ + int fd; + RelMapFile *realmap; + char mapfilename[MAXPGPATH]; + + /* + * Fill in the overhead fields and update CRC. + */ + newmap->magic = RELMAPPER_FILEMAGIC; + if (newmap->num_mappings < 0 || newmap->num_mappings > MAX_MAPPINGS) + elog(ERROR, "attempt to write bogus relation mapping"); + + INIT_CRC32C(newmap->crc); + COMP_CRC32C(newmap->crc, (char *) newmap, offsetof(RelMapFile, crc)); + FIN_CRC32C(newmap->crc); + + /* + * Open the target file. We prefer to do this before entering the + * critical section, so that an open() failure need not force PANIC. + */ + if (shared) + { + snprintf(mapfilename, sizeof(mapfilename), "global/%s", + RELMAPPER_FILENAME); + realmap = &shared_map; + } + else + { + snprintf(mapfilename, sizeof(mapfilename), "%s/%s", + dbpath, RELMAPPER_FILENAME); + realmap = &local_map; + } + + fd = OpenTransientFile(mapfilename, O_WRONLY | O_CREAT | PG_BINARY); + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + mapfilename))); + + if (write_wal) + { + xl_relmap_update xlrec; + XLogRecPtr lsn; + + /* now errors are fatal ... */ + START_CRIT_SECTION(); + + xlrec.dbid = dbid; + xlrec.tsid = tsid; + xlrec.nbytes = sizeof(RelMapFile); + + XLogBeginInsert(); + XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate); + XLogRegisterData((char *) newmap, sizeof(RelMapFile)); + + lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE); + + /* As always, WAL must hit the disk before the data update does */ + XLogFlush(lsn); + } + + errno = 0; + pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE); + if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile)) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + mapfilename))); + } + pgstat_report_wait_end(); + + /* + * We choose to fsync the data to disk before considering the task done. + * It would be possible to relax this if it turns out to be a performance + * issue, but it would complicate checkpointing --- see notes for + * CheckPointRelationMap. + */ + pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC); + if (pg_fsync(fd) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + mapfilename))); + pgstat_report_wait_end(); + + if (CloseTransientFile(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + mapfilename))); + + /* + * Now that the file is safely on disk, send sinval message to let other + * backends know to re-read it. We must do this inside the critical + * section: if for some reason we fail to send the message, we have to + * force a database-wide PANIC. Otherwise other backends might continue + * execution with stale mapping information, which would be catastrophic + * as soon as others began to use the now-committed data. + */ + if (send_sinval) + CacheInvalidateRelmap(dbid); + + /* + * Make sure that the files listed in the map are not deleted if the outer + * transaction aborts. This had better be within the critical section + * too: it's not likely to fail, but if it did, we'd arrive at transaction + * abort with the files still vulnerable. PANICing will leave things in a + * good state on-disk. + * + * Note: we're cheating a little bit here by assuming that mapped files + * are either in pg_global or the database's default tablespace. + */ + if (preserve_files) + { + int32 i; + + for (i = 0; i < newmap->num_mappings; i++) + { + RelFileNode rnode; + + rnode.spcNode = tsid; + rnode.dbNode = dbid; + rnode.relNode = newmap->mappings[i].mapfilenode; + RelationPreserveStorage(rnode, false); + } + } + + /* + * Success, update permanent copy. During bootstrap, we might be working + * on the permanent copy itself, in which case skip the memcpy() to avoid + * invoking nominally-undefined behavior. + */ + if (realmap != newmap) + memcpy(realmap, newmap, sizeof(RelMapFile)); + else + Assert(!send_sinval); /* must be bootstrapping */ + + /* Critical section done */ + if (write_wal) + END_CRIT_SECTION(); +} + +/* + * Merge the specified updates into the appropriate "real" map, + * and write out the changes. This function must be used for committing + * updates during normal multiuser operation. + */ +static void +perform_relmap_update(bool shared, const RelMapFile *updates) +{ + RelMapFile newmap; + + /* + * Anyone updating a relation's mapping info should take exclusive lock on + * that rel and hold it until commit. This ensures that there will not be + * concurrent updates on the same mapping value; but there could easily be + * concurrent updates on different values in the same file. We cover that + * by acquiring the RelationMappingLock, re-reading the target file to + * ensure it's up to date, applying the updates, and writing the data + * before releasing RelationMappingLock. + * + * There is only one RelationMappingLock. In principle we could try to + * have one per mapping file, but it seems unlikely to be worth the + * trouble. + */ + LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE); + + /* Be certain we see any other updates just made */ + load_relmap_file(shared, true); + + /* Prepare updated data in a local variable */ + if (shared) + memcpy(&newmap, &shared_map, sizeof(RelMapFile)); + else + memcpy(&newmap, &local_map, sizeof(RelMapFile)); + + /* + * Apply the updates to newmap. No new mappings should appear, unless + * somebody is adding indexes to system catalogs. + */ + merge_map_updates(&newmap, updates, allowSystemTableMods); + + /* Write out the updated map and do other necessary tasks */ + write_relmap_file(shared, &newmap, true, true, true, + (shared ? InvalidOid : MyDatabaseId), + (shared ? GLOBALTABLESPACE_OID : MyDatabaseTableSpace), + DatabasePath); + + /* Now we can release the lock */ + LWLockRelease(RelationMappingLock); +} + +/* + * RELMAP resource manager's routines + */ +void +relmap_redo(XLogReaderState *record) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + /* Backup blocks are not used in relmap records */ + Assert(!XLogRecHasAnyBlockRefs(record)); + + if (info == XLOG_RELMAP_UPDATE) + { + xl_relmap_update *xlrec = (xl_relmap_update *) XLogRecGetData(record); + RelMapFile newmap; + char *dbpath; + + if (xlrec->nbytes != sizeof(RelMapFile)) + elog(PANIC, "relmap_redo: wrong size %u in relmap update record", + xlrec->nbytes); + memcpy(&newmap, xlrec->data, sizeof(newmap)); + + /* We need to construct the pathname for this database */ + dbpath = GetDatabasePath(xlrec->dbid, xlrec->tsid); + + /* + * Write out the new map and send sinval, but of course don't write a + * new WAL entry. There's no surrounding transaction to tell to + * preserve files, either. + * + * There shouldn't be anyone else updating relmaps during WAL replay, + * but grab the lock to interlock against load_relmap_file(). + */ + LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE); + write_relmap_file((xlrec->dbid == InvalidOid), &newmap, + false, true, false, + xlrec->dbid, xlrec->tsid, dbpath); + LWLockRelease(RelationMappingLock); + + pfree(dbpath); + } + else + elog(PANIC, "relmap_redo: unknown op code %u", info); +} diff --git a/src/backend/utils/cache/spccache.c b/src/backend/utils/cache/spccache.c new file mode 100644 index 0000000..5870f43 --- /dev/null +++ b/src/backend/utils/cache/spccache.c @@ -0,0 +1,236 @@ +/*------------------------------------------------------------------------- + * + * spccache.c + * Tablespace cache management. + * + * We cache the parsed version of spcoptions for each tablespace to avoid + * needing to reparse on every lookup. Right now, there doesn't appear to + * be a measurable performance gain from doing this, but that might change + * in the future as we add more options. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/spccache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/reloptions.h" +#include "catalog/pg_tablespace.h" +#include "commands/tablespace.h" +#include "miscadmin.h" +#include "optimizer/optimizer.h" +#include "storage/bufmgr.h" +#include "utils/catcache.h" +#include "utils/hsearch.h" +#include "utils/inval.h" +#include "utils/spccache.h" +#include "utils/syscache.h" + + +/* Hash table for information about each tablespace */ +static HTAB *TableSpaceCacheHash = NULL; + +typedef struct +{ + Oid oid; /* lookup key - must be first */ + TableSpaceOpts *opts; /* options, or NULL if none */ +} TableSpaceCacheEntry; + + +/* + * InvalidateTableSpaceCacheCallback + * Flush all cache entries when pg_tablespace is updated. + * + * When pg_tablespace is updated, we must flush the cache entry at least + * for that tablespace. Currently, we just flush them all. This is quick + * and easy and doesn't cost much, since there shouldn't be terribly many + * tablespaces, nor do we expect them to be frequently modified. + */ +static void +InvalidateTableSpaceCacheCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + HASH_SEQ_STATUS status; + TableSpaceCacheEntry *spc; + + hash_seq_init(&status, TableSpaceCacheHash); + while ((spc = (TableSpaceCacheEntry *) hash_seq_search(&status)) != NULL) + { + if (spc->opts) + pfree(spc->opts); + if (hash_search(TableSpaceCacheHash, + (void *) &spc->oid, + HASH_REMOVE, + NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } +} + +/* + * InitializeTableSpaceCache + * Initialize the tablespace cache. + */ +static void +InitializeTableSpaceCache(void) +{ + HASHCTL ctl; + + /* Initialize the hash table. */ + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TableSpaceCacheEntry); + TableSpaceCacheHash = + hash_create("TableSpace cache", 16, &ctl, + HASH_ELEM | HASH_BLOBS); + + /* Make sure we've initialized CacheMemoryContext. */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + /* Watch for invalidation events. */ + CacheRegisterSyscacheCallback(TABLESPACEOID, + InvalidateTableSpaceCacheCallback, + (Datum) 0); +} + +/* + * get_tablespace + * Fetch TableSpaceCacheEntry structure for a specified table OID. + * + * Pointers returned by this function should not be stored, since a cache + * flush will invalidate them. + */ +static TableSpaceCacheEntry * +get_tablespace(Oid spcid) +{ + TableSpaceCacheEntry *spc; + HeapTuple tp; + TableSpaceOpts *opts; + + /* + * Since spcid is always from a pg_class tuple, InvalidOid implies the + * default. + */ + if (spcid == InvalidOid) + spcid = MyDatabaseTableSpace; + + /* Find existing cache entry, if any. */ + if (!TableSpaceCacheHash) + InitializeTableSpaceCache(); + spc = (TableSpaceCacheEntry *) hash_search(TableSpaceCacheHash, + (void *) &spcid, + HASH_FIND, + NULL); + if (spc) + return spc; + + /* + * Not found in TableSpace cache. Check catcache. If we don't find a + * valid HeapTuple, it must mean someone has managed to request tablespace + * details for a non-existent tablespace. We'll just treat that case as + * if no options were specified. + */ + tp = SearchSysCache1(TABLESPACEOID, ObjectIdGetDatum(spcid)); + if (!HeapTupleIsValid(tp)) + opts = NULL; + else + { + Datum datum; + bool isNull; + + datum = SysCacheGetAttr(TABLESPACEOID, + tp, + Anum_pg_tablespace_spcoptions, + &isNull); + if (isNull) + opts = NULL; + else + { + bytea *bytea_opts = tablespace_reloptions(datum, false); + + opts = MemoryContextAlloc(CacheMemoryContext, VARSIZE(bytea_opts)); + memcpy(opts, bytea_opts, VARSIZE(bytea_opts)); + } + ReleaseSysCache(tp); + } + + /* + * Now create the cache entry. It's important to do this only after + * reading the pg_tablespace entry, since doing so could cause a cache + * flush. + */ + spc = (TableSpaceCacheEntry *) hash_search(TableSpaceCacheHash, + (void *) &spcid, + HASH_ENTER, + NULL); + spc->opts = opts; + return spc; +} + +/* + * get_tablespace_page_costs + * Return random and/or sequential page costs for a given tablespace. + * + * This value is not locked by the transaction, so this value may + * be changed while a SELECT that has used these values for planning + * is still executing. + */ +void +get_tablespace_page_costs(Oid spcid, + double *spc_random_page_cost, + double *spc_seq_page_cost) +{ + TableSpaceCacheEntry *spc = get_tablespace(spcid); + + Assert(spc != NULL); + + if (spc_random_page_cost) + { + if (!spc->opts || spc->opts->random_page_cost < 0) + *spc_random_page_cost = random_page_cost; + else + *spc_random_page_cost = spc->opts->random_page_cost; + } + + if (spc_seq_page_cost) + { + if (!spc->opts || spc->opts->seq_page_cost < 0) + *spc_seq_page_cost = seq_page_cost; + else + *spc_seq_page_cost = spc->opts->seq_page_cost; + } +} + +/* + * get_tablespace_io_concurrency + * + * This value is not locked by the transaction, so this value may + * be changed while a SELECT that has used these values for planning + * is still executing. + */ +int +get_tablespace_io_concurrency(Oid spcid) +{ + TableSpaceCacheEntry *spc = get_tablespace(spcid); + + if (!spc->opts || spc->opts->effective_io_concurrency < 0) + return effective_io_concurrency; + else + return spc->opts->effective_io_concurrency; +} + +/* + * get_tablespace_maintenance_io_concurrency + */ +int +get_tablespace_maintenance_io_concurrency(Oid spcid) +{ + TableSpaceCacheEntry *spc = get_tablespace(spcid); + + if (!spc->opts || spc->opts->maintenance_io_concurrency < 0) + return maintenance_io_concurrency; + else + return spc->opts->maintenance_io_concurrency; +} diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c new file mode 100644 index 0000000..e4dc4ee --- /dev/null +++ b/src/backend/utils/cache/syscache.c @@ -0,0 +1,1565 @@ +/*------------------------------------------------------------------------- + * + * syscache.c + * System cache management routines + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/cache/syscache.c + * + * NOTES + * These routines allow the parser/planner/executor to perform + * rapid lookups on the contents of the system catalogs. + * + * see utils/syscache.h for a list of the cache IDs + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "catalog/pg_aggregate.h" +#include "catalog/pg_am.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_auth_members.h" +#include "catalog/pg_authid.h" +#include "catalog/pg_cast.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_conversion.h" +#include "catalog/pg_database.h" +#include "catalog/pg_db_role_setting.h" +#include "catalog/pg_default_acl.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_description.h" +#include "catalog/pg_enum.h" +#include "catalog/pg_event_trigger.h" +#include "catalog/pg_foreign_data_wrapper.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_language.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_partitioned_table.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_rel.h" +#include "catalog/pg_range.h" +#include "catalog/pg_replication_origin.h" +#include "catalog/pg_rewrite.h" +#include "catalog/pg_seclabel.h" +#include "catalog/pg_sequence.h" +#include "catalog/pg_shdepend.h" +#include "catalog/pg_shdescription.h" +#include "catalog/pg_shseclabel.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" +#include "catalog/pg_statistic_ext_data.h" +#include "catalog/pg_subscription.h" +#include "catalog/pg_subscription_rel.h" +#include "catalog/pg_tablespace.h" +#include "catalog/pg_transform.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "catalog/pg_type.h" +#include "catalog/pg_user_mapping.h" +#include "lib/qunique.h" +#include "utils/catcache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +/*--------------------------------------------------------------------------- + + Adding system caches: + + Add your new cache to the list in include/utils/syscache.h. + Keep the list sorted alphabetically. + + Add your entry to the cacheinfo[] array below. All cache lists are + alphabetical, so add it in the proper place. Specify the relation OID, + index OID, number of keys, key attribute numbers, and initial number of + hash buckets. + + The number of hash buckets must be a power of 2. It's reasonable to + set this to the number of entries that might be in the particular cache + in a medium-size database. + + There must be a unique index underlying each syscache (ie, an index + whose key is the same as that of the cache). If there is not one + already, add definitions for it to include/catalog/pg_*.h: you need + to add a DECLARE_UNIQUE_INDEX macro and a #define for the index OID. + (Adding an index requires a catversion.h update, while simply + adding/deleting caches only requires a recompile.) + + Finally, any place your relation gets heap_insert() or + heap_update() calls, use CatalogTupleInsert() or CatalogTupleUpdate() + instead, which also update indexes. The heap_* calls do not do that. + +*--------------------------------------------------------------------------- +*/ + +/* + * struct cachedesc: information defining a single syscache + */ +struct cachedesc +{ + Oid reloid; /* OID of the relation being cached */ + Oid indoid; /* OID of index relation for this cache */ + int nkeys; /* # of keys needed for cache lookup */ + int key[4]; /* attribute numbers of key attrs */ + int nbuckets; /* number of hash buckets for this cache */ +}; + +static const struct cachedesc cacheinfo[] = { + {AggregateRelationId, /* AGGFNOID */ + AggregateFnoidIndexId, + 1, + { + Anum_pg_aggregate_aggfnoid, + 0, + 0, + 0 + }, + 16 + }, + {AccessMethodRelationId, /* AMNAME */ + AmNameIndexId, + 1, + { + Anum_pg_am_amname, + 0, + 0, + 0 + }, + 4 + }, + {AccessMethodRelationId, /* AMOID */ + AmOidIndexId, + 1, + { + Anum_pg_am_oid, + 0, + 0, + 0 + }, + 4 + }, + {AccessMethodOperatorRelationId, /* AMOPOPID */ + AccessMethodOperatorIndexId, + 3, + { + Anum_pg_amop_amopopr, + Anum_pg_amop_amoppurpose, + Anum_pg_amop_amopfamily, + 0 + }, + 64 + }, + {AccessMethodOperatorRelationId, /* AMOPSTRATEGY */ + AccessMethodStrategyIndexId, + 4, + { + Anum_pg_amop_amopfamily, + Anum_pg_amop_amoplefttype, + Anum_pg_amop_amoprighttype, + Anum_pg_amop_amopstrategy + }, + 64 + }, + {AccessMethodProcedureRelationId, /* AMPROCNUM */ + AccessMethodProcedureIndexId, + 4, + { + Anum_pg_amproc_amprocfamily, + Anum_pg_amproc_amproclefttype, + Anum_pg_amproc_amprocrighttype, + Anum_pg_amproc_amprocnum + }, + 16 + }, + {AttributeRelationId, /* ATTNAME */ + AttributeRelidNameIndexId, + 2, + { + Anum_pg_attribute_attrelid, + Anum_pg_attribute_attname, + 0, + 0 + }, + 32 + }, + {AttributeRelationId, /* ATTNUM */ + AttributeRelidNumIndexId, + 2, + { + Anum_pg_attribute_attrelid, + Anum_pg_attribute_attnum, + 0, + 0 + }, + 128 + }, + {AuthMemRelationId, /* AUTHMEMMEMROLE */ + AuthMemMemRoleIndexId, + 2, + { + Anum_pg_auth_members_member, + Anum_pg_auth_members_roleid, + 0, + 0 + }, + 8 + }, + {AuthMemRelationId, /* AUTHMEMROLEMEM */ + AuthMemRoleMemIndexId, + 2, + { + Anum_pg_auth_members_roleid, + Anum_pg_auth_members_member, + 0, + 0 + }, + 8 + }, + {AuthIdRelationId, /* AUTHNAME */ + AuthIdRolnameIndexId, + 1, + { + Anum_pg_authid_rolname, + 0, + 0, + 0 + }, + 8 + }, + {AuthIdRelationId, /* AUTHOID */ + AuthIdOidIndexId, + 1, + { + Anum_pg_authid_oid, + 0, + 0, + 0 + }, + 8 + }, + { + CastRelationId, /* CASTSOURCETARGET */ + CastSourceTargetIndexId, + 2, + { + Anum_pg_cast_castsource, + Anum_pg_cast_casttarget, + 0, + 0 + }, + 256 + }, + {OperatorClassRelationId, /* CLAAMNAMENSP */ + OpclassAmNameNspIndexId, + 3, + { + Anum_pg_opclass_opcmethod, + Anum_pg_opclass_opcname, + Anum_pg_opclass_opcnamespace, + 0 + }, + 8 + }, + {OperatorClassRelationId, /* CLAOID */ + OpclassOidIndexId, + 1, + { + Anum_pg_opclass_oid, + 0, + 0, + 0 + }, + 8 + }, + {CollationRelationId, /* COLLNAMEENCNSP */ + CollationNameEncNspIndexId, + 3, + { + Anum_pg_collation_collname, + Anum_pg_collation_collencoding, + Anum_pg_collation_collnamespace, + 0 + }, + 8 + }, + {CollationRelationId, /* COLLOID */ + CollationOidIndexId, + 1, + { + Anum_pg_collation_oid, + 0, + 0, + 0 + }, + 8 + }, + {ConversionRelationId, /* CONDEFAULT */ + ConversionDefaultIndexId, + 4, + { + Anum_pg_conversion_connamespace, + Anum_pg_conversion_conforencoding, + Anum_pg_conversion_contoencoding, + Anum_pg_conversion_oid + }, + 8 + }, + {ConversionRelationId, /* CONNAMENSP */ + ConversionNameNspIndexId, + 2, + { + Anum_pg_conversion_conname, + Anum_pg_conversion_connamespace, + 0, + 0 + }, + 8 + }, + {ConstraintRelationId, /* CONSTROID */ + ConstraintOidIndexId, + 1, + { + Anum_pg_constraint_oid, + 0, + 0, + 0 + }, + 16 + }, + {ConversionRelationId, /* CONVOID */ + ConversionOidIndexId, + 1, + { + Anum_pg_conversion_oid, + 0, + 0, + 0 + }, + 8 + }, + {DatabaseRelationId, /* DATABASEOID */ + DatabaseOidIndexId, + 1, + { + Anum_pg_database_oid, + 0, + 0, + 0 + }, + 4 + }, + {DefaultAclRelationId, /* DEFACLROLENSPOBJ */ + DefaultAclRoleNspObjIndexId, + 3, + { + Anum_pg_default_acl_defaclrole, + Anum_pg_default_acl_defaclnamespace, + Anum_pg_default_acl_defaclobjtype, + 0 + }, + 8 + }, + {EnumRelationId, /* ENUMOID */ + EnumOidIndexId, + 1, + { + Anum_pg_enum_oid, + 0, + 0, + 0 + }, + 8 + }, + {EnumRelationId, /* ENUMTYPOIDNAME */ + EnumTypIdLabelIndexId, + 2, + { + Anum_pg_enum_enumtypid, + Anum_pg_enum_enumlabel, + 0, + 0 + }, + 8 + }, + {EventTriggerRelationId, /* EVENTTRIGGERNAME */ + EventTriggerNameIndexId, + 1, + { + Anum_pg_event_trigger_evtname, + 0, + 0, + 0 + }, + 8 + }, + {EventTriggerRelationId, /* EVENTTRIGGEROID */ + EventTriggerOidIndexId, + 1, + { + Anum_pg_event_trigger_oid, + 0, + 0, + 0 + }, + 8 + }, + {ForeignDataWrapperRelationId, /* FOREIGNDATAWRAPPERNAME */ + ForeignDataWrapperNameIndexId, + 1, + { + Anum_pg_foreign_data_wrapper_fdwname, + 0, + 0, + 0 + }, + 2 + }, + {ForeignDataWrapperRelationId, /* FOREIGNDATAWRAPPEROID */ + ForeignDataWrapperOidIndexId, + 1, + { + Anum_pg_foreign_data_wrapper_oid, + 0, + 0, + 0 + }, + 2 + }, + {ForeignServerRelationId, /* FOREIGNSERVERNAME */ + ForeignServerNameIndexId, + 1, + { + Anum_pg_foreign_server_srvname, + 0, + 0, + 0 + }, + 2 + }, + {ForeignServerRelationId, /* FOREIGNSERVEROID */ + ForeignServerOidIndexId, + 1, + { + Anum_pg_foreign_server_oid, + 0, + 0, + 0 + }, + 2 + }, + {ForeignTableRelationId, /* FOREIGNTABLEREL */ + ForeignTableRelidIndexId, + 1, + { + Anum_pg_foreign_table_ftrelid, + 0, + 0, + 0 + }, + 4 + }, + {IndexRelationId, /* INDEXRELID */ + IndexRelidIndexId, + 1, + { + Anum_pg_index_indexrelid, + 0, + 0, + 0 + }, + 64 + }, + {LanguageRelationId, /* LANGNAME */ + LanguageNameIndexId, + 1, + { + Anum_pg_language_lanname, + 0, + 0, + 0 + }, + 4 + }, + {LanguageRelationId, /* LANGOID */ + LanguageOidIndexId, + 1, + { + Anum_pg_language_oid, + 0, + 0, + 0 + }, + 4 + }, + {NamespaceRelationId, /* NAMESPACENAME */ + NamespaceNameIndexId, + 1, + { + Anum_pg_namespace_nspname, + 0, + 0, + 0 + }, + 4 + }, + {NamespaceRelationId, /* NAMESPACEOID */ + NamespaceOidIndexId, + 1, + { + Anum_pg_namespace_oid, + 0, + 0, + 0 + }, + 16 + }, + {OperatorRelationId, /* OPERNAMENSP */ + OperatorNameNspIndexId, + 4, + { + Anum_pg_operator_oprname, + Anum_pg_operator_oprleft, + Anum_pg_operator_oprright, + Anum_pg_operator_oprnamespace + }, + 256 + }, + {OperatorRelationId, /* OPEROID */ + OperatorOidIndexId, + 1, + { + Anum_pg_operator_oid, + 0, + 0, + 0 + }, + 32 + }, + {OperatorFamilyRelationId, /* OPFAMILYAMNAMENSP */ + OpfamilyAmNameNspIndexId, + 3, + { + Anum_pg_opfamily_opfmethod, + Anum_pg_opfamily_opfname, + Anum_pg_opfamily_opfnamespace, + 0 + }, + 8 + }, + {OperatorFamilyRelationId, /* OPFAMILYOID */ + OpfamilyOidIndexId, + 1, + { + Anum_pg_opfamily_oid, + 0, + 0, + 0 + }, + 8 + }, + {PartitionedRelationId, /* PARTRELID */ + PartitionedRelidIndexId, + 1, + { + Anum_pg_partitioned_table_partrelid, + 0, + 0, + 0 + }, + 32 + }, + {ProcedureRelationId, /* PROCNAMEARGSNSP */ + ProcedureNameArgsNspIndexId, + 3, + { + Anum_pg_proc_proname, + Anum_pg_proc_proargtypes, + Anum_pg_proc_pronamespace, + 0 + }, + 128 + }, + {ProcedureRelationId, /* PROCOID */ + ProcedureOidIndexId, + 1, + { + Anum_pg_proc_oid, + 0, + 0, + 0 + }, + 128 + }, + {PublicationRelationId, /* PUBLICATIONNAME */ + PublicationNameIndexId, + 1, + { + Anum_pg_publication_pubname, + 0, + 0, + 0 + }, + 8 + }, + {PublicationRelationId, /* PUBLICATIONOID */ + PublicationObjectIndexId, + 1, + { + Anum_pg_publication_oid, + 0, + 0, + 0 + }, + 8 + }, + {PublicationRelRelationId, /* PUBLICATIONREL */ + PublicationRelObjectIndexId, + 1, + { + Anum_pg_publication_rel_oid, + 0, + 0, + 0 + }, + 64 + }, + {PublicationRelRelationId, /* PUBLICATIONRELMAP */ + PublicationRelPrrelidPrpubidIndexId, + 2, + { + Anum_pg_publication_rel_prrelid, + Anum_pg_publication_rel_prpubid, + 0, + 0 + }, + 64 + }, + {RangeRelationId, /* RANGEMULTIRANGE */ + RangeMultirangeTypidIndexId, + 1, + { + Anum_pg_range_rngmultitypid, + 0, + 0, + 0 + }, + 4 + }, + + {RangeRelationId, /* RANGETYPE */ + RangeTypidIndexId, + 1, + { + Anum_pg_range_rngtypid, + 0, + 0, + 0 + }, + 4 + }, + {RelationRelationId, /* RELNAMENSP */ + ClassNameNspIndexId, + 2, + { + Anum_pg_class_relname, + Anum_pg_class_relnamespace, + 0, + 0 + }, + 128 + }, + {RelationRelationId, /* RELOID */ + ClassOidIndexId, + 1, + { + Anum_pg_class_oid, + 0, + 0, + 0 + }, + 128 + }, + {ReplicationOriginRelationId, /* REPLORIGIDENT */ + ReplicationOriginIdentIndex, + 1, + { + Anum_pg_replication_origin_roident, + 0, + 0, + 0 + }, + 16 + }, + {ReplicationOriginRelationId, /* REPLORIGNAME */ + ReplicationOriginNameIndex, + 1, + { + Anum_pg_replication_origin_roname, + 0, + 0, + 0 + }, + 16 + }, + {RewriteRelationId, /* RULERELNAME */ + RewriteRelRulenameIndexId, + 2, + { + Anum_pg_rewrite_ev_class, + Anum_pg_rewrite_rulename, + 0, + 0 + }, + 8 + }, + {SequenceRelationId, /* SEQRELID */ + SequenceRelidIndexId, + 1, + { + Anum_pg_sequence_seqrelid, + 0, + 0, + 0 + }, + 32 + }, + {StatisticExtDataRelationId, /* STATEXTDATASTXOID */ + StatisticExtDataStxoidIndexId, + 1, + { + Anum_pg_statistic_ext_data_stxoid, + 0, + 0, + 0 + }, + 4 + }, + {StatisticExtRelationId, /* STATEXTNAMENSP */ + StatisticExtNameIndexId, + 2, + { + Anum_pg_statistic_ext_stxname, + Anum_pg_statistic_ext_stxnamespace, + 0, + 0 + }, + 4 + }, + {StatisticExtRelationId, /* STATEXTOID */ + StatisticExtOidIndexId, + 1, + { + Anum_pg_statistic_ext_oid, + 0, + 0, + 0 + }, + 4 + }, + {StatisticRelationId, /* STATRELATTINH */ + StatisticRelidAttnumInhIndexId, + 3, + { + Anum_pg_statistic_starelid, + Anum_pg_statistic_staattnum, + Anum_pg_statistic_stainherit, + 0 + }, + 128 + }, + {SubscriptionRelationId, /* SUBSCRIPTIONNAME */ + SubscriptionNameIndexId, + 2, + { + Anum_pg_subscription_subdbid, + Anum_pg_subscription_subname, + 0, + 0 + }, + 4 + }, + {SubscriptionRelationId, /* SUBSCRIPTIONOID */ + SubscriptionObjectIndexId, + 1, + { + Anum_pg_subscription_oid, + 0, + 0, + 0 + }, + 4 + }, + {SubscriptionRelRelationId, /* SUBSCRIPTIONRELMAP */ + SubscriptionRelSrrelidSrsubidIndexId, + 2, + { + Anum_pg_subscription_rel_srrelid, + Anum_pg_subscription_rel_srsubid, + 0, + 0 + }, + 64 + }, + {TableSpaceRelationId, /* TABLESPACEOID */ + TablespaceOidIndexId, + 1, + { + Anum_pg_tablespace_oid, + 0, + 0, + 0, + }, + 4 + }, + {TransformRelationId, /* TRFOID */ + TransformOidIndexId, + 1, + { + Anum_pg_transform_oid, + 0, + 0, + 0, + }, + 16 + }, + {TransformRelationId, /* TRFTYPELANG */ + TransformTypeLangIndexId, + 2, + { + Anum_pg_transform_trftype, + Anum_pg_transform_trflang, + 0, + 0, + }, + 16 + }, + {TSConfigMapRelationId, /* TSCONFIGMAP */ + TSConfigMapIndexId, + 3, + { + Anum_pg_ts_config_map_mapcfg, + Anum_pg_ts_config_map_maptokentype, + Anum_pg_ts_config_map_mapseqno, + 0 + }, + 2 + }, + {TSConfigRelationId, /* TSCONFIGNAMENSP */ + TSConfigNameNspIndexId, + 2, + { + Anum_pg_ts_config_cfgname, + Anum_pg_ts_config_cfgnamespace, + 0, + 0 + }, + 2 + }, + {TSConfigRelationId, /* TSCONFIGOID */ + TSConfigOidIndexId, + 1, + { + Anum_pg_ts_config_oid, + 0, + 0, + 0 + }, + 2 + }, + {TSDictionaryRelationId, /* TSDICTNAMENSP */ + TSDictionaryNameNspIndexId, + 2, + { + Anum_pg_ts_dict_dictname, + Anum_pg_ts_dict_dictnamespace, + 0, + 0 + }, + 2 + }, + {TSDictionaryRelationId, /* TSDICTOID */ + TSDictionaryOidIndexId, + 1, + { + Anum_pg_ts_dict_oid, + 0, + 0, + 0 + }, + 2 + }, + {TSParserRelationId, /* TSPARSERNAMENSP */ + TSParserNameNspIndexId, + 2, + { + Anum_pg_ts_parser_prsname, + Anum_pg_ts_parser_prsnamespace, + 0, + 0 + }, + 2 + }, + {TSParserRelationId, /* TSPARSEROID */ + TSParserOidIndexId, + 1, + { + Anum_pg_ts_parser_oid, + 0, + 0, + 0 + }, + 2 + }, + {TSTemplateRelationId, /* TSTEMPLATENAMENSP */ + TSTemplateNameNspIndexId, + 2, + { + Anum_pg_ts_template_tmplname, + Anum_pg_ts_template_tmplnamespace, + 0, + 0 + }, + 2 + }, + {TSTemplateRelationId, /* TSTEMPLATEOID */ + TSTemplateOidIndexId, + 1, + { + Anum_pg_ts_template_oid, + 0, + 0, + 0 + }, + 2 + }, + {TypeRelationId, /* TYPENAMENSP */ + TypeNameNspIndexId, + 2, + { + Anum_pg_type_typname, + Anum_pg_type_typnamespace, + 0, + 0 + }, + 64 + }, + {TypeRelationId, /* TYPEOID */ + TypeOidIndexId, + 1, + { + Anum_pg_type_oid, + 0, + 0, + 0 + }, + 64 + }, + {UserMappingRelationId, /* USERMAPPINGOID */ + UserMappingOidIndexId, + 1, + { + Anum_pg_user_mapping_oid, + 0, + 0, + 0 + }, + 2 + }, + {UserMappingRelationId, /* USERMAPPINGUSERSERVER */ + UserMappingUserServerIndexId, + 2, + { + Anum_pg_user_mapping_umuser, + Anum_pg_user_mapping_umserver, + 0, + 0 + }, + 2 + } +}; + +static CatCache *SysCache[SysCacheSize]; + +static bool CacheInitialized = false; + +/* Sorted array of OIDs of tables that have caches on them */ +static Oid SysCacheRelationOid[SysCacheSize]; +static int SysCacheRelationOidSize; + +/* Sorted array of OIDs of tables and indexes used by caches */ +static Oid SysCacheSupportingRelOid[SysCacheSize * 2]; +static int SysCacheSupportingRelOidSize; + +static int oid_compare(const void *a, const void *b); + + +/* + * InitCatalogCache - initialize the caches + * + * Note that no database access is done here; we only allocate memory + * and initialize the cache structure. Interrogation of the database + * to complete initialization of a cache happens upon first use + * of that cache. + */ +void +InitCatalogCache(void) +{ + int cacheId; + + StaticAssertStmt(SysCacheSize == (int) lengthof(cacheinfo), + "SysCacheSize does not match syscache.c's array"); + + Assert(!CacheInitialized); + + SysCacheRelationOidSize = SysCacheSupportingRelOidSize = 0; + + for (cacheId = 0; cacheId < SysCacheSize; cacheId++) + { + SysCache[cacheId] = InitCatCache(cacheId, + cacheinfo[cacheId].reloid, + cacheinfo[cacheId].indoid, + cacheinfo[cacheId].nkeys, + cacheinfo[cacheId].key, + cacheinfo[cacheId].nbuckets); + if (!PointerIsValid(SysCache[cacheId])) + elog(ERROR, "could not initialize cache %u (%d)", + cacheinfo[cacheId].reloid, cacheId); + /* Accumulate data for OID lists, too */ + SysCacheRelationOid[SysCacheRelationOidSize++] = + cacheinfo[cacheId].reloid; + SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] = + cacheinfo[cacheId].reloid; + SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] = + cacheinfo[cacheId].indoid; + /* see comments for RelationInvalidatesSnapshotsOnly */ + Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid)); + } + + Assert(SysCacheRelationOidSize <= lengthof(SysCacheRelationOid)); + Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid)); + + /* Sort and de-dup OID arrays, so we can use binary search. */ + pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize, + sizeof(Oid), oid_compare); + SysCacheRelationOidSize = + qunique(SysCacheRelationOid, SysCacheRelationOidSize, sizeof(Oid), + oid_compare); + + pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize, + sizeof(Oid), oid_compare); + SysCacheSupportingRelOidSize = + qunique(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize, + sizeof(Oid), oid_compare); + + CacheInitialized = true; +} + +/* + * InitCatalogCachePhase2 - finish initializing the caches + * + * Finish initializing all the caches, including necessary database + * access. + * + * This is *not* essential; normally we allow syscaches to be initialized + * on first use. However, it is useful as a mechanism to preload the + * relcache with entries for the most-commonly-used system catalogs. + * Therefore, we invoke this routine when we need to write a new relcache + * init file. + */ +void +InitCatalogCachePhase2(void) +{ + int cacheId; + + Assert(CacheInitialized); + + for (cacheId = 0; cacheId < SysCacheSize; cacheId++) + InitCatCachePhase2(SysCache[cacheId], true); +} + + +/* + * SearchSysCache + * + * A layer on top of SearchCatCache that does the initialization and + * key-setting for you. + * + * Returns the cache copy of the tuple if one is found, NULL if not. + * The tuple is the 'cache' copy and must NOT be modified! + * + * When the caller is done using the tuple, call ReleaseSysCache() + * to release the reference count grabbed by SearchSysCache(). If this + * is not done, the tuple will remain locked in cache until end of + * transaction, which is tolerable but not desirable. + * + * CAUTION: The tuple that is returned must NOT be freed by the caller! + */ +HeapTuple +SearchSysCache(int cacheId, + Datum key1, + Datum key2, + Datum key3, + Datum key4) +{ + Assert(cacheId >= 0 && cacheId < SysCacheSize && + PointerIsValid(SysCache[cacheId])); + + return SearchCatCache(SysCache[cacheId], key1, key2, key3, key4); +} + +HeapTuple +SearchSysCache1(int cacheId, + Datum key1) +{ + Assert(cacheId >= 0 && cacheId < SysCacheSize && + PointerIsValid(SysCache[cacheId])); + Assert(SysCache[cacheId]->cc_nkeys == 1); + + return SearchCatCache1(SysCache[cacheId], key1); +} + +HeapTuple +SearchSysCache2(int cacheId, + Datum key1, Datum key2) +{ + Assert(cacheId >= 0 && cacheId < SysCacheSize && + PointerIsValid(SysCache[cacheId])); + Assert(SysCache[cacheId]->cc_nkeys == 2); + + return SearchCatCache2(SysCache[cacheId], key1, key2); +} + +HeapTuple +SearchSysCache3(int cacheId, + Datum key1, Datum key2, Datum key3) +{ + Assert(cacheId >= 0 && cacheId < SysCacheSize && + PointerIsValid(SysCache[cacheId])); + Assert(SysCache[cacheId]->cc_nkeys == 3); + + return SearchCatCache3(SysCache[cacheId], key1, key2, key3); +} + +HeapTuple +SearchSysCache4(int cacheId, + Datum key1, Datum key2, Datum key3, Datum key4) +{ + Assert(cacheId >= 0 && cacheId < SysCacheSize && + PointerIsValid(SysCache[cacheId])); + Assert(SysCache[cacheId]->cc_nkeys == 4); + + return SearchCatCache4(SysCache[cacheId], key1, key2, key3, key4); +} + +/* + * ReleaseSysCache + * Release previously grabbed reference count on a tuple + */ +void +ReleaseSysCache(HeapTuple tuple) +{ + ReleaseCatCache(tuple); +} + +/* + * SearchSysCacheCopy + * + * A convenience routine that does SearchSysCache and (if successful) + * returns a modifiable copy of the syscache entry. The original + * syscache entry is released before returning. The caller should + * heap_freetuple() the result when done with it. + */ +HeapTuple +SearchSysCacheCopy(int cacheId, + Datum key1, + Datum key2, + Datum key3, + Datum key4) +{ + HeapTuple tuple, + newtuple; + + tuple = SearchSysCache(cacheId, key1, key2, key3, key4); + if (!HeapTupleIsValid(tuple)) + return tuple; + newtuple = heap_copytuple(tuple); + ReleaseSysCache(tuple); + return newtuple; +} + +/* + * SearchSysCacheExists + * + * A convenience routine that just probes to see if a tuple can be found. + * No lock is retained on the syscache entry. + */ +bool +SearchSysCacheExists(int cacheId, + Datum key1, + Datum key2, + Datum key3, + Datum key4) +{ + HeapTuple tuple; + + tuple = SearchSysCache(cacheId, key1, key2, key3, key4); + if (!HeapTupleIsValid(tuple)) + return false; + ReleaseSysCache(tuple); + return true; +} + +/* + * GetSysCacheOid + * + * A convenience routine that does SearchSysCache and returns the OID in the + * oidcol column of the found tuple, or InvalidOid if no tuple could be found. + * No lock is retained on the syscache entry. + */ +Oid +GetSysCacheOid(int cacheId, + AttrNumber oidcol, + Datum key1, + Datum key2, + Datum key3, + Datum key4) +{ + HeapTuple tuple; + bool isNull; + Oid result; + + tuple = SearchSysCache(cacheId, key1, key2, key3, key4); + if (!HeapTupleIsValid(tuple)) + return InvalidOid; + result = heap_getattr(tuple, oidcol, + SysCache[cacheId]->cc_tupdesc, + &isNull); + Assert(!isNull); /* columns used as oids should never be NULL */ + ReleaseSysCache(tuple); + return result; +} + + +/* + * SearchSysCacheAttName + * + * This routine is equivalent to SearchSysCache on the ATTNAME cache, + * except that it will return NULL if the found attribute is marked + * attisdropped. This is convenient for callers that want to act as + * though dropped attributes don't exist. + */ +HeapTuple +SearchSysCacheAttName(Oid relid, const char *attname) +{ + HeapTuple tuple; + + tuple = SearchSysCache2(ATTNAME, + ObjectIdGetDatum(relid), + CStringGetDatum(attname)); + if (!HeapTupleIsValid(tuple)) + return NULL; + if (((Form_pg_attribute) GETSTRUCT(tuple))->attisdropped) + { + ReleaseSysCache(tuple); + return NULL; + } + return tuple; +} + +/* + * SearchSysCacheCopyAttName + * + * As above, an attisdropped-aware version of SearchSysCacheCopy. + */ +HeapTuple +SearchSysCacheCopyAttName(Oid relid, const char *attname) +{ + HeapTuple tuple, + newtuple; + + tuple = SearchSysCacheAttName(relid, attname); + if (!HeapTupleIsValid(tuple)) + return tuple; + newtuple = heap_copytuple(tuple); + ReleaseSysCache(tuple); + return newtuple; +} + +/* + * SearchSysCacheExistsAttName + * + * As above, an attisdropped-aware version of SearchSysCacheExists. + */ +bool +SearchSysCacheExistsAttName(Oid relid, const char *attname) +{ + HeapTuple tuple; + + tuple = SearchSysCacheAttName(relid, attname); + if (!HeapTupleIsValid(tuple)) + return false; + ReleaseSysCache(tuple); + return true; +} + + +/* + * SearchSysCacheAttNum + * + * This routine is equivalent to SearchSysCache on the ATTNUM cache, + * except that it will return NULL if the found attribute is marked + * attisdropped. This is convenient for callers that want to act as + * though dropped attributes don't exist. + */ +HeapTuple +SearchSysCacheAttNum(Oid relid, int16 attnum) +{ + HeapTuple tuple; + + tuple = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (!HeapTupleIsValid(tuple)) + return NULL; + if (((Form_pg_attribute) GETSTRUCT(tuple))->attisdropped) + { + ReleaseSysCache(tuple); + return NULL; + } + return tuple; +} + +/* + * SearchSysCacheCopyAttNum + * + * As above, an attisdropped-aware version of SearchSysCacheCopy. + */ +HeapTuple +SearchSysCacheCopyAttNum(Oid relid, int16 attnum) +{ + HeapTuple tuple, + newtuple; + + tuple = SearchSysCacheAttNum(relid, attnum); + if (!HeapTupleIsValid(tuple)) + return NULL; + newtuple = heap_copytuple(tuple); + ReleaseSysCache(tuple); + return newtuple; +} + + +/* + * SysCacheGetAttr + * + * Given a tuple previously fetched by SearchSysCache(), + * extract a specific attribute. + * + * This is equivalent to using heap_getattr() on a tuple fetched + * from a non-cached relation. Usually, this is only used for attributes + * that could be NULL or variable length; the fixed-size attributes in + * a system table are accessed just by mapping the tuple onto the C struct + * declarations from include/catalog/. + * + * As with heap_getattr(), if the attribute is of a pass-by-reference type + * then a pointer into the tuple data area is returned --- the caller must + * not modify or pfree the datum! + * + * Note: it is legal to use SysCacheGetAttr() with a cacheId referencing + * a different cache for the same catalog the tuple was fetched from. + */ +Datum +SysCacheGetAttr(int cacheId, HeapTuple tup, + AttrNumber attributeNumber, + bool *isNull) +{ + /* + * We just need to get the TupleDesc out of the cache entry, and then we + * can apply heap_getattr(). Normally the cache control data is already + * valid (because the caller recently fetched the tuple via this same + * cache), but there are cases where we have to initialize the cache here. + */ + if (cacheId < 0 || cacheId >= SysCacheSize || + !PointerIsValid(SysCache[cacheId])) + elog(ERROR, "invalid cache ID: %d", cacheId); + if (!PointerIsValid(SysCache[cacheId]->cc_tupdesc)) + { + InitCatCachePhase2(SysCache[cacheId], false); + Assert(PointerIsValid(SysCache[cacheId]->cc_tupdesc)); + } + + return heap_getattr(tup, attributeNumber, + SysCache[cacheId]->cc_tupdesc, + isNull); +} + +/* + * GetSysCacheHashValue + * + * Get the hash value that would be used for a tuple in the specified cache + * with the given search keys. + * + * The reason for exposing this as part of the API is that the hash value is + * exposed in cache invalidation operations, so there are places outside the + * catcache code that need to be able to compute the hash values. + */ +uint32 +GetSysCacheHashValue(int cacheId, + Datum key1, + Datum key2, + Datum key3, + Datum key4) +{ + if (cacheId < 0 || cacheId >= SysCacheSize || + !PointerIsValid(SysCache[cacheId])) + elog(ERROR, "invalid cache ID: %d", cacheId); + + return GetCatCacheHashValue(SysCache[cacheId], key1, key2, key3, key4); +} + +/* + * List-search interface + */ +struct catclist * +SearchSysCacheList(int cacheId, int nkeys, + Datum key1, Datum key2, Datum key3) +{ + if (cacheId < 0 || cacheId >= SysCacheSize || + !PointerIsValid(SysCache[cacheId])) + elog(ERROR, "invalid cache ID: %d", cacheId); + + return SearchCatCacheList(SysCache[cacheId], nkeys, + key1, key2, key3); +} + +/* + * SysCacheInvalidate + * + * Invalidate entries in the specified cache, given a hash value. + * See CatCacheInvalidate() for more info. + * + * This routine is only quasi-public: it should only be used by inval.c. + */ +void +SysCacheInvalidate(int cacheId, uint32 hashValue) +{ + if (cacheId < 0 || cacheId >= SysCacheSize) + elog(ERROR, "invalid cache ID: %d", cacheId); + + /* if this cache isn't initialized yet, no need to do anything */ + if (!PointerIsValid(SysCache[cacheId])) + return; + + CatCacheInvalidate(SysCache[cacheId], hashValue); +} + +/* + * Certain relations that do not have system caches send snapshot invalidation + * messages in lieu of catcache messages. This is for the benefit of + * GetCatalogSnapshot(), which can then reuse its existing MVCC snapshot + * for scanning one of those catalogs, rather than taking a new one, if no + * invalidation has been received. + * + * Relations that have syscaches need not (and must not) be listed here. The + * catcache invalidation messages will also flush the snapshot. If you add a + * syscache for one of these relations, remove it from this list. + */ +bool +RelationInvalidatesSnapshotsOnly(Oid relid) +{ + switch (relid) + { + case DbRoleSettingRelationId: + case DependRelationId: + case SharedDependRelationId: + case DescriptionRelationId: + case SharedDescriptionRelationId: + case SecLabelRelationId: + case SharedSecLabelRelationId: + return true; + default: + break; + } + + return false; +} + +/* + * Test whether a relation has a system cache. + */ +bool +RelationHasSysCache(Oid relid) +{ + int low = 0, + high = SysCacheRelationOidSize - 1; + + while (low <= high) + { + int middle = low + (high - low) / 2; + + if (SysCacheRelationOid[middle] == relid) + return true; + if (SysCacheRelationOid[middle] < relid) + low = middle + 1; + else + high = middle - 1; + } + + return false; +} + +/* + * Test whether a relation supports a system cache, ie it is either a + * cached table or the index used for a cache. + */ +bool +RelationSupportsSysCache(Oid relid) +{ + int low = 0, + high = SysCacheSupportingRelOidSize - 1; + + while (low <= high) + { + int middle = low + (high - low) / 2; + + if (SysCacheSupportingRelOid[middle] == relid) + return true; + if (SysCacheSupportingRelOid[middle] < relid) + low = middle + 1; + else + high = middle - 1; + } + + return false; +} + + +/* + * OID comparator for pg_qsort + */ +static int +oid_compare(const void *a, const void *b) +{ + Oid oa = *((const Oid *) a); + Oid ob = *((const Oid *) b); + + if (oa == ob) + return 0; + return (oa > ob) ? 1 : -1; +} diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c new file mode 100644 index 0000000..384107b --- /dev/null +++ b/src/backend/utils/cache/ts_cache.c @@ -0,0 +1,652 @@ +/*------------------------------------------------------------------------- + * + * ts_cache.c + * Tsearch related object caches. + * + * Tsearch performance is very sensitive to performance of parsers, + * dictionaries and mapping, so lookups should be cached as much + * as possible. + * + * Once a backend has created a cache entry for a particular TS object OID, + * the cache entry will exist for the life of the backend; hence it is + * safe to hold onto a pointer to the cache entry while doing things that + * might result in recognizing a cache invalidation. Beware however that + * subsidiary information might be deleted and reallocated somewhere else + * if a cache inval and reval happens! This does not look like it will be + * a big problem as long as parser and dictionary methods do not attempt + * any database access. + * + * + * Copyright (c) 2006-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/cache/ts_cache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/namespace.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "tsearch/ts_cache.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/regproc.h" +#include "utils/syscache.h" + + +/* + * MAXTOKENTYPE/MAXDICTSPERTT are arbitrary limits on the workspace size + * used in lookup_ts_config_cache(). We could avoid hardwiring a limit + * by making the workspace dynamically enlargeable, but it seems unlikely + * to be worth the trouble. + */ +#define MAXTOKENTYPE 256 +#define MAXDICTSPERTT 100 + + +static HTAB *TSParserCacheHash = NULL; +static TSParserCacheEntry *lastUsedParser = NULL; + +static HTAB *TSDictionaryCacheHash = NULL; +static TSDictionaryCacheEntry *lastUsedDictionary = NULL; + +static HTAB *TSConfigCacheHash = NULL; +static TSConfigCacheEntry *lastUsedConfig = NULL; + +/* + * GUC default_text_search_config, and a cache of the current config's OID + */ +char *TSCurrentConfig = NULL; + +static Oid TSCurrentConfigCache = InvalidOid; + + +/* + * We use this syscache callback to detect when a visible change to a TS + * catalog entry has been made, by either our own backend or another one. + * + * In principle we could just flush the specific cache entry that changed, + * but given that TS configuration changes are probably infrequent, it + * doesn't seem worth the trouble to determine that; we just flush all the + * entries of the related hash table. + * + * We can use the same function for all TS caches by passing the hash + * table address as the "arg". + */ +static void +InvalidateTSCacheCallBack(Datum arg, int cacheid, uint32 hashvalue) +{ + HTAB *hash = (HTAB *) DatumGetPointer(arg); + HASH_SEQ_STATUS status; + TSAnyCacheEntry *entry; + + hash_seq_init(&status, hash); + while ((entry = (TSAnyCacheEntry *) hash_seq_search(&status)) != NULL) + entry->isvalid = false; + + /* Also invalidate the current-config cache if it's pg_ts_config */ + if (hash == TSConfigCacheHash) + TSCurrentConfigCache = InvalidOid; +} + +/* + * Fetch parser cache entry + */ +TSParserCacheEntry * +lookup_ts_parser_cache(Oid prsId) +{ + TSParserCacheEntry *entry; + + if (TSParserCacheHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TSParserCacheEntry); + TSParserCacheHash = hash_create("Tsearch parser cache", 4, + &ctl, HASH_ELEM | HASH_BLOBS); + /* Flush cache on pg_ts_parser changes */ + CacheRegisterSyscacheCallback(TSPARSEROID, InvalidateTSCacheCallBack, + PointerGetDatum(TSParserCacheHash)); + + /* Also make sure CacheMemoryContext exists */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + } + + /* Check single-entry cache */ + if (lastUsedParser && lastUsedParser->prsId == prsId && + lastUsedParser->isvalid) + return lastUsedParser; + + /* Try to look up an existing entry */ + entry = (TSParserCacheEntry *) hash_search(TSParserCacheHash, + (void *) &prsId, + HASH_FIND, NULL); + if (entry == NULL || !entry->isvalid) + { + /* + * If we didn't find one, we want to make one. But first look up the + * object to be sure the OID is real. + */ + HeapTuple tp; + Form_pg_ts_parser prs; + + tp = SearchSysCache1(TSPARSEROID, ObjectIdGetDatum(prsId)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for text search parser %u", + prsId); + prs = (Form_pg_ts_parser) GETSTRUCT(tp); + + /* + * Sanity checks + */ + if (!OidIsValid(prs->prsstart)) + elog(ERROR, "text search parser %u has no prsstart method", prsId); + if (!OidIsValid(prs->prstoken)) + elog(ERROR, "text search parser %u has no prstoken method", prsId); + if (!OidIsValid(prs->prsend)) + elog(ERROR, "text search parser %u has no prsend method", prsId); + + if (entry == NULL) + { + bool found; + + /* Now make the cache entry */ + entry = (TSParserCacheEntry *) + hash_search(TSParserCacheHash, + (void *) &prsId, + HASH_ENTER, &found); + Assert(!found); /* it wasn't there a moment ago */ + } + + MemSet(entry, 0, sizeof(TSParserCacheEntry)); + entry->prsId = prsId; + entry->startOid = prs->prsstart; + entry->tokenOid = prs->prstoken; + entry->endOid = prs->prsend; + entry->headlineOid = prs->prsheadline; + entry->lextypeOid = prs->prslextype; + + ReleaseSysCache(tp); + + fmgr_info_cxt(entry->startOid, &entry->prsstart, CacheMemoryContext); + fmgr_info_cxt(entry->tokenOid, &entry->prstoken, CacheMemoryContext); + fmgr_info_cxt(entry->endOid, &entry->prsend, CacheMemoryContext); + if (OidIsValid(entry->headlineOid)) + fmgr_info_cxt(entry->headlineOid, &entry->prsheadline, + CacheMemoryContext); + + entry->isvalid = true; + } + + lastUsedParser = entry; + + return entry; +} + +/* + * Fetch dictionary cache entry + */ +TSDictionaryCacheEntry * +lookup_ts_dictionary_cache(Oid dictId) +{ + TSDictionaryCacheEntry *entry; + + if (TSDictionaryCacheHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TSDictionaryCacheEntry); + TSDictionaryCacheHash = hash_create("Tsearch dictionary cache", 8, + &ctl, HASH_ELEM | HASH_BLOBS); + /* Flush cache on pg_ts_dict and pg_ts_template changes */ + CacheRegisterSyscacheCallback(TSDICTOID, InvalidateTSCacheCallBack, + PointerGetDatum(TSDictionaryCacheHash)); + CacheRegisterSyscacheCallback(TSTEMPLATEOID, InvalidateTSCacheCallBack, + PointerGetDatum(TSDictionaryCacheHash)); + + /* Also make sure CacheMemoryContext exists */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + } + + /* Check single-entry cache */ + if (lastUsedDictionary && lastUsedDictionary->dictId == dictId && + lastUsedDictionary->isvalid) + return lastUsedDictionary; + + /* Try to look up an existing entry */ + entry = (TSDictionaryCacheEntry *) hash_search(TSDictionaryCacheHash, + (void *) &dictId, + HASH_FIND, NULL); + if (entry == NULL || !entry->isvalid) + { + /* + * If we didn't find one, we want to make one. But first look up the + * object to be sure the OID is real. + */ + HeapTuple tpdict, + tptmpl; + Form_pg_ts_dict dict; + Form_pg_ts_template template; + MemoryContext saveCtx; + + tpdict = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictId)); + if (!HeapTupleIsValid(tpdict)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + dict = (Form_pg_ts_dict) GETSTRUCT(tpdict); + + /* + * Sanity checks + */ + if (!OidIsValid(dict->dicttemplate)) + elog(ERROR, "text search dictionary %u has no template", dictId); + + /* + * Retrieve dictionary's template + */ + tptmpl = SearchSysCache1(TSTEMPLATEOID, + ObjectIdGetDatum(dict->dicttemplate)); + if (!HeapTupleIsValid(tptmpl)) + elog(ERROR, "cache lookup failed for text search template %u", + dict->dicttemplate); + template = (Form_pg_ts_template) GETSTRUCT(tptmpl); + + /* + * Sanity checks + */ + if (!OidIsValid(template->tmpllexize)) + elog(ERROR, "text search template %u has no lexize method", + template->tmpllexize); + + if (entry == NULL) + { + bool found; + + /* Now make the cache entry */ + entry = (TSDictionaryCacheEntry *) + hash_search(TSDictionaryCacheHash, + (void *) &dictId, + HASH_ENTER, &found); + Assert(!found); /* it wasn't there a moment ago */ + + /* Create private memory context the first time through */ + saveCtx = AllocSetContextCreate(CacheMemoryContext, + "TS dictionary", + ALLOCSET_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(saveCtx, NameStr(dict->dictname)); + } + else + { + /* Clear the existing entry's private context */ + saveCtx = entry->dictCtx; + /* Don't let context's ident pointer dangle while we reset it */ + MemoryContextSetIdentifier(saveCtx, NULL); + MemoryContextReset(saveCtx); + MemoryContextCopyAndSetIdentifier(saveCtx, NameStr(dict->dictname)); + } + + MemSet(entry, 0, sizeof(TSDictionaryCacheEntry)); + entry->dictId = dictId; + entry->dictCtx = saveCtx; + + entry->lexizeOid = template->tmpllexize; + + if (OidIsValid(template->tmplinit)) + { + List *dictoptions; + Datum opt; + bool isnull; + MemoryContext oldcontext; + + /* + * Init method runs in dictionary's private memory context, and we + * make sure the options are stored there too + */ + oldcontext = MemoryContextSwitchTo(entry->dictCtx); + + opt = SysCacheGetAttr(TSDICTOID, tpdict, + Anum_pg_ts_dict_dictinitoption, + &isnull); + if (isnull) + dictoptions = NIL; + else + dictoptions = deserialize_deflist(opt); + + entry->dictData = + DatumGetPointer(OidFunctionCall1(template->tmplinit, + PointerGetDatum(dictoptions))); + + MemoryContextSwitchTo(oldcontext); + } + + ReleaseSysCache(tptmpl); + ReleaseSysCache(tpdict); + + fmgr_info_cxt(entry->lexizeOid, &entry->lexize, entry->dictCtx); + + entry->isvalid = true; + } + + lastUsedDictionary = entry; + + return entry; +} + +/* + * Initialize config cache and prepare callbacks. This is split out of + * lookup_ts_config_cache because we need to activate the callback before + * caching TSCurrentConfigCache, too. + */ +static void +init_ts_config_cache(void) +{ + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TSConfigCacheEntry); + TSConfigCacheHash = hash_create("Tsearch configuration cache", 16, + &ctl, HASH_ELEM | HASH_BLOBS); + /* Flush cache on pg_ts_config and pg_ts_config_map changes */ + CacheRegisterSyscacheCallback(TSCONFIGOID, InvalidateTSCacheCallBack, + PointerGetDatum(TSConfigCacheHash)); + CacheRegisterSyscacheCallback(TSCONFIGMAP, InvalidateTSCacheCallBack, + PointerGetDatum(TSConfigCacheHash)); + + /* Also make sure CacheMemoryContext exists */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); +} + +/* + * Fetch configuration cache entry + */ +TSConfigCacheEntry * +lookup_ts_config_cache(Oid cfgId) +{ + TSConfigCacheEntry *entry; + + if (TSConfigCacheHash == NULL) + { + /* First time through: initialize the hash table */ + init_ts_config_cache(); + } + + /* Check single-entry cache */ + if (lastUsedConfig && lastUsedConfig->cfgId == cfgId && + lastUsedConfig->isvalid) + return lastUsedConfig; + + /* Try to look up an existing entry */ + entry = (TSConfigCacheEntry *) hash_search(TSConfigCacheHash, + (void *) &cfgId, + HASH_FIND, NULL); + if (entry == NULL || !entry->isvalid) + { + /* + * If we didn't find one, we want to make one. But first look up the + * object to be sure the OID is real. + */ + HeapTuple tp; + Form_pg_ts_config cfg; + Relation maprel; + Relation mapidx; + ScanKeyData mapskey; + SysScanDesc mapscan; + HeapTuple maptup; + ListDictionary maplists[MAXTOKENTYPE + 1]; + Oid mapdicts[MAXDICTSPERTT]; + int maxtokentype; + int ndicts; + int i; + + tp = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + cfg = (Form_pg_ts_config) GETSTRUCT(tp); + + /* + * Sanity checks + */ + if (!OidIsValid(cfg->cfgparser)) + elog(ERROR, "text search configuration %u has no parser", cfgId); + + if (entry == NULL) + { + bool found; + + /* Now make the cache entry */ + entry = (TSConfigCacheEntry *) + hash_search(TSConfigCacheHash, + (void *) &cfgId, + HASH_ENTER, &found); + Assert(!found); /* it wasn't there a moment ago */ + } + else + { + /* Cleanup old contents */ + if (entry->map) + { + for (i = 0; i < entry->lenmap; i++) + if (entry->map[i].dictIds) + pfree(entry->map[i].dictIds); + pfree(entry->map); + } + } + + MemSet(entry, 0, sizeof(TSConfigCacheEntry)); + entry->cfgId = cfgId; + entry->prsId = cfg->cfgparser; + + ReleaseSysCache(tp); + + /* + * Scan pg_ts_config_map to gather dictionary list for each token type + * + * Because the index is on (mapcfg, maptokentype, mapseqno), we will + * see the entries in maptokentype order, and in mapseqno order for + * each token type, even though we didn't explicitly ask for that. + */ + MemSet(maplists, 0, sizeof(maplists)); + maxtokentype = 0; + ndicts = 0; + + ScanKeyInit(&mapskey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + + maprel = table_open(TSConfigMapRelationId, AccessShareLock); + mapidx = index_open(TSConfigMapIndexId, AccessShareLock); + mapscan = systable_beginscan_ordered(maprel, mapidx, + NULL, 1, &mapskey); + + while ((maptup = systable_getnext_ordered(mapscan, ForwardScanDirection)) != NULL) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + int toktype = cfgmap->maptokentype; + + if (toktype <= 0 || toktype > MAXTOKENTYPE) + elog(ERROR, "maptokentype value %d is out of range", toktype); + if (toktype < maxtokentype) + elog(ERROR, "maptokentype entries are out of order"); + if (toktype > maxtokentype) + { + /* starting a new token type, but first save the prior data */ + if (ndicts > 0) + { + maplists[maxtokentype].len = ndicts; + maplists[maxtokentype].dictIds = (Oid *) + MemoryContextAlloc(CacheMemoryContext, + sizeof(Oid) * ndicts); + memcpy(maplists[maxtokentype].dictIds, mapdicts, + sizeof(Oid) * ndicts); + } + maxtokentype = toktype; + mapdicts[0] = cfgmap->mapdict; + ndicts = 1; + } + else + { + /* continuing data for current token type */ + if (ndicts >= MAXDICTSPERTT) + elog(ERROR, "too many pg_ts_config_map entries for one token type"); + mapdicts[ndicts++] = cfgmap->mapdict; + } + } + + systable_endscan_ordered(mapscan); + index_close(mapidx, AccessShareLock); + table_close(maprel, AccessShareLock); + + if (ndicts > 0) + { + /* save the last token type's dictionaries */ + maplists[maxtokentype].len = ndicts; + maplists[maxtokentype].dictIds = (Oid *) + MemoryContextAlloc(CacheMemoryContext, + sizeof(Oid) * ndicts); + memcpy(maplists[maxtokentype].dictIds, mapdicts, + sizeof(Oid) * ndicts); + /* and save the overall map */ + entry->lenmap = maxtokentype + 1; + entry->map = (ListDictionary *) + MemoryContextAlloc(CacheMemoryContext, + sizeof(ListDictionary) * entry->lenmap); + memcpy(entry->map, maplists, + sizeof(ListDictionary) * entry->lenmap); + } + + entry->isvalid = true; + } + + lastUsedConfig = entry; + + return entry; +} + + +/*--------------------------------------------------- + * GUC variable "default_text_search_config" + *--------------------------------------------------- + */ + +Oid +getTSCurrentConfig(bool emitError) +{ + /* if we have a cached value, return it */ + if (OidIsValid(TSCurrentConfigCache)) + return TSCurrentConfigCache; + + /* fail if GUC hasn't been set up yet */ + if (TSCurrentConfig == NULL || *TSCurrentConfig == '\0') + { + if (emitError) + elog(ERROR, "text search configuration isn't set"); + else + return InvalidOid; + } + + if (TSConfigCacheHash == NULL) + { + /* First time through: initialize the tsconfig inval callback */ + init_ts_config_cache(); + } + + /* Look up the config */ + TSCurrentConfigCache = + get_ts_config_oid(stringToQualifiedNameList(TSCurrentConfig), + !emitError); + + return TSCurrentConfigCache; +} + +/* GUC check_hook for default_text_search_config */ +bool +check_TSCurrentConfig(char **newval, void **extra, GucSource source) +{ + /* + * If we aren't inside a transaction, or connected to a database, we + * cannot do the catalog accesses necessary to verify the config name. + * Must accept it on faith. + */ + if (IsTransactionState() && MyDatabaseId != InvalidOid) + { + Oid cfgId; + HeapTuple tuple; + Form_pg_ts_config cfg; + char *buf; + + cfgId = get_ts_config_oid(stringToQualifiedNameList(*newval), true); + + /* + * When source == PGC_S_TEST, don't throw a hard error for a + * nonexistent configuration, only a NOTICE. See comments in guc.h. + */ + if (!OidIsValid(cfgId)) + { + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search configuration \"%s\" does not exist", *newval))); + return true; + } + else + return false; + } + + /* + * Modify the actually stored value to be fully qualified, to ensure + * later changes of search_path don't affect it. + */ + tuple = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + cfg = (Form_pg_ts_config) GETSTRUCT(tuple); + + buf = quote_qualified_identifier(get_namespace_name(cfg->cfgnamespace), + NameStr(cfg->cfgname)); + + ReleaseSysCache(tuple); + + /* GUC wants it malloc'd not palloc'd */ + free(*newval); + *newval = strdup(buf); + pfree(buf); + if (!*newval) + return false; + } + + return true; +} + +/* GUC assign_hook for default_text_search_config */ +void +assign_TSCurrentConfig(const char *newval, void *extra) +{ + /* Just reset the cache to force a lookup on first use */ + TSCurrentConfigCache = InvalidOid; +} diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c new file mode 100644 index 0000000..c1ca0b2 --- /dev/null +++ b/src/backend/utils/cache/typcache.c @@ -0,0 +1,2883 @@ +/*------------------------------------------------------------------------- + * + * typcache.c + * POSTGRES type cache code + * + * The type cache exists to speed lookup of certain information about data + * types that is not directly available from a type's pg_type row. For + * example, we use a type's default btree opclass, or the default hash + * opclass if no btree opclass exists, to determine which operators should + * be used for grouping and sorting the type (GROUP BY, ORDER BY ASC/DESC). + * + * Several seemingly-odd choices have been made to support use of the type + * cache by generic array and record handling routines, such as array_eq(), + * record_cmp(), and hash_array(). Because those routines are used as index + * support operations, they cannot leak memory. To allow them to execute + * efficiently, all information that they would like to re-use across calls + * is kept in the type cache. + * + * Once created, a type cache entry lives as long as the backend does, so + * there is no need for a call to release a cache entry. If the type is + * dropped, the cache entry simply becomes wasted storage. This is not + * expected to happen often, and assuming that typcache entries are good + * permanently allows caching pointers to them in long-lived places. + * + * We have some provisions for updating cache entries if the stored data + * becomes obsolete. Core data extracted from the pg_type row is updated + * when we detect updates to pg_type. Information dependent on opclasses is + * cleared if we detect updates to pg_opclass. We also support clearing the + * tuple descriptor and operator/function parts of a rowtype's cache entry, + * since those may need to change as a consequence of ALTER TABLE. Domain + * constraint changes are also tracked properly. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/typcache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> + +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/parallel.h" +#include "access/relation.h" +#include "access/session.h" +#include "access/table.h" +#include "catalog/pg_am.h" +#include "catalog/pg_constraint.h" +#include "catalog/pg_enum.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_range.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "executor/executor.h" +#include "lib/dshash.h" +#include "optimizer/optimizer.h" +#include "storage/lwlock.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + + +/* The main type cache hashtable searched by lookup_type_cache */ +static HTAB *TypeCacheHash = NULL; + +/* List of type cache entries for domain types */ +static TypeCacheEntry *firstDomainTypeEntry = NULL; + +/* Private flag bits in the TypeCacheEntry.flags field */ +#define TCFLAGS_HAVE_PG_TYPE_DATA 0x000001 +#define TCFLAGS_CHECKED_BTREE_OPCLASS 0x000002 +#define TCFLAGS_CHECKED_HASH_OPCLASS 0x000004 +#define TCFLAGS_CHECKED_EQ_OPR 0x000008 +#define TCFLAGS_CHECKED_LT_OPR 0x000010 +#define TCFLAGS_CHECKED_GT_OPR 0x000020 +#define TCFLAGS_CHECKED_CMP_PROC 0x000040 +#define TCFLAGS_CHECKED_HASH_PROC 0x000080 +#define TCFLAGS_CHECKED_HASH_EXTENDED_PROC 0x000100 +#define TCFLAGS_CHECKED_ELEM_PROPERTIES 0x000200 +#define TCFLAGS_HAVE_ELEM_EQUALITY 0x000400 +#define TCFLAGS_HAVE_ELEM_COMPARE 0x000800 +#define TCFLAGS_HAVE_ELEM_HASHING 0x001000 +#define TCFLAGS_HAVE_ELEM_EXTENDED_HASHING 0x002000 +#define TCFLAGS_CHECKED_FIELD_PROPERTIES 0x004000 +#define TCFLAGS_HAVE_FIELD_EQUALITY 0x008000 +#define TCFLAGS_HAVE_FIELD_COMPARE 0x010000 +#define TCFLAGS_HAVE_FIELD_HASHING 0x020000 +#define TCFLAGS_HAVE_FIELD_EXTENDED_HASHING 0x040000 +#define TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS 0x080000 +#define TCFLAGS_DOMAIN_BASE_IS_COMPOSITE 0x100000 + +/* The flags associated with equality/comparison/hashing are all but these: */ +#define TCFLAGS_OPERATOR_FLAGS \ + (~(TCFLAGS_HAVE_PG_TYPE_DATA | \ + TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS | \ + TCFLAGS_DOMAIN_BASE_IS_COMPOSITE)) + +/* + * Data stored about a domain type's constraints. Note that we do not create + * this struct for the common case of a constraint-less domain; we just set + * domainData to NULL to indicate that. + * + * Within a DomainConstraintCache, we store expression plan trees, but the + * check_exprstate fields of the DomainConstraintState nodes are just NULL. + * When needed, expression evaluation nodes are built by flat-copying the + * DomainConstraintState nodes and applying ExecInitExpr to check_expr. + * Such a node tree is not part of the DomainConstraintCache, but is + * considered to belong to a DomainConstraintRef. + */ +struct DomainConstraintCache +{ + List *constraints; /* list of DomainConstraintState nodes */ + MemoryContext dccContext; /* memory context holding all associated data */ + long dccRefCount; /* number of references to this struct */ +}; + +/* Private information to support comparisons of enum values */ +typedef struct +{ + Oid enum_oid; /* OID of one enum value */ + float4 sort_order; /* its sort position */ +} EnumItem; + +typedef struct TypeCacheEnumData +{ + Oid bitmap_base; /* OID corresponding to bit 0 of bitmapset */ + Bitmapset *sorted_values; /* Set of OIDs known to be in order */ + int num_values; /* total number of values in enum */ + EnumItem enum_values[FLEXIBLE_ARRAY_MEMBER]; +} TypeCacheEnumData; + +/* + * We use a separate table for storing the definitions of non-anonymous + * record types. Once defined, a record type will be remembered for the + * life of the backend. Subsequent uses of the "same" record type (where + * sameness means equalTupleDescs) will refer to the existing table entry. + * + * Stored record types are remembered in a linear array of TupleDescs, + * which can be indexed quickly with the assigned typmod. There is also + * a hash table to speed searches for matching TupleDescs. + */ + +typedef struct RecordCacheEntry +{ + TupleDesc tupdesc; +} RecordCacheEntry; + +/* + * To deal with non-anonymous record types that are exchanged by backends + * involved in a parallel query, we also need a shared version of the above. + */ +struct SharedRecordTypmodRegistry +{ + /* A hash table for finding a matching TupleDesc. */ + dshash_table_handle record_table_handle; + /* A hash table for finding a TupleDesc by typmod. */ + dshash_table_handle typmod_table_handle; + /* A source of new record typmod numbers. */ + pg_atomic_uint32 next_typmod; +}; + +/* + * When using shared tuple descriptors as hash table keys we need a way to be + * able to search for an equal shared TupleDesc using a backend-local + * TupleDesc. So we use this type which can hold either, and hash and compare + * functions that know how to handle both. + */ +typedef struct SharedRecordTableKey +{ + union + { + TupleDesc local_tupdesc; + dsa_pointer shared_tupdesc; + } u; + bool shared; +} SharedRecordTableKey; + +/* + * The shared version of RecordCacheEntry. This lets us look up a typmod + * using a TupleDesc which may be in local or shared memory. + */ +typedef struct SharedRecordTableEntry +{ + SharedRecordTableKey key; +} SharedRecordTableEntry; + +/* + * An entry in SharedRecordTypmodRegistry's typmod table. This lets us look + * up a TupleDesc in shared memory using a typmod. + */ +typedef struct SharedTypmodTableEntry +{ + uint32 typmod; + dsa_pointer shared_tupdesc; +} SharedTypmodTableEntry; + +/* + * A comparator function for SharedRecordTableKey. + */ +static int +shared_record_table_compare(const void *a, const void *b, size_t size, + void *arg) +{ + dsa_area *area = (dsa_area *) arg; + SharedRecordTableKey *k1 = (SharedRecordTableKey *) a; + SharedRecordTableKey *k2 = (SharedRecordTableKey *) b; + TupleDesc t1; + TupleDesc t2; + + if (k1->shared) + t1 = (TupleDesc) dsa_get_address(area, k1->u.shared_tupdesc); + else + t1 = k1->u.local_tupdesc; + + if (k2->shared) + t2 = (TupleDesc) dsa_get_address(area, k2->u.shared_tupdesc); + else + t2 = k2->u.local_tupdesc; + + return equalTupleDescs(t1, t2) ? 0 : 1; +} + +/* + * A hash function for SharedRecordTableKey. + */ +static uint32 +shared_record_table_hash(const void *a, size_t size, void *arg) +{ + dsa_area *area = (dsa_area *) arg; + SharedRecordTableKey *k = (SharedRecordTableKey *) a; + TupleDesc t; + + if (k->shared) + t = (TupleDesc) dsa_get_address(area, k->u.shared_tupdesc); + else + t = k->u.local_tupdesc; + + return hashTupleDesc(t); +} + +/* Parameters for SharedRecordTypmodRegistry's TupleDesc table. */ +static const dshash_parameters srtr_record_table_params = { + sizeof(SharedRecordTableKey), /* unused */ + sizeof(SharedRecordTableEntry), + shared_record_table_compare, + shared_record_table_hash, + LWTRANCHE_PER_SESSION_RECORD_TYPE +}; + +/* Parameters for SharedRecordTypmodRegistry's typmod hash table. */ +static const dshash_parameters srtr_typmod_table_params = { + sizeof(uint32), + sizeof(SharedTypmodTableEntry), + dshash_memcmp, + dshash_memhash, + LWTRANCHE_PER_SESSION_RECORD_TYPMOD +}; + +/* hashtable for recognizing registered record types */ +static HTAB *RecordCacheHash = NULL; + +/* arrays of info about registered record types, indexed by assigned typmod */ +static TupleDesc *RecordCacheArray = NULL; +static uint64 *RecordIdentifierArray = NULL; +static int32 RecordCacheArrayLen = 0; /* allocated length of above arrays */ +static int32 NextRecordTypmod = 0; /* number of entries used */ + +/* + * Process-wide counter for generating unique tupledesc identifiers. + * Zero and one (INVALID_TUPLEDESC_IDENTIFIER) aren't allowed to be chosen + * as identifiers, so we start the counter at INVALID_TUPLEDESC_IDENTIFIER. + */ +static uint64 tupledesc_id_counter = INVALID_TUPLEDESC_IDENTIFIER; + +static void load_typcache_tupdesc(TypeCacheEntry *typentry); +static void load_rangetype_info(TypeCacheEntry *typentry); +static void load_multirangetype_info(TypeCacheEntry *typentry); +static void load_domaintype_info(TypeCacheEntry *typentry); +static int dcs_cmp(const void *a, const void *b); +static void decr_dcc_refcount(DomainConstraintCache *dcc); +static void dccref_deletion_callback(void *arg); +static List *prep_domain_constraints(List *constraints, MemoryContext execctx); +static bool array_element_has_equality(TypeCacheEntry *typentry); +static bool array_element_has_compare(TypeCacheEntry *typentry); +static bool array_element_has_hashing(TypeCacheEntry *typentry); +static bool array_element_has_extended_hashing(TypeCacheEntry *typentry); +static void cache_array_element_properties(TypeCacheEntry *typentry); +static bool record_fields_have_equality(TypeCacheEntry *typentry); +static bool record_fields_have_compare(TypeCacheEntry *typentry); +static bool record_fields_have_hashing(TypeCacheEntry *typentry); +static bool record_fields_have_extended_hashing(TypeCacheEntry *typentry); +static void cache_record_field_properties(TypeCacheEntry *typentry); +static bool range_element_has_hashing(TypeCacheEntry *typentry); +static bool range_element_has_extended_hashing(TypeCacheEntry *typentry); +static void cache_range_element_properties(TypeCacheEntry *typentry); +static bool multirange_element_has_hashing(TypeCacheEntry *typentry); +static bool multirange_element_has_extended_hashing(TypeCacheEntry *typentry); +static void cache_multirange_element_properties(TypeCacheEntry *typentry); +static void TypeCacheRelCallback(Datum arg, Oid relid); +static void TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue); +static void TypeCacheOpcCallback(Datum arg, int cacheid, uint32 hashvalue); +static void TypeCacheConstrCallback(Datum arg, int cacheid, uint32 hashvalue); +static void load_enum_cache_data(TypeCacheEntry *tcache); +static EnumItem *find_enumitem(TypeCacheEnumData *enumdata, Oid arg); +static int enum_oid_cmp(const void *left, const void *right); +static void shared_record_typmod_registry_detach(dsm_segment *segment, + Datum datum); +static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc); +static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc, + uint32 typmod); + + +/* + * lookup_type_cache + * + * Fetch the type cache entry for the specified datatype, and make sure that + * all the fields requested by bits in 'flags' are valid. + * + * The result is never NULL --- we will ereport() if the passed type OID is + * invalid. Note however that we may fail to find one or more of the + * values requested by 'flags'; the caller needs to check whether the fields + * are InvalidOid or not. + */ +TypeCacheEntry * +lookup_type_cache(Oid type_id, int flags) +{ + TypeCacheEntry *typentry; + bool found; + + if (TypeCacheHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TypeCacheEntry); + TypeCacheHash = hash_create("Type information cache", 64, + &ctl, HASH_ELEM | HASH_BLOBS); + + /* Also set up callbacks for SI invalidations */ + CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0); + CacheRegisterSyscacheCallback(TYPEOID, TypeCacheTypCallback, (Datum) 0); + CacheRegisterSyscacheCallback(CLAOID, TypeCacheOpcCallback, (Datum) 0); + CacheRegisterSyscacheCallback(CONSTROID, TypeCacheConstrCallback, (Datum) 0); + + /* Also make sure CacheMemoryContext exists */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + } + + /* Try to look up an existing entry */ + typentry = (TypeCacheEntry *) hash_search(TypeCacheHash, + (void *) &type_id, + HASH_FIND, NULL); + if (typentry == NULL) + { + /* + * If we didn't find one, we want to make one. But first look up the + * pg_type row, just to make sure we don't make a cache entry for an + * invalid type OID. If the type OID is not valid, present a + * user-facing error, since some code paths such as domain_in() allow + * this function to be reached with a user-supplied OID. + */ + HeapTuple tp; + Form_pg_type typtup; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_id)); + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type with OID %u does not exist", type_id))); + typtup = (Form_pg_type) GETSTRUCT(tp); + if (!typtup->typisdefined) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type \"%s\" is only a shell", + NameStr(typtup->typname)))); + + /* Now make the typcache entry */ + typentry = (TypeCacheEntry *) hash_search(TypeCacheHash, + (void *) &type_id, + HASH_ENTER, &found); + Assert(!found); /* it wasn't there a moment ago */ + + MemSet(typentry, 0, sizeof(TypeCacheEntry)); + + /* These fields can never change, by definition */ + typentry->type_id = type_id; + typentry->type_id_hash = GetSysCacheHashValue1(TYPEOID, + ObjectIdGetDatum(type_id)); + + /* Keep this part in sync with the code below */ + typentry->typlen = typtup->typlen; + typentry->typbyval = typtup->typbyval; + typentry->typalign = typtup->typalign; + typentry->typstorage = typtup->typstorage; + typentry->typtype = typtup->typtype; + typentry->typrelid = typtup->typrelid; + typentry->typsubscript = typtup->typsubscript; + typentry->typelem = typtup->typelem; + typentry->typcollation = typtup->typcollation; + typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA; + + /* If it's a domain, immediately thread it into the domain cache list */ + if (typentry->typtype == TYPTYPE_DOMAIN) + { + typentry->nextDomain = firstDomainTypeEntry; + firstDomainTypeEntry = typentry; + } + + ReleaseSysCache(tp); + } + else if (!(typentry->flags & TCFLAGS_HAVE_PG_TYPE_DATA)) + { + /* + * We have an entry, but its pg_type row got changed, so reload the + * data obtained directly from pg_type. + */ + HeapTuple tp; + Form_pg_type typtup; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_id)); + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type with OID %u does not exist", type_id))); + typtup = (Form_pg_type) GETSTRUCT(tp); + if (!typtup->typisdefined) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("type \"%s\" is only a shell", + NameStr(typtup->typname)))); + + /* + * Keep this part in sync with the code above. Many of these fields + * shouldn't ever change, particularly typtype, but copy 'em anyway. + */ + typentry->typlen = typtup->typlen; + typentry->typbyval = typtup->typbyval; + typentry->typalign = typtup->typalign; + typentry->typstorage = typtup->typstorage; + typentry->typtype = typtup->typtype; + typentry->typrelid = typtup->typrelid; + typentry->typsubscript = typtup->typsubscript; + typentry->typelem = typtup->typelem; + typentry->typcollation = typtup->typcollation; + typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA; + + ReleaseSysCache(tp); + } + + /* + * Look up opclasses if we haven't already and any dependent info is + * requested. + */ + if ((flags & (TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR | + TYPECACHE_CMP_PROC | + TYPECACHE_EQ_OPR_FINFO | TYPECACHE_CMP_PROC_FINFO | + TYPECACHE_BTREE_OPFAMILY)) && + !(typentry->flags & TCFLAGS_CHECKED_BTREE_OPCLASS)) + { + Oid opclass; + + opclass = GetDefaultOpClass(type_id, BTREE_AM_OID); + if (OidIsValid(opclass)) + { + typentry->btree_opf = get_opclass_family(opclass); + typentry->btree_opintype = get_opclass_input_type(opclass); + } + else + { + typentry->btree_opf = typentry->btree_opintype = InvalidOid; + } + + /* + * Reset information derived from btree opclass. Note in particular + * that we'll redetermine the eq_opr even if we previously found one; + * this matters in case a btree opclass has been added to a type that + * previously had only a hash opclass. + */ + typentry->flags &= ~(TCFLAGS_CHECKED_EQ_OPR | + TCFLAGS_CHECKED_LT_OPR | + TCFLAGS_CHECKED_GT_OPR | + TCFLAGS_CHECKED_CMP_PROC); + typentry->flags |= TCFLAGS_CHECKED_BTREE_OPCLASS; + } + + /* + * If we need to look up equality operator, and there's no btree opclass, + * force lookup of hash opclass. + */ + if ((flags & (TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO)) && + !(typentry->flags & TCFLAGS_CHECKED_EQ_OPR) && + typentry->btree_opf == InvalidOid) + flags |= TYPECACHE_HASH_OPFAMILY; + + if ((flags & (TYPECACHE_HASH_PROC | TYPECACHE_HASH_PROC_FINFO | + TYPECACHE_HASH_EXTENDED_PROC | + TYPECACHE_HASH_EXTENDED_PROC_FINFO | + TYPECACHE_HASH_OPFAMILY)) && + !(typentry->flags & TCFLAGS_CHECKED_HASH_OPCLASS)) + { + Oid opclass; + + opclass = GetDefaultOpClass(type_id, HASH_AM_OID); + if (OidIsValid(opclass)) + { + typentry->hash_opf = get_opclass_family(opclass); + typentry->hash_opintype = get_opclass_input_type(opclass); + } + else + { + typentry->hash_opf = typentry->hash_opintype = InvalidOid; + } + + /* + * Reset information derived from hash opclass. We do *not* reset the + * eq_opr; if we already found one from the btree opclass, that + * decision is still good. + */ + typentry->flags &= ~(TCFLAGS_CHECKED_HASH_PROC | + TCFLAGS_CHECKED_HASH_EXTENDED_PROC); + typentry->flags |= TCFLAGS_CHECKED_HASH_OPCLASS; + } + + /* + * Look for requested operators and functions, if we haven't already. + */ + if ((flags & (TYPECACHE_EQ_OPR | TYPECACHE_EQ_OPR_FINFO)) && + !(typentry->flags & TCFLAGS_CHECKED_EQ_OPR)) + { + Oid eq_opr = InvalidOid; + + if (typentry->btree_opf != InvalidOid) + eq_opr = get_opfamily_member(typentry->btree_opf, + typentry->btree_opintype, + typentry->btree_opintype, + BTEqualStrategyNumber); + if (eq_opr == InvalidOid && + typentry->hash_opf != InvalidOid) + eq_opr = get_opfamily_member(typentry->hash_opf, + typentry->hash_opintype, + typentry->hash_opintype, + HTEqualStrategyNumber); + + /* + * If the proposed equality operator is array_eq or record_eq, check + * to see if the element type or column types support equality. If + * not, array_eq or record_eq would fail at runtime, so we don't want + * to report that the type has equality. (We can omit similar + * checking for ranges and multiranges because ranges can't be created + * in the first place unless their subtypes support equality.) + */ + if (eq_opr == ARRAY_EQ_OP && + !array_element_has_equality(typentry)) + eq_opr = InvalidOid; + else if (eq_opr == RECORD_EQ_OP && + !record_fields_have_equality(typentry)) + eq_opr = InvalidOid; + + /* Force update of eq_opr_finfo only if we're changing state */ + if (typentry->eq_opr != eq_opr) + typentry->eq_opr_finfo.fn_oid = InvalidOid; + + typentry->eq_opr = eq_opr; + + /* + * Reset info about hash functions whenever we pick up new info about + * equality operator. This is so we can ensure that the hash + * functions match the operator. + */ + typentry->flags &= ~(TCFLAGS_CHECKED_HASH_PROC | + TCFLAGS_CHECKED_HASH_EXTENDED_PROC); + typentry->flags |= TCFLAGS_CHECKED_EQ_OPR; + } + if ((flags & TYPECACHE_LT_OPR) && + !(typentry->flags & TCFLAGS_CHECKED_LT_OPR)) + { + Oid lt_opr = InvalidOid; + + if (typentry->btree_opf != InvalidOid) + lt_opr = get_opfamily_member(typentry->btree_opf, + typentry->btree_opintype, + typentry->btree_opintype, + BTLessStrategyNumber); + + /* + * As above, make sure array_cmp or record_cmp will succeed; but again + * we need no special check for ranges or multiranges. + */ + if (lt_opr == ARRAY_LT_OP && + !array_element_has_compare(typentry)) + lt_opr = InvalidOid; + else if (lt_opr == RECORD_LT_OP && + !record_fields_have_compare(typentry)) + lt_opr = InvalidOid; + + typentry->lt_opr = lt_opr; + typentry->flags |= TCFLAGS_CHECKED_LT_OPR; + } + if ((flags & TYPECACHE_GT_OPR) && + !(typentry->flags & TCFLAGS_CHECKED_GT_OPR)) + { + Oid gt_opr = InvalidOid; + + if (typentry->btree_opf != InvalidOid) + gt_opr = get_opfamily_member(typentry->btree_opf, + typentry->btree_opintype, + typentry->btree_opintype, + BTGreaterStrategyNumber); + + /* + * As above, make sure array_cmp or record_cmp will succeed; but again + * we need no special check for ranges or multiranges. + */ + if (gt_opr == ARRAY_GT_OP && + !array_element_has_compare(typentry)) + gt_opr = InvalidOid; + else if (gt_opr == RECORD_GT_OP && + !record_fields_have_compare(typentry)) + gt_opr = InvalidOid; + + typentry->gt_opr = gt_opr; + typentry->flags |= TCFLAGS_CHECKED_GT_OPR; + } + if ((flags & (TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO)) && + !(typentry->flags & TCFLAGS_CHECKED_CMP_PROC)) + { + Oid cmp_proc = InvalidOid; + + if (typentry->btree_opf != InvalidOid) + cmp_proc = get_opfamily_proc(typentry->btree_opf, + typentry->btree_opintype, + typentry->btree_opintype, + BTORDER_PROC); + + /* + * As above, make sure array_cmp or record_cmp will succeed; but again + * we need no special check for ranges or multiranges. + */ + if (cmp_proc == F_BTARRAYCMP && + !array_element_has_compare(typentry)) + cmp_proc = InvalidOid; + else if (cmp_proc == F_BTRECORDCMP && + !record_fields_have_compare(typentry)) + cmp_proc = InvalidOid; + + /* Force update of cmp_proc_finfo only if we're changing state */ + if (typentry->cmp_proc != cmp_proc) + typentry->cmp_proc_finfo.fn_oid = InvalidOid; + + typentry->cmp_proc = cmp_proc; + typentry->flags |= TCFLAGS_CHECKED_CMP_PROC; + } + if ((flags & (TYPECACHE_HASH_PROC | TYPECACHE_HASH_PROC_FINFO)) && + !(typentry->flags & TCFLAGS_CHECKED_HASH_PROC)) + { + Oid hash_proc = InvalidOid; + + /* + * We insist that the eq_opr, if one has been determined, match the + * hash opclass; else report there is no hash function. + */ + if (typentry->hash_opf != InvalidOid && + (!OidIsValid(typentry->eq_opr) || + typentry->eq_opr == get_opfamily_member(typentry->hash_opf, + typentry->hash_opintype, + typentry->hash_opintype, + HTEqualStrategyNumber))) + hash_proc = get_opfamily_proc(typentry->hash_opf, + typentry->hash_opintype, + typentry->hash_opintype, + HASHSTANDARD_PROC); + + /* + * As above, make sure hash_array, hash_record, or hash_range will + * succeed. + */ + if (hash_proc == F_HASH_ARRAY && + !array_element_has_hashing(typentry)) + hash_proc = InvalidOid; + else if (hash_proc == F_HASH_RECORD && + !record_fields_have_hashing(typentry)) + hash_proc = InvalidOid; + else if (hash_proc == F_HASH_RANGE && + !range_element_has_hashing(typentry)) + hash_proc = InvalidOid; + + /* + * Likewise for hash_multirange. + */ + if (hash_proc == F_HASH_MULTIRANGE && + !multirange_element_has_hashing(typentry)) + hash_proc = InvalidOid; + + /* Force update of hash_proc_finfo only if we're changing state */ + if (typentry->hash_proc != hash_proc) + typentry->hash_proc_finfo.fn_oid = InvalidOid; + + typentry->hash_proc = hash_proc; + typentry->flags |= TCFLAGS_CHECKED_HASH_PROC; + } + if ((flags & (TYPECACHE_HASH_EXTENDED_PROC | + TYPECACHE_HASH_EXTENDED_PROC_FINFO)) && + !(typentry->flags & TCFLAGS_CHECKED_HASH_EXTENDED_PROC)) + { + Oid hash_extended_proc = InvalidOid; + + /* + * We insist that the eq_opr, if one has been determined, match the + * hash opclass; else report there is no hash function. + */ + if (typentry->hash_opf != InvalidOid && + (!OidIsValid(typentry->eq_opr) || + typentry->eq_opr == get_opfamily_member(typentry->hash_opf, + typentry->hash_opintype, + typentry->hash_opintype, + HTEqualStrategyNumber))) + hash_extended_proc = get_opfamily_proc(typentry->hash_opf, + typentry->hash_opintype, + typentry->hash_opintype, + HASHEXTENDED_PROC); + + /* + * As above, make sure hash_array_extended, hash_record_extended, or + * hash_range_extended will succeed. + */ + if (hash_extended_proc == F_HASH_ARRAY_EXTENDED && + !array_element_has_extended_hashing(typentry)) + hash_extended_proc = InvalidOid; + else if (hash_extended_proc == F_HASH_RECORD_EXTENDED && + !record_fields_have_extended_hashing(typentry)) + hash_extended_proc = InvalidOid; + else if (hash_extended_proc == F_HASH_RANGE_EXTENDED && + !range_element_has_extended_hashing(typentry)) + hash_extended_proc = InvalidOid; + + /* + * Likewise for hash_multirange_extended. + */ + if (hash_extended_proc == F_HASH_MULTIRANGE_EXTENDED && + !multirange_element_has_extended_hashing(typentry)) + hash_extended_proc = InvalidOid; + + /* Force update of proc finfo only if we're changing state */ + if (typentry->hash_extended_proc != hash_extended_proc) + typentry->hash_extended_proc_finfo.fn_oid = InvalidOid; + + typentry->hash_extended_proc = hash_extended_proc; + typentry->flags |= TCFLAGS_CHECKED_HASH_EXTENDED_PROC; + } + + /* + * Set up fmgr lookup info as requested + * + * Note: we tell fmgr the finfo structures live in CacheMemoryContext, + * which is not quite right (they're really in the hash table's private + * memory context) but this will do for our purposes. + * + * Note: the code above avoids invalidating the finfo structs unless the + * referenced operator/function OID actually changes. This is to prevent + * unnecessary leakage of any subsidiary data attached to an finfo, since + * that would cause session-lifespan memory leaks. + */ + if ((flags & TYPECACHE_EQ_OPR_FINFO) && + typentry->eq_opr_finfo.fn_oid == InvalidOid && + typentry->eq_opr != InvalidOid) + { + Oid eq_opr_func; + + eq_opr_func = get_opcode(typentry->eq_opr); + if (eq_opr_func != InvalidOid) + fmgr_info_cxt(eq_opr_func, &typentry->eq_opr_finfo, + CacheMemoryContext); + } + if ((flags & TYPECACHE_CMP_PROC_FINFO) && + typentry->cmp_proc_finfo.fn_oid == InvalidOid && + typentry->cmp_proc != InvalidOid) + { + fmgr_info_cxt(typentry->cmp_proc, &typentry->cmp_proc_finfo, + CacheMemoryContext); + } + if ((flags & TYPECACHE_HASH_PROC_FINFO) && + typentry->hash_proc_finfo.fn_oid == InvalidOid && + typentry->hash_proc != InvalidOid) + { + fmgr_info_cxt(typentry->hash_proc, &typentry->hash_proc_finfo, + CacheMemoryContext); + } + if ((flags & TYPECACHE_HASH_EXTENDED_PROC_FINFO) && + typentry->hash_extended_proc_finfo.fn_oid == InvalidOid && + typentry->hash_extended_proc != InvalidOid) + { + fmgr_info_cxt(typentry->hash_extended_proc, + &typentry->hash_extended_proc_finfo, + CacheMemoryContext); + } + + /* + * If it's a composite type (row type), get tupdesc if requested + */ + if ((flags & TYPECACHE_TUPDESC) && + typentry->tupDesc == NULL && + typentry->typtype == TYPTYPE_COMPOSITE) + { + load_typcache_tupdesc(typentry); + } + + /* + * If requested, get information about a range type + * + * This includes making sure that the basic info about the range element + * type is up-to-date. + */ + if ((flags & TYPECACHE_RANGE_INFO) && + typentry->typtype == TYPTYPE_RANGE) + { + if (typentry->rngelemtype == NULL) + load_rangetype_info(typentry); + else if (!(typentry->rngelemtype->flags & TCFLAGS_HAVE_PG_TYPE_DATA)) + (void) lookup_type_cache(typentry->rngelemtype->type_id, 0); + } + + /* + * If requested, get information about a multirange type + */ + if ((flags & TYPECACHE_MULTIRANGE_INFO) && + typentry->rngtype == NULL && + typentry->typtype == TYPTYPE_MULTIRANGE) + { + load_multirangetype_info(typentry); + } + + /* + * If requested, get information about a domain type + */ + if ((flags & TYPECACHE_DOMAIN_BASE_INFO) && + typentry->domainBaseType == InvalidOid && + typentry->typtype == TYPTYPE_DOMAIN) + { + typentry->domainBaseTypmod = -1; + typentry->domainBaseType = + getBaseTypeAndTypmod(type_id, &typentry->domainBaseTypmod); + } + if ((flags & TYPECACHE_DOMAIN_CONSTR_INFO) && + (typentry->flags & TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS) == 0 && + typentry->typtype == TYPTYPE_DOMAIN) + { + load_domaintype_info(typentry); + } + + return typentry; +} + +/* + * load_typcache_tupdesc --- helper routine to set up composite type's tupDesc + */ +static void +load_typcache_tupdesc(TypeCacheEntry *typentry) +{ + Relation rel; + + if (!OidIsValid(typentry->typrelid)) /* should not happen */ + elog(ERROR, "invalid typrelid for composite type %u", + typentry->type_id); + rel = relation_open(typentry->typrelid, AccessShareLock); + Assert(rel->rd_rel->reltype == typentry->type_id); + + /* + * Link to the tupdesc and increment its refcount (we assert it's a + * refcounted descriptor). We don't use IncrTupleDescRefCount() for this, + * because the reference mustn't be entered in the current resource owner; + * it can outlive the current query. + */ + typentry->tupDesc = RelationGetDescr(rel); + + Assert(typentry->tupDesc->tdrefcount > 0); + typentry->tupDesc->tdrefcount++; + + /* + * In future, we could take some pains to not change tupDesc_identifier if + * the tupdesc didn't really change; but for now it's not worth it. + */ + typentry->tupDesc_identifier = ++tupledesc_id_counter; + + relation_close(rel, AccessShareLock); +} + +/* + * load_rangetype_info --- helper routine to set up range type information + */ +static void +load_rangetype_info(TypeCacheEntry *typentry) +{ + Form_pg_range pg_range; + HeapTuple tup; + Oid subtypeOid; + Oid opclassOid; + Oid canonicalOid; + Oid subdiffOid; + Oid opfamilyOid; + Oid opcintype; + Oid cmpFnOid; + + /* get information from pg_range */ + tup = SearchSysCache1(RANGETYPE, ObjectIdGetDatum(typentry->type_id)); + /* should not fail, since we already checked typtype ... */ + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for range type %u", + typentry->type_id); + pg_range = (Form_pg_range) GETSTRUCT(tup); + + subtypeOid = pg_range->rngsubtype; + typentry->rng_collation = pg_range->rngcollation; + opclassOid = pg_range->rngsubopc; + canonicalOid = pg_range->rngcanonical; + subdiffOid = pg_range->rngsubdiff; + + ReleaseSysCache(tup); + + /* get opclass properties and look up the comparison function */ + opfamilyOid = get_opclass_family(opclassOid); + opcintype = get_opclass_input_type(opclassOid); + + cmpFnOid = get_opfamily_proc(opfamilyOid, opcintype, opcintype, + BTORDER_PROC); + if (!RegProcedureIsValid(cmpFnOid)) + elog(ERROR, "missing support function %d(%u,%u) in opfamily %u", + BTORDER_PROC, opcintype, opcintype, opfamilyOid); + + /* set up cached fmgrinfo structs */ + fmgr_info_cxt(cmpFnOid, &typentry->rng_cmp_proc_finfo, + CacheMemoryContext); + if (OidIsValid(canonicalOid)) + fmgr_info_cxt(canonicalOid, &typentry->rng_canonical_finfo, + CacheMemoryContext); + if (OidIsValid(subdiffOid)) + fmgr_info_cxt(subdiffOid, &typentry->rng_subdiff_finfo, + CacheMemoryContext); + + /* Lastly, set up link to the element type --- this marks data valid */ + typentry->rngelemtype = lookup_type_cache(subtypeOid, 0); +} + +/* + * load_multirangetype_info --- helper routine to set up multirange type + * information + */ +static void +load_multirangetype_info(TypeCacheEntry *typentry) +{ + Oid rangetypeOid; + + rangetypeOid = get_multirange_range(typentry->type_id); + if (!OidIsValid(rangetypeOid)) + elog(ERROR, "cache lookup failed for multirange type %u", + typentry->type_id); + + typentry->rngtype = lookup_type_cache(rangetypeOid, TYPECACHE_RANGE_INFO); +} + +/* + * load_domaintype_info --- helper routine to set up domain constraint info + * + * Note: we assume we're called in a relatively short-lived context, so it's + * okay to leak data into the current context while scanning pg_constraint. + * We build the new DomainConstraintCache data in a context underneath + * CurrentMemoryContext, and reparent it under CacheMemoryContext when + * complete. + */ +static void +load_domaintype_info(TypeCacheEntry *typentry) +{ + Oid typeOid = typentry->type_id; + DomainConstraintCache *dcc; + bool notNull = false; + DomainConstraintState **ccons; + int cconslen; + Relation conRel; + MemoryContext oldcxt; + + /* + * If we're here, any existing constraint info is stale, so release it. + * For safety, be sure to null the link before trying to delete the data. + */ + if (typentry->domainData) + { + dcc = typentry->domainData; + typentry->domainData = NULL; + decr_dcc_refcount(dcc); + } + + /* + * We try to optimize the common case of no domain constraints, so don't + * create the dcc object and context until we find a constraint. Likewise + * for the temp sorting array. + */ + dcc = NULL; + ccons = NULL; + cconslen = 0; + + /* + * Scan pg_constraint for relevant constraints. We want to find + * constraints for not just this domain, but any ancestor domains, so the + * outer loop crawls up the domain stack. + */ + conRel = table_open(ConstraintRelationId, AccessShareLock); + + for (;;) + { + HeapTuple tup; + HeapTuple conTup; + Form_pg_type typTup; + int nccons = 0; + ScanKeyData key[1]; + SysScanDesc scan; + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", typeOid); + typTup = (Form_pg_type) GETSTRUCT(tup); + + if (typTup->typtype != TYPTYPE_DOMAIN) + { + /* Not a domain, so done */ + ReleaseSysCache(tup); + break; + } + + /* Test for NOT NULL Constraint */ + if (typTup->typnotnull) + notNull = true; + + /* Look for CHECK Constraints on this domain */ + ScanKeyInit(&key[0], + Anum_pg_constraint_contypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(typeOid)); + + scan = systable_beginscan(conRel, ConstraintTypidIndexId, true, + NULL, 1, key); + + while (HeapTupleIsValid(conTup = systable_getnext(scan))) + { + Form_pg_constraint c = (Form_pg_constraint) GETSTRUCT(conTup); + Datum val; + bool isNull; + char *constring; + Expr *check_expr; + DomainConstraintState *r; + + /* Ignore non-CHECK constraints (presently, shouldn't be any) */ + if (c->contype != CONSTRAINT_CHECK) + continue; + + /* Not expecting conbin to be NULL, but we'll test for it anyway */ + val = fastgetattr(conTup, Anum_pg_constraint_conbin, + conRel->rd_att, &isNull); + if (isNull) + elog(ERROR, "domain \"%s\" constraint \"%s\" has NULL conbin", + NameStr(typTup->typname), NameStr(c->conname)); + + /* Convert conbin to C string in caller context */ + constring = TextDatumGetCString(val); + + /* Create the DomainConstraintCache object and context if needed */ + if (dcc == NULL) + { + MemoryContext cxt; + + cxt = AllocSetContextCreate(CurrentMemoryContext, + "Domain constraints", + ALLOCSET_SMALL_SIZES); + dcc = (DomainConstraintCache *) + MemoryContextAlloc(cxt, sizeof(DomainConstraintCache)); + dcc->constraints = NIL; + dcc->dccContext = cxt; + dcc->dccRefCount = 0; + } + + /* Create node trees in DomainConstraintCache's context */ + oldcxt = MemoryContextSwitchTo(dcc->dccContext); + + check_expr = (Expr *) stringToNode(constring); + + /* + * Plan the expression, since ExecInitExpr will expect that. + * + * Note: caching the result of expression_planner() is not very + * good practice. Ideally we'd use a CachedExpression here so + * that we would react promptly to, eg, changes in inlined + * functions. However, because we don't support mutable domain + * CHECK constraints, it's not really clear that it's worth the + * extra overhead to do that. + */ + check_expr = expression_planner(check_expr); + + r = makeNode(DomainConstraintState); + r->constrainttype = DOM_CONSTRAINT_CHECK; + r->name = pstrdup(NameStr(c->conname)); + r->check_expr = check_expr; + r->check_exprstate = NULL; + + MemoryContextSwitchTo(oldcxt); + + /* Accumulate constraints in an array, for sorting below */ + if (ccons == NULL) + { + cconslen = 8; + ccons = (DomainConstraintState **) + palloc(cconslen * sizeof(DomainConstraintState *)); + } + else if (nccons >= cconslen) + { + cconslen *= 2; + ccons = (DomainConstraintState **) + repalloc(ccons, cconslen * sizeof(DomainConstraintState *)); + } + ccons[nccons++] = r; + } + + systable_endscan(scan); + + if (nccons > 0) + { + /* + * Sort the items for this domain, so that CHECKs are applied in a + * deterministic order. + */ + if (nccons > 1) + qsort(ccons, nccons, sizeof(DomainConstraintState *), dcs_cmp); + + /* + * Now attach them to the overall list. Use lcons() here because + * constraints of parent domains should be applied earlier. + */ + oldcxt = MemoryContextSwitchTo(dcc->dccContext); + while (nccons > 0) + dcc->constraints = lcons(ccons[--nccons], dcc->constraints); + MemoryContextSwitchTo(oldcxt); + } + + /* loop to next domain in stack */ + typeOid = typTup->typbasetype; + ReleaseSysCache(tup); + } + + table_close(conRel, AccessShareLock); + + /* + * Only need to add one NOT NULL check regardless of how many domains in + * the stack request it. + */ + if (notNull) + { + DomainConstraintState *r; + + /* Create the DomainConstraintCache object and context if needed */ + if (dcc == NULL) + { + MemoryContext cxt; + + cxt = AllocSetContextCreate(CurrentMemoryContext, + "Domain constraints", + ALLOCSET_SMALL_SIZES); + dcc = (DomainConstraintCache *) + MemoryContextAlloc(cxt, sizeof(DomainConstraintCache)); + dcc->constraints = NIL; + dcc->dccContext = cxt; + dcc->dccRefCount = 0; + } + + /* Create node trees in DomainConstraintCache's context */ + oldcxt = MemoryContextSwitchTo(dcc->dccContext); + + r = makeNode(DomainConstraintState); + + r->constrainttype = DOM_CONSTRAINT_NOTNULL; + r->name = pstrdup("NOT NULL"); + r->check_expr = NULL; + r->check_exprstate = NULL; + + /* lcons to apply the nullness check FIRST */ + dcc->constraints = lcons(r, dcc->constraints); + + MemoryContextSwitchTo(oldcxt); + } + + /* + * If we made a constraint object, move it into CacheMemoryContext and + * attach it to the typcache entry. + */ + if (dcc) + { + MemoryContextSetParent(dcc->dccContext, CacheMemoryContext); + typentry->domainData = dcc; + dcc->dccRefCount++; /* count the typcache's reference */ + } + + /* Either way, the typcache entry's domain data is now valid. */ + typentry->flags |= TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS; +} + +/* + * qsort comparator to sort DomainConstraintState pointers by name + */ +static int +dcs_cmp(const void *a, const void *b) +{ + const DomainConstraintState *const *ca = (const DomainConstraintState *const *) a; + const DomainConstraintState *const *cb = (const DomainConstraintState *const *) b; + + return strcmp((*ca)->name, (*cb)->name); +} + +/* + * decr_dcc_refcount --- decrement a DomainConstraintCache's refcount, + * and free it if no references remain + */ +static void +decr_dcc_refcount(DomainConstraintCache *dcc) +{ + Assert(dcc->dccRefCount > 0); + if (--(dcc->dccRefCount) <= 0) + MemoryContextDelete(dcc->dccContext); +} + +/* + * Context reset/delete callback for a DomainConstraintRef + */ +static void +dccref_deletion_callback(void *arg) +{ + DomainConstraintRef *ref = (DomainConstraintRef *) arg; + DomainConstraintCache *dcc = ref->dcc; + + /* Paranoia --- be sure link is nulled before trying to release */ + if (dcc) + { + ref->constraints = NIL; + ref->dcc = NULL; + decr_dcc_refcount(dcc); + } +} + +/* + * prep_domain_constraints --- prepare domain constraints for execution + * + * The expression trees stored in the DomainConstraintCache's list are + * converted to executable expression state trees stored in execctx. + */ +static List * +prep_domain_constraints(List *constraints, MemoryContext execctx) +{ + List *result = NIL; + MemoryContext oldcxt; + ListCell *lc; + + oldcxt = MemoryContextSwitchTo(execctx); + + foreach(lc, constraints) + { + DomainConstraintState *r = (DomainConstraintState *) lfirst(lc); + DomainConstraintState *newr; + + newr = makeNode(DomainConstraintState); + newr->constrainttype = r->constrainttype; + newr->name = r->name; + newr->check_expr = r->check_expr; + newr->check_exprstate = ExecInitExpr(r->check_expr, NULL); + + result = lappend(result, newr); + } + + MemoryContextSwitchTo(oldcxt); + + return result; +} + +/* + * InitDomainConstraintRef --- initialize a DomainConstraintRef struct + * + * Caller must tell us the MemoryContext in which the DomainConstraintRef + * lives. The ref will be cleaned up when that context is reset/deleted. + * + * Caller must also tell us whether it wants check_exprstate fields to be + * computed in the DomainConstraintState nodes attached to this ref. + * If it doesn't, we need not make a copy of the DomainConstraintState list. + */ +void +InitDomainConstraintRef(Oid type_id, DomainConstraintRef *ref, + MemoryContext refctx, bool need_exprstate) +{ + /* Look up the typcache entry --- we assume it survives indefinitely */ + ref->tcache = lookup_type_cache(type_id, TYPECACHE_DOMAIN_CONSTR_INFO); + ref->need_exprstate = need_exprstate; + /* For safety, establish the callback before acquiring a refcount */ + ref->refctx = refctx; + ref->dcc = NULL; + ref->callback.func = dccref_deletion_callback; + ref->callback.arg = (void *) ref; + MemoryContextRegisterResetCallback(refctx, &ref->callback); + /* Acquire refcount if there are constraints, and set up exported list */ + if (ref->tcache->domainData) + { + ref->dcc = ref->tcache->domainData; + ref->dcc->dccRefCount++; + if (ref->need_exprstate) + ref->constraints = prep_domain_constraints(ref->dcc->constraints, + ref->refctx); + else + ref->constraints = ref->dcc->constraints; + } + else + ref->constraints = NIL; +} + +/* + * UpdateDomainConstraintRef --- recheck validity of domain constraint info + * + * If the domain's constraint set changed, ref->constraints is updated to + * point at a new list of cached constraints. + * + * In the normal case where nothing happened to the domain, this is cheap + * enough that it's reasonable (and expected) to check before *each* use + * of the constraint info. + */ +void +UpdateDomainConstraintRef(DomainConstraintRef *ref) +{ + TypeCacheEntry *typentry = ref->tcache; + + /* Make sure typcache entry's data is up to date */ + if ((typentry->flags & TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS) == 0 && + typentry->typtype == TYPTYPE_DOMAIN) + load_domaintype_info(typentry); + + /* Transfer to ref object if there's new info, adjusting refcounts */ + if (ref->dcc != typentry->domainData) + { + /* Paranoia --- be sure link is nulled before trying to release */ + DomainConstraintCache *dcc = ref->dcc; + + if (dcc) + { + /* + * Note: we just leak the previous list of executable domain + * constraints. Alternatively, we could keep those in a child + * context of ref->refctx and free that context at this point. + * However, in practice this code path will be taken so seldom + * that the extra bookkeeping for a child context doesn't seem + * worthwhile; we'll just allow a leak for the lifespan of refctx. + */ + ref->constraints = NIL; + ref->dcc = NULL; + decr_dcc_refcount(dcc); + } + dcc = typentry->domainData; + if (dcc) + { + ref->dcc = dcc; + dcc->dccRefCount++; + if (ref->need_exprstate) + ref->constraints = prep_domain_constraints(dcc->constraints, + ref->refctx); + else + ref->constraints = dcc->constraints; + } + } +} + +/* + * DomainHasConstraints --- utility routine to check if a domain has constraints + * + * This is defined to return false, not fail, if type is not a domain. + */ +bool +DomainHasConstraints(Oid type_id) +{ + TypeCacheEntry *typentry; + + /* + * Note: a side effect is to cause the typcache's domain data to become + * valid. This is fine since we'll likely need it soon if there is any. + */ + typentry = lookup_type_cache(type_id, TYPECACHE_DOMAIN_CONSTR_INFO); + + return (typentry->domainData != NULL); +} + + +/* + * array_element_has_equality and friends are helper routines to check + * whether we should believe that array_eq and related functions will work + * on the given array type or composite type. + * + * The logic above may call these repeatedly on the same type entry, so we + * make use of the typentry->flags field to cache the results once known. + * Also, we assume that we'll probably want all these facts about the type + * if we want any, so we cache them all using only one lookup of the + * component datatype(s). + */ + +static bool +array_element_has_equality(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_array_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_EQUALITY) != 0; +} + +static bool +array_element_has_compare(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_array_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_COMPARE) != 0; +} + +static bool +array_element_has_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_array_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_HASHING) != 0; +} + +static bool +array_element_has_extended_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_array_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_EXTENDED_HASHING) != 0; +} + +static void +cache_array_element_properties(TypeCacheEntry *typentry) +{ + Oid elem_type = get_base_element_type(typentry->type_id); + + if (OidIsValid(elem_type)) + { + TypeCacheEntry *elementry; + + elementry = lookup_type_cache(elem_type, + TYPECACHE_EQ_OPR | + TYPECACHE_CMP_PROC | + TYPECACHE_HASH_PROC | + TYPECACHE_HASH_EXTENDED_PROC); + if (OidIsValid(elementry->eq_opr)) + typentry->flags |= TCFLAGS_HAVE_ELEM_EQUALITY; + if (OidIsValid(elementry->cmp_proc)) + typentry->flags |= TCFLAGS_HAVE_ELEM_COMPARE; + if (OidIsValid(elementry->hash_proc)) + typentry->flags |= TCFLAGS_HAVE_ELEM_HASHING; + if (OidIsValid(elementry->hash_extended_proc)) + typentry->flags |= TCFLAGS_HAVE_ELEM_EXTENDED_HASHING; + } + typentry->flags |= TCFLAGS_CHECKED_ELEM_PROPERTIES; +} + +/* + * Likewise, some helper functions for composite types. + */ + +static bool +record_fields_have_equality(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_FIELD_PROPERTIES)) + cache_record_field_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_FIELD_EQUALITY) != 0; +} + +static bool +record_fields_have_compare(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_FIELD_PROPERTIES)) + cache_record_field_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_FIELD_COMPARE) != 0; +} + +static bool +record_fields_have_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_FIELD_PROPERTIES)) + cache_record_field_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_FIELD_HASHING) != 0; +} + +static bool +record_fields_have_extended_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_FIELD_PROPERTIES)) + cache_record_field_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_FIELD_EXTENDED_HASHING) != 0; +} + +static void +cache_record_field_properties(TypeCacheEntry *typentry) +{ + /* + * For type RECORD, we can't really tell what will work, since we don't + * have access here to the specific anonymous type. Just assume that + * equality and comparison will (we may get a failure at runtime). We + * could also claim that hashing works, but then if code that has the + * option between a comparison-based (sort-based) and a hash-based plan + * chooses hashing, stuff could fail that would otherwise work if it chose + * a comparison-based plan. In practice more types support comparison + * than hashing. + */ + if (typentry->type_id == RECORDOID) + { + typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY | + TCFLAGS_HAVE_FIELD_COMPARE); + } + else if (typentry->typtype == TYPTYPE_COMPOSITE) + { + TupleDesc tupdesc; + int newflags; + int i; + + /* Fetch composite type's tupdesc if we don't have it already */ + if (typentry->tupDesc == NULL) + load_typcache_tupdesc(typentry); + tupdesc = typentry->tupDesc; + + /* Must bump the refcount while we do additional catalog lookups */ + IncrTupleDescRefCount(tupdesc); + + /* Have each property if all non-dropped fields have the property */ + newflags = (TCFLAGS_HAVE_FIELD_EQUALITY | + TCFLAGS_HAVE_FIELD_COMPARE | + TCFLAGS_HAVE_FIELD_HASHING | + TCFLAGS_HAVE_FIELD_EXTENDED_HASHING); + for (i = 0; i < tupdesc->natts; i++) + { + TypeCacheEntry *fieldentry; + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + + if (attr->attisdropped) + continue; + + fieldentry = lookup_type_cache(attr->atttypid, + TYPECACHE_EQ_OPR | + TYPECACHE_CMP_PROC | + TYPECACHE_HASH_PROC | + TYPECACHE_HASH_EXTENDED_PROC); + if (!OidIsValid(fieldentry->eq_opr)) + newflags &= ~TCFLAGS_HAVE_FIELD_EQUALITY; + if (!OidIsValid(fieldentry->cmp_proc)) + newflags &= ~TCFLAGS_HAVE_FIELD_COMPARE; + if (!OidIsValid(fieldentry->hash_proc)) + newflags &= ~TCFLAGS_HAVE_FIELD_HASHING; + if (!OidIsValid(fieldentry->hash_extended_proc)) + newflags &= ~TCFLAGS_HAVE_FIELD_EXTENDED_HASHING; + + /* We can drop out of the loop once we disprove all bits */ + if (newflags == 0) + break; + } + typentry->flags |= newflags; + + DecrTupleDescRefCount(tupdesc); + } + else if (typentry->typtype == TYPTYPE_DOMAIN) + { + /* If it's domain over composite, copy base type's properties */ + TypeCacheEntry *baseentry; + + /* load up basetype info if we didn't already */ + if (typentry->domainBaseType == InvalidOid) + { + typentry->domainBaseTypmod = -1; + typentry->domainBaseType = + getBaseTypeAndTypmod(typentry->type_id, + &typentry->domainBaseTypmod); + } + baseentry = lookup_type_cache(typentry->domainBaseType, + TYPECACHE_EQ_OPR | + TYPECACHE_CMP_PROC | + TYPECACHE_HASH_PROC | + TYPECACHE_HASH_EXTENDED_PROC); + if (baseentry->typtype == TYPTYPE_COMPOSITE) + { + typentry->flags |= TCFLAGS_DOMAIN_BASE_IS_COMPOSITE; + typentry->flags |= baseentry->flags & (TCFLAGS_HAVE_FIELD_EQUALITY | + TCFLAGS_HAVE_FIELD_COMPARE | + TCFLAGS_HAVE_FIELD_HASHING | + TCFLAGS_HAVE_FIELD_EXTENDED_HASHING); + } + } + typentry->flags |= TCFLAGS_CHECKED_FIELD_PROPERTIES; +} + +/* + * Likewise, some helper functions for range and multirange types. + * + * We can borrow the flag bits for array element properties to use for range + * element properties, since those flag bits otherwise have no use in a + * range or multirange type's typcache entry. + */ + +static bool +range_element_has_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_range_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_HASHING) != 0; +} + +static bool +range_element_has_extended_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_range_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_EXTENDED_HASHING) != 0; +} + +static void +cache_range_element_properties(TypeCacheEntry *typentry) +{ + /* load up subtype link if we didn't already */ + if (typentry->rngelemtype == NULL && + typentry->typtype == TYPTYPE_RANGE) + load_rangetype_info(typentry); + + if (typentry->rngelemtype != NULL) + { + TypeCacheEntry *elementry; + + /* might need to calculate subtype's hash function properties */ + elementry = lookup_type_cache(typentry->rngelemtype->type_id, + TYPECACHE_HASH_PROC | + TYPECACHE_HASH_EXTENDED_PROC); + if (OidIsValid(elementry->hash_proc)) + typentry->flags |= TCFLAGS_HAVE_ELEM_HASHING; + if (OidIsValid(elementry->hash_extended_proc)) + typentry->flags |= TCFLAGS_HAVE_ELEM_EXTENDED_HASHING; + } + typentry->flags |= TCFLAGS_CHECKED_ELEM_PROPERTIES; +} + +static bool +multirange_element_has_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_multirange_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_HASHING) != 0; +} + +static bool +multirange_element_has_extended_hashing(TypeCacheEntry *typentry) +{ + if (!(typentry->flags & TCFLAGS_CHECKED_ELEM_PROPERTIES)) + cache_multirange_element_properties(typentry); + return (typentry->flags & TCFLAGS_HAVE_ELEM_EXTENDED_HASHING) != 0; +} + +static void +cache_multirange_element_properties(TypeCacheEntry *typentry) +{ + /* load up range link if we didn't already */ + if (typentry->rngtype == NULL && + typentry->typtype == TYPTYPE_MULTIRANGE) + load_multirangetype_info(typentry); + + if (typentry->rngtype != NULL && typentry->rngtype->rngelemtype != NULL) + { + TypeCacheEntry *elementry; + + /* might need to calculate subtype's hash function properties */ + elementry = lookup_type_cache(typentry->rngtype->rngelemtype->type_id, + TYPECACHE_HASH_PROC | + TYPECACHE_HASH_EXTENDED_PROC); + if (OidIsValid(elementry->hash_proc)) + typentry->flags |= TCFLAGS_HAVE_ELEM_HASHING; + if (OidIsValid(elementry->hash_extended_proc)) + typentry->flags |= TCFLAGS_HAVE_ELEM_EXTENDED_HASHING; + } + typentry->flags |= TCFLAGS_CHECKED_ELEM_PROPERTIES; +} + +/* + * Make sure that RecordCacheArray and RecordIdentifierArray are large enough + * to store 'typmod'. + */ +static void +ensure_record_cache_typmod_slot_exists(int32 typmod) +{ + if (RecordCacheArray == NULL) + { + RecordCacheArray = (TupleDesc *) + MemoryContextAllocZero(CacheMemoryContext, 64 * sizeof(TupleDesc)); + RecordIdentifierArray = (uint64 *) + MemoryContextAllocZero(CacheMemoryContext, 64 * sizeof(uint64)); + RecordCacheArrayLen = 64; + } + + if (typmod >= RecordCacheArrayLen) + { + int32 newlen = RecordCacheArrayLen * 2; + + while (typmod >= newlen) + newlen *= 2; + + RecordCacheArray = (TupleDesc *) repalloc(RecordCacheArray, + newlen * sizeof(TupleDesc)); + memset(RecordCacheArray + RecordCacheArrayLen, 0, + (newlen - RecordCacheArrayLen) * sizeof(TupleDesc)); + RecordIdentifierArray = (uint64 *) repalloc(RecordIdentifierArray, + newlen * sizeof(uint64)); + memset(RecordIdentifierArray + RecordCacheArrayLen, 0, + (newlen - RecordCacheArrayLen) * sizeof(uint64)); + RecordCacheArrayLen = newlen; + } +} + +/* + * lookup_rowtype_tupdesc_internal --- internal routine to lookup a rowtype + * + * Same API as lookup_rowtype_tupdesc_noerror, but the returned tupdesc + * hasn't had its refcount bumped. + */ +static TupleDesc +lookup_rowtype_tupdesc_internal(Oid type_id, int32 typmod, bool noError) +{ + if (type_id != RECORDOID) + { + /* + * It's a named composite type, so use the regular typcache. + */ + TypeCacheEntry *typentry; + + typentry = lookup_type_cache(type_id, TYPECACHE_TUPDESC); + if (typentry->tupDesc == NULL && !noError) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("type %s is not composite", + format_type_be(type_id)))); + return typentry->tupDesc; + } + else + { + /* + * It's a transient record type, so look in our record-type table. + */ + if (typmod >= 0) + { + /* It is already in our local cache? */ + if (typmod < RecordCacheArrayLen && + RecordCacheArray[typmod] != NULL) + return RecordCacheArray[typmod]; + + /* Are we attached to a shared record typmod registry? */ + if (CurrentSession->shared_typmod_registry != NULL) + { + SharedTypmodTableEntry *entry; + + /* Try to find it in the shared typmod index. */ + entry = dshash_find(CurrentSession->shared_typmod_table, + &typmod, false); + if (entry != NULL) + { + TupleDesc tupdesc; + + tupdesc = (TupleDesc) + dsa_get_address(CurrentSession->area, + entry->shared_tupdesc); + Assert(typmod == tupdesc->tdtypmod); + + /* We may need to extend the local RecordCacheArray. */ + ensure_record_cache_typmod_slot_exists(typmod); + + /* + * Our local array can now point directly to the TupleDesc + * in shared memory, which is non-reference-counted. + */ + RecordCacheArray[typmod] = tupdesc; + Assert(tupdesc->tdrefcount == -1); + + /* + * We don't share tupdesc identifiers across processes, so + * assign one locally. + */ + RecordIdentifierArray[typmod] = ++tupledesc_id_counter; + + dshash_release_lock(CurrentSession->shared_typmod_table, + entry); + + return RecordCacheArray[typmod]; + } + } + } + + if (!noError) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("record type has not been registered"))); + return NULL; + } +} + +/* + * lookup_rowtype_tupdesc + * + * Given a typeid/typmod that should describe a known composite type, + * return the tuple descriptor for the type. Will ereport on failure. + * (Use ereport because this is reachable with user-specified OIDs, + * for example from record_in().) + * + * Note: on success, we increment the refcount of the returned TupleDesc, + * and log the reference in CurrentResourceOwner. Caller should call + * ReleaseTupleDesc or DecrTupleDescRefCount when done using the tupdesc. + */ +TupleDesc +lookup_rowtype_tupdesc(Oid type_id, int32 typmod) +{ + TupleDesc tupDesc; + + tupDesc = lookup_rowtype_tupdesc_internal(type_id, typmod, false); + PinTupleDesc(tupDesc); + return tupDesc; +} + +/* + * lookup_rowtype_tupdesc_noerror + * + * As above, but if the type is not a known composite type and noError + * is true, returns NULL instead of ereport'ing. (Note that if a bogus + * type_id is passed, you'll get an ereport anyway.) + */ +TupleDesc +lookup_rowtype_tupdesc_noerror(Oid type_id, int32 typmod, bool noError) +{ + TupleDesc tupDesc; + + tupDesc = lookup_rowtype_tupdesc_internal(type_id, typmod, noError); + if (tupDesc != NULL) + PinTupleDesc(tupDesc); + return tupDesc; +} + +/* + * lookup_rowtype_tupdesc_copy + * + * Like lookup_rowtype_tupdesc(), but the returned TupleDesc has been + * copied into the CurrentMemoryContext and is not reference-counted. + */ +TupleDesc +lookup_rowtype_tupdesc_copy(Oid type_id, int32 typmod) +{ + TupleDesc tmp; + + tmp = lookup_rowtype_tupdesc_internal(type_id, typmod, false); + return CreateTupleDescCopyConstr(tmp); +} + +/* + * lookup_rowtype_tupdesc_domain + * + * Same as lookup_rowtype_tupdesc_noerror(), except that the type can also be + * a domain over a named composite type; so this is effectively equivalent to + * lookup_rowtype_tupdesc_noerror(getBaseType(type_id), typmod, noError) + * except for being a tad faster. + * + * Note: the reason we don't fold the look-through-domain behavior into plain + * lookup_rowtype_tupdesc() is that we want callers to know they might be + * dealing with a domain. Otherwise they might construct a tuple that should + * be of the domain type, but not apply domain constraints. + */ +TupleDesc +lookup_rowtype_tupdesc_domain(Oid type_id, int32 typmod, bool noError) +{ + TupleDesc tupDesc; + + if (type_id != RECORDOID) + { + /* + * Check for domain or named composite type. We might as well load + * whichever data is needed. + */ + TypeCacheEntry *typentry; + + typentry = lookup_type_cache(type_id, + TYPECACHE_TUPDESC | + TYPECACHE_DOMAIN_BASE_INFO); + if (typentry->typtype == TYPTYPE_DOMAIN) + return lookup_rowtype_tupdesc_noerror(typentry->domainBaseType, + typentry->domainBaseTypmod, + noError); + if (typentry->tupDesc == NULL && !noError) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("type %s is not composite", + format_type_be(type_id)))); + tupDesc = typentry->tupDesc; + } + else + tupDesc = lookup_rowtype_tupdesc_internal(type_id, typmod, noError); + if (tupDesc != NULL) + PinTupleDesc(tupDesc); + return tupDesc; +} + +/* + * Hash function for the hash table of RecordCacheEntry. + */ +static uint32 +record_type_typmod_hash(const void *data, size_t size) +{ + RecordCacheEntry *entry = (RecordCacheEntry *) data; + + return hashTupleDesc(entry->tupdesc); +} + +/* + * Match function for the hash table of RecordCacheEntry. + */ +static int +record_type_typmod_compare(const void *a, const void *b, size_t size) +{ + RecordCacheEntry *left = (RecordCacheEntry *) a; + RecordCacheEntry *right = (RecordCacheEntry *) b; + + return equalTupleDescs(left->tupdesc, right->tupdesc) ? 0 : 1; +} + +/* + * assign_record_type_typmod + * + * Given a tuple descriptor for a RECORD type, find or create a cache entry + * for the type, and set the tupdesc's tdtypmod field to a value that will + * identify this cache entry to lookup_rowtype_tupdesc. + */ +void +assign_record_type_typmod(TupleDesc tupDesc) +{ + RecordCacheEntry *recentry; + TupleDesc entDesc; + bool found; + MemoryContext oldcxt; + + Assert(tupDesc->tdtypeid == RECORDOID); + + if (RecordCacheHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + ctl.keysize = sizeof(TupleDesc); /* just the pointer */ + ctl.entrysize = sizeof(RecordCacheEntry); + ctl.hash = record_type_typmod_hash; + ctl.match = record_type_typmod_compare; + RecordCacheHash = hash_create("Record information cache", 64, + &ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE); + + /* Also make sure CacheMemoryContext exists */ + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + } + + /* + * Find a hashtable entry for this tuple descriptor. We don't use + * HASH_ENTER yet, because if it's missing, we need to make sure that all + * the allocations succeed before we create the new entry. + */ + recentry = (RecordCacheEntry *) hash_search(RecordCacheHash, + (void *) &tupDesc, + HASH_FIND, &found); + if (found && recentry->tupdesc != NULL) + { + tupDesc->tdtypmod = recentry->tupdesc->tdtypmod; + return; + } + + /* Not present, so need to manufacture an entry */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + /* Look in the SharedRecordTypmodRegistry, if attached */ + entDesc = find_or_make_matching_shared_tupledesc(tupDesc); + if (entDesc == NULL) + { + /* + * Make sure we have room before we CreateTupleDescCopy() or advance + * NextRecordTypmod. + */ + ensure_record_cache_typmod_slot_exists(NextRecordTypmod); + + /* Reference-counted local cache only. */ + entDesc = CreateTupleDescCopy(tupDesc); + entDesc->tdrefcount = 1; + entDesc->tdtypmod = NextRecordTypmod++; + } + else + { + ensure_record_cache_typmod_slot_exists(entDesc->tdtypmod); + } + + RecordCacheArray[entDesc->tdtypmod] = entDesc; + + /* Assign a unique tupdesc identifier, too. */ + RecordIdentifierArray[entDesc->tdtypmod] = ++tupledesc_id_counter; + + /* Fully initialized; create the hash table entry */ + recentry = (RecordCacheEntry *) hash_search(RecordCacheHash, + (void *) &tupDesc, + HASH_ENTER, NULL); + recentry->tupdesc = entDesc; + + /* Update the caller's tuple descriptor. */ + tupDesc->tdtypmod = entDesc->tdtypmod; + + MemoryContextSwitchTo(oldcxt); +} + +/* + * assign_record_type_identifier + * + * Get an identifier, which will be unique over the lifespan of this backend + * process, for the current tuple descriptor of the specified composite type. + * For named composite types, the value is guaranteed to change if the type's + * definition does. For registered RECORD types, the value will not change + * once assigned, since the registered type won't either. If an anonymous + * RECORD type is specified, we return a new identifier on each call. + */ +uint64 +assign_record_type_identifier(Oid type_id, int32 typmod) +{ + if (type_id != RECORDOID) + { + /* + * It's a named composite type, so use the regular typcache. + */ + TypeCacheEntry *typentry; + + typentry = lookup_type_cache(type_id, TYPECACHE_TUPDESC); + if (typentry->tupDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("type %s is not composite", + format_type_be(type_id)))); + Assert(typentry->tupDesc_identifier != 0); + return typentry->tupDesc_identifier; + } + else + { + /* + * It's a transient record type, so look in our record-type table. + */ + if (typmod >= 0 && typmod < RecordCacheArrayLen && + RecordCacheArray[typmod] != NULL) + { + Assert(RecordIdentifierArray[typmod] != 0); + return RecordIdentifierArray[typmod]; + } + + /* For anonymous or unrecognized record type, generate a new ID */ + return ++tupledesc_id_counter; + } +} + +/* + * Return the amount of shmem required to hold a SharedRecordTypmodRegistry. + * This exists only to avoid exposing private innards of + * SharedRecordTypmodRegistry in a header. + */ +size_t +SharedRecordTypmodRegistryEstimate(void) +{ + return sizeof(SharedRecordTypmodRegistry); +} + +/* + * Initialize 'registry' in a pre-existing shared memory region, which must be + * maximally aligned and have space for SharedRecordTypmodRegistryEstimate() + * bytes. + * + * 'area' will be used to allocate shared memory space as required for the + * typemod registration. The current process, expected to be a leader process + * in a parallel query, will be attached automatically and its current record + * types will be loaded into *registry. While attached, all calls to + * assign_record_type_typmod will use the shared registry. Worker backends + * will need to attach explicitly. + * + * Note that this function takes 'area' and 'segment' as arguments rather than + * accessing them via CurrentSession, because they aren't installed there + * until after this function runs. + */ +void +SharedRecordTypmodRegistryInit(SharedRecordTypmodRegistry *registry, + dsm_segment *segment, + dsa_area *area) +{ + MemoryContext old_context; + dshash_table *record_table; + dshash_table *typmod_table; + int32 typmod; + + Assert(!IsParallelWorker()); + + /* We can't already be attached to a shared registry. */ + Assert(CurrentSession->shared_typmod_registry == NULL); + Assert(CurrentSession->shared_record_table == NULL); + Assert(CurrentSession->shared_typmod_table == NULL); + + old_context = MemoryContextSwitchTo(TopMemoryContext); + + /* Create the hash table of tuple descriptors indexed by themselves. */ + record_table = dshash_create(area, &srtr_record_table_params, area); + + /* Create the hash table of tuple descriptors indexed by typmod. */ + typmod_table = dshash_create(area, &srtr_typmod_table_params, NULL); + + MemoryContextSwitchTo(old_context); + + /* Initialize the SharedRecordTypmodRegistry. */ + registry->record_table_handle = dshash_get_hash_table_handle(record_table); + registry->typmod_table_handle = dshash_get_hash_table_handle(typmod_table); + pg_atomic_init_u32(®istry->next_typmod, NextRecordTypmod); + + /* + * Copy all entries from this backend's private registry into the shared + * registry. + */ + for (typmod = 0; typmod < NextRecordTypmod; ++typmod) + { + SharedTypmodTableEntry *typmod_table_entry; + SharedRecordTableEntry *record_table_entry; + SharedRecordTableKey record_table_key; + dsa_pointer shared_dp; + TupleDesc tupdesc; + bool found; + + tupdesc = RecordCacheArray[typmod]; + if (tupdesc == NULL) + continue; + + /* Copy the TupleDesc into shared memory. */ + shared_dp = share_tupledesc(area, tupdesc, typmod); + + /* Insert into the typmod table. */ + typmod_table_entry = dshash_find_or_insert(typmod_table, + &tupdesc->tdtypmod, + &found); + if (found) + elog(ERROR, "cannot create duplicate shared record typmod"); + typmod_table_entry->typmod = tupdesc->tdtypmod; + typmod_table_entry->shared_tupdesc = shared_dp; + dshash_release_lock(typmod_table, typmod_table_entry); + + /* Insert into the record table. */ + record_table_key.shared = false; + record_table_key.u.local_tupdesc = tupdesc; + record_table_entry = dshash_find_or_insert(record_table, + &record_table_key, + &found); + if (!found) + { + record_table_entry->key.shared = true; + record_table_entry->key.u.shared_tupdesc = shared_dp; + } + dshash_release_lock(record_table, record_table_entry); + } + + /* + * Set up the global state that will tell assign_record_type_typmod and + * lookup_rowtype_tupdesc_internal about the shared registry. + */ + CurrentSession->shared_record_table = record_table; + CurrentSession->shared_typmod_table = typmod_table; + CurrentSession->shared_typmod_registry = registry; + + /* + * We install a detach hook in the leader, but only to handle cleanup on + * failure during GetSessionDsmHandle(). Once GetSessionDsmHandle() pins + * the memory, the leader process will use a shared registry until it + * exits. + */ + on_dsm_detach(segment, shared_record_typmod_registry_detach, (Datum) 0); +} + +/* + * Attach to 'registry', which must have been initialized already by another + * backend. Future calls to assign_record_type_typmod and + * lookup_rowtype_tupdesc_internal will use the shared registry until the + * current session is detached. + */ +void +SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry) +{ + MemoryContext old_context; + dshash_table *record_table; + dshash_table *typmod_table; + + Assert(IsParallelWorker()); + + /* We can't already be attached to a shared registry. */ + Assert(CurrentSession != NULL); + Assert(CurrentSession->segment != NULL); + Assert(CurrentSession->area != NULL); + Assert(CurrentSession->shared_typmod_registry == NULL); + Assert(CurrentSession->shared_record_table == NULL); + Assert(CurrentSession->shared_typmod_table == NULL); + + /* + * We can't already have typmods in our local cache, because they'd clash + * with those imported by SharedRecordTypmodRegistryInit. This should be + * a freshly started parallel worker. If we ever support worker + * recycling, a worker would need to zap its local cache in between + * servicing different queries, in order to be able to call this and + * synchronize typmods with a new leader; but that's problematic because + * we can't be very sure that record-typmod-related state hasn't escaped + * to anywhere else in the process. + */ + Assert(NextRecordTypmod == 0); + + old_context = MemoryContextSwitchTo(TopMemoryContext); + + /* Attach to the two hash tables. */ + record_table = dshash_attach(CurrentSession->area, + &srtr_record_table_params, + registry->record_table_handle, + CurrentSession->area); + typmod_table = dshash_attach(CurrentSession->area, + &srtr_typmod_table_params, + registry->typmod_table_handle, + NULL); + + MemoryContextSwitchTo(old_context); + + /* + * Set up detach hook to run at worker exit. Currently this is the same + * as the leader's detach hook, but in future they might need to be + * different. + */ + on_dsm_detach(CurrentSession->segment, + shared_record_typmod_registry_detach, + PointerGetDatum(registry)); + + /* + * Set up the session state that will tell assign_record_type_typmod and + * lookup_rowtype_tupdesc_internal about the shared registry. + */ + CurrentSession->shared_typmod_registry = registry; + CurrentSession->shared_record_table = record_table; + CurrentSession->shared_typmod_table = typmod_table; +} + +/* + * TypeCacheRelCallback + * Relcache inval callback function + * + * Delete the cached tuple descriptor (if any) for the given rel's composite + * type, or for all composite types if relid == InvalidOid. Also reset + * whatever info we have cached about the composite type's comparability. + * + * This is called when a relcache invalidation event occurs for the given + * relid. We must scan the whole typcache hash since we don't know the + * type OID corresponding to the relid. We could do a direct search if this + * were a syscache-flush callback on pg_type, but then we would need all + * ALTER-TABLE-like commands that could modify a rowtype to issue syscache + * invals against the rel's pg_type OID. The extra SI signaling could very + * well cost more than we'd save, since in most usages there are not very + * many entries in a backend's typcache. The risk of bugs-of-omission seems + * high, too. + * + * Another possibility, with only localized impact, is to maintain a second + * hashtable that indexes composite-type typcache entries by their typrelid. + * But it's still not clear it's worth the trouble. + */ +static void +TypeCacheRelCallback(Datum arg, Oid relid) +{ + HASH_SEQ_STATUS status; + TypeCacheEntry *typentry; + + /* TypeCacheHash must exist, else this callback wouldn't be registered */ + hash_seq_init(&status, TypeCacheHash); + while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL) + { + if (typentry->typtype == TYPTYPE_COMPOSITE) + { + /* Skip if no match, unless we're zapping all composite types */ + if (relid != typentry->typrelid && relid != InvalidOid) + continue; + + /* Delete tupdesc if we have it */ + if (typentry->tupDesc != NULL) + { + /* + * Release our refcount, and free the tupdesc if none remain. + * (Can't use DecrTupleDescRefCount because this reference is + * not logged in current resource owner.) + */ + Assert(typentry->tupDesc->tdrefcount > 0); + if (--typentry->tupDesc->tdrefcount == 0) + FreeTupleDesc(typentry->tupDesc); + typentry->tupDesc = NULL; + + /* + * Also clear tupDesc_identifier, so that anything watching + * that will realize that the tupdesc has possibly changed. + * (Alternatively, we could specify that to detect possible + * tupdesc change, one must check for tupDesc != NULL as well + * as tupDesc_identifier being the same as what was previously + * seen. That seems error-prone.) + */ + typentry->tupDesc_identifier = 0; + } + + /* Reset equality/comparison/hashing validity information */ + typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS; + } + else if (typentry->typtype == TYPTYPE_DOMAIN) + { + /* + * If it's domain over composite, reset flags. (We don't bother + * trying to determine whether the specific base type needs a + * reset.) Note that if we haven't determined whether the base + * type is composite, we don't need to reset anything. + */ + if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE) + typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS; + } + } +} + +/* + * TypeCacheTypCallback + * Syscache inval callback function + * + * This is called when a syscache invalidation event occurs for any + * pg_type row. If we have information cached about that type, mark + * it as needing to be reloaded. + */ +static void +TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + HASH_SEQ_STATUS status; + TypeCacheEntry *typentry; + + /* TypeCacheHash must exist, else this callback wouldn't be registered */ + hash_seq_init(&status, TypeCacheHash); + while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL) + { + /* Is this the targeted type row (or it's a total cache flush)? */ + if (hashvalue == 0 || typentry->type_id_hash == hashvalue) + { + /* + * Mark the data obtained directly from pg_type as invalid. Also, + * if it's a domain, typnotnull might've changed, so we'll need to + * recalculate its constraints. + */ + typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA | + TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS); + } + } +} + +/* + * TypeCacheOpcCallback + * Syscache inval callback function + * + * This is called when a syscache invalidation event occurs for any pg_opclass + * row. In principle we could probably just invalidate data dependent on the + * particular opclass, but since updates on pg_opclass are rare in production + * it doesn't seem worth a lot of complication: we just mark all cached data + * invalid. + * + * Note that we don't bother watching for updates on pg_amop or pg_amproc. + * This should be safe because ALTER OPERATOR FAMILY ADD/DROP OPERATOR/FUNCTION + * is not allowed to be used to add/drop the primary operators and functions + * of an opclass, only cross-type members of a family; and the latter sorts + * of members are not going to get cached here. + */ +static void +TypeCacheOpcCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + HASH_SEQ_STATUS status; + TypeCacheEntry *typentry; + + /* TypeCacheHash must exist, else this callback wouldn't be registered */ + hash_seq_init(&status, TypeCacheHash); + while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL) + { + /* Reset equality/comparison/hashing validity information */ + typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS; + } +} + +/* + * TypeCacheConstrCallback + * Syscache inval callback function + * + * This is called when a syscache invalidation event occurs for any + * pg_constraint row. We flush information about domain constraints + * when this happens. + * + * It's slightly annoying that we can't tell whether the inval event was for + * a domain constraint record or not; there's usually more update traffic + * for table constraints than domain constraints, so we'll do a lot of + * useless flushes. Still, this is better than the old no-caching-at-all + * approach to domain constraints. + */ +static void +TypeCacheConstrCallback(Datum arg, int cacheid, uint32 hashvalue) +{ + TypeCacheEntry *typentry; + + /* + * Because this is called very frequently, and typically very few of the + * typcache entries are for domains, we don't use hash_seq_search here. + * Instead we thread all the domain-type entries together so that we can + * visit them cheaply. + */ + for (typentry = firstDomainTypeEntry; + typentry != NULL; + typentry = typentry->nextDomain) + { + /* Reset domain constraint validity information */ + typentry->flags &= ~TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS; + } +} + + +/* + * Check if given OID is part of the subset that's sortable by comparisons + */ +static inline bool +enum_known_sorted(TypeCacheEnumData *enumdata, Oid arg) +{ + Oid offset; + + if (arg < enumdata->bitmap_base) + return false; + offset = arg - enumdata->bitmap_base; + if (offset > (Oid) INT_MAX) + return false; + return bms_is_member((int) offset, enumdata->sorted_values); +} + + +/* + * compare_values_of_enum + * Compare two members of an enum type. + * Return <0, 0, or >0 according as arg1 <, =, or > arg2. + * + * Note: currently, the enumData cache is refreshed only if we are asked + * to compare an enum value that is not already in the cache. This is okay + * because there is no support for re-ordering existing values, so comparisons + * of previously cached values will return the right answer even if other + * values have been added since we last loaded the cache. + * + * Note: the enum logic has a special-case rule about even-numbered versus + * odd-numbered OIDs, but we take no account of that rule here; this + * routine shouldn't even get called when that rule applies. + */ +int +compare_values_of_enum(TypeCacheEntry *tcache, Oid arg1, Oid arg2) +{ + TypeCacheEnumData *enumdata; + EnumItem *item1; + EnumItem *item2; + + /* + * Equal OIDs are certainly equal --- this case was probably handled by + * our caller, but we may as well check. + */ + if (arg1 == arg2) + return 0; + + /* Load up the cache if first time through */ + if (tcache->enumData == NULL) + load_enum_cache_data(tcache); + enumdata = tcache->enumData; + + /* + * If both OIDs are known-sorted, we can just compare them directly. + */ + if (enum_known_sorted(enumdata, arg1) && + enum_known_sorted(enumdata, arg2)) + { + if (arg1 < arg2) + return -1; + else + return 1; + } + + /* + * Slow path: we have to identify their actual sort-order positions. + */ + item1 = find_enumitem(enumdata, arg1); + item2 = find_enumitem(enumdata, arg2); + + if (item1 == NULL || item2 == NULL) + { + /* + * We couldn't find one or both values. That means the enum has + * changed under us, so re-initialize the cache and try again. We + * don't bother retrying the known-sorted case in this path. + */ + load_enum_cache_data(tcache); + enumdata = tcache->enumData; + + item1 = find_enumitem(enumdata, arg1); + item2 = find_enumitem(enumdata, arg2); + + /* + * If we still can't find the values, complain: we must have corrupt + * data. + */ + if (item1 == NULL) + elog(ERROR, "enum value %u not found in cache for enum %s", + arg1, format_type_be(tcache->type_id)); + if (item2 == NULL) + elog(ERROR, "enum value %u not found in cache for enum %s", + arg2, format_type_be(tcache->type_id)); + } + + if (item1->sort_order < item2->sort_order) + return -1; + else if (item1->sort_order > item2->sort_order) + return 1; + else + return 0; +} + +/* + * Load (or re-load) the enumData member of the typcache entry. + */ +static void +load_enum_cache_data(TypeCacheEntry *tcache) +{ + TypeCacheEnumData *enumdata; + Relation enum_rel; + SysScanDesc enum_scan; + HeapTuple enum_tuple; + ScanKeyData skey; + EnumItem *items; + int numitems; + int maxitems; + Oid bitmap_base; + Bitmapset *bitmap; + MemoryContext oldcxt; + int bm_size, + start_pos; + + /* Check that this is actually an enum */ + if (tcache->typtype != TYPTYPE_ENUM) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not an enum", + format_type_be(tcache->type_id)))); + + /* + * Read all the information for members of the enum type. We collect the + * info in working memory in the caller's context, and then transfer it to + * permanent memory in CacheMemoryContext. This minimizes the risk of + * leaking memory from CacheMemoryContext in the event of an error partway + * through. + */ + maxitems = 64; + items = (EnumItem *) palloc(sizeof(EnumItem) * maxitems); + numitems = 0; + + /* Scan pg_enum for the members of the target enum type. */ + ScanKeyInit(&skey, + Anum_pg_enum_enumtypid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(tcache->type_id)); + + enum_rel = table_open(EnumRelationId, AccessShareLock); + enum_scan = systable_beginscan(enum_rel, + EnumTypIdLabelIndexId, + true, NULL, + 1, &skey); + + while (HeapTupleIsValid(enum_tuple = systable_getnext(enum_scan))) + { + Form_pg_enum en = (Form_pg_enum) GETSTRUCT(enum_tuple); + + if (numitems >= maxitems) + { + maxitems *= 2; + items = (EnumItem *) repalloc(items, sizeof(EnumItem) * maxitems); + } + items[numitems].enum_oid = en->oid; + items[numitems].sort_order = en->enumsortorder; + numitems++; + } + + systable_endscan(enum_scan); + table_close(enum_rel, AccessShareLock); + + /* Sort the items into OID order */ + qsort(items, numitems, sizeof(EnumItem), enum_oid_cmp); + + /* + * Here, we create a bitmap listing a subset of the enum's OIDs that are + * known to be in order and can thus be compared with just OID comparison. + * + * The point of this is that the enum's initial OIDs were certainly in + * order, so there is some subset that can be compared via OID comparison; + * and we'd rather not do binary searches unnecessarily. + * + * This is somewhat heuristic, and might identify a subset of OIDs that + * isn't exactly what the type started with. That's okay as long as the + * subset is correctly sorted. + */ + bitmap_base = InvalidOid; + bitmap = NULL; + bm_size = 1; /* only save sets of at least 2 OIDs */ + + for (start_pos = 0; start_pos < numitems - 1; start_pos++) + { + /* + * Identify longest sorted subsequence starting at start_pos + */ + Bitmapset *this_bitmap = bms_make_singleton(0); + int this_bm_size = 1; + Oid start_oid = items[start_pos].enum_oid; + float4 prev_order = items[start_pos].sort_order; + int i; + + for (i = start_pos + 1; i < numitems; i++) + { + Oid offset; + + offset = items[i].enum_oid - start_oid; + /* quit if bitmap would be too large; cutoff is arbitrary */ + if (offset >= 8192) + break; + /* include the item if it's in-order */ + if (items[i].sort_order > prev_order) + { + prev_order = items[i].sort_order; + this_bitmap = bms_add_member(this_bitmap, (int) offset); + this_bm_size++; + } + } + + /* Remember it if larger than previous best */ + if (this_bm_size > bm_size) + { + bms_free(bitmap); + bitmap_base = start_oid; + bitmap = this_bitmap; + bm_size = this_bm_size; + } + else + bms_free(this_bitmap); + + /* + * Done if it's not possible to find a longer sequence in the rest of + * the list. In typical cases this will happen on the first + * iteration, which is why we create the bitmaps on the fly instead of + * doing a second pass over the list. + */ + if (bm_size >= (numitems - start_pos - 1)) + break; + } + + /* OK, copy the data into CacheMemoryContext */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + enumdata = (TypeCacheEnumData *) + palloc(offsetof(TypeCacheEnumData, enum_values) + + numitems * sizeof(EnumItem)); + enumdata->bitmap_base = bitmap_base; + enumdata->sorted_values = bms_copy(bitmap); + enumdata->num_values = numitems; + memcpy(enumdata->enum_values, items, numitems * sizeof(EnumItem)); + MemoryContextSwitchTo(oldcxt); + + pfree(items); + bms_free(bitmap); + + /* And link the finished cache struct into the typcache */ + if (tcache->enumData != NULL) + pfree(tcache->enumData); + tcache->enumData = enumdata; +} + +/* + * Locate the EnumItem with the given OID, if present + */ +static EnumItem * +find_enumitem(TypeCacheEnumData *enumdata, Oid arg) +{ + EnumItem srch; + + /* On some versions of Solaris, bsearch of zero items dumps core */ + if (enumdata->num_values <= 0) + return NULL; + + srch.enum_oid = arg; + return bsearch(&srch, enumdata->enum_values, enumdata->num_values, + sizeof(EnumItem), enum_oid_cmp); +} + +/* + * qsort comparison function for OID-ordered EnumItems + */ +static int +enum_oid_cmp(const void *left, const void *right) +{ + const EnumItem *l = (const EnumItem *) left; + const EnumItem *r = (const EnumItem *) right; + + if (l->enum_oid < r->enum_oid) + return -1; + else if (l->enum_oid > r->enum_oid) + return 1; + else + return 0; +} + +/* + * Copy 'tupdesc' into newly allocated shared memory in 'area', set its typmod + * to the given value and return a dsa_pointer. + */ +static dsa_pointer +share_tupledesc(dsa_area *area, TupleDesc tupdesc, uint32 typmod) +{ + dsa_pointer shared_dp; + TupleDesc shared; + + shared_dp = dsa_allocate(area, TupleDescSize(tupdesc)); + shared = (TupleDesc) dsa_get_address(area, shared_dp); + TupleDescCopy(shared, tupdesc); + shared->tdtypmod = typmod; + + return shared_dp; +} + +/* + * If we are attached to a SharedRecordTypmodRegistry, use it to find or + * create a shared TupleDesc that matches 'tupdesc'. Otherwise return NULL. + * Tuple descriptors returned by this function are not reference counted, and + * will exist at least as long as the current backend remained attached to the + * current session. + */ +static TupleDesc +find_or_make_matching_shared_tupledesc(TupleDesc tupdesc) +{ + TupleDesc result; + SharedRecordTableKey key; + SharedRecordTableEntry *record_table_entry; + SharedTypmodTableEntry *typmod_table_entry; + dsa_pointer shared_dp; + bool found; + uint32 typmod; + + /* If not even attached, nothing to do. */ + if (CurrentSession->shared_typmod_registry == NULL) + return NULL; + + /* Try to find a matching tuple descriptor in the record table. */ + key.shared = false; + key.u.local_tupdesc = tupdesc; + record_table_entry = (SharedRecordTableEntry *) + dshash_find(CurrentSession->shared_record_table, &key, false); + if (record_table_entry) + { + Assert(record_table_entry->key.shared); + dshash_release_lock(CurrentSession->shared_record_table, + record_table_entry); + result = (TupleDesc) + dsa_get_address(CurrentSession->area, + record_table_entry->key.u.shared_tupdesc); + Assert(result->tdrefcount == -1); + + return result; + } + + /* Allocate a new typmod number. This will be wasted if we error out. */ + typmod = (int) + pg_atomic_fetch_add_u32(&CurrentSession->shared_typmod_registry->next_typmod, + 1); + + /* Copy the TupleDesc into shared memory. */ + shared_dp = share_tupledesc(CurrentSession->area, tupdesc, typmod); + + /* + * Create an entry in the typmod table so that others will understand this + * typmod number. + */ + PG_TRY(); + { + typmod_table_entry = (SharedTypmodTableEntry *) + dshash_find_or_insert(CurrentSession->shared_typmod_table, + &typmod, &found); + if (found) + elog(ERROR, "cannot create duplicate shared record typmod"); + } + PG_CATCH(); + { + dsa_free(CurrentSession->area, shared_dp); + PG_RE_THROW(); + } + PG_END_TRY(); + typmod_table_entry->typmod = typmod; + typmod_table_entry->shared_tupdesc = shared_dp; + dshash_release_lock(CurrentSession->shared_typmod_table, + typmod_table_entry); + + /* + * Finally create an entry in the record table so others with matching + * tuple descriptors can reuse the typmod. + */ + record_table_entry = (SharedRecordTableEntry *) + dshash_find_or_insert(CurrentSession->shared_record_table, &key, + &found); + if (found) + { + /* + * Someone concurrently inserted a matching tuple descriptor since the + * first time we checked. Use that one instead. + */ + dshash_release_lock(CurrentSession->shared_record_table, + record_table_entry); + + /* Might as well free up the space used by the one we created. */ + found = dshash_delete_key(CurrentSession->shared_typmod_table, + &typmod); + Assert(found); + dsa_free(CurrentSession->area, shared_dp); + + /* Return the one we found. */ + Assert(record_table_entry->key.shared); + result = (TupleDesc) + dsa_get_address(CurrentSession->area, + record_table_entry->key.u.shared_tupdesc); + Assert(result->tdrefcount == -1); + + return result; + } + + /* Store it and return it. */ + record_table_entry->key.shared = true; + record_table_entry->key.u.shared_tupdesc = shared_dp; + dshash_release_lock(CurrentSession->shared_record_table, + record_table_entry); + result = (TupleDesc) + dsa_get_address(CurrentSession->area, shared_dp); + Assert(result->tdrefcount == -1); + + return result; +} + +/* + * On-DSM-detach hook to forget about the current shared record typmod + * infrastructure. This is currently used by both leader and workers. + */ +static void +shared_record_typmod_registry_detach(dsm_segment *segment, Datum datum) +{ + /* Be cautious here: maybe we didn't finish initializing. */ + if (CurrentSession->shared_record_table != NULL) + { + dshash_detach(CurrentSession->shared_record_table); + CurrentSession->shared_record_table = NULL; + } + if (CurrentSession->shared_typmod_table != NULL) + { + dshash_detach(CurrentSession->shared_typmod_table); + CurrentSession->shared_typmod_table = NULL; + } + CurrentSession->shared_typmod_registry = NULL; +} |