/*------------------------------------------------------------------------- * * shmem.c * create shared memory and initialize shared memory data structures. * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/storage/ipc/shmem.c * *------------------------------------------------------------------------- */ /* * POSTGRES processes share one or more regions of shared memory. * The shared memory is created by a postmaster and is inherited * by each backend via fork() (or, in some ports, via other OS-specific * methods). The routines in this file are used for allocating and * binding to shared memory data structures. * * NOTES: * (a) There are three kinds of shared memory data structures * available to POSTGRES: fixed-size structures, queues and hash * tables. Fixed-size structures contain things like global variables * for a module and should never be allocated after the shared memory * initialization phase. Hash tables have a fixed maximum size, but * their actual size can vary dynamically. When entries are added * to the table, more space is allocated. Queues link data structures * that have been allocated either within fixed-size structures or as hash * buckets. Each shared data structure has a string name to identify * it (assigned in the module that declares it). * * (b) During initialization, each module looks for its * shared data structures in a hash table called the "Shmem Index". * If the data structure is not present, the caller can allocate * a new one and initialize it. If the data structure is present, * the caller "attaches" to the structure by initializing a pointer * in the local address space. * The shmem index has two purposes: first, it gives us * a simple model of how the world looks when a backend process * initializes. If something is present in the shmem index, * it is initialized. If it is not, it is uninitialized. Second, * the shmem index allows us to allocate shared memory on demand * instead of trying to preallocate structures and hard-wire the * sizes and locations in header files. If you are using a lot * of shared memory in a lot of different places (and changing * things during development), this is important. * * (c) In standard Unix-ish environments, individual backends do not * need to re-establish their local pointers into shared memory, because * they inherit correct values of those variables via fork() from the * postmaster. However, this does not work in the EXEC_BACKEND case. * In ports using EXEC_BACKEND, new backends have to set up their local * pointers using the method described in (b) above. * * (d) memory allocation model: shared memory can never be * freed, once allocated. Each hash table has its own free list, * so hash buckets can be reused when an item is deleted. However, * if one hash table grows very large and then shrinks, its space * cannot be redistributed to other tables. We could build a simple * hash bucket garbage collector if need be. Right now, it seems * unnecessary. */ #include "postgres.h" #include "access/transam.h" #include "fmgr.h" #include "funcapi.h" #include "miscadmin.h" #include "storage/lwlock.h" #include "storage/pg_shmem.h" #include "storage/shmem.h" #include "storage/spin.h" #include "utils/builtins.h" static void *ShmemAllocRaw(Size size, Size *allocated_size); /* shared memory global variables */ static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */ static void *ShmemBase; /* start address of shared memory */ static void *ShmemEnd; /* end+1 address of shared memory */ slock_t *ShmemLock; /* spinlock for shared memory and LWLock * allocation */ static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */ /* * InitShmemAccess() --- set up basic pointers to shared memory. * * Note: the argument should be declared "PGShmemHeader *seghdr", * but we use void to avoid having to include ipc.h in shmem.h. */ void InitShmemAccess(void *seghdr) { PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr; ShmemSegHdr = shmhdr; ShmemBase = (void *) shmhdr; ShmemEnd = (char *) ShmemBase + shmhdr->totalsize; } /* * InitShmemAllocation() --- set up shared-memory space allocation. * * This should be called only in the postmaster or a standalone backend. */ void InitShmemAllocation(void) { PGShmemHeader *shmhdr = ShmemSegHdr; char *aligned; Assert(shmhdr != NULL); /* * Initialize the spinlock used by ShmemAlloc. We must use * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet. */ ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t)); SpinLockInit(ShmemLock); /* * Allocations after this point should go through ShmemAlloc, which * expects to allocate everything on cache line boundaries. Make sure the * first allocation begins on a cache line boundary. */ aligned = (char *) (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset))); shmhdr->freeoffset = aligned - (char *) shmhdr; /* ShmemIndex can't be set up yet (need LWLocks first) */ shmhdr->index = NULL; ShmemIndex = (HTAB *) NULL; /* * Initialize ShmemVariableCache for transaction manager. (This doesn't * really belong here, but not worth moving.) */ ShmemVariableCache = (VariableCache) ShmemAlloc(sizeof(*ShmemVariableCache)); memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache)); } /* * ShmemAlloc -- allocate max-aligned chunk from shared memory * * Throws error if request cannot be satisfied. * * Assumes ShmemLock and ShmemSegHdr are initialized. */ void * ShmemAlloc(Size size) { void *newSpace; Size allocated_size; newSpace = ShmemAllocRaw(size, &allocated_size); if (!newSpace) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory (%zu bytes requested)", size))); return newSpace; } /* * ShmemAllocNoError -- allocate max-aligned chunk from shared memory * * As ShmemAlloc, but returns NULL if out of space, rather than erroring. */ void * ShmemAllocNoError(Size size) { Size allocated_size; return ShmemAllocRaw(size, &allocated_size); } /* * ShmemAllocRaw -- allocate align chunk and return allocated size * * Also sets *allocated_size to the number of bytes allocated, which will * be equal to the number requested plus any padding we choose to add. */ static void * ShmemAllocRaw(Size size, Size *allocated_size) { Size newStart; Size newFree; void *newSpace; /* * Ensure all space is adequately aligned. We used to only MAXALIGN this * space but experience has proved that on modern systems that is not good * enough. Many parts of the system are very sensitive to critical data * structures getting split across cache line boundaries. To avoid that, * attempt to align the beginning of the allocation to a cache line * boundary. The calling code will still need to be careful about how it * uses the allocated space - e.g. by padding each element in an array of * structures out to a power-of-two size - but without this, even that * won't be sufficient. */ size = CACHELINEALIGN(size); *allocated_size = size; Assert(ShmemSegHdr != NULL); SpinLockAcquire(ShmemLock); newStart = ShmemSegHdr->freeoffset; newFree = newStart + size; if (newFree <= ShmemSegHdr->totalsize) { newSpace = (void *) ((char *) ShmemBase + newStart); ShmemSegHdr->freeoffset = newFree; } else newSpace = NULL; SpinLockRelease(ShmemLock); /* note this assert is okay with newSpace == NULL */ Assert(newSpace == (void *) CACHELINEALIGN(newSpace)); return newSpace; } /* * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory * * Allocate space without locking ShmemLock. This should be used for, * and only for, allocations that must happen before ShmemLock is ready. * * We consider maxalign, rather than cachealign, sufficient here. */ void * ShmemAllocUnlocked(Size size) { Size newStart; Size newFree; void *newSpace; /* * Ensure allocated space is adequately aligned. */ size = MAXALIGN(size); Assert(ShmemSegHdr != NULL); newStart = ShmemSegHdr->freeoffset; newFree = newStart + size; if (newFree > ShmemSegHdr->totalsize) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory (%zu bytes requested)", size))); ShmemSegHdr->freeoffset = newFree; newSpace = (void *) ((char *) ShmemBase + newStart); Assert(newSpace == (void *) MAXALIGN(newSpace)); return newSpace; } /* * ShmemAddrIsValid -- test if an address refers to shared memory * * Returns true if the pointer points within the shared memory segment. */ bool ShmemAddrIsValid(const void *addr) { return (addr >= ShmemBase) && (addr < ShmemEnd); } /* * InitShmemIndex() --- set up or attach to shmem index table. */ void InitShmemIndex(void) { HASHCTL info; /* * Create the shared memory shmem index. * * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex * hashtable to exist already, we have a bit of a circularity problem in * initializing the ShmemIndex itself. The special "ShmemIndex" hash * table name will tell ShmemInitStruct to fake it. */ info.keysize = SHMEM_INDEX_KEYSIZE; info.entrysize = sizeof(ShmemIndexEnt); ShmemIndex = ShmemInitHash("ShmemIndex", SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, &info, HASH_ELEM | HASH_STRINGS); } /* * ShmemInitHash -- Create and initialize, or attach to, a * shared memory hash table. * * We assume caller is doing some kind of synchronization * so that two processes don't try to create/initialize the same * table at once. (In practice, all creations are done in the postmaster * process; child processes should always be attaching to existing tables.) * * max_size is the estimated maximum number of hashtable entries. This is * not a hard limit, but the access efficiency will degrade if it is * exceeded substantially (since it's used to compute directory size and * the hash table buckets will get overfull). * * init_size is the number of hashtable entries to preallocate. For a table * whose maximum size is certain, this should be equal to max_size; that * ensures that no run-time out-of-shared-memory failures can occur. * * *infoP and hash_flags must specify at least the entry sizes and key * comparison semantics (see hash_create()). Flag bits and values specific * to shared-memory hash tables are added here, except that callers may * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE. * * Note: before Postgres 9.0, this function returned NULL for some failure * cases. Now, it always throws error instead, so callers need not check * for NULL. */ HTAB * ShmemInitHash(const char *name, /* table string name for shmem index */ long init_size, /* initial table size */ long max_size, /* max size of the table */ HASHCTL *infoP, /* info about key and bucket size */ int hash_flags) /* info about infoP */ { bool found; void *location; /* * Hash tables allocated in shared memory have a fixed directory; it can't * grow or other backends wouldn't be able to find it. So, make sure we * make it big enough to start with. * * The shared memory allocator must be specified too. */ infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size); infoP->alloc = ShmemAllocNoError; hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE; /* look it up in the shmem index */ location = ShmemInitStruct(name, hash_get_shared_size(infoP, hash_flags), &found); /* * if it already exists, attach to it rather than allocate and initialize * new space */ if (found) hash_flags |= HASH_ATTACH; /* Pass location of hashtable header to hash_create */ infoP->hctl = (HASHHDR *) location; return hash_create(name, init_size, infoP, hash_flags); } /* * ShmemInitStruct -- Create/attach to a structure in shared memory. * * This is called during initialization to find or allocate * a data structure in shared memory. If no other process * has created the structure, this routine allocates space * for it. If it exists already, a pointer to the existing * structure is returned. * * Returns: pointer to the object. *foundPtr is set true if the object was * already in the shmem index (hence, already initialized). * * Note: before Postgres 9.0, this function returned NULL for some failure * cases. Now, it always throws error instead, so callers need not check * for NULL. */ void * ShmemInitStruct(const char *name, Size size, bool *foundPtr) { ShmemIndexEnt *result; void *structPtr; LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); if (!ShmemIndex) { PGShmemHeader *shmemseghdr = ShmemSegHdr; /* Must be trying to create/attach to ShmemIndex itself */ Assert(strcmp(name, "ShmemIndex") == 0); if (IsUnderPostmaster) { /* Must be initializing a (non-standalone) backend */ Assert(shmemseghdr->index != NULL); structPtr = shmemseghdr->index; *foundPtr = true; } else { /* * If the shmem index doesn't exist, we are bootstrapping: we must * be trying to init the shmem index itself. * * Notice that the ShmemIndexLock is released before the shmem * index has been initialized. This should be OK because no other * process can be accessing shared memory yet. */ Assert(shmemseghdr->index == NULL); structPtr = ShmemAlloc(size); shmemseghdr->index = structPtr; *foundPtr = false; } LWLockRelease(ShmemIndexLock); return structPtr; } /* look it up in the shmem index */ result = (ShmemIndexEnt *) hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr); if (!result) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("could not create ShmemIndex entry for data structure \"%s\"", name))); } if (*foundPtr) { /* * Structure is in the shmem index so someone else has allocated it * already. The size better be the same as the size we are trying to * initialize to, or there is a name conflict (or worse). */ if (result->size != size) { LWLockRelease(ShmemIndexLock); ereport(ERROR, (errmsg("ShmemIndex entry size is wrong for data structure" " \"%s\": expected %zu, actual %zu", name, size, result->size))); } structPtr = result->location; } else { Size allocated_size; /* It isn't in the table yet. allocate and initialize it */ structPtr = ShmemAllocRaw(size, &allocated_size); if (structPtr == NULL) { /* out of memory; remove the failed ShmemIndex entry */ hash_search(ShmemIndex, name, HASH_REMOVE, NULL); LWLockRelease(ShmemIndexLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("not enough shared memory for data structure" " \"%s\" (%zu bytes requested)", name, size))); } result->size = size; result->allocated_size = allocated_size; result->location = structPtr; } LWLockRelease(ShmemIndexLock); Assert(ShmemAddrIsValid(structPtr)); Assert(structPtr == (void *) CACHELINEALIGN(structPtr)); return structPtr; } /* * Add two Size values, checking for overflow */ Size add_size(Size s1, Size s2) { Size result; result = s1 + s2; /* We are assuming Size is an unsigned type here... */ if (result < s1 || result < s2) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested shared memory size overflows size_t"))); return result; } /* * Multiply two Size values, checking for overflow */ Size mul_size(Size s1, Size s2) { Size result; if (s1 == 0 || s2 == 0) return 0; result = s1 * s2; /* We are assuming Size is an unsigned type here... */ if (result / s2 != s1) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested shared memory size overflows size_t"))); return result; } /* SQL SRF showing allocated shared memory */ Datum pg_get_shmem_allocations(PG_FUNCTION_ARGS) { #define PG_GET_SHMEM_SIZES_COLS 4 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupdesc; Tuplestorestate *tupstore; MemoryContext per_query_ctx; MemoryContext oldcontext; HASH_SEQ_STATUS hstat; ShmemIndexEnt *ent; Size named_allocated = 0; Datum values[PG_GET_SHMEM_SIZES_COLS]; bool nulls[PG_GET_SHMEM_SIZES_COLS]; /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not allowed in this context"))); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = tupstore; rsinfo->setDesc = tupdesc; MemoryContextSwitchTo(oldcontext); LWLockAcquire(ShmemIndexLock, LW_SHARED); hash_seq_init(&hstat, ShmemIndex); /* output all allocated entries */ memset(nulls, 0, sizeof(nulls)); while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL) { values[0] = CStringGetTextDatum(ent->key); values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr); values[2] = Int64GetDatum(ent->size); values[3] = Int64GetDatum(ent->allocated_size); named_allocated += ent->allocated_size; tuplestore_putvalues(tupstore, tupdesc, values, nulls); } /* output shared memory allocated but not counted via the shmem index */ values[0] = CStringGetTextDatum(""); nulls[1] = true; values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated); values[3] = values[2]; tuplestore_putvalues(tupstore, tupdesc, values, nulls); /* output as-of-yet unused shared memory */ nulls[0] = true; values[1] = Int64GetDatum(ShmemSegHdr->freeoffset); nulls[1] = false; values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset); values[3] = values[2]; tuplestore_putvalues(tupstore, tupdesc, values, nulls); LWLockRelease(ShmemIndexLock); tuplestore_donestoring(tupstore); return (Datum) 0; }