diff options
Diffstat (limited to 'src/backend/storage/ipc/shm_toc.c')
-rw-r--r-- | src/backend/storage/ipc/shm_toc.c | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/src/backend/storage/ipc/shm_toc.c b/src/backend/storage/ipc/shm_toc.c new file mode 100644 index 0000000..0cd8244 --- /dev/null +++ b/src/backend/storage/ipc/shm_toc.c @@ -0,0 +1,272 @@ +/*------------------------------------------------------------------------- + * + * shm_toc.c + * shared memory segment table of contents + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/storage/ipc/shm_toc.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "port/atomics.h" +#include "storage/shm_toc.h" +#include "storage/spin.h" + +typedef struct shm_toc_entry +{ + uint64 key; /* Arbitrary identifier */ + Size offset; /* Offset, in bytes, from TOC start */ +} shm_toc_entry; + +struct shm_toc +{ + uint64 toc_magic; /* Magic number identifying this TOC */ + slock_t toc_mutex; /* Spinlock for mutual exclusion */ + Size toc_total_bytes; /* Bytes managed by this TOC */ + Size toc_allocated_bytes; /* Bytes allocated of those managed */ + uint32 toc_nentry; /* Number of entries in TOC */ + shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Initialize a region of shared memory with a table of contents. + */ +shm_toc * +shm_toc_create(uint64 magic, void *address, Size nbytes) +{ + shm_toc *toc = (shm_toc *) address; + + Assert(nbytes > offsetof(shm_toc, toc_entry)); + toc->toc_magic = magic; + SpinLockInit(&toc->toc_mutex); + + /* + * The alignment code in shm_toc_allocate() assumes that the starting + * value is buffer-aligned. + */ + toc->toc_total_bytes = BUFFERALIGN_DOWN(nbytes); + toc->toc_allocated_bytes = 0; + toc->toc_nentry = 0; + + return toc; +} + +/* + * Attach to an existing table of contents. If the magic number found at + * the target address doesn't match our expectations, return NULL. + */ +shm_toc * +shm_toc_attach(uint64 magic, void *address) +{ + shm_toc *toc = (shm_toc *) address; + + if (toc->toc_magic != magic) + return NULL; + + Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes); + Assert(toc->toc_total_bytes > offsetof(shm_toc, toc_entry)); + + return toc; +} + +/* + * Allocate shared memory from a segment managed by a table of contents. + * + * This is not a full-blown allocator; there's no way to free memory. It's + * just a way of dividing a single physical shared memory segment into logical + * chunks that may be used for different purposes. + * + * We allocate backwards from the end of the segment, so that the TOC entries + * can grow forward from the start of the segment. + */ +void * +shm_toc_allocate(shm_toc *toc, Size nbytes) +{ + volatile shm_toc *vtoc = toc; + Size total_bytes; + Size allocated_bytes; + Size nentry; + Size toc_bytes; + + /* + * Make sure request is well-aligned. XXX: MAXALIGN is not enough, + * because atomic ops might need a wider alignment. We don't have a + * proper definition for the minimum to make atomic ops safe, but + * BUFFERALIGN ought to be enough. + */ + nbytes = BUFFERALIGN(nbytes); + + SpinLockAcquire(&toc->toc_mutex); + + total_bytes = vtoc->toc_total_bytes; + allocated_bytes = vtoc->toc_allocated_bytes; + nentry = vtoc->toc_nentry; + toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) + + allocated_bytes; + + /* Check for memory exhaustion and overflow. */ + if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes) + { + SpinLockRelease(&toc->toc_mutex); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"))); + } + vtoc->toc_allocated_bytes += nbytes; + + SpinLockRelease(&toc->toc_mutex); + + return ((char *) toc) + (total_bytes - allocated_bytes - nbytes); +} + +/* + * Return the number of bytes that can still be allocated. + */ +Size +shm_toc_freespace(shm_toc *toc) +{ + volatile shm_toc *vtoc = toc; + Size total_bytes; + Size allocated_bytes; + Size nentry; + Size toc_bytes; + + SpinLockAcquire(&toc->toc_mutex); + total_bytes = vtoc->toc_total_bytes; + allocated_bytes = vtoc->toc_allocated_bytes; + nentry = vtoc->toc_nentry; + SpinLockRelease(&toc->toc_mutex); + + toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry); + Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes); + return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes)); +} + +/* + * Insert a TOC entry. + * + * The idea here is that the process setting up the shared memory segment will + * register the addresses of data structures within the segment using this + * function. Each data structure will be identified using a 64-bit key, which + * is assumed to be a well-known or discoverable integer. Other processes + * accessing the shared memory segment can pass the same key to + * shm_toc_lookup() to discover the addresses of those data structures. + * + * Since the shared memory segment may be mapped at different addresses within + * different backends, we store relative rather than absolute pointers. + * + * This won't scale well to a large number of keys. Hopefully, that isn't + * necessary; if it proves to be, we might need to provide a more sophisticated + * data structure here. But the real idea here is just to give someone mapping + * a dynamic shared memory the ability to find the bare minimum number of + * pointers that they need to bootstrap. If you're storing a lot of stuff in + * the TOC, you're doing it wrong. + */ +void +shm_toc_insert(shm_toc *toc, uint64 key, void *address) +{ + volatile shm_toc *vtoc = toc; + Size total_bytes; + Size allocated_bytes; + Size nentry; + Size toc_bytes; + Size offset; + + /* Relativize pointer. */ + Assert(address > (void *) toc); + offset = ((char *) address) - (char *) toc; + + SpinLockAcquire(&toc->toc_mutex); + + total_bytes = vtoc->toc_total_bytes; + allocated_bytes = vtoc->toc_allocated_bytes; + nentry = vtoc->toc_nentry; + toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) + + allocated_bytes; + + /* Check for memory exhaustion and overflow. */ + if (toc_bytes + sizeof(shm_toc_entry) > total_bytes || + toc_bytes + sizeof(shm_toc_entry) < toc_bytes || + nentry >= PG_UINT32_MAX) + { + SpinLockRelease(&toc->toc_mutex); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"))); + } + + Assert(offset < total_bytes); + vtoc->toc_entry[nentry].key = key; + vtoc->toc_entry[nentry].offset = offset; + + /* + * By placing a write barrier after filling in the entry and before + * updating the number of entries, we make it safe to read the TOC + * unlocked. + */ + pg_write_barrier(); + + vtoc->toc_nentry++; + + SpinLockRelease(&toc->toc_mutex); +} + +/* + * Look up a TOC entry. + * + * If the key is not found, returns NULL if noError is true, otherwise + * throws elog(ERROR). + * + * Unlike the other functions in this file, this operation acquires no lock; + * it uses only barriers. It probably wouldn't hurt concurrency very much even + * if it did get a lock, but since it's reasonably likely that a group of + * worker processes could each read a series of entries from the same TOC + * right around the same time, there seems to be some value in avoiding it. + */ +void * +shm_toc_lookup(shm_toc *toc, uint64 key, bool noError) +{ + uint32 nentry; + uint32 i; + + /* + * Read the number of entries before we examine any entry. We assume that + * reading a uint32 is atomic. + */ + nentry = toc->toc_nentry; + pg_read_barrier(); + + /* Now search for a matching entry. */ + for (i = 0; i < nentry; ++i) + { + if (toc->toc_entry[i].key == key) + return ((char *) toc) + toc->toc_entry[i].offset; + } + + /* No matching entry was found. */ + if (!noError) + elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p", + key, toc); + return NULL; +} + +/* + * Estimate how much shared memory will be required to store a TOC and its + * dependent data structures. + */ +Size +shm_toc_estimate(shm_toc_estimator *e) +{ + Size sz; + + sz = offsetof(shm_toc, toc_entry); + sz = add_size(sz, mul_size(e->number_of_keys, sizeof(shm_toc_entry))); + sz = add_size(sz, e->space_for_chunks); + + return BUFFERALIGN(sz); +} |