/*------------------------------------------------------------------------- * * shm_toc.c * shared memory segment table of contents * * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/storage/ipc/shm_toc.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "port/atomics.h" #include "storage/shm_toc.h" #include "storage/spin.h" typedef struct shm_toc_entry { uint64 key; /* Arbitrary identifier */ Size offset; /* Offset, in bytes, from TOC start */ } shm_toc_entry; struct shm_toc { uint64 toc_magic; /* Magic number identifying this TOC */ slock_t toc_mutex; /* Spinlock for mutual exclusion */ Size toc_total_bytes; /* Bytes managed by this TOC */ Size toc_allocated_bytes; /* Bytes allocated of those managed */ uint32 toc_nentry; /* Number of entries in TOC */ shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER]; }; /* * Initialize a region of shared memory with a table of contents. */ shm_toc * shm_toc_create(uint64 magic, void *address, Size nbytes) { shm_toc *toc = (shm_toc *) address; Assert(nbytes > offsetof(shm_toc, toc_entry)); toc->toc_magic = magic; SpinLockInit(&toc->toc_mutex); /* * The alignment code in shm_toc_allocate() assumes that the starting * value is buffer-aligned. */ toc->toc_total_bytes = BUFFERALIGN_DOWN(nbytes); toc->toc_allocated_bytes = 0; toc->toc_nentry = 0; return toc; } /* * Attach to an existing table of contents. If the magic number found at * the target address doesn't match our expectations, return NULL. */ shm_toc * shm_toc_attach(uint64 magic, void *address) { shm_toc *toc = (shm_toc *) address; if (toc->toc_magic != magic) return NULL; Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes); Assert(toc->toc_total_bytes > offsetof(shm_toc, toc_entry)); return toc; } /* * Allocate shared memory from a segment managed by a table of contents. * * This is not a full-blown allocator; there's no way to free memory. It's * just a way of dividing a single physical shared memory segment into logical * chunks that may be used for different purposes. * * We allocate backwards from the end of the segment, so that the TOC entries * can grow forward from the start of the segment. */ void * shm_toc_allocate(shm_toc *toc, Size nbytes) { volatile shm_toc *vtoc = toc; Size total_bytes; Size allocated_bytes; Size nentry; Size toc_bytes; /* * Make sure request is well-aligned. XXX: MAXALIGN is not enough, * because atomic ops might need a wider alignment. We don't have a * proper definition for the minimum to make atomic ops safe, but * BUFFERALIGN ought to be enough. */ nbytes = BUFFERALIGN(nbytes); SpinLockAcquire(&toc->toc_mutex); total_bytes = vtoc->toc_total_bytes; allocated_bytes = vtoc->toc_allocated_bytes; nentry = vtoc->toc_nentry; toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) + allocated_bytes; /* Check for memory exhaustion and overflow. */ if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes) { SpinLockRelease(&toc->toc_mutex); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); } vtoc->toc_allocated_bytes += nbytes; SpinLockRelease(&toc->toc_mutex); return ((char *) toc) + (total_bytes - allocated_bytes - nbytes); } /* * Return the number of bytes that can still be allocated. */ Size shm_toc_freespace(shm_toc *toc) { volatile shm_toc *vtoc = toc; Size total_bytes; Size allocated_bytes; Size nentry; Size toc_bytes; SpinLockAcquire(&toc->toc_mutex); total_bytes = vtoc->toc_total_bytes; allocated_bytes = vtoc->toc_allocated_bytes; nentry = vtoc->toc_nentry; SpinLockRelease(&toc->toc_mutex); toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry); Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes); return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes)); } /* * Insert a TOC entry. * * The idea here is that the process setting up the shared memory segment will * register the addresses of data structures within the segment using this * function. Each data structure will be identified using a 64-bit key, which * is assumed to be a well-known or discoverable integer. Other processes * accessing the shared memory segment can pass the same key to * shm_toc_lookup() to discover the addresses of those data structures. * * Since the shared memory segment may be mapped at different addresses within * different backends, we store relative rather than absolute pointers. * * This won't scale well to a large number of keys. Hopefully, that isn't * necessary; if it proves to be, we might need to provide a more sophisticated * data structure here. But the real idea here is just to give someone mapping * a dynamic shared memory the ability to find the bare minimum number of * pointers that they need to bootstrap. If you're storing a lot of stuff in * the TOC, you're doing it wrong. */ void shm_toc_insert(shm_toc *toc, uint64 key, void *address) { volatile shm_toc *vtoc = toc; Size total_bytes; Size allocated_bytes; Size nentry; Size toc_bytes; Size offset; /* Relativize pointer. */ Assert(address > (void *) toc); offset = ((char *) address) - (char *) toc; SpinLockAcquire(&toc->toc_mutex); total_bytes = vtoc->toc_total_bytes; allocated_bytes = vtoc->toc_allocated_bytes; nentry = vtoc->toc_nentry; toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) + allocated_bytes; /* Check for memory exhaustion and overflow. */ if (toc_bytes + sizeof(shm_toc_entry) > total_bytes || toc_bytes + sizeof(shm_toc_entry) < toc_bytes || nentry >= PG_UINT32_MAX) { SpinLockRelease(&toc->toc_mutex); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); } Assert(offset < total_bytes); vtoc->toc_entry[nentry].key = key; vtoc->toc_entry[nentry].offset = offset; /* * By placing a write barrier after filling in the entry and before * updating the number of entries, we make it safe to read the TOC * unlocked. */ pg_write_barrier(); vtoc->toc_nentry++; SpinLockRelease(&toc->toc_mutex); } /* * Look up a TOC entry. * * If the key is not found, returns NULL if noError is true, otherwise * throws elog(ERROR). * * Unlike the other functions in this file, this operation acquires no lock; * it uses only barriers. It probably wouldn't hurt concurrency very much even * if it did get a lock, but since it's reasonably likely that a group of * worker processes could each read a series of entries from the same TOC * right around the same time, there seems to be some value in avoiding it. */ void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError) { uint32 nentry; uint32 i; /* * Read the number of entries before we examine any entry. We assume that * reading a uint32 is atomic. */ nentry = toc->toc_nentry; pg_read_barrier(); /* Now search for a matching entry. */ for (i = 0; i < nentry; ++i) { if (toc->toc_entry[i].key == key) return ((char *) toc) + toc->toc_entry[i].offset; } /* No matching entry was found. */ if (!noError) elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p", key, toc); return NULL; } /* * Estimate how much shared memory will be required to store a TOC and its * dependent data structures. */ Size shm_toc_estimate(shm_toc_estimator *e) { Size sz; sz = offsetof(shm_toc, toc_entry); sz = add_size(sz, mul_size(e->number_of_keys, sizeof(shm_toc_entry))); sz = add_size(sz, e->space_for_chunks); return BUFFERALIGN(sz); }