diff options
Diffstat (limited to 'src/backend/storage/ipc/dsm.c')
-rw-r--r-- | src/backend/storage/ipc/dsm.c | 1248 |
1 files changed, 1248 insertions, 0 deletions
diff --git a/src/backend/storage/ipc/dsm.c b/src/backend/storage/ipc/dsm.c new file mode 100644 index 0000000..b461a5f --- /dev/null +++ b/src/backend/storage/ipc/dsm.c @@ -0,0 +1,1248 @@ +/*------------------------------------------------------------------------- + * + * dsm.c + * manage dynamic shared memory segments + * + * This file provides a set of services to make programming with dynamic + * shared memory segments more convenient. Unlike the low-level + * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments + * created using this module will be cleaned up automatically. Mappings + * will be removed when the resource owner under which they were created + * is cleaned up, unless dsm_pin_mapping() is used, in which case they + * have session lifespan. Segments will be removed when there are no + * remaining mappings, or at postmaster shutdown in any case. After a + * hard postmaster crash, remaining segments will be removed, if they + * still exist, at the next postmaster startup. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/storage/ipc/dsm.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <fcntl.h> +#include <unistd.h> +#ifndef WIN32 +#include <sys/mman.h> +#endif +#include <sys/stat.h> + +#include "lib/ilist.h" +#include "miscadmin.h" +#include "port/pg_bitutils.h" +#include "storage/dsm.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/pg_shmem.h" +#include "utils/freepage.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "utils/resowner_private.h" + +#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32 + +#define PG_DYNSHMEM_FIXED_SLOTS 64 +#define PG_DYNSHMEM_SLOTS_PER_BACKEND 5 + +#define INVALID_CONTROL_SLOT ((uint32) -1) + +/* Backend-local tracking for on-detach callbacks. */ +typedef struct dsm_segment_detach_callback +{ + on_dsm_detach_callback function; + Datum arg; + slist_node node; +} dsm_segment_detach_callback; + +/* Backend-local state for a dynamic shared memory segment. */ +struct dsm_segment +{ + dlist_node node; /* List link in dsm_segment_list. */ + ResourceOwner resowner; /* Resource owner. */ + dsm_handle handle; /* Segment name. */ + uint32 control_slot; /* Slot in control segment. */ + void *impl_private; /* Implementation-specific private data. */ + void *mapped_address; /* Mapping address, or NULL if unmapped. */ + Size mapped_size; /* Size of our mapping. */ + slist_head on_detach; /* On-detach callbacks. */ +}; + +/* Shared-memory state for a dynamic shared memory segment. */ +typedef struct dsm_control_item +{ + dsm_handle handle; + uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */ + size_t first_page; + size_t npages; + void *impl_private_pm_handle; /* only needed on Windows */ + bool pinned; +} dsm_control_item; + +/* Layout of the dynamic shared memory control segment. */ +typedef struct dsm_control_header +{ + uint32 magic; + uint32 nitems; + uint32 maxitems; + dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]; +} dsm_control_header; + +static void dsm_cleanup_for_mmap(void); +static void dsm_postmaster_shutdown(int code, Datum arg); +static dsm_segment *dsm_create_descriptor(void); +static bool dsm_control_segment_sane(dsm_control_header *control, + Size mapped_size); +static uint64 dsm_control_bytes_needed(uint32 nitems); +static inline dsm_handle make_main_region_dsm_handle(int slot); +static inline bool is_main_region_dsm_handle(dsm_handle handle); + +/* Has this backend initialized the dynamic shared memory system yet? */ +static bool dsm_init_done = false; + +/* Preallocated DSM space in the main shared memory region. */ +static void *dsm_main_space_begin = NULL; + +/* + * List of dynamic shared memory segments used by this backend. + * + * At process exit time, we must decrement the reference count of each + * segment we have attached; this list makes it possible to find all such + * segments. + * + * This list should always be empty in the postmaster. We could probably + * allow the postmaster to map dynamic shared memory segments before it + * begins to start child processes, provided that each process adjusted + * the reference counts for those segments in the control segment at + * startup time, but there's no obvious need for such a facility, which + * would also be complex to handle in the EXEC_BACKEND case. Once the + * postmaster has begun spawning children, there's an additional problem: + * each new mapping would require an update to the control segment, + * which requires locking, in which the postmaster must not be involved. + */ +static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list); + +/* + * Control segment information. + * + * Unlike ordinary shared memory segments, the control segment is not + * reference counted; instead, it lasts for the postmaster's entire + * life cycle. For simplicity, it doesn't have a dsm_segment object either. + */ +static dsm_handle dsm_control_handle; +static dsm_control_header *dsm_control; +static Size dsm_control_mapped_size = 0; +static void *dsm_control_impl_private = NULL; + +/* + * Start up the dynamic shared memory system. + * + * This is called just once during each cluster lifetime, at postmaster + * startup time. + */ +void +dsm_postmaster_startup(PGShmemHeader *shim) +{ + void *dsm_control_address = NULL; + uint32 maxitems; + Size segsize; + + Assert(!IsUnderPostmaster); + + /* + * If we're using the mmap implementations, clean up any leftovers. + * Cleanup isn't needed on Windows, and happens earlier in startup for + * POSIX and System V shared memory, via a direct call to + * dsm_cleanup_using_control_segment. + */ + if (dynamic_shared_memory_type == DSM_IMPL_MMAP) + dsm_cleanup_for_mmap(); + + /* Determine size for new control segment. */ + maxitems = PG_DYNSHMEM_FIXED_SLOTS + + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends; + elog(DEBUG2, "dynamic shared memory system will support %u segments", + maxitems); + segsize = dsm_control_bytes_needed(maxitems); + + /* + * Loop until we find an unused identifier for the new control segment. We + * sometimes use 0 as a sentinel value indicating that no control segment + * is known to exist, so avoid using that value for a real control + * segment. + */ + for (;;) + { + Assert(dsm_control_address == NULL); + Assert(dsm_control_mapped_size == 0); + dsm_control_handle = random() << 1; /* Even numbers only */ + if (dsm_control_handle == DSM_HANDLE_INVALID) + continue; + if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize, + &dsm_control_impl_private, &dsm_control_address, + &dsm_control_mapped_size, ERROR)) + break; + } + dsm_control = dsm_control_address; + on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim)); + elog(DEBUG2, + "created dynamic shared memory control segment %u (%zu bytes)", + dsm_control_handle, segsize); + shim->dsm_control = dsm_control_handle; + + /* Initialize control segment. */ + dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC; + dsm_control->nitems = 0; + dsm_control->maxitems = maxitems; +} + +/* + * Determine whether the control segment from the previous postmaster + * invocation still exists. If so, remove the dynamic shared memory + * segments to which it refers, and then the control segment itself. + */ +void +dsm_cleanup_using_control_segment(dsm_handle old_control_handle) +{ + void *mapped_address = NULL; + void *junk_mapped_address = NULL; + void *impl_private = NULL; + void *junk_impl_private = NULL; + Size mapped_size = 0; + Size junk_mapped_size = 0; + uint32 nitems; + uint32 i; + dsm_control_header *old_control; + + /* + * Try to attach the segment. If this fails, it probably just means that + * the operating system has been rebooted and the segment no longer + * exists, or an unrelated process has used the same shm ID. So just fall + * out quietly. + */ + if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private, + &mapped_address, &mapped_size, DEBUG1)) + return; + + /* + * We've managed to reattach it, but the contents might not be sane. If + * they aren't, we disregard the segment after all. + */ + old_control = (dsm_control_header *) mapped_address; + if (!dsm_control_segment_sane(old_control, mapped_size)) + { + dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private, + &mapped_address, &mapped_size, LOG); + return; + } + + /* + * OK, the control segment looks basically valid, so we can use it to get + * a list of segments that need to be removed. + */ + nitems = old_control->nitems; + for (i = 0; i < nitems; ++i) + { + dsm_handle handle; + uint32 refcnt; + + /* If the reference count is 0, the slot is actually unused. */ + refcnt = old_control->item[i].refcnt; + if (refcnt == 0) + continue; + + /* If it was using the main shmem area, there is nothing to do. */ + handle = old_control->item[i].handle; + if (is_main_region_dsm_handle(handle)) + continue; + + /* Log debugging information. */ + elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)", + handle, refcnt); + + /* Destroy the referenced segment. */ + dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, + &junk_mapped_address, &junk_mapped_size, LOG); + } + + /* Destroy the old control segment, too. */ + elog(DEBUG2, + "cleaning up dynamic shared memory control segment with ID %u", + old_control_handle); + dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private, + &mapped_address, &mapped_size, LOG); +} + +/* + * When we're using the mmap shared memory implementation, "shared memory" + * segments might even manage to survive an operating system reboot. + * But there's no guarantee as to exactly what will survive: some segments + * may survive, and others may not, and the contents of some may be out + * of date. In particular, the control segment may be out of date, so we + * can't rely on it to figure out what to remove. However, since we know + * what directory contains the files we used as shared memory, we can simply + * scan the directory and blow everything away that shouldn't be there. + */ +static void +dsm_cleanup_for_mmap(void) +{ + DIR *dir; + struct dirent *dent; + + /* Scan the directory for something with a name of the correct format. */ + dir = AllocateDir(PG_DYNSHMEM_DIR); + + while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL) + { + if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX, + strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0) + { + char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)]; + + snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name); + + elog(DEBUG2, "removing file \"%s\"", buf); + + /* We found a matching file; so remove it. */ + if (unlink(buf) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", buf))); + } + } + + /* Cleanup complete. */ + FreeDir(dir); +} + +/* + * At shutdown time, we iterate over the control segment and remove all + * remaining dynamic shared memory segments. We avoid throwing errors here; + * the postmaster is shutting down either way, and this is just non-critical + * resource cleanup. + */ +static void +dsm_postmaster_shutdown(int code, Datum arg) +{ + uint32 nitems; + uint32 i; + void *dsm_control_address; + void *junk_mapped_address = NULL; + void *junk_impl_private = NULL; + Size junk_mapped_size = 0; + PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg); + + /* + * If some other backend exited uncleanly, it might have corrupted the + * control segment while it was dying. In that case, we warn and ignore + * the contents of the control segment. This may end up leaving behind + * stray shared memory segments, but there's not much we can do about that + * if the metadata is gone. + */ + nitems = dsm_control->nitems; + if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size)) + { + ereport(LOG, + (errmsg("dynamic shared memory control segment is corrupt"))); + return; + } + + /* Remove any remaining segments. */ + for (i = 0; i < nitems; ++i) + { + dsm_handle handle; + + /* If the reference count is 0, the slot is actually unused. */ + if (dsm_control->item[i].refcnt == 0) + continue; + + handle = dsm_control->item[i].handle; + if (is_main_region_dsm_handle(handle)) + continue; + + /* Log debugging information. */ + elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u", + handle); + + /* Destroy the segment. */ + dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, + &junk_mapped_address, &junk_mapped_size, LOG); + } + + /* Remove the control segment itself. */ + elog(DEBUG2, + "cleaning up dynamic shared memory control segment with ID %u", + dsm_control_handle); + dsm_control_address = dsm_control; + dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0, + &dsm_control_impl_private, &dsm_control_address, + &dsm_control_mapped_size, LOG); + dsm_control = dsm_control_address; + shim->dsm_control = 0; +} + +/* + * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND, + * we must reread the state file and map the control segment; in other cases, + * we'll have inherited the postmaster's mapping and global variables. + */ +static void +dsm_backend_startup(void) +{ +#ifdef EXEC_BACKEND + { + void *control_address = NULL; + + /* Attach control segment. */ + Assert(dsm_control_handle != 0); + dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0, + &dsm_control_impl_private, &control_address, + &dsm_control_mapped_size, ERROR); + dsm_control = control_address; + /* If control segment doesn't look sane, something is badly wrong. */ + if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size)) + { + dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0, + &dsm_control_impl_private, &control_address, + &dsm_control_mapped_size, WARNING); + ereport(FATAL, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("dynamic shared memory control segment is not valid"))); + } + } +#endif + + dsm_init_done = true; +} + +#ifdef EXEC_BACKEND +/* + * When running under EXEC_BACKEND, we get a callback here when the main + * shared memory segment is re-attached, so that we can record the control + * handle retrieved from it. + */ +void +dsm_set_control_handle(dsm_handle h) +{ + Assert(dsm_control_handle == 0 && h != 0); + dsm_control_handle = h; +} +#endif + +/* + * Reserve some space in the main shared memory segment for DSM segments. + */ +size_t +dsm_estimate_size(void) +{ + return 1024 * 1024 * (size_t) min_dynamic_shared_memory; +} + +/* + * Initialize space in the main shared memory segment for DSM segments. + */ +void +dsm_shmem_init(void) +{ + size_t size = dsm_estimate_size(); + bool found; + + if (size == 0) + return; + + dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found); + if (!found) + { + FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin; + size_t first_page = 0; + size_t pages; + + /* Reserve space for the FreePageManager. */ + while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager)) + ++first_page; + + /* Initialize it and give it all the rest of the space. */ + FreePageManagerInitialize(fpm, dsm_main_space_begin); + pages = (size / FPM_PAGE_SIZE) - first_page; + FreePageManagerPut(fpm, first_page, pages); + } +} + +/* + * Create a new dynamic shared memory segment. + * + * If there is a non-NULL CurrentResourceOwner, the new segment is associated + * with it and must be detached before the resource owner releases, or a + * warning will be logged. If CurrentResourceOwner is NULL, the segment + * remains attached until explicitly detached or the session ends. + * Creating with a NULL CurrentResourceOwner is equivalent to creating + * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping. + */ +dsm_segment * +dsm_create(Size size, int flags) +{ + dsm_segment *seg; + uint32 i; + uint32 nitems; + size_t npages = 0; + size_t first_page = 0; + FreePageManager *dsm_main_space_fpm = dsm_main_space_begin; + bool using_main_dsm_region = false; + + /* Unsafe in postmaster (and pointless in a stand-alone backend). */ + Assert(IsUnderPostmaster); + + if (!dsm_init_done) + dsm_backend_startup(); + + /* Create a new segment descriptor. */ + seg = dsm_create_descriptor(); + + /* + * Lock the control segment while we try to allocate from the main shared + * memory area, if configured. + */ + if (dsm_main_space_fpm) + { + npages = size / FPM_PAGE_SIZE; + if (size % FPM_PAGE_SIZE > 0) + ++npages; + + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page)) + { + /* We can carve out a piece of the main shared memory segment. */ + seg->mapped_address = (char *) dsm_main_space_begin + + first_page * FPM_PAGE_SIZE; + seg->mapped_size = npages * FPM_PAGE_SIZE; + using_main_dsm_region = true; + /* We'll choose a handle below. */ + } + } + + if (!using_main_dsm_region) + { + /* + * We need to create a new memory segment. Loop until we find an + * unused segment identifier. + */ + if (dsm_main_space_fpm) + LWLockRelease(DynamicSharedMemoryControlLock); + for (;;) + { + Assert(seg->mapped_address == NULL && seg->mapped_size == 0); + seg->handle = random() << 1; /* Even numbers only */ + if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */ + continue; + if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private, + &seg->mapped_address, &seg->mapped_size, ERROR)) + break; + } + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + } + + /* Search the control segment for an unused slot. */ + nitems = dsm_control->nitems; + for (i = 0; i < nitems; ++i) + { + if (dsm_control->item[i].refcnt == 0) + { + if (using_main_dsm_region) + { + seg->handle = make_main_region_dsm_handle(i); + dsm_control->item[i].first_page = first_page; + dsm_control->item[i].npages = npages; + } + else + Assert(!is_main_region_dsm_handle(seg->handle)); + dsm_control->item[i].handle = seg->handle; + /* refcnt of 1 triggers destruction, so start at 2 */ + dsm_control->item[i].refcnt = 2; + dsm_control->item[i].impl_private_pm_handle = NULL; + dsm_control->item[i].pinned = false; + seg->control_slot = i; + LWLockRelease(DynamicSharedMemoryControlLock); + return seg; + } + } + + /* Verify that we can support an additional mapping. */ + if (nitems >= dsm_control->maxitems) + { + if (using_main_dsm_region) + FreePageManagerPut(dsm_main_space_fpm, first_page, npages); + LWLockRelease(DynamicSharedMemoryControlLock); + if (!using_main_dsm_region) + dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private, + &seg->mapped_address, &seg->mapped_size, WARNING); + if (seg->resowner != NULL) + ResourceOwnerForgetDSM(seg->resowner, seg); + dlist_delete(&seg->node); + pfree(seg); + + if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0) + return NULL; + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("too many dynamic shared memory segments"))); + } + + /* Enter the handle into a new array slot. */ + if (using_main_dsm_region) + { + seg->handle = make_main_region_dsm_handle(nitems); + dsm_control->item[i].first_page = first_page; + dsm_control->item[i].npages = npages; + } + dsm_control->item[nitems].handle = seg->handle; + /* refcnt of 1 triggers destruction, so start at 2 */ + dsm_control->item[nitems].refcnt = 2; + dsm_control->item[nitems].impl_private_pm_handle = NULL; + dsm_control->item[nitems].pinned = false; + seg->control_slot = nitems; + dsm_control->nitems++; + LWLockRelease(DynamicSharedMemoryControlLock); + + return seg; +} + +/* + * Attach a dynamic shared memory segment. + * + * See comments for dsm_segment_handle() for an explanation of how this + * is intended to be used. + * + * This function will return NULL if the segment isn't known to the system. + * This can happen if we're asked to attach the segment, but then everyone + * else detaches it (causing it to be destroyed) before we get around to + * attaching it. + * + * If there is a non-NULL CurrentResourceOwner, the attached segment is + * associated with it and must be detached before the resource owner releases, + * or a warning will be logged. Otherwise the segment remains attached until + * explicitly detached or the session ends. See the note atop dsm_create(). + */ +dsm_segment * +dsm_attach(dsm_handle h) +{ + dsm_segment *seg; + dlist_iter iter; + uint32 i; + uint32 nitems; + + /* Unsafe in postmaster (and pointless in a stand-alone backend). */ + Assert(IsUnderPostmaster); + + if (!dsm_init_done) + dsm_backend_startup(); + + /* + * Since this is just a debugging cross-check, we could leave it out + * altogether, or include it only in assert-enabled builds. But since the + * list of attached segments should normally be very short, let's include + * it always for right now. + * + * If you're hitting this error, you probably want to attempt to find an + * existing mapping via dsm_find_mapping() before calling dsm_attach() to + * create a new one. + */ + dlist_foreach(iter, &dsm_segment_list) + { + seg = dlist_container(dsm_segment, node, iter.cur); + if (seg->handle == h) + elog(ERROR, "can't attach the same segment more than once"); + } + + /* Create a new segment descriptor. */ + seg = dsm_create_descriptor(); + seg->handle = h; + + /* Bump reference count for this segment in shared memory. */ + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + nitems = dsm_control->nitems; + for (i = 0; i < nitems; ++i) + { + /* + * If the reference count is 0, the slot is actually unused. If the + * reference count is 1, the slot is still in use, but the segment is + * in the process of going away; even if the handle matches, another + * slot may already have started using the same handle value by + * coincidence so we have to keep searching. + */ + if (dsm_control->item[i].refcnt <= 1) + continue; + + /* If the handle doesn't match, it's not the slot we want. */ + if (dsm_control->item[i].handle != seg->handle) + continue; + + /* Otherwise we've found a match. */ + dsm_control->item[i].refcnt++; + seg->control_slot = i; + if (is_main_region_dsm_handle(seg->handle)) + { + seg->mapped_address = (char *) dsm_main_space_begin + + dsm_control->item[i].first_page * FPM_PAGE_SIZE; + seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE; + } + break; + } + LWLockRelease(DynamicSharedMemoryControlLock); + + /* + * If we didn't find the handle we're looking for in the control segment, + * it probably means that everyone else who had it mapped, including the + * original creator, died before we got to this point. It's up to the + * caller to decide what to do about that. + */ + if (seg->control_slot == INVALID_CONTROL_SLOT) + { + dsm_detach(seg); + return NULL; + } + + /* Here's where we actually try to map the segment. */ + if (!is_main_region_dsm_handle(seg->handle)) + dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private, + &seg->mapped_address, &seg->mapped_size, ERROR); + + return seg; +} + +/* + * At backend shutdown time, detach any segments that are still attached. + * (This is similar to dsm_detach_all, except that there's no reason to + * unmap the control segment before exiting, so we don't bother.) + */ +void +dsm_backend_shutdown(void) +{ + while (!dlist_is_empty(&dsm_segment_list)) + { + dsm_segment *seg; + + seg = dlist_head_element(dsm_segment, node, &dsm_segment_list); + dsm_detach(seg); + } +} + +/* + * Detach all shared memory segments, including the control segments. This + * should be called, along with PGSharedMemoryDetach, in processes that + * might inherit mappings but are not intended to be connected to dynamic + * shared memory. + */ +void +dsm_detach_all(void) +{ + void *control_address = dsm_control; + + while (!dlist_is_empty(&dsm_segment_list)) + { + dsm_segment *seg; + + seg = dlist_head_element(dsm_segment, node, &dsm_segment_list); + dsm_detach(seg); + } + + if (control_address != NULL) + dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0, + &dsm_control_impl_private, &control_address, + &dsm_control_mapped_size, ERROR); +} + +/* + * Detach from a shared memory segment, destroying the segment if we + * remove the last reference. + * + * This function should never fail. It will often be invoked when aborting + * a transaction, and a further error won't serve any purpose. It's not a + * complete disaster if we fail to unmap or destroy the segment; it means a + * resource leak, but that doesn't necessarily preclude further operations. + */ +void +dsm_detach(dsm_segment *seg) +{ + /* + * Invoke registered callbacks. Just in case one of those callbacks + * throws a further error that brings us back here, pop the callback + * before invoking it, to avoid infinite error recursion. Don't allow + * interrupts while running the individual callbacks in non-error code + * paths, to avoid leaving cleanup work unfinished if we're interrupted by + * a statement timeout or similar. + */ + HOLD_INTERRUPTS(); + while (!slist_is_empty(&seg->on_detach)) + { + slist_node *node; + dsm_segment_detach_callback *cb; + on_dsm_detach_callback function; + Datum arg; + + node = slist_pop_head_node(&seg->on_detach); + cb = slist_container(dsm_segment_detach_callback, node, node); + function = cb->function; + arg = cb->arg; + pfree(cb); + + function(seg, arg); + } + RESUME_INTERRUPTS(); + + /* + * Try to remove the mapping, if one exists. Normally, there will be, but + * maybe not, if we failed partway through a create or attach operation. + * We remove the mapping before decrementing the reference count so that + * the process that sees a zero reference count can be certain that no + * remaining mappings exist. Even if this fails, we pretend that it + * works, because retrying is likely to fail in the same way. + */ + if (seg->mapped_address != NULL) + { + if (!is_main_region_dsm_handle(seg->handle)) + dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private, + &seg->mapped_address, &seg->mapped_size, WARNING); + seg->impl_private = NULL; + seg->mapped_address = NULL; + seg->mapped_size = 0; + } + + /* Reduce reference count, if we previously increased it. */ + if (seg->control_slot != INVALID_CONTROL_SLOT) + { + uint32 refcnt; + uint32 control_slot = seg->control_slot; + + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + Assert(dsm_control->item[control_slot].handle == seg->handle); + Assert(dsm_control->item[control_slot].refcnt > 1); + refcnt = --dsm_control->item[control_slot].refcnt; + seg->control_slot = INVALID_CONTROL_SLOT; + LWLockRelease(DynamicSharedMemoryControlLock); + + /* If new reference count is 1, try to destroy the segment. */ + if (refcnt == 1) + { + /* A pinned segment should never reach 1. */ + Assert(!dsm_control->item[control_slot].pinned); + + /* + * If we fail to destroy the segment here, or are killed before we + * finish doing so, the reference count will remain at 1, which + * will mean that nobody else can attach to the segment. At + * postmaster shutdown time, or when a new postmaster is started + * after a hard kill, another attempt will be made to remove the + * segment. + * + * The main case we're worried about here is being killed by a + * signal before we can finish removing the segment. In that + * case, it's important to be sure that the segment still gets + * removed. If we actually fail to remove the segment for some + * other reason, the postmaster may not have any better luck than + * we did. There's not much we can do about that, though. + */ + if (is_main_region_dsm_handle(seg->handle) || + dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private, + &seg->mapped_address, &seg->mapped_size, WARNING)) + { + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + if (is_main_region_dsm_handle(seg->handle)) + FreePageManagerPut((FreePageManager *) dsm_main_space_begin, + dsm_control->item[control_slot].first_page, + dsm_control->item[control_slot].npages); + Assert(dsm_control->item[control_slot].handle == seg->handle); + Assert(dsm_control->item[control_slot].refcnt == 1); + dsm_control->item[control_slot].refcnt = 0; + LWLockRelease(DynamicSharedMemoryControlLock); + } + } + } + + /* Clean up our remaining backend-private data structures. */ + if (seg->resowner != NULL) + ResourceOwnerForgetDSM(seg->resowner, seg); + dlist_delete(&seg->node); + pfree(seg); +} + +/* + * Keep a dynamic shared memory mapping until end of session. + * + * By default, mappings are owned by the current resource owner, which + * typically means they stick around for the duration of the current query + * only. + */ +void +dsm_pin_mapping(dsm_segment *seg) +{ + if (seg->resowner != NULL) + { + ResourceOwnerForgetDSM(seg->resowner, seg); + seg->resowner = NULL; + } +} + +/* + * Arrange to remove a dynamic shared memory mapping at cleanup time. + * + * dsm_pin_mapping() can be used to preserve a mapping for the entire + * lifetime of a process; this function reverses that decision, making + * the segment owned by the current resource owner. This may be useful + * just before performing some operation that will invalidate the segment + * for future use by this backend. + */ +void +dsm_unpin_mapping(dsm_segment *seg) +{ + Assert(seg->resowner == NULL); + ResourceOwnerEnlargeDSMs(CurrentResourceOwner); + seg->resowner = CurrentResourceOwner; + ResourceOwnerRememberDSM(seg->resowner, seg); +} + +/* + * Keep a dynamic shared memory segment until postmaster shutdown, or until + * dsm_unpin_segment is called. + * + * This function should not be called more than once per segment, unless the + * segment is explicitly unpinned with dsm_unpin_segment in between calls. + * + * Note that this function does not arrange for the current process to + * keep the segment mapped indefinitely; if that behavior is desired, + * dsm_pin_mapping() should be used from each process that needs to + * retain the mapping. + */ +void +dsm_pin_segment(dsm_segment *seg) +{ + void *handle; + + /* + * Bump reference count for this segment in shared memory. This will + * ensure that even if there is no session which is attached to this + * segment, it will remain until postmaster shutdown or an explicit call + * to unpin. + */ + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + if (dsm_control->item[seg->control_slot].pinned) + elog(ERROR, "cannot pin a segment that is already pinned"); + dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle); + dsm_control->item[seg->control_slot].pinned = true; + dsm_control->item[seg->control_slot].refcnt++; + dsm_control->item[seg->control_slot].impl_private_pm_handle = handle; + LWLockRelease(DynamicSharedMemoryControlLock); +} + +/* + * Unpin a dynamic shared memory segment that was previously pinned with + * dsm_pin_segment. This function should not be called unless dsm_pin_segment + * was previously called for this segment. + * + * The argument is a dsm_handle rather than a dsm_segment in case you want + * to unpin a segment to which you haven't attached. This turns out to be + * useful if, for example, a reference to one shared memory segment is stored + * within another shared memory segment. You might want to unpin the + * referenced segment before destroying the referencing segment. + */ +void +dsm_unpin_segment(dsm_handle handle) +{ + uint32 control_slot = INVALID_CONTROL_SLOT; + bool destroy = false; + uint32 i; + + /* Find the control slot for the given handle. */ + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + for (i = 0; i < dsm_control->nitems; ++i) + { + /* Skip unused slots and segments that are concurrently going away. */ + if (dsm_control->item[i].refcnt <= 1) + continue; + + /* If we've found our handle, we can stop searching. */ + if (dsm_control->item[i].handle == handle) + { + control_slot = i; + break; + } + } + + /* + * We should definitely have found the slot, and it should not already be + * in the process of going away, because this function should only be + * called on a segment which is pinned. + */ + if (control_slot == INVALID_CONTROL_SLOT) + elog(ERROR, "cannot unpin unknown segment handle"); + if (!dsm_control->item[control_slot].pinned) + elog(ERROR, "cannot unpin a segment that is not pinned"); + Assert(dsm_control->item[control_slot].refcnt > 1); + + /* + * Allow implementation-specific code to run. We have to do this before + * releasing the lock, because impl_private_pm_handle may get modified by + * dsm_impl_unpin_segment. + */ + dsm_impl_unpin_segment(handle, + &dsm_control->item[control_slot].impl_private_pm_handle); + + /* Note that 1 means no references (0 means unused slot). */ + if (--dsm_control->item[control_slot].refcnt == 1) + destroy = true; + dsm_control->item[control_slot].pinned = false; + + /* Now we can release the lock. */ + LWLockRelease(DynamicSharedMemoryControlLock); + + /* Clean up resources if that was the last reference. */ + if (destroy) + { + void *junk_impl_private = NULL; + void *junk_mapped_address = NULL; + Size junk_mapped_size = 0; + + /* + * For an explanation of how error handling works in this case, see + * comments in dsm_detach. Note that if we reach this point, the + * current process certainly does not have the segment mapped, because + * if it did, the reference count would have still been greater than 1 + * even after releasing the reference count held by the pin. The fact + * that there can't be a dsm_segment for this handle makes it OK to + * pass the mapped size, mapped address, and private data as NULL + * here. + */ + if (is_main_region_dsm_handle(handle) || + dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, + &junk_mapped_address, &junk_mapped_size, WARNING)) + { + LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); + if (is_main_region_dsm_handle(handle)) + FreePageManagerPut((FreePageManager *) dsm_main_space_begin, + dsm_control->item[control_slot].first_page, + dsm_control->item[control_slot].npages); + Assert(dsm_control->item[control_slot].handle == handle); + Assert(dsm_control->item[control_slot].refcnt == 1); + dsm_control->item[control_slot].refcnt = 0; + LWLockRelease(DynamicSharedMemoryControlLock); + } + } +} + +/* + * Find an existing mapping for a shared memory segment, if there is one. + */ +dsm_segment * +dsm_find_mapping(dsm_handle h) +{ + dlist_iter iter; + dsm_segment *seg; + + dlist_foreach(iter, &dsm_segment_list) + { + seg = dlist_container(dsm_segment, node, iter.cur); + if (seg->handle == h) + return seg; + } + + return NULL; +} + +/* + * Get the address at which a dynamic shared memory segment is mapped. + */ +void * +dsm_segment_address(dsm_segment *seg) +{ + Assert(seg->mapped_address != NULL); + return seg->mapped_address; +} + +/* + * Get the size of a mapping. + */ +Size +dsm_segment_map_length(dsm_segment *seg) +{ + Assert(seg->mapped_address != NULL); + return seg->mapped_size; +} + +/* + * Get a handle for a mapping. + * + * To establish communication via dynamic shared memory between two backends, + * one of them should first call dsm_create() to establish a new shared + * memory mapping. That process should then call dsm_segment_handle() to + * obtain a handle for the mapping, and pass that handle to the + * coordinating backend via some means (e.g. bgw_main_arg, or via the + * main shared memory segment). The recipient, once in possession of the + * handle, should call dsm_attach(). + */ +dsm_handle +dsm_segment_handle(dsm_segment *seg) +{ + return seg->handle; +} + +/* + * Register an on-detach callback for a dynamic shared memory segment. + */ +void +on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg) +{ + dsm_segment_detach_callback *cb; + + cb = MemoryContextAlloc(TopMemoryContext, + sizeof(dsm_segment_detach_callback)); + cb->function = function; + cb->arg = arg; + slist_push_head(&seg->on_detach, &cb->node); +} + +/* + * Unregister an on-detach callback for a dynamic shared memory segment. + */ +void +cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, + Datum arg) +{ + slist_mutable_iter iter; + + slist_foreach_modify(iter, &seg->on_detach) + { + dsm_segment_detach_callback *cb; + + cb = slist_container(dsm_segment_detach_callback, node, iter.cur); + if (cb->function == function && cb->arg == arg) + { + slist_delete_current(&iter); + pfree(cb); + break; + } + } +} + +/* + * Discard all registered on-detach callbacks without executing them. + */ +void +reset_on_dsm_detach(void) +{ + dlist_iter iter; + + dlist_foreach(iter, &dsm_segment_list) + { + dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur); + + /* Throw away explicit on-detach actions one by one. */ + while (!slist_is_empty(&seg->on_detach)) + { + slist_node *node; + dsm_segment_detach_callback *cb; + + node = slist_pop_head_node(&seg->on_detach); + cb = slist_container(dsm_segment_detach_callback, node, node); + pfree(cb); + } + + /* + * Decrementing the reference count is a sort of implicit on-detach + * action; make sure we don't do that, either. + */ + seg->control_slot = INVALID_CONTROL_SLOT; + } +} + +/* + * Create a segment descriptor. + */ +static dsm_segment * +dsm_create_descriptor(void) +{ + dsm_segment *seg; + + if (CurrentResourceOwner) + ResourceOwnerEnlargeDSMs(CurrentResourceOwner); + + seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment)); + dlist_push_head(&dsm_segment_list, &seg->node); + + /* seg->handle must be initialized by the caller */ + seg->control_slot = INVALID_CONTROL_SLOT; + seg->impl_private = NULL; + seg->mapped_address = NULL; + seg->mapped_size = 0; + + seg->resowner = CurrentResourceOwner; + if (CurrentResourceOwner) + ResourceOwnerRememberDSM(CurrentResourceOwner, seg); + + slist_init(&seg->on_detach); + + return seg; +} + +/* + * Sanity check a control segment. + * + * The goal here isn't to detect everything that could possibly be wrong with + * the control segment; there's not enough information for that. Rather, the + * goal is to make sure that someone can iterate over the items in the segment + * without overrunning the end of the mapping and crashing. We also check + * the magic number since, if that's messed up, this may not even be one of + * our segments at all. + */ +static bool +dsm_control_segment_sane(dsm_control_header *control, Size mapped_size) +{ + if (mapped_size < offsetof(dsm_control_header, item)) + return false; /* Mapped size too short to read header. */ + if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC) + return false; /* Magic number doesn't match. */ + if (dsm_control_bytes_needed(control->maxitems) > mapped_size) + return false; /* Max item count won't fit in map. */ + if (control->nitems > control->maxitems) + return false; /* Overfull. */ + return true; +} + +/* + * Compute the number of control-segment bytes needed to store a given + * number of items. + */ +static uint64 +dsm_control_bytes_needed(uint32 nitems) +{ + return offsetof(dsm_control_header, item) + + sizeof(dsm_control_item) * (uint64) nitems; +} + +static inline dsm_handle +make_main_region_dsm_handle(int slot) +{ + dsm_handle handle; + + /* + * We need to create a handle that doesn't collide with any existing extra + * segment created by dsm_impl_op(), so we'll make it odd. It also + * mustn't collide with any other main area pseudo-segment, so we'll + * include the slot number in some of the bits. We also want to make an + * effort to avoid newly created and recently destroyed handles from being + * confused, so we'll make the rest of the bits random. + */ + handle = 1; + handle |= slot << 1; + handle |= random() << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1); + return handle; +} + +static inline bool +is_main_region_dsm_handle(dsm_handle handle) +{ + return handle & 1; +} |