summaryrefslogtreecommitdiffstats
path: root/src/backend/storage/ipc/dsm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/ipc/dsm.c')
-rw-r--r--src/backend/storage/ipc/dsm.c1248
1 files changed, 1248 insertions, 0 deletions
diff --git a/src/backend/storage/ipc/dsm.c b/src/backend/storage/ipc/dsm.c
new file mode 100644
index 0000000..b461a5f
--- /dev/null
+++ b/src/backend/storage/ipc/dsm.c
@@ -0,0 +1,1248 @@
+/*-------------------------------------------------------------------------
+ *
+ * dsm.c
+ * manage dynamic shared memory segments
+ *
+ * This file provides a set of services to make programming with dynamic
+ * shared memory segments more convenient. Unlike the low-level
+ * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
+ * created using this module will be cleaned up automatically. Mappings
+ * will be removed when the resource owner under which they were created
+ * is cleaned up, unless dsm_pin_mapping() is used, in which case they
+ * have session lifespan. Segments will be removed when there are no
+ * remaining mappings, or at postmaster shutdown in any case. After a
+ * hard postmaster crash, remaining segments will be removed, if they
+ * still exist, at the next postmaster startup.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/storage/ipc/dsm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#ifndef WIN32
+#include <sys/mman.h>
+#endif
+#include <sys/stat.h>
+
+#include "lib/ilist.h"
+#include "miscadmin.h"
+#include "port/pg_bitutils.h"
+#include "storage/dsm.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pg_shmem.h"
+#include "utils/freepage.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/resowner_private.h"
+
+#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
+
+#define PG_DYNSHMEM_FIXED_SLOTS 64
+#define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
+
+#define INVALID_CONTROL_SLOT ((uint32) -1)
+
+/* Backend-local tracking for on-detach callbacks. */
+typedef struct dsm_segment_detach_callback
+{
+ on_dsm_detach_callback function;
+ Datum arg;
+ slist_node node;
+} dsm_segment_detach_callback;
+
+/* Backend-local state for a dynamic shared memory segment. */
+struct dsm_segment
+{
+ dlist_node node; /* List link in dsm_segment_list. */
+ ResourceOwner resowner; /* Resource owner. */
+ dsm_handle handle; /* Segment name. */
+ uint32 control_slot; /* Slot in control segment. */
+ void *impl_private; /* Implementation-specific private data. */
+ void *mapped_address; /* Mapping address, or NULL if unmapped. */
+ Size mapped_size; /* Size of our mapping. */
+ slist_head on_detach; /* On-detach callbacks. */
+};
+
+/* Shared-memory state for a dynamic shared memory segment. */
+typedef struct dsm_control_item
+{
+ dsm_handle handle;
+ uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
+ size_t first_page;
+ size_t npages;
+ void *impl_private_pm_handle; /* only needed on Windows */
+ bool pinned;
+} dsm_control_item;
+
+/* Layout of the dynamic shared memory control segment. */
+typedef struct dsm_control_header
+{
+ uint32 magic;
+ uint32 nitems;
+ uint32 maxitems;
+ dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
+} dsm_control_header;
+
+static void dsm_cleanup_for_mmap(void);
+static void dsm_postmaster_shutdown(int code, Datum arg);
+static dsm_segment *dsm_create_descriptor(void);
+static bool dsm_control_segment_sane(dsm_control_header *control,
+ Size mapped_size);
+static uint64 dsm_control_bytes_needed(uint32 nitems);
+static inline dsm_handle make_main_region_dsm_handle(int slot);
+static inline bool is_main_region_dsm_handle(dsm_handle handle);
+
+/* Has this backend initialized the dynamic shared memory system yet? */
+static bool dsm_init_done = false;
+
+/* Preallocated DSM space in the main shared memory region. */
+static void *dsm_main_space_begin = NULL;
+
+/*
+ * List of dynamic shared memory segments used by this backend.
+ *
+ * At process exit time, we must decrement the reference count of each
+ * segment we have attached; this list makes it possible to find all such
+ * segments.
+ *
+ * This list should always be empty in the postmaster. We could probably
+ * allow the postmaster to map dynamic shared memory segments before it
+ * begins to start child processes, provided that each process adjusted
+ * the reference counts for those segments in the control segment at
+ * startup time, but there's no obvious need for such a facility, which
+ * would also be complex to handle in the EXEC_BACKEND case. Once the
+ * postmaster has begun spawning children, there's an additional problem:
+ * each new mapping would require an update to the control segment,
+ * which requires locking, in which the postmaster must not be involved.
+ */
+static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
+
+/*
+ * Control segment information.
+ *
+ * Unlike ordinary shared memory segments, the control segment is not
+ * reference counted; instead, it lasts for the postmaster's entire
+ * life cycle. For simplicity, it doesn't have a dsm_segment object either.
+ */
+static dsm_handle dsm_control_handle;
+static dsm_control_header *dsm_control;
+static Size dsm_control_mapped_size = 0;
+static void *dsm_control_impl_private = NULL;
+
+/*
+ * Start up the dynamic shared memory system.
+ *
+ * This is called just once during each cluster lifetime, at postmaster
+ * startup time.
+ */
+void
+dsm_postmaster_startup(PGShmemHeader *shim)
+{
+ void *dsm_control_address = NULL;
+ uint32 maxitems;
+ Size segsize;
+
+ Assert(!IsUnderPostmaster);
+
+ /*
+ * If we're using the mmap implementations, clean up any leftovers.
+ * Cleanup isn't needed on Windows, and happens earlier in startup for
+ * POSIX and System V shared memory, via a direct call to
+ * dsm_cleanup_using_control_segment.
+ */
+ if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
+ dsm_cleanup_for_mmap();
+
+ /* Determine size for new control segment. */
+ maxitems = PG_DYNSHMEM_FIXED_SLOTS
+ + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
+ elog(DEBUG2, "dynamic shared memory system will support %u segments",
+ maxitems);
+ segsize = dsm_control_bytes_needed(maxitems);
+
+ /*
+ * Loop until we find an unused identifier for the new control segment. We
+ * sometimes use 0 as a sentinel value indicating that no control segment
+ * is known to exist, so avoid using that value for a real control
+ * segment.
+ */
+ for (;;)
+ {
+ Assert(dsm_control_address == NULL);
+ Assert(dsm_control_mapped_size == 0);
+ dsm_control_handle = random() << 1; /* Even numbers only */
+ if (dsm_control_handle == DSM_HANDLE_INVALID)
+ continue;
+ if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
+ &dsm_control_impl_private, &dsm_control_address,
+ &dsm_control_mapped_size, ERROR))
+ break;
+ }
+ dsm_control = dsm_control_address;
+ on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
+ elog(DEBUG2,
+ "created dynamic shared memory control segment %u (%zu bytes)",
+ dsm_control_handle, segsize);
+ shim->dsm_control = dsm_control_handle;
+
+ /* Initialize control segment. */
+ dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
+ dsm_control->nitems = 0;
+ dsm_control->maxitems = maxitems;
+}
+
+/*
+ * Determine whether the control segment from the previous postmaster
+ * invocation still exists. If so, remove the dynamic shared memory
+ * segments to which it refers, and then the control segment itself.
+ */
+void
+dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
+{
+ void *mapped_address = NULL;
+ void *junk_mapped_address = NULL;
+ void *impl_private = NULL;
+ void *junk_impl_private = NULL;
+ Size mapped_size = 0;
+ Size junk_mapped_size = 0;
+ uint32 nitems;
+ uint32 i;
+ dsm_control_header *old_control;
+
+ /*
+ * Try to attach the segment. If this fails, it probably just means that
+ * the operating system has been rebooted and the segment no longer
+ * exists, or an unrelated process has used the same shm ID. So just fall
+ * out quietly.
+ */
+ if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
+ &mapped_address, &mapped_size, DEBUG1))
+ return;
+
+ /*
+ * We've managed to reattach it, but the contents might not be sane. If
+ * they aren't, we disregard the segment after all.
+ */
+ old_control = (dsm_control_header *) mapped_address;
+ if (!dsm_control_segment_sane(old_control, mapped_size))
+ {
+ dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
+ &mapped_address, &mapped_size, LOG);
+ return;
+ }
+
+ /*
+ * OK, the control segment looks basically valid, so we can use it to get
+ * a list of segments that need to be removed.
+ */
+ nitems = old_control->nitems;
+ for (i = 0; i < nitems; ++i)
+ {
+ dsm_handle handle;
+ uint32 refcnt;
+
+ /* If the reference count is 0, the slot is actually unused. */
+ refcnt = old_control->item[i].refcnt;
+ if (refcnt == 0)
+ continue;
+
+ /* If it was using the main shmem area, there is nothing to do. */
+ handle = old_control->item[i].handle;
+ if (is_main_region_dsm_handle(handle))
+ continue;
+
+ /* Log debugging information. */
+ elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
+ handle, refcnt);
+
+ /* Destroy the referenced segment. */
+ dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
+ &junk_mapped_address, &junk_mapped_size, LOG);
+ }
+
+ /* Destroy the old control segment, too. */
+ elog(DEBUG2,
+ "cleaning up dynamic shared memory control segment with ID %u",
+ old_control_handle);
+ dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
+ &mapped_address, &mapped_size, LOG);
+}
+
+/*
+ * When we're using the mmap shared memory implementation, "shared memory"
+ * segments might even manage to survive an operating system reboot.
+ * But there's no guarantee as to exactly what will survive: some segments
+ * may survive, and others may not, and the contents of some may be out
+ * of date. In particular, the control segment may be out of date, so we
+ * can't rely on it to figure out what to remove. However, since we know
+ * what directory contains the files we used as shared memory, we can simply
+ * scan the directory and blow everything away that shouldn't be there.
+ */
+static void
+dsm_cleanup_for_mmap(void)
+{
+ DIR *dir;
+ struct dirent *dent;
+
+ /* Scan the directory for something with a name of the correct format. */
+ dir = AllocateDir(PG_DYNSHMEM_DIR);
+
+ while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
+ {
+ if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
+ strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
+ {
+ char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
+
+ snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
+
+ elog(DEBUG2, "removing file \"%s\"", buf);
+
+ /* We found a matching file; so remove it. */
+ if (unlink(buf) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m", buf)));
+ }
+ }
+
+ /* Cleanup complete. */
+ FreeDir(dir);
+}
+
+/*
+ * At shutdown time, we iterate over the control segment and remove all
+ * remaining dynamic shared memory segments. We avoid throwing errors here;
+ * the postmaster is shutting down either way, and this is just non-critical
+ * resource cleanup.
+ */
+static void
+dsm_postmaster_shutdown(int code, Datum arg)
+{
+ uint32 nitems;
+ uint32 i;
+ void *dsm_control_address;
+ void *junk_mapped_address = NULL;
+ void *junk_impl_private = NULL;
+ Size junk_mapped_size = 0;
+ PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
+
+ /*
+ * If some other backend exited uncleanly, it might have corrupted the
+ * control segment while it was dying. In that case, we warn and ignore
+ * the contents of the control segment. This may end up leaving behind
+ * stray shared memory segments, but there's not much we can do about that
+ * if the metadata is gone.
+ */
+ nitems = dsm_control->nitems;
+ if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
+ {
+ ereport(LOG,
+ (errmsg("dynamic shared memory control segment is corrupt")));
+ return;
+ }
+
+ /* Remove any remaining segments. */
+ for (i = 0; i < nitems; ++i)
+ {
+ dsm_handle handle;
+
+ /* If the reference count is 0, the slot is actually unused. */
+ if (dsm_control->item[i].refcnt == 0)
+ continue;
+
+ handle = dsm_control->item[i].handle;
+ if (is_main_region_dsm_handle(handle))
+ continue;
+
+ /* Log debugging information. */
+ elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
+ handle);
+
+ /* Destroy the segment. */
+ dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
+ &junk_mapped_address, &junk_mapped_size, LOG);
+ }
+
+ /* Remove the control segment itself. */
+ elog(DEBUG2,
+ "cleaning up dynamic shared memory control segment with ID %u",
+ dsm_control_handle);
+ dsm_control_address = dsm_control;
+ dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
+ &dsm_control_impl_private, &dsm_control_address,
+ &dsm_control_mapped_size, LOG);
+ dsm_control = dsm_control_address;
+ shim->dsm_control = 0;
+}
+
+/*
+ * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
+ * we must reread the state file and map the control segment; in other cases,
+ * we'll have inherited the postmaster's mapping and global variables.
+ */
+static void
+dsm_backend_startup(void)
+{
+#ifdef EXEC_BACKEND
+ {
+ void *control_address = NULL;
+
+ /* Attach control segment. */
+ Assert(dsm_control_handle != 0);
+ dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
+ &dsm_control_impl_private, &control_address,
+ &dsm_control_mapped_size, ERROR);
+ dsm_control = control_address;
+ /* If control segment doesn't look sane, something is badly wrong. */
+ if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
+ {
+ dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
+ &dsm_control_impl_private, &control_address,
+ &dsm_control_mapped_size, WARNING);
+ ereport(FATAL,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("dynamic shared memory control segment is not valid")));
+ }
+ }
+#endif
+
+ dsm_init_done = true;
+}
+
+#ifdef EXEC_BACKEND
+/*
+ * When running under EXEC_BACKEND, we get a callback here when the main
+ * shared memory segment is re-attached, so that we can record the control
+ * handle retrieved from it.
+ */
+void
+dsm_set_control_handle(dsm_handle h)
+{
+ Assert(dsm_control_handle == 0 && h != 0);
+ dsm_control_handle = h;
+}
+#endif
+
+/*
+ * Reserve some space in the main shared memory segment for DSM segments.
+ */
+size_t
+dsm_estimate_size(void)
+{
+ return 1024 * 1024 * (size_t) min_dynamic_shared_memory;
+}
+
+/*
+ * Initialize space in the main shared memory segment for DSM segments.
+ */
+void
+dsm_shmem_init(void)
+{
+ size_t size = dsm_estimate_size();
+ bool found;
+
+ if (size == 0)
+ return;
+
+ dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found);
+ if (!found)
+ {
+ FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin;
+ size_t first_page = 0;
+ size_t pages;
+
+ /* Reserve space for the FreePageManager. */
+ while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
+ ++first_page;
+
+ /* Initialize it and give it all the rest of the space. */
+ FreePageManagerInitialize(fpm, dsm_main_space_begin);
+ pages = (size / FPM_PAGE_SIZE) - first_page;
+ FreePageManagerPut(fpm, first_page, pages);
+ }
+}
+
+/*
+ * Create a new dynamic shared memory segment.
+ *
+ * If there is a non-NULL CurrentResourceOwner, the new segment is associated
+ * with it and must be detached before the resource owner releases, or a
+ * warning will be logged. If CurrentResourceOwner is NULL, the segment
+ * remains attached until explicitly detached or the session ends.
+ * Creating with a NULL CurrentResourceOwner is equivalent to creating
+ * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
+ */
+dsm_segment *
+dsm_create(Size size, int flags)
+{
+ dsm_segment *seg;
+ uint32 i;
+ uint32 nitems;
+ size_t npages = 0;
+ size_t first_page = 0;
+ FreePageManager *dsm_main_space_fpm = dsm_main_space_begin;
+ bool using_main_dsm_region = false;
+
+ /* Unsafe in postmaster (and pointless in a stand-alone backend). */
+ Assert(IsUnderPostmaster);
+
+ if (!dsm_init_done)
+ dsm_backend_startup();
+
+ /* Create a new segment descriptor. */
+ seg = dsm_create_descriptor();
+
+ /*
+ * Lock the control segment while we try to allocate from the main shared
+ * memory area, if configured.
+ */
+ if (dsm_main_space_fpm)
+ {
+ npages = size / FPM_PAGE_SIZE;
+ if (size % FPM_PAGE_SIZE > 0)
+ ++npages;
+
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
+ {
+ /* We can carve out a piece of the main shared memory segment. */
+ seg->mapped_address = (char *) dsm_main_space_begin +
+ first_page * FPM_PAGE_SIZE;
+ seg->mapped_size = npages * FPM_PAGE_SIZE;
+ using_main_dsm_region = true;
+ /* We'll choose a handle below. */
+ }
+ }
+
+ if (!using_main_dsm_region)
+ {
+ /*
+ * We need to create a new memory segment. Loop until we find an
+ * unused segment identifier.
+ */
+ if (dsm_main_space_fpm)
+ LWLockRelease(DynamicSharedMemoryControlLock);
+ for (;;)
+ {
+ Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
+ seg->handle = random() << 1; /* Even numbers only */
+ if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
+ continue;
+ if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
+ &seg->mapped_address, &seg->mapped_size, ERROR))
+ break;
+ }
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ }
+
+ /* Search the control segment for an unused slot. */
+ nitems = dsm_control->nitems;
+ for (i = 0; i < nitems; ++i)
+ {
+ if (dsm_control->item[i].refcnt == 0)
+ {
+ if (using_main_dsm_region)
+ {
+ seg->handle = make_main_region_dsm_handle(i);
+ dsm_control->item[i].first_page = first_page;
+ dsm_control->item[i].npages = npages;
+ }
+ else
+ Assert(!is_main_region_dsm_handle(seg->handle));
+ dsm_control->item[i].handle = seg->handle;
+ /* refcnt of 1 triggers destruction, so start at 2 */
+ dsm_control->item[i].refcnt = 2;
+ dsm_control->item[i].impl_private_pm_handle = NULL;
+ dsm_control->item[i].pinned = false;
+ seg->control_slot = i;
+ LWLockRelease(DynamicSharedMemoryControlLock);
+ return seg;
+ }
+ }
+
+ /* Verify that we can support an additional mapping. */
+ if (nitems >= dsm_control->maxitems)
+ {
+ if (using_main_dsm_region)
+ FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
+ LWLockRelease(DynamicSharedMemoryControlLock);
+ if (!using_main_dsm_region)
+ dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
+ &seg->mapped_address, &seg->mapped_size, WARNING);
+ if (seg->resowner != NULL)
+ ResourceOwnerForgetDSM(seg->resowner, seg);
+ dlist_delete(&seg->node);
+ pfree(seg);
+
+ if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
+ return NULL;
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+ errmsg("too many dynamic shared memory segments")));
+ }
+
+ /* Enter the handle into a new array slot. */
+ if (using_main_dsm_region)
+ {
+ seg->handle = make_main_region_dsm_handle(nitems);
+ dsm_control->item[i].first_page = first_page;
+ dsm_control->item[i].npages = npages;
+ }
+ dsm_control->item[nitems].handle = seg->handle;
+ /* refcnt of 1 triggers destruction, so start at 2 */
+ dsm_control->item[nitems].refcnt = 2;
+ dsm_control->item[nitems].impl_private_pm_handle = NULL;
+ dsm_control->item[nitems].pinned = false;
+ seg->control_slot = nitems;
+ dsm_control->nitems++;
+ LWLockRelease(DynamicSharedMemoryControlLock);
+
+ return seg;
+}
+
+/*
+ * Attach a dynamic shared memory segment.
+ *
+ * See comments for dsm_segment_handle() for an explanation of how this
+ * is intended to be used.
+ *
+ * This function will return NULL if the segment isn't known to the system.
+ * This can happen if we're asked to attach the segment, but then everyone
+ * else detaches it (causing it to be destroyed) before we get around to
+ * attaching it.
+ *
+ * If there is a non-NULL CurrentResourceOwner, the attached segment is
+ * associated with it and must be detached before the resource owner releases,
+ * or a warning will be logged. Otherwise the segment remains attached until
+ * explicitly detached or the session ends. See the note atop dsm_create().
+ */
+dsm_segment *
+dsm_attach(dsm_handle h)
+{
+ dsm_segment *seg;
+ dlist_iter iter;
+ uint32 i;
+ uint32 nitems;
+
+ /* Unsafe in postmaster (and pointless in a stand-alone backend). */
+ Assert(IsUnderPostmaster);
+
+ if (!dsm_init_done)
+ dsm_backend_startup();
+
+ /*
+ * Since this is just a debugging cross-check, we could leave it out
+ * altogether, or include it only in assert-enabled builds. But since the
+ * list of attached segments should normally be very short, let's include
+ * it always for right now.
+ *
+ * If you're hitting this error, you probably want to attempt to find an
+ * existing mapping via dsm_find_mapping() before calling dsm_attach() to
+ * create a new one.
+ */
+ dlist_foreach(iter, &dsm_segment_list)
+ {
+ seg = dlist_container(dsm_segment, node, iter.cur);
+ if (seg->handle == h)
+ elog(ERROR, "can't attach the same segment more than once");
+ }
+
+ /* Create a new segment descriptor. */
+ seg = dsm_create_descriptor();
+ seg->handle = h;
+
+ /* Bump reference count for this segment in shared memory. */
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ nitems = dsm_control->nitems;
+ for (i = 0; i < nitems; ++i)
+ {
+ /*
+ * If the reference count is 0, the slot is actually unused. If the
+ * reference count is 1, the slot is still in use, but the segment is
+ * in the process of going away; even if the handle matches, another
+ * slot may already have started using the same handle value by
+ * coincidence so we have to keep searching.
+ */
+ if (dsm_control->item[i].refcnt <= 1)
+ continue;
+
+ /* If the handle doesn't match, it's not the slot we want. */
+ if (dsm_control->item[i].handle != seg->handle)
+ continue;
+
+ /* Otherwise we've found a match. */
+ dsm_control->item[i].refcnt++;
+ seg->control_slot = i;
+ if (is_main_region_dsm_handle(seg->handle))
+ {
+ seg->mapped_address = (char *) dsm_main_space_begin +
+ dsm_control->item[i].first_page * FPM_PAGE_SIZE;
+ seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE;
+ }
+ break;
+ }
+ LWLockRelease(DynamicSharedMemoryControlLock);
+
+ /*
+ * If we didn't find the handle we're looking for in the control segment,
+ * it probably means that everyone else who had it mapped, including the
+ * original creator, died before we got to this point. It's up to the
+ * caller to decide what to do about that.
+ */
+ if (seg->control_slot == INVALID_CONTROL_SLOT)
+ {
+ dsm_detach(seg);
+ return NULL;
+ }
+
+ /* Here's where we actually try to map the segment. */
+ if (!is_main_region_dsm_handle(seg->handle))
+ dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
+ &seg->mapped_address, &seg->mapped_size, ERROR);
+
+ return seg;
+}
+
+/*
+ * At backend shutdown time, detach any segments that are still attached.
+ * (This is similar to dsm_detach_all, except that there's no reason to
+ * unmap the control segment before exiting, so we don't bother.)
+ */
+void
+dsm_backend_shutdown(void)
+{
+ while (!dlist_is_empty(&dsm_segment_list))
+ {
+ dsm_segment *seg;
+
+ seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
+ dsm_detach(seg);
+ }
+}
+
+/*
+ * Detach all shared memory segments, including the control segments. This
+ * should be called, along with PGSharedMemoryDetach, in processes that
+ * might inherit mappings but are not intended to be connected to dynamic
+ * shared memory.
+ */
+void
+dsm_detach_all(void)
+{
+ void *control_address = dsm_control;
+
+ while (!dlist_is_empty(&dsm_segment_list))
+ {
+ dsm_segment *seg;
+
+ seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
+ dsm_detach(seg);
+ }
+
+ if (control_address != NULL)
+ dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
+ &dsm_control_impl_private, &control_address,
+ &dsm_control_mapped_size, ERROR);
+}
+
+/*
+ * Detach from a shared memory segment, destroying the segment if we
+ * remove the last reference.
+ *
+ * This function should never fail. It will often be invoked when aborting
+ * a transaction, and a further error won't serve any purpose. It's not a
+ * complete disaster if we fail to unmap or destroy the segment; it means a
+ * resource leak, but that doesn't necessarily preclude further operations.
+ */
+void
+dsm_detach(dsm_segment *seg)
+{
+ /*
+ * Invoke registered callbacks. Just in case one of those callbacks
+ * throws a further error that brings us back here, pop the callback
+ * before invoking it, to avoid infinite error recursion. Don't allow
+ * interrupts while running the individual callbacks in non-error code
+ * paths, to avoid leaving cleanup work unfinished if we're interrupted by
+ * a statement timeout or similar.
+ */
+ HOLD_INTERRUPTS();
+ while (!slist_is_empty(&seg->on_detach))
+ {
+ slist_node *node;
+ dsm_segment_detach_callback *cb;
+ on_dsm_detach_callback function;
+ Datum arg;
+
+ node = slist_pop_head_node(&seg->on_detach);
+ cb = slist_container(dsm_segment_detach_callback, node, node);
+ function = cb->function;
+ arg = cb->arg;
+ pfree(cb);
+
+ function(seg, arg);
+ }
+ RESUME_INTERRUPTS();
+
+ /*
+ * Try to remove the mapping, if one exists. Normally, there will be, but
+ * maybe not, if we failed partway through a create or attach operation.
+ * We remove the mapping before decrementing the reference count so that
+ * the process that sees a zero reference count can be certain that no
+ * remaining mappings exist. Even if this fails, we pretend that it
+ * works, because retrying is likely to fail in the same way.
+ */
+ if (seg->mapped_address != NULL)
+ {
+ if (!is_main_region_dsm_handle(seg->handle))
+ dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
+ &seg->mapped_address, &seg->mapped_size, WARNING);
+ seg->impl_private = NULL;
+ seg->mapped_address = NULL;
+ seg->mapped_size = 0;
+ }
+
+ /* Reduce reference count, if we previously increased it. */
+ if (seg->control_slot != INVALID_CONTROL_SLOT)
+ {
+ uint32 refcnt;
+ uint32 control_slot = seg->control_slot;
+
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ Assert(dsm_control->item[control_slot].handle == seg->handle);
+ Assert(dsm_control->item[control_slot].refcnt > 1);
+ refcnt = --dsm_control->item[control_slot].refcnt;
+ seg->control_slot = INVALID_CONTROL_SLOT;
+ LWLockRelease(DynamicSharedMemoryControlLock);
+
+ /* If new reference count is 1, try to destroy the segment. */
+ if (refcnt == 1)
+ {
+ /* A pinned segment should never reach 1. */
+ Assert(!dsm_control->item[control_slot].pinned);
+
+ /*
+ * If we fail to destroy the segment here, or are killed before we
+ * finish doing so, the reference count will remain at 1, which
+ * will mean that nobody else can attach to the segment. At
+ * postmaster shutdown time, or when a new postmaster is started
+ * after a hard kill, another attempt will be made to remove the
+ * segment.
+ *
+ * The main case we're worried about here is being killed by a
+ * signal before we can finish removing the segment. In that
+ * case, it's important to be sure that the segment still gets
+ * removed. If we actually fail to remove the segment for some
+ * other reason, the postmaster may not have any better luck than
+ * we did. There's not much we can do about that, though.
+ */
+ if (is_main_region_dsm_handle(seg->handle) ||
+ dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
+ &seg->mapped_address, &seg->mapped_size, WARNING))
+ {
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ if (is_main_region_dsm_handle(seg->handle))
+ FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
+ dsm_control->item[control_slot].first_page,
+ dsm_control->item[control_slot].npages);
+ Assert(dsm_control->item[control_slot].handle == seg->handle);
+ Assert(dsm_control->item[control_slot].refcnt == 1);
+ dsm_control->item[control_slot].refcnt = 0;
+ LWLockRelease(DynamicSharedMemoryControlLock);
+ }
+ }
+ }
+
+ /* Clean up our remaining backend-private data structures. */
+ if (seg->resowner != NULL)
+ ResourceOwnerForgetDSM(seg->resowner, seg);
+ dlist_delete(&seg->node);
+ pfree(seg);
+}
+
+/*
+ * Keep a dynamic shared memory mapping until end of session.
+ *
+ * By default, mappings are owned by the current resource owner, which
+ * typically means they stick around for the duration of the current query
+ * only.
+ */
+void
+dsm_pin_mapping(dsm_segment *seg)
+{
+ if (seg->resowner != NULL)
+ {
+ ResourceOwnerForgetDSM(seg->resowner, seg);
+ seg->resowner = NULL;
+ }
+}
+
+/*
+ * Arrange to remove a dynamic shared memory mapping at cleanup time.
+ *
+ * dsm_pin_mapping() can be used to preserve a mapping for the entire
+ * lifetime of a process; this function reverses that decision, making
+ * the segment owned by the current resource owner. This may be useful
+ * just before performing some operation that will invalidate the segment
+ * for future use by this backend.
+ */
+void
+dsm_unpin_mapping(dsm_segment *seg)
+{
+ Assert(seg->resowner == NULL);
+ ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
+ seg->resowner = CurrentResourceOwner;
+ ResourceOwnerRememberDSM(seg->resowner, seg);
+}
+
+/*
+ * Keep a dynamic shared memory segment until postmaster shutdown, or until
+ * dsm_unpin_segment is called.
+ *
+ * This function should not be called more than once per segment, unless the
+ * segment is explicitly unpinned with dsm_unpin_segment in between calls.
+ *
+ * Note that this function does not arrange for the current process to
+ * keep the segment mapped indefinitely; if that behavior is desired,
+ * dsm_pin_mapping() should be used from each process that needs to
+ * retain the mapping.
+ */
+void
+dsm_pin_segment(dsm_segment *seg)
+{
+ void *handle;
+
+ /*
+ * Bump reference count for this segment in shared memory. This will
+ * ensure that even if there is no session which is attached to this
+ * segment, it will remain until postmaster shutdown or an explicit call
+ * to unpin.
+ */
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ if (dsm_control->item[seg->control_slot].pinned)
+ elog(ERROR, "cannot pin a segment that is already pinned");
+ dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
+ dsm_control->item[seg->control_slot].pinned = true;
+ dsm_control->item[seg->control_slot].refcnt++;
+ dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
+ LWLockRelease(DynamicSharedMemoryControlLock);
+}
+
+/*
+ * Unpin a dynamic shared memory segment that was previously pinned with
+ * dsm_pin_segment. This function should not be called unless dsm_pin_segment
+ * was previously called for this segment.
+ *
+ * The argument is a dsm_handle rather than a dsm_segment in case you want
+ * to unpin a segment to which you haven't attached. This turns out to be
+ * useful if, for example, a reference to one shared memory segment is stored
+ * within another shared memory segment. You might want to unpin the
+ * referenced segment before destroying the referencing segment.
+ */
+void
+dsm_unpin_segment(dsm_handle handle)
+{
+ uint32 control_slot = INVALID_CONTROL_SLOT;
+ bool destroy = false;
+ uint32 i;
+
+ /* Find the control slot for the given handle. */
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ for (i = 0; i < dsm_control->nitems; ++i)
+ {
+ /* Skip unused slots and segments that are concurrently going away. */
+ if (dsm_control->item[i].refcnt <= 1)
+ continue;
+
+ /* If we've found our handle, we can stop searching. */
+ if (dsm_control->item[i].handle == handle)
+ {
+ control_slot = i;
+ break;
+ }
+ }
+
+ /*
+ * We should definitely have found the slot, and it should not already be
+ * in the process of going away, because this function should only be
+ * called on a segment which is pinned.
+ */
+ if (control_slot == INVALID_CONTROL_SLOT)
+ elog(ERROR, "cannot unpin unknown segment handle");
+ if (!dsm_control->item[control_slot].pinned)
+ elog(ERROR, "cannot unpin a segment that is not pinned");
+ Assert(dsm_control->item[control_slot].refcnt > 1);
+
+ /*
+ * Allow implementation-specific code to run. We have to do this before
+ * releasing the lock, because impl_private_pm_handle may get modified by
+ * dsm_impl_unpin_segment.
+ */
+ dsm_impl_unpin_segment(handle,
+ &dsm_control->item[control_slot].impl_private_pm_handle);
+
+ /* Note that 1 means no references (0 means unused slot). */
+ if (--dsm_control->item[control_slot].refcnt == 1)
+ destroy = true;
+ dsm_control->item[control_slot].pinned = false;
+
+ /* Now we can release the lock. */
+ LWLockRelease(DynamicSharedMemoryControlLock);
+
+ /* Clean up resources if that was the last reference. */
+ if (destroy)
+ {
+ void *junk_impl_private = NULL;
+ void *junk_mapped_address = NULL;
+ Size junk_mapped_size = 0;
+
+ /*
+ * For an explanation of how error handling works in this case, see
+ * comments in dsm_detach. Note that if we reach this point, the
+ * current process certainly does not have the segment mapped, because
+ * if it did, the reference count would have still been greater than 1
+ * even after releasing the reference count held by the pin. The fact
+ * that there can't be a dsm_segment for this handle makes it OK to
+ * pass the mapped size, mapped address, and private data as NULL
+ * here.
+ */
+ if (is_main_region_dsm_handle(handle) ||
+ dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
+ &junk_mapped_address, &junk_mapped_size, WARNING))
+ {
+ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+ if (is_main_region_dsm_handle(handle))
+ FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
+ dsm_control->item[control_slot].first_page,
+ dsm_control->item[control_slot].npages);
+ Assert(dsm_control->item[control_slot].handle == handle);
+ Assert(dsm_control->item[control_slot].refcnt == 1);
+ dsm_control->item[control_slot].refcnt = 0;
+ LWLockRelease(DynamicSharedMemoryControlLock);
+ }
+ }
+}
+
+/*
+ * Find an existing mapping for a shared memory segment, if there is one.
+ */
+dsm_segment *
+dsm_find_mapping(dsm_handle h)
+{
+ dlist_iter iter;
+ dsm_segment *seg;
+
+ dlist_foreach(iter, &dsm_segment_list)
+ {
+ seg = dlist_container(dsm_segment, node, iter.cur);
+ if (seg->handle == h)
+ return seg;
+ }
+
+ return NULL;
+}
+
+/*
+ * Get the address at which a dynamic shared memory segment is mapped.
+ */
+void *
+dsm_segment_address(dsm_segment *seg)
+{
+ Assert(seg->mapped_address != NULL);
+ return seg->mapped_address;
+}
+
+/*
+ * Get the size of a mapping.
+ */
+Size
+dsm_segment_map_length(dsm_segment *seg)
+{
+ Assert(seg->mapped_address != NULL);
+ return seg->mapped_size;
+}
+
+/*
+ * Get a handle for a mapping.
+ *
+ * To establish communication via dynamic shared memory between two backends,
+ * one of them should first call dsm_create() to establish a new shared
+ * memory mapping. That process should then call dsm_segment_handle() to
+ * obtain a handle for the mapping, and pass that handle to the
+ * coordinating backend via some means (e.g. bgw_main_arg, or via the
+ * main shared memory segment). The recipient, once in possession of the
+ * handle, should call dsm_attach().
+ */
+dsm_handle
+dsm_segment_handle(dsm_segment *seg)
+{
+ return seg->handle;
+}
+
+/*
+ * Register an on-detach callback for a dynamic shared memory segment.
+ */
+void
+on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
+{
+ dsm_segment_detach_callback *cb;
+
+ cb = MemoryContextAlloc(TopMemoryContext,
+ sizeof(dsm_segment_detach_callback));
+ cb->function = function;
+ cb->arg = arg;
+ slist_push_head(&seg->on_detach, &cb->node);
+}
+
+/*
+ * Unregister an on-detach callback for a dynamic shared memory segment.
+ */
+void
+cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function,
+ Datum arg)
+{
+ slist_mutable_iter iter;
+
+ slist_foreach_modify(iter, &seg->on_detach)
+ {
+ dsm_segment_detach_callback *cb;
+
+ cb = slist_container(dsm_segment_detach_callback, node, iter.cur);
+ if (cb->function == function && cb->arg == arg)
+ {
+ slist_delete_current(&iter);
+ pfree(cb);
+ break;
+ }
+ }
+}
+
+/*
+ * Discard all registered on-detach callbacks without executing them.
+ */
+void
+reset_on_dsm_detach(void)
+{
+ dlist_iter iter;
+
+ dlist_foreach(iter, &dsm_segment_list)
+ {
+ dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
+
+ /* Throw away explicit on-detach actions one by one. */
+ while (!slist_is_empty(&seg->on_detach))
+ {
+ slist_node *node;
+ dsm_segment_detach_callback *cb;
+
+ node = slist_pop_head_node(&seg->on_detach);
+ cb = slist_container(dsm_segment_detach_callback, node, node);
+ pfree(cb);
+ }
+
+ /*
+ * Decrementing the reference count is a sort of implicit on-detach
+ * action; make sure we don't do that, either.
+ */
+ seg->control_slot = INVALID_CONTROL_SLOT;
+ }
+}
+
+/*
+ * Create a segment descriptor.
+ */
+static dsm_segment *
+dsm_create_descriptor(void)
+{
+ dsm_segment *seg;
+
+ if (CurrentResourceOwner)
+ ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
+
+ seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
+ dlist_push_head(&dsm_segment_list, &seg->node);
+
+ /* seg->handle must be initialized by the caller */
+ seg->control_slot = INVALID_CONTROL_SLOT;
+ seg->impl_private = NULL;
+ seg->mapped_address = NULL;
+ seg->mapped_size = 0;
+
+ seg->resowner = CurrentResourceOwner;
+ if (CurrentResourceOwner)
+ ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
+
+ slist_init(&seg->on_detach);
+
+ return seg;
+}
+
+/*
+ * Sanity check a control segment.
+ *
+ * The goal here isn't to detect everything that could possibly be wrong with
+ * the control segment; there's not enough information for that. Rather, the
+ * goal is to make sure that someone can iterate over the items in the segment
+ * without overrunning the end of the mapping and crashing. We also check
+ * the magic number since, if that's messed up, this may not even be one of
+ * our segments at all.
+ */
+static bool
+dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
+{
+ if (mapped_size < offsetof(dsm_control_header, item))
+ return false; /* Mapped size too short to read header. */
+ if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
+ return false; /* Magic number doesn't match. */
+ if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
+ return false; /* Max item count won't fit in map. */
+ if (control->nitems > control->maxitems)
+ return false; /* Overfull. */
+ return true;
+}
+
+/*
+ * Compute the number of control-segment bytes needed to store a given
+ * number of items.
+ */
+static uint64
+dsm_control_bytes_needed(uint32 nitems)
+{
+ return offsetof(dsm_control_header, item)
+ + sizeof(dsm_control_item) * (uint64) nitems;
+}
+
+static inline dsm_handle
+make_main_region_dsm_handle(int slot)
+{
+ dsm_handle handle;
+
+ /*
+ * We need to create a handle that doesn't collide with any existing extra
+ * segment created by dsm_impl_op(), so we'll make it odd. It also
+ * mustn't collide with any other main area pseudo-segment, so we'll
+ * include the slot number in some of the bits. We also want to make an
+ * effort to avoid newly created and recently destroyed handles from being
+ * confused, so we'll make the rest of the bits random.
+ */
+ handle = 1;
+ handle |= slot << 1;
+ handle |= random() << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1);
+ return handle;
+}
+
+static inline bool
+is_main_region_dsm_handle(dsm_handle handle)
+{
+ return handle & 1;
+}