summaryrefslogtreecommitdiffstats
path: root/src/backend/port/win32_shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/port/win32_shmem.c')
-rw-r--r--src/backend/port/win32_shmem.c599
1 files changed, 599 insertions, 0 deletions
diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c
new file mode 100644
index 0000000..30b0730
--- /dev/null
+++ b/src/backend/port/win32_shmem.c
@@ -0,0 +1,599 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32_shmem.c
+ * Implement shared memory using win32 facilities
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/port/win32_shmem.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "storage/dsm.h"
+#include "storage/ipc.h"
+#include "storage/pg_shmem.h"
+
+/*
+ * Early in a process's life, Windows asynchronously creates threads for the
+ * process's "default thread pool"
+ * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
+ * Occasionally, thread creation allocates a stack after
+ * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
+ * mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
+ * if the allocator preferred the just-released region for allocating the new
+ * thread stack. We observed such failures in some Windows Server 2016
+ * configurations. To give the system another region to prefer, reserve and
+ * release an additional, protective region immediately before reserving or
+ * releasing shared memory. The idea is that, if the allocator handed out
+ * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
+ * both regions are free. Windows Server 2016 exhibits that behavior, and a
+ * system behaving differently would have less need to protect
+ * UsedShmemSegAddr. The protective region must be at least large enough for
+ * one thread stack. However, ten times as much is less than 2% of the 32-bit
+ * address space and is negligible relative to the 64-bit address space.
+ */
+#define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
+void *ShmemProtectiveRegion = NULL;
+
+HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
+void *UsedShmemSegAddr = NULL;
+static Size UsedShmemSegSize = 0;
+
+static bool EnableLockPagesPrivilege(int elevel);
+static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
+
+/*
+ * Generate shared memory segment name. Expand the data directory, to generate
+ * an identifier unique for this data directory. Then replace all backslashes
+ * with forward slashes, since backslashes aren't permitted in global object names.
+ *
+ * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
+ * 2000, but that's all we support for other reasons as well), to make sure you can't
+ * open two postmasters in different sessions against the same data directory.
+ *
+ * XXX: What happens with junctions? It's only someone breaking things on purpose,
+ * and this is still better than before, but we might want to do something about
+ * that sometime in the future.
+ */
+static char *
+GetSharedMemName(void)
+{
+ char *retptr;
+ DWORD bufsize;
+ DWORD r;
+ char *cp;
+
+ bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
+ if (bufsize == 0)
+ elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
+ DataDir, GetLastError());
+
+ retptr = malloc(bufsize + 18); /* 18 for Global\PostgreSQL: */
+ if (retptr == NULL)
+ elog(FATAL, "could not allocate memory for shared memory name");
+
+ strcpy(retptr, "Global\\PostgreSQL:");
+ r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
+ if (r == 0 || r > bufsize)
+ elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
+ DataDir, GetLastError());
+
+ /*
+ * XXX: Intentionally overwriting the Global\ part here. This was not the
+ * original approach, but putting it in the actual Global\ namespace
+ * causes permission errors in a lot of cases, so we leave it in the
+ * default namespace for now.
+ */
+ for (cp = retptr; *cp; cp++)
+ if (*cp == '\\')
+ *cp = '/';
+
+ return retptr;
+}
+
+
+/*
+ * PGSharedMemoryIsInUse
+ *
+ * Is a previously-existing shmem segment still existing and in use?
+ *
+ * The point of this exercise is to detect the case where a prior postmaster
+ * crashed, but it left child backends that are still running. Therefore
+ * we only care about shmem segments that are associated with the intended
+ * DataDir. This is an important consideration since accidental matches of
+ * shmem segment IDs are reasonably common.
+ */
+bool
+PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
+{
+ char *szShareMem;
+ HANDLE hmap;
+
+ szShareMem = GetSharedMemName();
+
+ hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
+
+ free(szShareMem);
+
+ if (hmap == NULL)
+ return false;
+
+ CloseHandle(hmap);
+ return true;
+}
+
+/*
+ * EnableLockPagesPrivilege
+ *
+ * Try to acquire SeLockMemoryPrivilege so we can use large pages.
+ */
+static bool
+EnableLockPagesPrivilege(int elevel)
+{
+ HANDLE hToken;
+ TOKEN_PRIVILEGES tp;
+ LUID luid;
+
+ if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
+ {
+ ereport(elevel,
+ (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
+ errdetail("Failed system call was %s.", "OpenProcessToken")));
+ return FALSE;
+ }
+
+ if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
+ {
+ ereport(elevel,
+ (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
+ errdetail("Failed system call was %s.", "LookupPrivilegeValue")));
+ CloseHandle(hToken);
+ return FALSE;
+ }
+ tp.PrivilegeCount = 1;
+ tp.Privileges[0].Luid = luid;
+ tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+ if (!AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL))
+ {
+ ereport(elevel,
+ (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
+ errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
+ CloseHandle(hToken);
+ return FALSE;
+ }
+
+ if (GetLastError() != ERROR_SUCCESS)
+ {
+ if (GetLastError() == ERROR_NOT_ALL_ASSIGNED)
+ ereport(elevel,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("could not enable Lock Pages in Memory user right"),
+ errhint("Assign Lock Pages in Memory user right to the Windows user account which runs PostgreSQL.")));
+ else
+ ereport(elevel,
+ (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
+ errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
+ CloseHandle(hToken);
+ return FALSE;
+ }
+
+ CloseHandle(hToken);
+
+ return TRUE;
+}
+
+/*
+ * PGSharedMemoryCreate
+ *
+ * Create a shared memory segment of the given size and initialize its
+ * standard header.
+ */
+PGShmemHeader *
+PGSharedMemoryCreate(Size size,
+ PGShmemHeader **shim)
+{
+ void *memAddress;
+ PGShmemHeader *hdr;
+ HANDLE hmap,
+ hmap2;
+ char *szShareMem;
+ int i;
+ DWORD size_high;
+ DWORD size_low;
+ SIZE_T largePageSize = 0;
+ Size orig_size = size;
+ DWORD flProtect = PAGE_READWRITE;
+
+ ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
+ MEM_RESERVE, PAGE_NOACCESS);
+ if (ShmemProtectiveRegion == NULL)
+ elog(FATAL, "could not reserve memory region: error code %lu",
+ GetLastError());
+
+ /* Room for a header? */
+ Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
+
+ szShareMem = GetSharedMemName();
+
+ UsedShmemSegAddr = NULL;
+
+ if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
+ {
+ /* Does the processor support large pages? */
+ largePageSize = GetLargePageMinimum();
+ if (largePageSize == 0)
+ {
+ ereport(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("the processor does not support large pages")));
+ ereport(DEBUG1,
+ (errmsg("disabling huge pages")));
+ }
+ else if (!EnableLockPagesPrivilege(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1))
+ {
+ ereport(DEBUG1,
+ (errmsg("disabling huge pages")));
+ }
+ else
+ {
+ /* Huge pages available and privilege enabled, so turn on */
+ flProtect = PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES;
+
+ /* Round size up as appropriate. */
+ if (size % largePageSize != 0)
+ size += largePageSize - (size % largePageSize);
+ }
+ }
+
+retry:
+#ifdef _WIN64
+ size_high = size >> 32;
+#else
+ size_high = 0;
+#endif
+ size_low = (DWORD) size;
+
+ /*
+ * When recycling a shared memory segment, it may take a short while
+ * before it gets dropped from the global namespace. So re-try after
+ * sleeping for a second, and continue retrying 10 times. (both the 1
+ * second time and the 10 retries are completely arbitrary)
+ */
+ for (i = 0; i < 10; i++)
+ {
+ /*
+ * In case CreateFileMapping() doesn't set the error code to 0 on
+ * success
+ */
+ SetLastError(0);
+
+ hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
+ NULL, /* Default security attrs */
+ flProtect,
+ size_high, /* Size Upper 32 Bits */
+ size_low, /* Size Lower 32 bits */
+ szShareMem);
+
+ if (!hmap)
+ {
+ if (GetLastError() == ERROR_NO_SYSTEM_RESOURCES &&
+ huge_pages == HUGE_PAGES_TRY &&
+ (flProtect & SEC_LARGE_PAGES) != 0)
+ {
+ elog(DEBUG1, "CreateFileMapping(%zu) with SEC_LARGE_PAGES failed, "
+ "huge pages disabled",
+ size);
+
+ /*
+ * Use the original size, not the rounded-up value, when
+ * falling back to non-huge pages.
+ */
+ size = orig_size;
+ flProtect = PAGE_READWRITE;
+ goto retry;
+ }
+ else
+ ereport(FATAL,
+ (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
+ errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
+ size, szShareMem)));
+ }
+
+ /*
+ * If the segment already existed, CreateFileMapping() will return a
+ * handle to the existing one and set ERROR_ALREADY_EXISTS.
+ */
+ if (GetLastError() == ERROR_ALREADY_EXISTS)
+ {
+ CloseHandle(hmap); /* Close the handle, since we got a valid one
+ * to the previous segment. */
+ hmap = NULL;
+ Sleep(1000);
+ continue;
+ }
+ break;
+ }
+
+ /*
+ * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
+ * shared memory segment exists and we assume it belongs to somebody else.
+ */
+ if (!hmap)
+ ereport(FATAL,
+ (errmsg("pre-existing shared memory block is still in use"),
+ errhint("Check if there are any old server processes still running, and terminate them.")));
+
+ free(szShareMem);
+
+ /*
+ * Make the handle inheritable
+ */
+ if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
+ ereport(FATAL,
+ (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
+ errdetail("Failed system call was DuplicateHandle.")));
+
+ /*
+ * Close the old, non-inheritable handle. If this fails we don't really
+ * care.
+ */
+ if (!CloseHandle(hmap))
+ elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
+
+
+ /*
+ * Get a pointer to the new shared memory segment. Map the whole segment
+ * at once, and let the system decide on the initial address.
+ */
+ memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
+ if (!memAddress)
+ ereport(FATAL,
+ (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
+ errdetail("Failed system call was MapViewOfFileEx.")));
+
+
+
+ /*
+ * OK, we created a new segment. Mark it as created by this process. The
+ * order of assignments here is critical so that another Postgres process
+ * can't see the header as valid but belonging to an invalid PID!
+ */
+ hdr = (PGShmemHeader *) memAddress;
+ hdr->creatorPID = getpid();
+ hdr->magic = PGShmemMagic;
+
+ /*
+ * Initialize space allocation status for segment.
+ */
+ hdr->totalsize = size;
+ hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
+ hdr->dsm_control = 0;
+
+ /* Save info for possible future use */
+ UsedShmemSegAddr = memAddress;
+ UsedShmemSegSize = size;
+ UsedShmemSegID = hmap2;
+
+ /* Register on-exit routine to delete the new segment */
+ on_shmem_exit(pgwin32_SharedMemoryDelete, PointerGetDatum(hmap2));
+
+ *shim = hdr;
+ return hdr;
+}
+
+/*
+ * PGSharedMemoryReAttach
+ *
+ * This is called during startup of a postmaster child process to re-attach to
+ * an already existing shared memory segment, using the handle inherited from
+ * the postmaster.
+ *
+ * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
+ * parameters to this routine. The caller must have already restored them to
+ * the postmaster's values.
+ */
+void
+PGSharedMemoryReAttach(void)
+{
+ PGShmemHeader *hdr;
+ void *origUsedShmemSegAddr = UsedShmemSegAddr;
+
+ Assert(ShmemProtectiveRegion != NULL);
+ Assert(UsedShmemSegAddr != NULL);
+ Assert(IsUnderPostmaster);
+
+ /*
+ * Release memory region reservations made by the postmaster
+ */
+ if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
+ elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
+ ShmemProtectiveRegion, GetLastError());
+ if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
+ elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
+ UsedShmemSegAddr, GetLastError());
+
+ hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
+ if (!hdr)
+ elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
+ UsedShmemSegID, UsedShmemSegAddr, GetLastError());
+ if (hdr != origUsedShmemSegAddr)
+ elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
+ hdr, origUsedShmemSegAddr);
+ if (hdr->magic != PGShmemMagic)
+ elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
+ dsm_set_control_handle(hdr->dsm_control);
+
+ UsedShmemSegAddr = hdr; /* probably redundant */
+}
+
+/*
+ * PGSharedMemoryNoReAttach
+ *
+ * This is called during startup of a postmaster child process when we choose
+ * *not* to re-attach to the existing shared memory segment. We must clean up
+ * to leave things in the appropriate state.
+ *
+ * The child process startup logic might or might not call PGSharedMemoryDetach
+ * after this; make sure that it will be a no-op if called.
+ *
+ * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
+ * parameters to this routine. The caller must have already restored them to
+ * the postmaster's values.
+ */
+void
+PGSharedMemoryNoReAttach(void)
+{
+ Assert(ShmemProtectiveRegion != NULL);
+ Assert(UsedShmemSegAddr != NULL);
+ Assert(IsUnderPostmaster);
+
+ /*
+ * Under Windows we will not have mapped the segment, so we don't need to
+ * un-map it. Just reset UsedShmemSegAddr to show we're not attached.
+ */
+ UsedShmemSegAddr = NULL;
+
+ /*
+ * We *must* close the inherited shmem segment handle, else Windows will
+ * consider the existence of this process to mean it can't release the
+ * shmem segment yet. We can now use PGSharedMemoryDetach to do that.
+ */
+ PGSharedMemoryDetach();
+}
+
+/*
+ * PGSharedMemoryDetach
+ *
+ * Detach from the shared memory segment, if still attached. This is not
+ * intended to be called explicitly by the process that originally created the
+ * segment (it will have an on_shmem_exit callback registered to do that).
+ * Rather, this is for subprocesses that have inherited an attachment and want
+ * to get rid of it.
+ *
+ * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
+ * parameters to this routine.
+ */
+void
+PGSharedMemoryDetach(void)
+{
+ /*
+ * Releasing the protective region liberates an unimportant quantity of
+ * address space, but be tidy.
+ */
+ if (ShmemProtectiveRegion != NULL)
+ {
+ if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
+ elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
+ ShmemProtectiveRegion, GetLastError());
+
+ ShmemProtectiveRegion = NULL;
+ }
+
+ /* Unmap the view, if it's mapped */
+ if (UsedShmemSegAddr != NULL)
+ {
+ if (!UnmapViewOfFile(UsedShmemSegAddr))
+ elog(LOG, "could not unmap view of shared memory: error code %lu",
+ GetLastError());
+
+ UsedShmemSegAddr = NULL;
+ }
+
+ /* And close the shmem handle, if we have one */
+ if (UsedShmemSegID != INVALID_HANDLE_VALUE)
+ {
+ if (!CloseHandle(UsedShmemSegID))
+ elog(LOG, "could not close handle to shared memory: error code %lu",
+ GetLastError());
+
+ UsedShmemSegID = INVALID_HANDLE_VALUE;
+ }
+}
+
+
+/*
+ * pgwin32_SharedMemoryDelete
+ *
+ * Detach from and delete the shared memory segment
+ * (called as an on_shmem_exit callback, hence funny argument list)
+ */
+static void
+pgwin32_SharedMemoryDelete(int status, Datum shmId)
+{
+ Assert(DatumGetPointer(shmId) == UsedShmemSegID);
+ PGSharedMemoryDetach();
+}
+
+/*
+ * pgwin32_ReserveSharedMemoryRegion(hChild)
+ *
+ * Reserve the memory region that will be used for shared memory in a child
+ * process. It is called before the child process starts, to make sure the
+ * memory is available.
+ *
+ * Once the child starts, DLLs loading in different order or threads getting
+ * scheduled differently may allocate memory which can conflict with the
+ * address space we need for our shared memory. By reserving the shared
+ * memory region before the child starts, and freeing it only just before we
+ * attempt to get access to the shared memory forces these allocations to
+ * be given different address ranges that don't conflict.
+ *
+ * NOTE! This function executes in the postmaster, and should for this
+ * reason not use elog(FATAL) since that would take down the postmaster.
+ */
+int
+pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
+{
+ void *address;
+
+ Assert(ShmemProtectiveRegion != NULL);
+ Assert(UsedShmemSegAddr != NULL);
+ Assert(UsedShmemSegSize != 0);
+
+ /* ShmemProtectiveRegion */
+ address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
+ PROTECTIVE_REGION_SIZE,
+ MEM_RESERVE, PAGE_NOACCESS);
+ if (address == NULL)
+ {
+ /* Don't use FATAL since we're running in the postmaster */
+ elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
+ ShmemProtectiveRegion, hChild, GetLastError());
+ return false;
+ }
+ if (address != ShmemProtectiveRegion)
+ {
+ /*
+ * Should never happen - in theory if allocation granularity causes
+ * strange effects it could, so check just in case.
+ *
+ * Don't use FATAL since we're running in the postmaster.
+ */
+ elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
+ address, ShmemProtectiveRegion);
+ return false;
+ }
+
+ /* UsedShmemSegAddr */
+ address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
+ MEM_RESERVE, PAGE_READWRITE);
+ if (address == NULL)
+ {
+ elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
+ UsedShmemSegAddr, hChild, GetLastError());
+ return false;
+ }
+ if (address != UsedShmemSegAddr)
+ {
+ elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
+ address, UsedShmemSegAddr);
+ return false;
+ }
+
+ return true;
+}