summaryrefslogtreecommitdiffstats
path: root/src/pmdk/src/libpmem2/map_windows.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/pmdk/src/libpmem2/map_windows.c')
-rw-r--r--src/pmdk/src/libpmem2/map_windows.c590
1 files changed, 590 insertions, 0 deletions
diff --git a/src/pmdk/src/libpmem2/map_windows.c b/src/pmdk/src/libpmem2/map_windows.c
new file mode 100644
index 000000000..67901eed3
--- /dev/null
+++ b/src/pmdk/src/libpmem2/map_windows.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright 2019-2020, Intel Corporation */
+
+/*
+ * map_windows.c -- pmem2_map (Windows)
+ */
+
+#include <stdbool.h>
+
+#include "libpmem2.h"
+
+#include "alloc.h"
+#include "auto_flush.h"
+#include "config.h"
+#include "map.h"
+#include "os_thread.h"
+#include "out.h"
+#include "persist.h"
+#include "pmem2_utils.h"
+#include "source.h"
+#include "sys_util.h"
+#include "util.h"
+
+#define HIDWORD(x) ((DWORD)((x) >> 32))
+#define LODWORD(x) ((DWORD)((x) & 0xFFFFFFFF))
+
+/* requested CACHE_LINE, available PAGE */
+#define REQ_CL_AVAIL_PG \
+ "requested granularity not available because specified volume is not a direct access (DAX) volume"
+
+/* requested BYTE, available PAGE */
+#define REQ_BY_AVAIL_PG REQ_CL_AVAIL_PG
+
+/* requested BYTE, available CACHE_LINE */
+#define REQ_BY_AVAIL_CL \
+ "requested granularity not available because the platform doesn't support eADR"
+
+/* indicates the cases in which the error cannot occur */
+#define GRAN_IMPOSSIBLE "impossible"
+static const char *granularity_err_msg[3][3] = {
+/* requested granularity / available granularity */
+/* -------------------------------------------------------------------- */
+/* BYTE CACHE_LINE PAGE */
+/* -------------------------------------------------------------------- */
+/* BYTE */ {GRAN_IMPOSSIBLE, REQ_BY_AVAIL_CL, REQ_BY_AVAIL_PG},
+/* CL */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, REQ_CL_AVAIL_PG},
+/* PAGE */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE}};
+
+/*
+ * create_mapping -- creates file mapping object for a file
+ */
+static HANDLE
+create_mapping(HANDLE hfile, size_t offset, size_t length, DWORD protect,
+ unsigned long *err)
+{
+ size_t max_size = length + offset;
+ SetLastError(0);
+ HANDLE mh = CreateFileMapping(hfile,
+ NULL, /* security attributes */
+ protect,
+ HIDWORD(max_size),
+ LODWORD(max_size),
+ NULL);
+
+ *err = GetLastError();
+ if (!mh) {
+ ERR("!!CreateFileMapping");
+ return NULL;
+ }
+
+ if (*err == ERROR_ALREADY_EXISTS) {
+ ERR("!!CreateFileMapping");
+ CloseHandle(mh);
+ return NULL;
+ }
+
+ /* if the handle is valid the last error is undefined */
+ *err = 0;
+ return mh;
+}
+
+/*
+ * is_direct_access -- check if the specified volume is a
+ * direct access (DAX) volume
+ */
+static int
+is_direct_access(HANDLE fh)
+{
+ DWORD filesystemFlags;
+
+ if (!GetVolumeInformationByHandleW(fh, NULL, 0, NULL,
+ NULL, &filesystemFlags, NULL, 0)) {
+ ERR("!!GetVolumeInformationByHandleW");
+ /* always return a negative value */
+ return pmem2_lasterror_to_err();
+ }
+
+ if (filesystemFlags & FILE_DAX_VOLUME)
+ return 1;
+
+ return 0;
+}
+
+struct pmem2_map *vm_reservation_map_find_closest_prior(
+ struct pmem2_vm_reservation *rsv,
+ size_t reserv_offset, size_t len);
+struct pmem2_map *vm_reservation_map_find_closest_later(
+ struct pmem2_vm_reservation *rsv,
+ size_t reserv_offset, size_t len);
+
+/*
+ * vm_reservation_unmap -- unmaps given region of the reservation,
+ * preserves the placeholder
+ */
+static int
+vm_reservation_unmap(struct pmem2_vm_reservation *rsv, void *addr,
+ size_t length)
+{
+ void *rsv_addr = pmem2_vm_reservation_get_address(rsv);
+ size_t rsv_size = pmem2_vm_reservation_get_size(rsv);
+
+ if (addr < rsv_addr ||
+ (char *)addr + length > (char *)rsv_addr + rsv_size)
+ return PMEM2_E_LENGTH_OUT_OF_RANGE;
+
+ int ret = UnmapViewOfFile2(GetCurrentProcess(),
+ addr,
+ MEM_PRESERVE_PLACEHOLDER);
+ if (!ret) {
+ ERR("!!UnmapViewOfFile2");
+ return pmem2_lasterror_to_err();
+ }
+
+ return 0;
+}
+
+/*
+ * vm_reservation_merge -- merges given placeholder region with his neighbouring
+ * placeholders
+ */
+static int
+vm_reservation_merge(struct pmem2_vm_reservation *rsv, void *addr,
+ size_t length)
+{
+ void *rsv_addr = pmem2_vm_reservation_get_address(rsv);
+ size_t rsv_size = pmem2_vm_reservation_get_size(rsv);
+ size_t rsv_offset = (size_t)addr - (size_t)rsv_addr;
+
+ /*
+ * After unmapping from the reservation, it is neccessary to merge
+ * the unoccupied neighbours so that the placeholders will be available
+ * for splitting for the required size of the mapping.
+ */
+ void *merge_addr = addr;
+ size_t merge_size = length;
+ struct pmem2_map *map = NULL;
+
+ if (rsv_offset > 0) {
+ map = vm_reservation_map_find_closest_prior(rsv, rsv_offset,
+ length);
+ if (map) {
+ merge_addr = (char *)map->addr + map->reserved_length;
+ merge_size += (char *)addr - (char *)merge_addr;
+ } else {
+ merge_addr = rsv_addr;
+ merge_size += rsv_offset;
+ }
+ }
+
+ if (rsv_offset + length < rsv_size) {
+ map = vm_reservation_map_find_closest_later(rsv, rsv_offset,
+ length);
+ if (map)
+ merge_size += (char *)map->addr - (char *)addr - length;
+ else
+ merge_size += rsv_size - rsv_offset - length;
+ }
+
+ if ((addr != merge_addr) || (length != merge_size)) {
+ int ret = VirtualFree(merge_addr,
+ merge_size,
+ MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS);
+ if (!ret) {
+ ERR("!!VirtualFree");
+ return pmem2_lasterror_to_err();
+
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * vm_reservation_split - splits the virtual memory reservation into
+ * separate regions
+ */
+int
+vm_reservation_split(struct pmem2_vm_reservation *rsv, size_t rsv_offset,
+ size_t length)
+{
+ LOG(3, "rsv %p rsv_offset %zu length %zu", rsv, rsv_offset, length);
+
+ void *rsv_addr = pmem2_vm_reservation_get_address(rsv);
+ size_t rsv_size = pmem2_vm_reservation_get_size(rsv);
+
+ LOG(3, "rsv_addr %p rsv_size %zu", rsv_addr, rsv_size);
+
+ if ((rsv_offset > 0 && !vm_reservation_map_find(rsv,
+ rsv_offset - 1, 1)) ||
+ (rsv_offset + length < rsv_size &&
+ !vm_reservation_map_find(rsv,
+ rsv_offset + length, 1))) {
+ /* split the placeholder */
+ int ret = VirtualFree((char *)rsv_addr + rsv_offset,
+ length,
+ MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER);
+ if (!ret) {
+ ERR("!!VirtualFree");
+ ret = pmem2_lasterror_to_err();
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * pmem2_map_new -- map memory according to provided config
+ */
+int
+pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg,
+ const struct pmem2_source *src)
+{
+ LOG(3, "cfg %p src %p map_ptr %p", cfg, src, map_ptr);
+ PMEM2_ERR_CLR();
+
+ int ret = 0;
+ unsigned long err = 0;
+ size_t file_size;
+ *map_ptr = NULL;
+
+ if ((int)cfg->requested_max_granularity == PMEM2_GRANULARITY_INVALID) {
+ ERR(
+ "please define the max granularity requested for the mapping");
+
+ return PMEM2_E_GRANULARITY_NOT_SET;
+ }
+
+ ret = pmem2_source_size(src, &file_size);
+ if (ret)
+ return ret;
+
+ size_t src_alignment;
+ ret = pmem2_source_alignment(src, &src_alignment);
+ if (ret)
+ return ret;
+
+ size_t length;
+ ret = pmem2_config_validate_length(cfg, file_size, src_alignment);
+ if (ret)
+ return ret;
+
+ size_t effective_offset;
+ ret = pmem2_validate_offset(cfg, &effective_offset, src_alignment);
+ if (ret)
+ return ret;
+
+ if (src->type == PMEM2_SOURCE_ANON)
+ effective_offset = 0;
+
+ /* without user-provided length, map to the end of the file */
+ if (cfg->length)
+ length = cfg->length;
+ else
+ length = file_size - effective_offset;
+
+ HANDLE map_handle = INVALID_HANDLE_VALUE;
+ if (src->type == PMEM2_SOURCE_HANDLE) {
+ map_handle = src->value.handle;
+ } else if (src->type == PMEM2_SOURCE_ANON) {
+ /* no extra settings */
+ } else {
+ ASSERT(0);
+ }
+
+ DWORD proto = PAGE_READWRITE;
+ DWORD access = FILE_MAP_ALL_ACCESS;
+
+ /* Unsupported flag combinations */
+ if ((cfg->protection_flag == PMEM2_PROT_NONE) ||
+ (cfg->protection_flag == PMEM2_PROT_WRITE) ||
+ (cfg->protection_flag == PMEM2_PROT_EXEC) ||
+ (cfg->protection_flag == (PMEM2_PROT_WRITE |
+ PMEM2_PROT_EXEC))) {
+ ERR("Windows does not support "
+ "this protection flag combination.");
+ return PMEM2_E_NOSUPP;
+ }
+
+ /* Translate protection flags into Windows flags */
+ if (cfg->protection_flag & PMEM2_PROT_WRITE) {
+ if (cfg->protection_flag & PMEM2_PROT_EXEC) {
+ proto = PAGE_EXECUTE_READWRITE;
+ access = FILE_MAP_READ | FILE_MAP_WRITE |
+ FILE_MAP_EXECUTE;
+ } else {
+ /*
+ * Due to the already done exclusion
+ * of incorrect combinations, PROT_WRITE
+ * implies PROT_READ
+ */
+ proto = PAGE_READWRITE;
+ access = FILE_MAP_READ | FILE_MAP_WRITE;
+ }
+ } else if (cfg->protection_flag & PMEM2_PROT_READ) {
+ if (cfg->protection_flag & PMEM2_PROT_EXEC) {
+ proto = PAGE_EXECUTE_READ;
+ access = FILE_MAP_READ | FILE_MAP_EXECUTE;
+ } else {
+ proto = PAGE_READONLY;
+ access = FILE_MAP_READ;
+ }
+ }
+
+ if (cfg->sharing == PMEM2_PRIVATE) {
+ if (cfg->protection_flag & PMEM2_PROT_EXEC) {
+ proto = PAGE_EXECUTE_WRITECOPY;
+ access = FILE_MAP_EXECUTE | FILE_MAP_COPY;
+ } else {
+ /*
+ * If FILE_MAP_COPY is set,
+ * protection is changed to read/write
+ */
+ proto = PAGE_READONLY;
+ access = FILE_MAP_COPY;
+ }
+ }
+
+ /* create a file mapping handle */
+ HANDLE mh = create_mapping(map_handle, effective_offset, length,
+ proto, &err);
+
+ if (!mh) {
+ if (err == ERROR_ALREADY_EXISTS) {
+ ERR("mapping already exists");
+ return PMEM2_E_MAPPING_EXISTS;
+ } else if (err == ERROR_ACCESS_DENIED) {
+ return PMEM2_E_NO_ACCESS;
+ }
+ return pmem2_lasterror_to_err();
+ }
+
+ void *base;
+ void *rsv = cfg->reserv;
+ if (rsv) {
+ void *rsv_addr = pmem2_vm_reservation_get_address(rsv);
+ size_t rsv_size = pmem2_vm_reservation_get_size(rsv);
+ size_t rsv_offset = cfg->reserv_offset;
+
+ if (rsv_offset % Mmap_align) {
+ ret = PMEM2_E_OFFSET_UNALIGNED;
+ ERR(
+ "offset from the beggining of virtual memory "
+ "reservation %zu is not a multiple of %llu",
+ rsv_offset, Mmap_align);
+ goto err_close_mapping_handle;
+ }
+
+ if (rsv_offset + length > rsv_size) {
+ ret = PMEM2_E_LENGTH_OUT_OF_RANGE;
+ ERR(
+ "length of the mapping %zu combined with the "
+ "offset into the reservation %zu exceeds virtual "
+ "memory reservation size %zu",
+ length, effective_offset, rsv_size);
+ goto err_close_mapping_handle;
+ }
+
+ if (vm_reservation_map_find_acquire(rsv, rsv_offset, length)) {
+ ret = PMEM2_E_MAPPING_EXISTS;
+ ERR(
+ "region of the reservation %p at the offset %zu and "
+ "length %zu is at least partly occupied by other mapping",
+ rsv, rsv_offset, length);
+ goto err_reservation_release;
+ }
+
+ void *addr = (char *)rsv_addr + rsv_offset;
+ /*
+ * Before mapping to the reservation, it is neccessary to split
+ * the unoccupied region into separate placeholders,
+ * so that the size to be mapped and the cut out placeholder
+ * size will be the same.
+ */
+ ret = vm_reservation_split(rsv, rsv_offset, length);
+ if (ret)
+ goto err_reservation_release;
+
+ /* replace placeholder with a regular mapping */
+ base = MapViewOfFile3(mh,
+ NULL,
+ addr, /* addr in reservation */
+ effective_offset,
+ length,
+ MEM_REPLACE_PLACEHOLDER,
+ proto,
+ NULL,
+ 0);
+
+ if (base == NULL) {
+ ERR("!!MapViewOfFile3");
+ DWORD ret_windows = GetLastError();
+ if (ret_windows == ERROR_INVALID_ADDRESS)
+ ret = PMEM2_E_MAPPING_EXISTS;
+ else
+ ret = pmem2_lasterror_to_err();
+ goto err_merge_reservation_regions;
+ }
+
+ ASSERTeq(base, addr);
+ } else {
+ /* obtain a pointer to the mapping view */
+ base = MapViewOfFile(mh,
+ access,
+ HIDWORD(effective_offset),
+ LODWORD(effective_offset),
+ length);
+
+ if (base == NULL) {
+ ERR("!!MapViewOfFile");
+ ret = pmem2_lasterror_to_err();
+ goto err_close_mapping_handle;
+ }
+ }
+
+ if (!CloseHandle(mh)) {
+ ERR("!!CloseHandle");
+ ret = pmem2_lasterror_to_err();
+ goto err_undo_mapping;
+ }
+
+ enum pmem2_granularity available_min_granularity =
+ PMEM2_GRANULARITY_PAGE;
+ if (src->type == PMEM2_SOURCE_HANDLE) {
+ int direct_access = is_direct_access(src->value.handle);
+ if (direct_access < 0) {
+ ret = direct_access;
+ goto err_undo_mapping;
+ }
+
+ bool eADR = (pmem2_auto_flush() == 1);
+ available_min_granularity =
+ get_min_granularity(eADR, direct_access, cfg->sharing);
+ } else if (src->type == PMEM2_SOURCE_ANON) {
+ available_min_granularity = PMEM2_GRANULARITY_BYTE;
+ } else {
+ ASSERT(0);
+ }
+
+ if (available_min_granularity > cfg->requested_max_granularity) {
+ const char *err = granularity_err_msg
+ [cfg->requested_max_granularity]
+ [available_min_granularity];
+ if (strcmp(err, GRAN_IMPOSSIBLE) == 0)
+ FATAL(
+ "unhandled granularity error: available_min_granularity: %d" \
+ "requested_max_granularity: %d",
+ available_min_granularity,
+ cfg->requested_max_granularity);
+ ERR("%s", err);
+ ret = PMEM2_E_GRANULARITY_NOT_SUPPORTED;
+ goto err_undo_mapping;
+ }
+
+ /* prepare pmem2_map structure */
+ struct pmem2_map *map;
+ map = (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret);
+ if (!map)
+ goto err_undo_mapping;
+
+ map->addr = base;
+ /*
+ * XXX probably in some cases the reserved length > the content length.
+ * Maybe it is worth to do the research.
+ */
+ map->reserved_length = length;
+ map->content_length = length;
+ map->effective_granularity = available_min_granularity;
+ map->reserv = rsv;
+ map->source = *src;
+ pmem2_set_flush_fns(map);
+ pmem2_set_mem_fns(map);
+
+ ret = pmem2_register_mapping(map);
+ if (ret) {
+ goto err_free_map_struct;
+ }
+
+ if (rsv) {
+ ret = vm_reservation_map_register_release(rsv, map);
+ if (ret)
+ goto err_unregister_map;
+ }
+
+ /* return a pointer to the pmem2_map structure */
+ *map_ptr = map;
+
+ return ret;
+
+err_unregister_map:
+ pmem2_unregister_mapping(map);
+err_free_map_struct:
+ free(map);
+err_undo_mapping:
+ if (rsv)
+ vm_reservation_unmap(rsv, base, length);
+ else
+ UnmapViewOfFile(base);
+err_merge_reservation_regions:
+ if (rsv)
+ vm_reservation_merge(rsv, base, length);
+err_reservation_release:
+ if (rsv)
+ vm_reservation_release(rsv);
+err_close_mapping_handle:
+ CloseHandle(mh);
+ return ret;
+}
+
+/*
+ * pmem2_map_delete -- unmap the specified region
+ */
+int
+pmem2_map_delete(struct pmem2_map **map_ptr)
+{
+ LOG(3, "map_ptr %p", map_ptr);
+ PMEM2_ERR_CLR();
+
+ struct pmem2_map *map = *map_ptr;
+ size_t map_len = map->content_length;
+ void *map_addr = map->addr;
+ struct pmem2_vm_reservation *rsv = map->reserv;
+
+ int ret = pmem2_unregister_mapping(map);
+ if (ret)
+ return ret;
+
+ if (map->reserved_length != 0) {
+ if (rsv) {
+ void *rsv_addr = pmem2_vm_reservation_get_address(rsv);
+ size_t rsv_offset = (size_t)map_addr - (size_t)rsv_addr;
+ if (!vm_reservation_map_find_acquire(rsv, rsv_offset,
+ map_len)) {
+ ret = PMEM2_E_MAPPING_NOT_FOUND;
+ goto err_reservation_release;
+ }
+
+ ret = vm_reservation_unmap(rsv, map->addr,
+ map->reserved_length);
+ if (ret)
+ goto err_reservation_release;
+
+ ret = vm_reservation_merge(rsv, map->addr,
+ map->reserved_length);
+ if (ret)
+ goto err_reservation_release;
+
+ ret = vm_reservation_map_unregister_release(rsv, map);
+ if (ret)
+ goto err_register_map;
+ } else {
+ if (!UnmapViewOfFile(map->addr)) {
+ ERR("!!UnmapViewOfFile");
+ ret = pmem2_lasterror_to_err();
+ goto err_register_map;
+ }
+ }
+ }
+
+ Free(map);
+ *map_ptr = NULL;
+
+ return 0;
+
+err_reservation_release:
+ vm_reservation_release(rsv);
+err_register_map:
+ pmem2_register_mapping(map);
+ return ret;
+}