From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/pmdk/.cirrus.yml | 10 + src/pmdk/.codecov.yml | 18 + src/pmdk/.gitattributes | 7 + src/pmdk/.github/ISSUE_TEMPLATE.md | 28 + src/pmdk/.github/ISSUE_TEMPLATE/bug_report.md | 49 + src/pmdk/.github/ISSUE_TEMPLATE/feature.md | 26 + src/pmdk/.github/ISSUE_TEMPLATE/question.md | 15 + src/pmdk/.github/workflows/coverity.yml | 41 + src/pmdk/.github/workflows/gha.yml | 155 + src/pmdk/.gitignore | 31 + src/pmdk/.mailmap | 29 + src/pmdk/.skip-doc | 0 src/pmdk/.travis.yml | 42 + src/pmdk/CODING_STYLE.md | 140 + src/pmdk/CONTRIBUTING.md | 153 + src/pmdk/ChangeLog | 866 +++ src/pmdk/LICENSE | 39 + src/pmdk/Makefile | 136 + src/pmdk/README.md | 386 ++ src/pmdk/VERSION | 1 + src/pmdk/appveyor.yml | 95 + src/pmdk/res/PMDK.ico | Bin 0 -> 53067 bytes src/pmdk/src/.clang-format | 33 + src/pmdk/src/.gitignore | 23 + src/pmdk/src/LongPath.manifest | 7 + src/pmdk/src/LongPathSupport.props | 10 + src/pmdk/src/Makefile | 216 + src/pmdk/src/Makefile.inc | 318 + src/pmdk/src/PMDK.sln | 2240 +++++++ src/pmdk/src/README | 16 + src/pmdk/src/common.inc | 400 ++ src/pmdk/src/common/.cstyleignore | 1 + src/pmdk/src/common/Makefile | 15 + src/pmdk/src/common/bad_blocks.c | 264 + src/pmdk/src/common/badblocks.h | 77 + src/pmdk/src/common/common.rc | 80 + src/pmdk/src/common/ctl.c | 578 ++ src/pmdk/src/common/ctl.h | 202 + src/pmdk/src/common/ctl_cow.c | 51 + src/pmdk/src/common/ctl_fallocate.c | 46 + src/pmdk/src/common/ctl_global.h | 33 + src/pmdk/src/common/ctl_prefault.c | 69 + src/pmdk/src/common/ctl_sds.c | 46 + src/pmdk/src/common/dlsym.h | 103 + src/pmdk/src/common/file.c | 618 ++ src/pmdk/src/common/file.h | 115 + src/pmdk/src/common/file_posix.c | 264 + src/pmdk/src/common/file_windows.c | 196 + src/pmdk/src/common/libpmemcommon.vcxproj | 163 + src/pmdk/src/common/libpmemcommon.vcxproj.filters | 149 + src/pmdk/src/common/mmap.c | 504 ++ src/pmdk/src/common/mmap.h | 142 + src/pmdk/src/common/mmap_posix.c | 193 + src/pmdk/src/common/mmap_windows.c | 150 + src/pmdk/src/common/os_deep.h | 27 + src/pmdk/src/common/os_deep_linux.c | 177 + src/pmdk/src/common/os_deep_windows.c | 75 + src/pmdk/src/common/page_size.h | 22 + src/pmdk/src/common/pmemcommon.h | 39 + src/pmdk/src/common/pmemcommon.inc | 55 + src/pmdk/src/common/pool_hdr.c | 345 + src/pmdk/src/common/pool_hdr.h | 259 + src/pmdk/src/common/queue.h | 634 ++ src/pmdk/src/common/rand.c | 124 + src/pmdk/src/common/rand.h | 29 + src/pmdk/src/common/ravl.c | 577 ++ src/pmdk/src/common/ravl.h | 54 + src/pmdk/src/common/set.c | 4439 +++++++++++++ src/pmdk/src/common/set.h | 440 ++ src/pmdk/src/common/set_badblocks.c | 254 + src/pmdk/src/common/set_badblocks.h | 28 + src/pmdk/src/common/shutdown_state.c | 234 + src/pmdk/src/common/shutdown_state.h | 41 + src/pmdk/src/common/sys_util.h | 315 + src/pmdk/src/common/util_pmem.h | 47 + src/pmdk/src/common/uuid.c | 83 + src/pmdk/src/common/uuid.h | 55 + src/pmdk/src/common/uuid_freebsd.c | 24 + src/pmdk/src/common/uuid_linux.c | 49 + src/pmdk/src/common/uuid_windows.c | 23 + src/pmdk/src/common/vec.h | 157 + src/pmdk/src/common/vecq.h | 128 + src/pmdk/src/core/Makefile | 12 + src/pmdk/src/core/alloc.c | 119 + src/pmdk/src/core/alloc.h | 49 + src/pmdk/src/core/errno_freebsd.h | 19 + src/pmdk/src/core/fault_injection.h | 39 + src/pmdk/src/core/fs.h | 51 + src/pmdk/src/core/fs_posix.c | 84 + src/pmdk/src/core/fs_windows.c | 123 + src/pmdk/src/core/libpmemcore.vcxproj | 135 + src/pmdk/src/core/libpmemcore.vcxproj.filters | 71 + src/pmdk/src/core/os.h | 115 + src/pmdk/src/core/os_posix.c | 353 ++ src/pmdk/src/core/os_thread.h | 181 + src/pmdk/src/core/os_thread_posix.c | 436 ++ src/pmdk/src/core/os_thread_windows.c | 655 ++ src/pmdk/src/core/os_windows.c | 741 +++ src/pmdk/src/core/out.c | 592 ++ src/pmdk/src/core/out.h | 231 + src/pmdk/src/core/pmemcore.h | 44 + src/pmdk/src/core/pmemcore.inc | 41 + src/pmdk/src/core/util.c | 494 ++ src/pmdk/src/core/util.h | 541 ++ src/pmdk/src/core/util_posix.c | 126 + src/pmdk/src/core/util_windows.c | 320 + src/pmdk/src/core/valgrind/.cstyleignore | 5 + src/pmdk/src/core/valgrind/README | 2 + src/pmdk/src/core/valgrind/drd.h | 571 ++ src/pmdk/src/core/valgrind/helgrind.h | 841 +++ src/pmdk/src/core/valgrind/memcheck.h | 320 + src/pmdk/src/core/valgrind/pmemcheck.h | 186 + src/pmdk/src/core/valgrind/valgrind.h | 6647 ++++++++++++++++++++ src/pmdk/src/core/valgrind_internal.h | 478 ++ src/pmdk/src/freebsd/README | 13 + src/pmdk/src/freebsd/include/endian.h | 8 + src/pmdk/src/freebsd/include/features.h | 6 + src/pmdk/src/freebsd/include/linux/kdev_t.h | 6 + src/pmdk/src/freebsd/include/linux/limits.h | 6 + src/pmdk/src/freebsd/include/sys/sysmacros.h | 6 + src/pmdk/src/include/.cstyleignore | 1 + src/pmdk/src/include/README | 27 + src/pmdk/src/include/libpmem.h | 131 + src/pmdk/src/include/libpmem2.h | 272 + src/pmdk/src/include/libpmemblk.h | 164 + src/pmdk/src/include/libpmemlog.h | 152 + src/pmdk/src/include/libpmemobj++/README.md | 2 + src/pmdk/src/include/libpmemobj++/detail/README.md | 2 + src/pmdk/src/include/libpmemobj.h | 26 + src/pmdk/src/include/libpmemobj/action.h | 33 + src/pmdk/src/include/libpmemobj/action_base.h | 74 + src/pmdk/src/include/libpmemobj/atomic.h | 45 + src/pmdk/src/include/libpmemobj/atomic_base.h | 93 + src/pmdk/src/include/libpmemobj/base.h | 299 + src/pmdk/src/include/libpmemobj/ctl.h | 175 + src/pmdk/src/include/libpmemobj/iterator.h | 82 + src/pmdk/src/include/libpmemobj/iterator_base.h | 39 + src/pmdk/src/include/libpmemobj/lists_atomic.h | 164 + .../src/include/libpmemobj/lists_atomic_base.h | 39 + src/pmdk/src/include/libpmemobj/pool.h | 17 + src/pmdk/src/include/libpmemobj/pool_base.h | 105 + src/pmdk/src/include/libpmemobj/thread.h | 71 + src/pmdk/src/include/libpmemobj/tx.h | 185 + src/pmdk/src/include/libpmemobj/tx_base.h | 450 ++ src/pmdk/src/include/libpmemobj/types.h | 205 + src/pmdk/src/include/libpmempool.h | 334 + src/pmdk/src/include/librpmem.h | 98 + src/pmdk/src/include/pmemcompat.h | 63 + src/pmdk/src/libpmem/Makefile | 68 + src/pmdk/src/libpmem/libpmem.c | 125 + src/pmdk/src/libpmem/libpmem.def | 66 + src/pmdk/src/libpmem/libpmem.link.in | 35 + src/pmdk/src/libpmem/libpmem.rc | 12 + src/pmdk/src/libpmem/libpmem.vcxproj | 162 + src/pmdk/src/libpmem/libpmem.vcxproj.filters | 243 + src/pmdk/src/libpmem/libpmem_main.c | 36 + src/pmdk/src/libpmem/pmem.c | 957 +++ src/pmdk/src/libpmem/pmem.h | 58 + src/pmdk/src/libpmem/pmem_posix.c | 81 + src/pmdk/src/libpmem/pmem_windows.c | 215 + src/pmdk/src/libpmem2/Makefile | 66 + src/pmdk/src/libpmem2/aarch64/arm_cacheops.h | 62 + src/pmdk/src/libpmem2/aarch64/flags.inc | 11 + src/pmdk/src/libpmem2/aarch64/flush.h | 31 + src/pmdk/src/libpmem2/aarch64/init.c | 47 + src/pmdk/src/libpmem2/aarch64/sources.inc | 8 + src/pmdk/src/libpmem2/auto_flush.h | 21 + src/pmdk/src/libpmem2/auto_flush_linux.c | 184 + src/pmdk/src/libpmem2/auto_flush_none.c | 16 + src/pmdk/src/libpmem2/auto_flush_windows.c | 197 + src/pmdk/src/libpmem2/auto_flush_windows.h | 50 + src/pmdk/src/libpmem2/badblocks.c | 41 + src/pmdk/src/libpmem2/badblocks_ndctl.c | 771 +++ src/pmdk/src/libpmem2/badblocks_none.c | 50 + src/pmdk/src/libpmem2/config.c | 218 + src/pmdk/src/libpmem2/config.h | 34 + src/pmdk/src/libpmem2/deep_flush.c | 42 + src/pmdk/src/libpmem2/deep_flush.h | 27 + src/pmdk/src/libpmem2/deep_flush_linux.c | 124 + src/pmdk/src/libpmem2/deep_flush_other.c | 47 + src/pmdk/src/libpmem2/deep_flush_windows.c | 47 + src/pmdk/src/libpmem2/errormsg.c | 97 + src/pmdk/src/libpmem2/extent.h | 37 + src/pmdk/src/libpmem2/extent_linux.c | 164 + src/pmdk/src/libpmem2/extent_none.c | 31 + src/pmdk/src/libpmem2/libpmem2.c | 48 + src/pmdk/src/libpmem2/libpmem2.def | 55 + src/pmdk/src/libpmem2/libpmem2.link.in | 51 + src/pmdk/src/libpmem2/libpmem2.rc | 12 + src/pmdk/src/libpmem2/libpmem2.vcxproj | 144 + src/pmdk/src/libpmem2/libpmem2.vcxproj.filters | 223 + src/pmdk/src/libpmem2/libpmem2_main.c | 28 + src/pmdk/src/libpmem2/map.c | 294 + src/pmdk/src/libpmem2/map.h | 63 + src/pmdk/src/libpmem2/map_posix.c | 609 ++ src/pmdk/src/libpmem2/map_windows.c | 590 ++ src/pmdk/src/libpmem2/memops_generic.c | 339 + src/pmdk/src/libpmem2/persist.c | 610 ++ src/pmdk/src/libpmem2/persist.h | 29 + src/pmdk/src/libpmem2/persist_posix.c | 50 + src/pmdk/src/libpmem2/persist_windows.c | 36 + src/pmdk/src/libpmem2/pmem2.h | 27 + src/pmdk/src/libpmem2/pmem2_arch.h | 59 + src/pmdk/src/libpmem2/pmem2_utils.c | 95 + src/pmdk/src/libpmem2/pmem2_utils.h | 55 + src/pmdk/src/libpmem2/pmem2_utils_linux.c | 70 + src/pmdk/src/libpmem2/pmem2_utils_ndctl.c | 91 + src/pmdk/src/libpmem2/pmem2_utils_none.c | 33 + src/pmdk/src/libpmem2/pmem2_utils_other.c | 59 + src/pmdk/src/libpmem2/ppc64/.cstyleignore | 1 + src/pmdk/src/libpmem2/ppc64/flags.inc | 9 + src/pmdk/src/libpmem2/ppc64/init.c | 66 + src/pmdk/src/libpmem2/ppc64/sources.inc | 4 + src/pmdk/src/libpmem2/ravl_interval.c | 260 + src/pmdk/src/libpmem2/ravl_interval.h | 36 + src/pmdk/src/libpmem2/region_namespace.h | 26 + src/pmdk/src/libpmem2/region_namespace_ndctl.c | 258 + src/pmdk/src/libpmem2/region_namespace_ndctl.h | 32 + src/pmdk/src/libpmem2/region_namespace_none.c | 16 + src/pmdk/src/libpmem2/source.c | 37 + src/pmdk/src/libpmem2/source.h | 49 + src/pmdk/src/libpmem2/source_posix.c | 196 + src/pmdk/src/libpmem2/source_windows.c | 183 + src/pmdk/src/libpmem2/usc_ndctl.c | 149 + src/pmdk/src/libpmem2/usc_none.c | 23 + src/pmdk/src/libpmem2/usc_windows.c | 230 + src/pmdk/src/libpmem2/vm_reservation.c | 294 + src/pmdk/src/libpmem2/vm_reservation.h | 25 + src/pmdk/src/libpmem2/vm_reservation_posix.c | 85 + src/pmdk/src/libpmem2/vm_reservation_windows.c | 120 + src/pmdk/src/libpmem2/x86_64/avx.h | 86 + src/pmdk/src/libpmem2/x86_64/cpu.c | 174 + src/pmdk/src/libpmem2/x86_64/cpu.h | 18 + src/pmdk/src/libpmem2/x86_64/flags.inc | 31 + src/pmdk/src/libpmem2/x86_64/flush.h | 118 + src/pmdk/src/libpmem2/x86_64/init.c | 528 ++ src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx.h | 100 + .../src/libpmem2/x86_64/memcpy/memcpy_avx512f.h | 18 + .../src/libpmem2/x86_64/memcpy/memcpy_nt_avx.c | 443 ++ .../src/libpmem2/x86_64/memcpy/memcpy_nt_avx512f.c | 459 ++ .../src/libpmem2/x86_64/memcpy/memcpy_nt_sse2.c | 428 ++ src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_sse2.h | 116 + src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx.c | 281 + .../src/libpmem2/x86_64/memcpy/memcpy_t_avx512f.c | 438 ++ .../src/libpmem2/x86_64/memcpy/memcpy_t_sse2.c | 246 + src/pmdk/src/libpmem2/x86_64/memcpy_memset.h | 273 + src/pmdk/src/libpmem2/x86_64/memset/memset_avx.h | 97 + .../src/libpmem2/x86_64/memset/memset_avx512f.h | 18 + .../src/libpmem2/x86_64/memset/memset_nt_avx.c | 286 + .../src/libpmem2/x86_64/memset/memset_nt_avx512f.c | 282 + .../src/libpmem2/x86_64/memset/memset_nt_sse2.c | 273 + src/pmdk/src/libpmem2/x86_64/memset/memset_sse2.h | 104 + src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx.c | 178 + .../src/libpmem2/x86_64/memset/memset_t_avx512f.c | 285 + .../src/libpmem2/x86_64/memset/memset_t_sse2.c | 153 + src/pmdk/src/libpmem2/x86_64/sources.inc | 30 + src/pmdk/src/libpmemblk/Makefile | 22 + src/pmdk/src/libpmemblk/blk.c | 948 +++ src/pmdk/src/libpmemblk/blk.h | 102 + src/pmdk/src/libpmemblk/btt.c | 2051 ++++++ src/pmdk/src/libpmemblk/btt.h | 59 + src/pmdk/src/libpmemblk/btt_layout.h | 107 + src/pmdk/src/libpmemblk/libpmemblk.c | 200 + src/pmdk/src/libpmemblk/libpmemblk.def | 36 + src/pmdk/src/libpmemblk/libpmemblk.link.in | 28 + src/pmdk/src/libpmemblk/libpmemblk.rc | 12 + src/pmdk/src/libpmemblk/libpmemblk.vcxproj | 133 + src/pmdk/src/libpmemblk/libpmemblk.vcxproj.filters | 217 + src/pmdk/src/libpmemblk/libpmemblk_main.c | 32 + src/pmdk/src/libpmemlog/Makefile | 23 + src/pmdk/src/libpmemlog/libpmemlog.c | 201 + src/pmdk/src/libpmemlog/libpmemlog.def | 36 + src/pmdk/src/libpmemlog/libpmemlog.link.in | 28 + src/pmdk/src/libpmemlog/libpmemlog.rc | 12 + src/pmdk/src/libpmemlog/libpmemlog.vcxproj | 130 + src/pmdk/src/libpmemlog/libpmemlog.vcxproj.filters | 208 + src/pmdk/src/libpmemlog/libpmemlog_main.c | 32 + src/pmdk/src/libpmemlog/log.c | 895 +++ src/pmdk/src/libpmemlog/log.h | 115 + src/pmdk/src/libpmemobj/Makefile | 41 + src/pmdk/src/libpmemobj/alloc_class.c | 636 ++ src/pmdk/src/libpmemobj/alloc_class.h | 79 + src/pmdk/src/libpmemobj/bucket.c | 99 + src/pmdk/src/libpmemobj/bucket.h | 50 + src/pmdk/src/libpmemobj/container.h | 48 + src/pmdk/src/libpmemobj/container_ravl.c | 188 + src/pmdk/src/libpmemobj/container_ravl.h | 23 + src/pmdk/src/libpmemobj/container_seglists.c | 171 + src/pmdk/src/libpmemobj/container_seglists.h | 24 + src/pmdk/src/libpmemobj/critnib.c | 651 ++ src/pmdk/src/libpmemobj/critnib.h | 31 + src/pmdk/src/libpmemobj/ctl_debug.c | 61 + src/pmdk/src/libpmemobj/ctl_debug.h | 22 + src/pmdk/src/libpmemobj/heap.c | 1893 ++++++ src/pmdk/src/libpmemobj/heap.h | 132 + src/pmdk/src/libpmemobj/heap_layout.h | 206 + src/pmdk/src/libpmemobj/lane.c | 572 ++ src/pmdk/src/libpmemobj/lane.h | 149 + src/pmdk/src/libpmemobj/libpmemobj.c | 136 + src/pmdk/src/libpmemobj/libpmemobj.def | 124 + src/pmdk/src/libpmemobj/libpmemobj.link.in | 121 + src/pmdk/src/libpmemobj/libpmemobj.rc | 12 + src/pmdk/src/libpmemobj/libpmemobj.vcxproj | 187 + src/pmdk/src/libpmemobj/libpmemobj.vcxproj.filters | 384 ++ src/pmdk/src/libpmemobj/libpmemobj_main.c | 32 + src/pmdk/src/libpmemobj/list.c | 939 +++ src/pmdk/src/libpmemobj/list.h | 64 + src/pmdk/src/libpmemobj/memblock.c | 1520 +++++ src/pmdk/src/libpmemobj/memblock.h | 306 + src/pmdk/src/libpmemobj/memops.c | 837 +++ src/pmdk/src/libpmemobj/memops.h | 84 + src/pmdk/src/libpmemobj/obj.c | 3447 ++++++++++ src/pmdk/src/libpmemobj/obj.h | 289 + src/pmdk/src/libpmemobj/palloc.c | 1336 ++++ src/pmdk/src/libpmemobj/palloc.h | 113 + src/pmdk/src/libpmemobj/pmalloc.c | 797 +++ src/pmdk/src/libpmemobj/pmalloc.h | 50 + src/pmdk/src/libpmemobj/pmemops.h | 104 + src/pmdk/src/libpmemobj/recycler.c | 303 + src/pmdk/src/libpmemobj/recycler.h | 52 + src/pmdk/src/libpmemobj/stats.c | 151 + src/pmdk/src/libpmemobj/stats.h | 108 + src/pmdk/src/libpmemobj/sync.c | 642 ++ src/pmdk/src/libpmemobj/sync.h | 112 + src/pmdk/src/libpmemobj/tx.c | 2375 +++++++ src/pmdk/src/libpmemobj/tx.h | 54 + src/pmdk/src/libpmemobj/ulog.c | 883 +++ src/pmdk/src/libpmemobj/ulog.h | 166 + src/pmdk/src/libpmempool/Makefile | 61 + src/pmdk/src/libpmempool/check.c | 232 + src/pmdk/src/libpmempool/check.h | 30 + src/pmdk/src/libpmempool/check_backup.c | 367 ++ src/pmdk/src/libpmempool/check_bad_blocks.c | 60 + src/pmdk/src/libpmempool/check_blk.c | 237 + src/pmdk/src/libpmempool/check_btt_info.c | 509 ++ src/pmdk/src/libpmempool/check_btt_map_flog.c | 685 ++ src/pmdk/src/libpmempool/check_log.c | 209 + src/pmdk/src/libpmempool/check_pool_hdr.c | 1010 +++ src/pmdk/src/libpmempool/check_sds.c | 289 + src/pmdk/src/libpmempool/check_util.c | 669 ++ src/pmdk/src/libpmempool/check_util.h | 196 + src/pmdk/src/libpmempool/check_write.c | 246 + src/pmdk/src/libpmempool/feature.c | 789 +++ src/pmdk/src/libpmempool/libpmempool.c | 417 ++ src/pmdk/src/libpmempool/libpmempool.def | 32 + src/pmdk/src/libpmempool/libpmempool.link.in | 23 + src/pmdk/src/libpmempool/libpmempool.rc | 12 + src/pmdk/src/libpmempool/libpmempool.vcxproj | 162 + .../src/libpmempool/libpmempool.vcxproj.filters | 253 + src/pmdk/src/libpmempool/libpmempool_main.c | 34 + src/pmdk/src/libpmempool/pmempool.h | 48 + src/pmdk/src/libpmempool/pool.c | 1123 ++++ src/pmdk/src/libpmempool/pool.h | 163 + src/pmdk/src/libpmempool/replica.c | 2503 ++++++++ src/pmdk/src/libpmempool/replica.h | 211 + src/pmdk/src/libpmempool/rm.c | 251 + src/pmdk/src/libpmempool/sync.c | 1646 +++++ src/pmdk/src/libpmempool/transform.c | 1017 +++ src/pmdk/src/librpmem/Makefile | 43 + src/pmdk/src/librpmem/README | 7 + src/pmdk/src/librpmem/librpmem.c | 84 + src/pmdk/src/librpmem/librpmem.link.in | 24 + src/pmdk/src/librpmem/rpmem.c | 914 +++ src/pmdk/src/librpmem/rpmem.h | 34 + src/pmdk/src/librpmem/rpmem_cmd.c | 239 + src/pmdk/src/librpmem/rpmem_cmd.h | 39 + src/pmdk/src/librpmem/rpmem_fip.c | 1987 ++++++ src/pmdk/src/librpmem/rpmem_fip.h | 61 + src/pmdk/src/librpmem/rpmem_obc.c | 677 ++ src/pmdk/src/librpmem/rpmem_obc.h | 47 + src/pmdk/src/librpmem/rpmem_ssh.c | 442 ++ src/pmdk/src/librpmem/rpmem_ssh.h | 34 + src/pmdk/src/librpmem/rpmem_util.c | 239 + src/pmdk/src/librpmem/rpmem_util.h | 47 + src/pmdk/src/libvmem/README.md | 2 + src/pmdk/src/libvmmalloc/README.md | 2 + src/pmdk/src/rpmem_common/Makefile | 33 + src/pmdk/src/rpmem_common/rpmem_common.c | 314 + src/pmdk/src/rpmem_common/rpmem_common.h | 139 + src/pmdk/src/rpmem_common/rpmem_common_log.h | 38 + src/pmdk/src/rpmem_common/rpmem_fip_common.c | 332 + src/pmdk/src/rpmem_common/rpmem_fip_common.h | 89 + src/pmdk/src/rpmem_common/rpmem_fip_lane.h | 127 + src/pmdk/src/rpmem_common/rpmem_fip_msg.h | 146 + src/pmdk/src/rpmem_common/rpmem_proto.h | 545 ++ src/pmdk/src/tools/.gitignore | 7 + src/pmdk/src/tools/Makefile | 45 + src/pmdk/src/tools/Makefile.inc | 342 + src/pmdk/src/tools/daxio/.gitignore | 1 + src/pmdk/src/tools/daxio/Makefile | 36 + src/pmdk/src/tools/daxio/README | 47 + src/pmdk/src/tools/daxio/daxio.c | 607 ++ src/pmdk/src/tools/pmempool/.gitignore | 1 + src/pmdk/src/tools/pmempool/Makefile | 58 + src/pmdk/src/tools/pmempool/README | 306 + .../src/tools/pmempool/bash_completion/pmempool | 168 + src/pmdk/src/tools/pmempool/check.c | 315 + src/pmdk/src/tools/pmempool/check.h | 9 + src/pmdk/src/tools/pmempool/common.c | 1382 ++++ src/pmdk/src/tools/pmempool/common.h | 203 + src/pmdk/src/tools/pmempool/convert.c | 111 + src/pmdk/src/tools/pmempool/convert.h | 11 + src/pmdk/src/tools/pmempool/create.c | 668 ++ src/pmdk/src/tools/pmempool/create.h | 9 + src/pmdk/src/tools/pmempool/dump.c | 391 ++ src/pmdk/src/tools/pmempool/dump.h | 9 + src/pmdk/src/tools/pmempool/feature.c | 207 + src/pmdk/src/tools/pmempool/feature.h | 9 + src/pmdk/src/tools/pmempool/info.c | 1034 +++ src/pmdk/src/tools/pmempool/info.h | 166 + src/pmdk/src/tools/pmempool/info_blk.c | 567 ++ src/pmdk/src/tools/pmempool/info_log.c | 160 + src/pmdk/src/tools/pmempool/info_obj.c | 962 +++ src/pmdk/src/tools/pmempool/output.c | 844 +++ src/pmdk/src/tools/pmempool/output.h | 48 + src/pmdk/src/tools/pmempool/pmempool.c | 302 + src/pmdk/src/tools/pmempool/pmempool.rc | Bin 0 -> 3722 bytes src/pmdk/src/tools/pmempool/pmempool.vcxproj | 178 + .../src/tools/pmempool/pmempool.vcxproj.filters | 157 + src/pmdk/src/tools/pmempool/rm.c | 372 ++ src/pmdk/src/tools/pmempool/rm.h | 9 + src/pmdk/src/tools/pmempool/synchronize.c | 157 + src/pmdk/src/tools/pmempool/synchronize.h | 9 + src/pmdk/src/tools/pmempool/transform.c | 160 + src/pmdk/src/tools/pmempool/transform.h | 9 + src/pmdk/src/tools/pmreorder/.gitignore | 3 + src/pmdk/src/tools/pmreorder/Makefile | 16 + .../src/tools/pmreorder/binaryoutputhandler.py | 218 + .../src/tools/pmreorder/consistencycheckwrap.py | 112 + src/pmdk/src/tools/pmreorder/loggingfacility.py | 77 + src/pmdk/src/tools/pmreorder/markerparser.py | 52 + src/pmdk/src/tools/pmreorder/memoryoperations.py | 413 ++ src/pmdk/src/tools/pmreorder/operationfactory.py | 145 + src/pmdk/src/tools/pmreorder/opscontext.py | 68 + src/pmdk/src/tools/pmreorder/pmreorder.py | 88 + src/pmdk/src/tools/pmreorder/reorderengines.py | 341 + src/pmdk/src/tools/pmreorder/reorderexceptions.py | 10 + src/pmdk/src/tools/pmreorder/statemachine.py | 364 ++ src/pmdk/src/tools/pmreorder/utils.py | 102 + src/pmdk/src/tools/rpmemd/.gitignore | 1 + src/pmdk/src/tools/rpmemd/Makefile | 47 + src/pmdk/src/tools/rpmemd/README | 8 + src/pmdk/src/tools/rpmemd/rpmemd.c | 803 +++ src/pmdk/src/tools/rpmemd/rpmemd.h | 8 + src/pmdk/src/tools/rpmemd/rpmemd_config.c | 640 ++ src/pmdk/src/tools/rpmemd/rpmemd_config.h | 45 + src/pmdk/src/tools/rpmemd/rpmemd_db.c | 635 ++ src/pmdk/src/tools/rpmemd/rpmemd_db.h | 33 + src/pmdk/src/tools/rpmemd/rpmemd_fip.c | 1216 ++++ src/pmdk/src/tools/rpmemd/rpmemd_fip.h | 37 + src/pmdk/src/tools/rpmemd/rpmemd_log.c | 250 + src/pmdk/src/tools/rpmemd/rpmemd_log.h | 75 + src/pmdk/src/tools/rpmemd/rpmemd_obc.c | 548 ++ src/pmdk/src/tools/rpmemd/rpmemd_obc.h | 39 + src/pmdk/src/tools/rpmemd/rpmemd_util.c | 119 + src/pmdk/src/tools/rpmemd/rpmemd_util.h | 13 + src/pmdk/src/windows/README | 19 + src/pmdk/src/windows/getopt/.cstyleignore | 2 + src/pmdk/src/windows/getopt/LICENSE.txt | 24 + src/pmdk/src/windows/getopt/README | 9 + src/pmdk/src/windows/getopt/getopt.c | 293 + src/pmdk/src/windows/getopt/getopt.h | 58 + src/pmdk/src/windows/getopt/getopt.vcxproj | 88 + src/pmdk/src/windows/getopt/getopt.vcxproj.filters | 23 + src/pmdk/src/windows/include/.cstyleignore | 1 + src/pmdk/src/windows/include/dirent.h | 6 + src/pmdk/src/windows/include/endian.h | 32 + src/pmdk/src/windows/include/err.h | 41 + src/pmdk/src/windows/include/features.h | 6 + src/pmdk/src/windows/include/libgen.h | 6 + src/pmdk/src/windows/include/linux/limits.h | 15 + src/pmdk/src/windows/include/platform.h | 226 + src/pmdk/src/windows/include/sched.h | 6 + src/pmdk/src/windows/include/strings.h | 6 + src/pmdk/src/windows/include/sys/file.h | 37 + src/pmdk/src/windows/include/sys/mman.h | 46 + src/pmdk/src/windows/include/sys/mount.h | 6 + src/pmdk/src/windows/include/sys/param.h | 23 + src/pmdk/src/windows/include/sys/resource.h | 6 + src/pmdk/src/windows/include/sys/statvfs.h | 6 + src/pmdk/src/windows/include/sys/uio.h | 23 + src/pmdk/src/windows/include/sys/wait.h | 6 + src/pmdk/src/windows/include/unistd.h | 136 + src/pmdk/src/windows/include/win_mmap.h | 81 + src/pmdk/src/windows/libs_debug.props | 34 + src/pmdk/src/windows/libs_release.props | 36 + src/pmdk/src/windows/srcversion/srcversion.vcxproj | 108 + src/pmdk/src/windows/win_mmap.c | 1132 ++++ src/pmdk/utils/.gitignore | 1 + src/pmdk/utils/CHECK_WHITESPACE.PS1 | 20 + src/pmdk/utils/CREATE-ZIP.PS1 | 83 + src/pmdk/utils/CSTYLE.ps1 | 37 + src/pmdk/utils/Makefile | 12 + src/pmdk/utils/README | 5 + src/pmdk/utils/SRCVERSION.ps1 | 158 + src/pmdk/utils/build-dpkg.sh | 836 +++ src/pmdk/utils/build-rpm.sh | 263 + src/pmdk/utils/check-area.sh | 74 + src/pmdk/utils/check-commit.sh | 50 + src/pmdk/utils/check-commits.sh | 43 + src/pmdk/utils/check-manpage | 62 + src/pmdk/utils/check-manpages | 16 + src/pmdk/utils/check-os.sh | 30 + src/pmdk/utils/check-shebang.sh | 30 + src/pmdk/utils/check_license/.gitignore | 1 + src/pmdk/utils/check_license/check-headers.sh | 192 + src/pmdk/utils/check_license/check-ms-license.pl | 62 + src/pmdk/utils/check_license/file-exceptions.sh | 7 + src/pmdk/utils/check_sdk_version.py | 76 + src/pmdk/utils/check_whitespace | 210 + src/pmdk/utils/copy-source.sh | 36 + src/pmdk/utils/cstyle | 1037 +++ ...ravis-fix-travisci_build_coverity_scan.sh.patch | 27 + src/pmdk/utils/docker/README | 19 + src/pmdk/utils/docker/build-CI.sh | 143 + src/pmdk/utils/docker/build-local.sh | 111 + src/pmdk/utils/docker/configure-tests.sh | 105 + ...ating-gcov-files-and-turn-off-verbose-log.patch | 37 + src/pmdk/utils/docker/images/Dockerfile.fedora-31 | 120 + .../utils/docker/images/Dockerfile.ubuntu-19.10 | 121 + src/pmdk/utils/docker/images/README | 6 + src/pmdk/utils/docker/images/build-image.sh | 53 + src/pmdk/utils/docker/images/download-scripts.sh | 32 + src/pmdk/utils/docker/images/install-libfabric.sh | 40 + src/pmdk/utils/docker/images/install-libndctl.sh | 60 + src/pmdk/utils/docker/images/install-valgrind.sh | 52 + src/pmdk/utils/docker/images/push-image.sh | 51 + src/pmdk/utils/docker/ppc64le.blacklist | 19 + src/pmdk/utils/docker/prepare-for-build.sh | 25 + src/pmdk/utils/docker/pull-or-rebuild-image.sh | 112 + src/pmdk/utils/docker/run-build-package.sh | 47 + src/pmdk/utils/docker/run-build.sh | 34 + src/pmdk/utils/docker/run-coverage.sh | 52 + src/pmdk/utils/docker/run-coverity.sh | 71 + src/pmdk/utils/docker/run-doc-update.sh | 76 + src/pmdk/utils/docker/set-ci-vars.sh | 96 + src/pmdk/utils/docker/set-vars.sh | 12 + src/pmdk/utils/docker/test_package/.gitignore | 1 + src/pmdk/utils/docker/test_package/Makefile | 21 + src/pmdk/utils/docker/test_package/README | 6 + src/pmdk/utils/docker/test_package/test_package.c | 41 + src/pmdk/utils/docker/valid-branches.sh | 12 + src/pmdk/utils/get_aliases.sh | 110 + src/pmdk/utils/git-years | 8 + src/pmdk/utils/libpmem.pc.in | 9 + src/pmdk/utils/libpmem2.pc.in | 9 + src/pmdk/utils/libpmemblk.pc.in | 9 + src/pmdk/utils/libpmemlog.pc.in | 9 + src/pmdk/utils/libpmemobj.pc.in | 10 + src/pmdk/utils/libpmempool.pc.in | 10 + src/pmdk/utils/librpmem.pc.in | 9 + src/pmdk/utils/magic-install.sh | 15 + src/pmdk/utils/magic-uninstall.sh | 21 + src/pmdk/utils/md2man.sh | 67 + src/pmdk/utils/os-banned | 63 + src/pmdk/utils/pkg-common.sh | 56 + src/pmdk/utils/pkg-config.sh | 17 + src/pmdk/utils/pmdk.magic | 15 + src/pmdk/utils/pmdk.spec.in | 710 +++ src/pmdk/utils/ps_analyze.ps1 | 31 + src/pmdk/utils/sort_solution | 128 + src/pmdk/utils/style_check.sh | 137 + src/pmdk/utils/version.sh | 62 + 563 files changed, 124242 insertions(+) create mode 100644 src/pmdk/.cirrus.yml create mode 100644 src/pmdk/.codecov.yml create mode 100644 src/pmdk/.gitattributes create mode 100644 src/pmdk/.github/ISSUE_TEMPLATE.md create mode 100644 src/pmdk/.github/ISSUE_TEMPLATE/bug_report.md create mode 100644 src/pmdk/.github/ISSUE_TEMPLATE/feature.md create mode 100644 src/pmdk/.github/ISSUE_TEMPLATE/question.md create mode 100644 src/pmdk/.github/workflows/coverity.yml create mode 100644 src/pmdk/.github/workflows/gha.yml create mode 100644 src/pmdk/.gitignore create mode 100644 src/pmdk/.mailmap create mode 100644 src/pmdk/.skip-doc create mode 100644 src/pmdk/.travis.yml create mode 100644 src/pmdk/CODING_STYLE.md create mode 100644 src/pmdk/CONTRIBUTING.md create mode 100644 src/pmdk/ChangeLog create mode 100644 src/pmdk/LICENSE create mode 100644 src/pmdk/Makefile create mode 100644 src/pmdk/README.md create mode 100644 src/pmdk/VERSION create mode 100644 src/pmdk/appveyor.yml create mode 100644 src/pmdk/res/PMDK.ico create mode 100644 src/pmdk/src/.clang-format create mode 100644 src/pmdk/src/.gitignore create mode 100644 src/pmdk/src/LongPath.manifest create mode 100644 src/pmdk/src/LongPathSupport.props create mode 100644 src/pmdk/src/Makefile create mode 100644 src/pmdk/src/Makefile.inc create mode 100644 src/pmdk/src/PMDK.sln create mode 100644 src/pmdk/src/README create mode 100644 src/pmdk/src/common.inc create mode 100644 src/pmdk/src/common/.cstyleignore create mode 100644 src/pmdk/src/common/Makefile create mode 100644 src/pmdk/src/common/bad_blocks.c create mode 100644 src/pmdk/src/common/badblocks.h create mode 100644 src/pmdk/src/common/common.rc create mode 100644 src/pmdk/src/common/ctl.c create mode 100644 src/pmdk/src/common/ctl.h create mode 100644 src/pmdk/src/common/ctl_cow.c create mode 100644 src/pmdk/src/common/ctl_fallocate.c create mode 100644 src/pmdk/src/common/ctl_global.h create mode 100644 src/pmdk/src/common/ctl_prefault.c create mode 100644 src/pmdk/src/common/ctl_sds.c create mode 100644 src/pmdk/src/common/dlsym.h create mode 100644 src/pmdk/src/common/file.c create mode 100644 src/pmdk/src/common/file.h create mode 100644 src/pmdk/src/common/file_posix.c create mode 100644 src/pmdk/src/common/file_windows.c create mode 100644 src/pmdk/src/common/libpmemcommon.vcxproj create mode 100644 src/pmdk/src/common/libpmemcommon.vcxproj.filters create mode 100644 src/pmdk/src/common/mmap.c create mode 100644 src/pmdk/src/common/mmap.h create mode 100644 src/pmdk/src/common/mmap_posix.c create mode 100644 src/pmdk/src/common/mmap_windows.c create mode 100644 src/pmdk/src/common/os_deep.h create mode 100644 src/pmdk/src/common/os_deep_linux.c create mode 100644 src/pmdk/src/common/os_deep_windows.c create mode 100644 src/pmdk/src/common/page_size.h create mode 100644 src/pmdk/src/common/pmemcommon.h create mode 100644 src/pmdk/src/common/pmemcommon.inc create mode 100644 src/pmdk/src/common/pool_hdr.c create mode 100644 src/pmdk/src/common/pool_hdr.h create mode 100644 src/pmdk/src/common/queue.h create mode 100644 src/pmdk/src/common/rand.c create mode 100644 src/pmdk/src/common/rand.h create mode 100644 src/pmdk/src/common/ravl.c create mode 100644 src/pmdk/src/common/ravl.h create mode 100644 src/pmdk/src/common/set.c create mode 100644 src/pmdk/src/common/set.h create mode 100644 src/pmdk/src/common/set_badblocks.c create mode 100644 src/pmdk/src/common/set_badblocks.h create mode 100644 src/pmdk/src/common/shutdown_state.c create mode 100644 src/pmdk/src/common/shutdown_state.h create mode 100644 src/pmdk/src/common/sys_util.h create mode 100644 src/pmdk/src/common/util_pmem.h create mode 100644 src/pmdk/src/common/uuid.c create mode 100644 src/pmdk/src/common/uuid.h create mode 100644 src/pmdk/src/common/uuid_freebsd.c create mode 100644 src/pmdk/src/common/uuid_linux.c create mode 100644 src/pmdk/src/common/uuid_windows.c create mode 100644 src/pmdk/src/common/vec.h create mode 100644 src/pmdk/src/common/vecq.h create mode 100644 src/pmdk/src/core/Makefile create mode 100644 src/pmdk/src/core/alloc.c create mode 100644 src/pmdk/src/core/alloc.h create mode 100644 src/pmdk/src/core/errno_freebsd.h create mode 100644 src/pmdk/src/core/fault_injection.h create mode 100644 src/pmdk/src/core/fs.h create mode 100644 src/pmdk/src/core/fs_posix.c create mode 100644 src/pmdk/src/core/fs_windows.c create mode 100644 src/pmdk/src/core/libpmemcore.vcxproj create mode 100644 src/pmdk/src/core/libpmemcore.vcxproj.filters create mode 100644 src/pmdk/src/core/os.h create mode 100644 src/pmdk/src/core/os_posix.c create mode 100644 src/pmdk/src/core/os_thread.h create mode 100644 src/pmdk/src/core/os_thread_posix.c create mode 100644 src/pmdk/src/core/os_thread_windows.c create mode 100644 src/pmdk/src/core/os_windows.c create mode 100644 src/pmdk/src/core/out.c create mode 100644 src/pmdk/src/core/out.h create mode 100644 src/pmdk/src/core/pmemcore.h create mode 100644 src/pmdk/src/core/pmemcore.inc create mode 100644 src/pmdk/src/core/util.c create mode 100644 src/pmdk/src/core/util.h create mode 100644 src/pmdk/src/core/util_posix.c create mode 100644 src/pmdk/src/core/util_windows.c create mode 100644 src/pmdk/src/core/valgrind/.cstyleignore create mode 100644 src/pmdk/src/core/valgrind/README create mode 100644 src/pmdk/src/core/valgrind/drd.h create mode 100644 src/pmdk/src/core/valgrind/helgrind.h create mode 100644 src/pmdk/src/core/valgrind/memcheck.h create mode 100644 src/pmdk/src/core/valgrind/pmemcheck.h create mode 100644 src/pmdk/src/core/valgrind/valgrind.h create mode 100644 src/pmdk/src/core/valgrind_internal.h create mode 100644 src/pmdk/src/freebsd/README create mode 100644 src/pmdk/src/freebsd/include/endian.h create mode 100644 src/pmdk/src/freebsd/include/features.h create mode 100644 src/pmdk/src/freebsd/include/linux/kdev_t.h create mode 100644 src/pmdk/src/freebsd/include/linux/limits.h create mode 100644 src/pmdk/src/freebsd/include/sys/sysmacros.h create mode 100644 src/pmdk/src/include/.cstyleignore create mode 100644 src/pmdk/src/include/README create mode 100644 src/pmdk/src/include/libpmem.h create mode 100644 src/pmdk/src/include/libpmem2.h create mode 100644 src/pmdk/src/include/libpmemblk.h create mode 100644 src/pmdk/src/include/libpmemlog.h create mode 100644 src/pmdk/src/include/libpmemobj++/README.md create mode 100644 src/pmdk/src/include/libpmemobj++/detail/README.md create mode 100644 src/pmdk/src/include/libpmemobj.h create mode 100644 src/pmdk/src/include/libpmemobj/action.h create mode 100644 src/pmdk/src/include/libpmemobj/action_base.h create mode 100644 src/pmdk/src/include/libpmemobj/atomic.h create mode 100644 src/pmdk/src/include/libpmemobj/atomic_base.h create mode 100644 src/pmdk/src/include/libpmemobj/base.h create mode 100644 src/pmdk/src/include/libpmemobj/ctl.h create mode 100644 src/pmdk/src/include/libpmemobj/iterator.h create mode 100644 src/pmdk/src/include/libpmemobj/iterator_base.h create mode 100644 src/pmdk/src/include/libpmemobj/lists_atomic.h create mode 100644 src/pmdk/src/include/libpmemobj/lists_atomic_base.h create mode 100644 src/pmdk/src/include/libpmemobj/pool.h create mode 100644 src/pmdk/src/include/libpmemobj/pool_base.h create mode 100644 src/pmdk/src/include/libpmemobj/thread.h create mode 100644 src/pmdk/src/include/libpmemobj/tx.h create mode 100644 src/pmdk/src/include/libpmemobj/tx_base.h create mode 100644 src/pmdk/src/include/libpmemobj/types.h create mode 100644 src/pmdk/src/include/libpmempool.h create mode 100644 src/pmdk/src/include/librpmem.h create mode 100644 src/pmdk/src/include/pmemcompat.h create mode 100644 src/pmdk/src/libpmem/Makefile create mode 100644 src/pmdk/src/libpmem/libpmem.c create mode 100644 src/pmdk/src/libpmem/libpmem.def create mode 100644 src/pmdk/src/libpmem/libpmem.link.in create mode 100644 src/pmdk/src/libpmem/libpmem.rc create mode 100644 src/pmdk/src/libpmem/libpmem.vcxproj create mode 100644 src/pmdk/src/libpmem/libpmem.vcxproj.filters create mode 100644 src/pmdk/src/libpmem/libpmem_main.c create mode 100644 src/pmdk/src/libpmem/pmem.c create mode 100644 src/pmdk/src/libpmem/pmem.h create mode 100644 src/pmdk/src/libpmem/pmem_posix.c create mode 100644 src/pmdk/src/libpmem/pmem_windows.c create mode 100644 src/pmdk/src/libpmem2/Makefile create mode 100644 src/pmdk/src/libpmem2/aarch64/arm_cacheops.h create mode 100644 src/pmdk/src/libpmem2/aarch64/flags.inc create mode 100644 src/pmdk/src/libpmem2/aarch64/flush.h create mode 100644 src/pmdk/src/libpmem2/aarch64/init.c create mode 100644 src/pmdk/src/libpmem2/aarch64/sources.inc create mode 100644 src/pmdk/src/libpmem2/auto_flush.h create mode 100644 src/pmdk/src/libpmem2/auto_flush_linux.c create mode 100644 src/pmdk/src/libpmem2/auto_flush_none.c create mode 100644 src/pmdk/src/libpmem2/auto_flush_windows.c create mode 100644 src/pmdk/src/libpmem2/auto_flush_windows.h create mode 100644 src/pmdk/src/libpmem2/badblocks.c create mode 100644 src/pmdk/src/libpmem2/badblocks_ndctl.c create mode 100644 src/pmdk/src/libpmem2/badblocks_none.c create mode 100644 src/pmdk/src/libpmem2/config.c create mode 100644 src/pmdk/src/libpmem2/config.h create mode 100644 src/pmdk/src/libpmem2/deep_flush.c create mode 100644 src/pmdk/src/libpmem2/deep_flush.h create mode 100644 src/pmdk/src/libpmem2/deep_flush_linux.c create mode 100644 src/pmdk/src/libpmem2/deep_flush_other.c create mode 100644 src/pmdk/src/libpmem2/deep_flush_windows.c create mode 100644 src/pmdk/src/libpmem2/errormsg.c create mode 100644 src/pmdk/src/libpmem2/extent.h create mode 100644 src/pmdk/src/libpmem2/extent_linux.c create mode 100644 src/pmdk/src/libpmem2/extent_none.c create mode 100644 src/pmdk/src/libpmem2/libpmem2.c create mode 100644 src/pmdk/src/libpmem2/libpmem2.def create mode 100644 src/pmdk/src/libpmem2/libpmem2.link.in create mode 100644 src/pmdk/src/libpmem2/libpmem2.rc create mode 100644 src/pmdk/src/libpmem2/libpmem2.vcxproj create mode 100644 src/pmdk/src/libpmem2/libpmem2.vcxproj.filters create mode 100644 src/pmdk/src/libpmem2/libpmem2_main.c create mode 100644 src/pmdk/src/libpmem2/map.c create mode 100644 src/pmdk/src/libpmem2/map.h create mode 100644 src/pmdk/src/libpmem2/map_posix.c create mode 100644 src/pmdk/src/libpmem2/map_windows.c create mode 100644 src/pmdk/src/libpmem2/memops_generic.c create mode 100644 src/pmdk/src/libpmem2/persist.c create mode 100644 src/pmdk/src/libpmem2/persist.h create mode 100644 src/pmdk/src/libpmem2/persist_posix.c create mode 100644 src/pmdk/src/libpmem2/persist_windows.c create mode 100644 src/pmdk/src/libpmem2/pmem2.h create mode 100644 src/pmdk/src/libpmem2/pmem2_arch.h create mode 100644 src/pmdk/src/libpmem2/pmem2_utils.c create mode 100644 src/pmdk/src/libpmem2/pmem2_utils.h create mode 100644 src/pmdk/src/libpmem2/pmem2_utils_linux.c create mode 100644 src/pmdk/src/libpmem2/pmem2_utils_ndctl.c create mode 100644 src/pmdk/src/libpmem2/pmem2_utils_none.c create mode 100644 src/pmdk/src/libpmem2/pmem2_utils_other.c create mode 100644 src/pmdk/src/libpmem2/ppc64/.cstyleignore create mode 100644 src/pmdk/src/libpmem2/ppc64/flags.inc create mode 100644 src/pmdk/src/libpmem2/ppc64/init.c create mode 100644 src/pmdk/src/libpmem2/ppc64/sources.inc create mode 100644 src/pmdk/src/libpmem2/ravl_interval.c create mode 100644 src/pmdk/src/libpmem2/ravl_interval.h create mode 100644 src/pmdk/src/libpmem2/region_namespace.h create mode 100644 src/pmdk/src/libpmem2/region_namespace_ndctl.c create mode 100644 src/pmdk/src/libpmem2/region_namespace_ndctl.h create mode 100644 src/pmdk/src/libpmem2/region_namespace_none.c create mode 100644 src/pmdk/src/libpmem2/source.c create mode 100644 src/pmdk/src/libpmem2/source.h create mode 100644 src/pmdk/src/libpmem2/source_posix.c create mode 100644 src/pmdk/src/libpmem2/source_windows.c create mode 100644 src/pmdk/src/libpmem2/usc_ndctl.c create mode 100644 src/pmdk/src/libpmem2/usc_none.c create mode 100644 src/pmdk/src/libpmem2/usc_windows.c create mode 100644 src/pmdk/src/libpmem2/vm_reservation.c create mode 100644 src/pmdk/src/libpmem2/vm_reservation.h create mode 100644 src/pmdk/src/libpmem2/vm_reservation_posix.c create mode 100644 src/pmdk/src/libpmem2/vm_reservation_windows.c create mode 100644 src/pmdk/src/libpmem2/x86_64/avx.h create mode 100644 src/pmdk/src/libpmem2/x86_64/cpu.c create mode 100644 src/pmdk/src/libpmem2/x86_64/cpu.h create mode 100644 src/pmdk/src/libpmem2/x86_64/flags.inc create mode 100644 src/pmdk/src/libpmem2/x86_64/flush.h create mode 100644 src/pmdk/src/libpmem2/x86_64/init.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx.h create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx512f.h create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx512f.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_sse2.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_sse2.h create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx512f.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_sse2.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memcpy_memset.h create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_avx.h create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_avx512f.h create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx512f.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_nt_sse2.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_sse2.h create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx512f.c create mode 100644 src/pmdk/src/libpmem2/x86_64/memset/memset_t_sse2.c create mode 100644 src/pmdk/src/libpmem2/x86_64/sources.inc create mode 100644 src/pmdk/src/libpmemblk/Makefile create mode 100644 src/pmdk/src/libpmemblk/blk.c create mode 100644 src/pmdk/src/libpmemblk/blk.h create mode 100644 src/pmdk/src/libpmemblk/btt.c create mode 100644 src/pmdk/src/libpmemblk/btt.h create mode 100644 src/pmdk/src/libpmemblk/btt_layout.h create mode 100644 src/pmdk/src/libpmemblk/libpmemblk.c create mode 100644 src/pmdk/src/libpmemblk/libpmemblk.def create mode 100644 src/pmdk/src/libpmemblk/libpmemblk.link.in create mode 100644 src/pmdk/src/libpmemblk/libpmemblk.rc create mode 100644 src/pmdk/src/libpmemblk/libpmemblk.vcxproj create mode 100644 src/pmdk/src/libpmemblk/libpmemblk.vcxproj.filters create mode 100644 src/pmdk/src/libpmemblk/libpmemblk_main.c create mode 100644 src/pmdk/src/libpmemlog/Makefile create mode 100644 src/pmdk/src/libpmemlog/libpmemlog.c create mode 100644 src/pmdk/src/libpmemlog/libpmemlog.def create mode 100644 src/pmdk/src/libpmemlog/libpmemlog.link.in create mode 100644 src/pmdk/src/libpmemlog/libpmemlog.rc create mode 100644 src/pmdk/src/libpmemlog/libpmemlog.vcxproj create mode 100644 src/pmdk/src/libpmemlog/libpmemlog.vcxproj.filters create mode 100644 src/pmdk/src/libpmemlog/libpmemlog_main.c create mode 100644 src/pmdk/src/libpmemlog/log.c create mode 100644 src/pmdk/src/libpmemlog/log.h create mode 100644 src/pmdk/src/libpmemobj/Makefile create mode 100644 src/pmdk/src/libpmemobj/alloc_class.c create mode 100644 src/pmdk/src/libpmemobj/alloc_class.h create mode 100644 src/pmdk/src/libpmemobj/bucket.c create mode 100644 src/pmdk/src/libpmemobj/bucket.h create mode 100644 src/pmdk/src/libpmemobj/container.h create mode 100644 src/pmdk/src/libpmemobj/container_ravl.c create mode 100644 src/pmdk/src/libpmemobj/container_ravl.h create mode 100644 src/pmdk/src/libpmemobj/container_seglists.c create mode 100644 src/pmdk/src/libpmemobj/container_seglists.h create mode 100644 src/pmdk/src/libpmemobj/critnib.c create mode 100644 src/pmdk/src/libpmemobj/critnib.h create mode 100644 src/pmdk/src/libpmemobj/ctl_debug.c create mode 100644 src/pmdk/src/libpmemobj/ctl_debug.h create mode 100644 src/pmdk/src/libpmemobj/heap.c create mode 100644 src/pmdk/src/libpmemobj/heap.h create mode 100644 src/pmdk/src/libpmemobj/heap_layout.h create mode 100644 src/pmdk/src/libpmemobj/lane.c create mode 100644 src/pmdk/src/libpmemobj/lane.h create mode 100644 src/pmdk/src/libpmemobj/libpmemobj.c create mode 100644 src/pmdk/src/libpmemobj/libpmemobj.def create mode 100644 src/pmdk/src/libpmemobj/libpmemobj.link.in create mode 100644 src/pmdk/src/libpmemobj/libpmemobj.rc create mode 100644 src/pmdk/src/libpmemobj/libpmemobj.vcxproj create mode 100644 src/pmdk/src/libpmemobj/libpmemobj.vcxproj.filters create mode 100644 src/pmdk/src/libpmemobj/libpmemobj_main.c create mode 100644 src/pmdk/src/libpmemobj/list.c create mode 100644 src/pmdk/src/libpmemobj/list.h create mode 100644 src/pmdk/src/libpmemobj/memblock.c create mode 100644 src/pmdk/src/libpmemobj/memblock.h create mode 100644 src/pmdk/src/libpmemobj/memops.c create mode 100644 src/pmdk/src/libpmemobj/memops.h create mode 100644 src/pmdk/src/libpmemobj/obj.c create mode 100644 src/pmdk/src/libpmemobj/obj.h create mode 100644 src/pmdk/src/libpmemobj/palloc.c create mode 100644 src/pmdk/src/libpmemobj/palloc.h create mode 100644 src/pmdk/src/libpmemobj/pmalloc.c create mode 100644 src/pmdk/src/libpmemobj/pmalloc.h create mode 100644 src/pmdk/src/libpmemobj/pmemops.h create mode 100644 src/pmdk/src/libpmemobj/recycler.c create mode 100644 src/pmdk/src/libpmemobj/recycler.h create mode 100644 src/pmdk/src/libpmemobj/stats.c create mode 100644 src/pmdk/src/libpmemobj/stats.h create mode 100644 src/pmdk/src/libpmemobj/sync.c create mode 100644 src/pmdk/src/libpmemobj/sync.h create mode 100644 src/pmdk/src/libpmemobj/tx.c create mode 100644 src/pmdk/src/libpmemobj/tx.h create mode 100644 src/pmdk/src/libpmemobj/ulog.c create mode 100644 src/pmdk/src/libpmemobj/ulog.h create mode 100644 src/pmdk/src/libpmempool/Makefile create mode 100644 src/pmdk/src/libpmempool/check.c create mode 100644 src/pmdk/src/libpmempool/check.h create mode 100644 src/pmdk/src/libpmempool/check_backup.c create mode 100644 src/pmdk/src/libpmempool/check_bad_blocks.c create mode 100644 src/pmdk/src/libpmempool/check_blk.c create mode 100644 src/pmdk/src/libpmempool/check_btt_info.c create mode 100644 src/pmdk/src/libpmempool/check_btt_map_flog.c create mode 100644 src/pmdk/src/libpmempool/check_log.c create mode 100644 src/pmdk/src/libpmempool/check_pool_hdr.c create mode 100644 src/pmdk/src/libpmempool/check_sds.c create mode 100644 src/pmdk/src/libpmempool/check_util.c create mode 100644 src/pmdk/src/libpmempool/check_util.h create mode 100644 src/pmdk/src/libpmempool/check_write.c create mode 100644 src/pmdk/src/libpmempool/feature.c create mode 100644 src/pmdk/src/libpmempool/libpmempool.c create mode 100644 src/pmdk/src/libpmempool/libpmempool.def create mode 100644 src/pmdk/src/libpmempool/libpmempool.link.in create mode 100644 src/pmdk/src/libpmempool/libpmempool.rc create mode 100644 src/pmdk/src/libpmempool/libpmempool.vcxproj create mode 100644 src/pmdk/src/libpmempool/libpmempool.vcxproj.filters create mode 100644 src/pmdk/src/libpmempool/libpmempool_main.c create mode 100644 src/pmdk/src/libpmempool/pmempool.h create mode 100644 src/pmdk/src/libpmempool/pool.c create mode 100644 src/pmdk/src/libpmempool/pool.h create mode 100644 src/pmdk/src/libpmempool/replica.c create mode 100644 src/pmdk/src/libpmempool/replica.h create mode 100644 src/pmdk/src/libpmempool/rm.c create mode 100644 src/pmdk/src/libpmempool/sync.c create mode 100644 src/pmdk/src/libpmempool/transform.c create mode 100644 src/pmdk/src/librpmem/Makefile create mode 100644 src/pmdk/src/librpmem/README create mode 100644 src/pmdk/src/librpmem/librpmem.c create mode 100644 src/pmdk/src/librpmem/librpmem.link.in create mode 100644 src/pmdk/src/librpmem/rpmem.c create mode 100644 src/pmdk/src/librpmem/rpmem.h create mode 100644 src/pmdk/src/librpmem/rpmem_cmd.c create mode 100644 src/pmdk/src/librpmem/rpmem_cmd.h create mode 100644 src/pmdk/src/librpmem/rpmem_fip.c create mode 100644 src/pmdk/src/librpmem/rpmem_fip.h create mode 100644 src/pmdk/src/librpmem/rpmem_obc.c create mode 100644 src/pmdk/src/librpmem/rpmem_obc.h create mode 100644 src/pmdk/src/librpmem/rpmem_ssh.c create mode 100644 src/pmdk/src/librpmem/rpmem_ssh.h create mode 100644 src/pmdk/src/librpmem/rpmem_util.c create mode 100644 src/pmdk/src/librpmem/rpmem_util.h create mode 100644 src/pmdk/src/libvmem/README.md create mode 100644 src/pmdk/src/libvmmalloc/README.md create mode 100644 src/pmdk/src/rpmem_common/Makefile create mode 100644 src/pmdk/src/rpmem_common/rpmem_common.c create mode 100644 src/pmdk/src/rpmem_common/rpmem_common.h create mode 100644 src/pmdk/src/rpmem_common/rpmem_common_log.h create mode 100644 src/pmdk/src/rpmem_common/rpmem_fip_common.c create mode 100644 src/pmdk/src/rpmem_common/rpmem_fip_common.h create mode 100644 src/pmdk/src/rpmem_common/rpmem_fip_lane.h create mode 100644 src/pmdk/src/rpmem_common/rpmem_fip_msg.h create mode 100644 src/pmdk/src/rpmem_common/rpmem_proto.h create mode 100644 src/pmdk/src/tools/.gitignore create mode 100644 src/pmdk/src/tools/Makefile create mode 100644 src/pmdk/src/tools/Makefile.inc create mode 100644 src/pmdk/src/tools/daxio/.gitignore create mode 100644 src/pmdk/src/tools/daxio/Makefile create mode 100644 src/pmdk/src/tools/daxio/README create mode 100644 src/pmdk/src/tools/daxio/daxio.c create mode 100644 src/pmdk/src/tools/pmempool/.gitignore create mode 100644 src/pmdk/src/tools/pmempool/Makefile create mode 100644 src/pmdk/src/tools/pmempool/README create mode 100644 src/pmdk/src/tools/pmempool/bash_completion/pmempool create mode 100644 src/pmdk/src/tools/pmempool/check.c create mode 100644 src/pmdk/src/tools/pmempool/check.h create mode 100644 src/pmdk/src/tools/pmempool/common.c create mode 100644 src/pmdk/src/tools/pmempool/common.h create mode 100644 src/pmdk/src/tools/pmempool/convert.c create mode 100644 src/pmdk/src/tools/pmempool/convert.h create mode 100644 src/pmdk/src/tools/pmempool/create.c create mode 100644 src/pmdk/src/tools/pmempool/create.h create mode 100644 src/pmdk/src/tools/pmempool/dump.c create mode 100644 src/pmdk/src/tools/pmempool/dump.h create mode 100644 src/pmdk/src/tools/pmempool/feature.c create mode 100644 src/pmdk/src/tools/pmempool/feature.h create mode 100644 src/pmdk/src/tools/pmempool/info.c create mode 100644 src/pmdk/src/tools/pmempool/info.h create mode 100644 src/pmdk/src/tools/pmempool/info_blk.c create mode 100644 src/pmdk/src/tools/pmempool/info_log.c create mode 100644 src/pmdk/src/tools/pmempool/info_obj.c create mode 100644 src/pmdk/src/tools/pmempool/output.c create mode 100644 src/pmdk/src/tools/pmempool/output.h create mode 100644 src/pmdk/src/tools/pmempool/pmempool.c create mode 100644 src/pmdk/src/tools/pmempool/pmempool.rc create mode 100644 src/pmdk/src/tools/pmempool/pmempool.vcxproj create mode 100644 src/pmdk/src/tools/pmempool/pmempool.vcxproj.filters create mode 100644 src/pmdk/src/tools/pmempool/rm.c create mode 100644 src/pmdk/src/tools/pmempool/rm.h create mode 100644 src/pmdk/src/tools/pmempool/synchronize.c create mode 100644 src/pmdk/src/tools/pmempool/synchronize.h create mode 100644 src/pmdk/src/tools/pmempool/transform.c create mode 100644 src/pmdk/src/tools/pmempool/transform.h create mode 100644 src/pmdk/src/tools/pmreorder/.gitignore create mode 100644 src/pmdk/src/tools/pmreorder/Makefile create mode 100644 src/pmdk/src/tools/pmreorder/binaryoutputhandler.py create mode 100644 src/pmdk/src/tools/pmreorder/consistencycheckwrap.py create mode 100644 src/pmdk/src/tools/pmreorder/loggingfacility.py create mode 100644 src/pmdk/src/tools/pmreorder/markerparser.py create mode 100644 src/pmdk/src/tools/pmreorder/memoryoperations.py create mode 100644 src/pmdk/src/tools/pmreorder/operationfactory.py create mode 100644 src/pmdk/src/tools/pmreorder/opscontext.py create mode 100644 src/pmdk/src/tools/pmreorder/pmreorder.py create mode 100644 src/pmdk/src/tools/pmreorder/reorderengines.py create mode 100644 src/pmdk/src/tools/pmreorder/reorderexceptions.py create mode 100644 src/pmdk/src/tools/pmreorder/statemachine.py create mode 100644 src/pmdk/src/tools/pmreorder/utils.py create mode 100644 src/pmdk/src/tools/rpmemd/.gitignore create mode 100644 src/pmdk/src/tools/rpmemd/Makefile create mode 100644 src/pmdk/src/tools/rpmemd/README create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd.c create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd.h create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_config.c create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_config.h create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_db.c create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_db.h create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_fip.c create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_fip.h create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_log.c create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_log.h create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_obc.c create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_obc.h create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_util.c create mode 100644 src/pmdk/src/tools/rpmemd/rpmemd_util.h create mode 100644 src/pmdk/src/windows/README create mode 100644 src/pmdk/src/windows/getopt/.cstyleignore create mode 100644 src/pmdk/src/windows/getopt/LICENSE.txt create mode 100644 src/pmdk/src/windows/getopt/README create mode 100644 src/pmdk/src/windows/getopt/getopt.c create mode 100644 src/pmdk/src/windows/getopt/getopt.h create mode 100644 src/pmdk/src/windows/getopt/getopt.vcxproj create mode 100644 src/pmdk/src/windows/getopt/getopt.vcxproj.filters create mode 100644 src/pmdk/src/windows/include/.cstyleignore create mode 100644 src/pmdk/src/windows/include/dirent.h create mode 100644 src/pmdk/src/windows/include/endian.h create mode 100644 src/pmdk/src/windows/include/err.h create mode 100644 src/pmdk/src/windows/include/features.h create mode 100644 src/pmdk/src/windows/include/libgen.h create mode 100644 src/pmdk/src/windows/include/linux/limits.h create mode 100644 src/pmdk/src/windows/include/platform.h create mode 100644 src/pmdk/src/windows/include/sched.h create mode 100644 src/pmdk/src/windows/include/strings.h create mode 100644 src/pmdk/src/windows/include/sys/file.h create mode 100644 src/pmdk/src/windows/include/sys/mman.h create mode 100644 src/pmdk/src/windows/include/sys/mount.h create mode 100644 src/pmdk/src/windows/include/sys/param.h create mode 100644 src/pmdk/src/windows/include/sys/resource.h create mode 100644 src/pmdk/src/windows/include/sys/statvfs.h create mode 100644 src/pmdk/src/windows/include/sys/uio.h create mode 100644 src/pmdk/src/windows/include/sys/wait.h create mode 100644 src/pmdk/src/windows/include/unistd.h create mode 100644 src/pmdk/src/windows/include/win_mmap.h create mode 100644 src/pmdk/src/windows/libs_debug.props create mode 100644 src/pmdk/src/windows/libs_release.props create mode 100644 src/pmdk/src/windows/srcversion/srcversion.vcxproj create mode 100644 src/pmdk/src/windows/win_mmap.c create mode 100644 src/pmdk/utils/.gitignore create mode 100644 src/pmdk/utils/CHECK_WHITESPACE.PS1 create mode 100644 src/pmdk/utils/CREATE-ZIP.PS1 create mode 100644 src/pmdk/utils/CSTYLE.ps1 create mode 100644 src/pmdk/utils/Makefile create mode 100644 src/pmdk/utils/README create mode 100644 src/pmdk/utils/SRCVERSION.ps1 create mode 100755 src/pmdk/utils/build-dpkg.sh create mode 100755 src/pmdk/utils/build-rpm.sh create mode 100755 src/pmdk/utils/check-area.sh create mode 100755 src/pmdk/utils/check-commit.sh create mode 100755 src/pmdk/utils/check-commits.sh create mode 100755 src/pmdk/utils/check-manpage create mode 100755 src/pmdk/utils/check-manpages create mode 100755 src/pmdk/utils/check-os.sh create mode 100755 src/pmdk/utils/check-shebang.sh create mode 100644 src/pmdk/utils/check_license/.gitignore create mode 100755 src/pmdk/utils/check_license/check-headers.sh create mode 100755 src/pmdk/utils/check_license/check-ms-license.pl create mode 100755 src/pmdk/utils/check_license/file-exceptions.sh create mode 100755 src/pmdk/utils/check_sdk_version.py create mode 100755 src/pmdk/utils/check_whitespace create mode 100755 src/pmdk/utils/copy-source.sh create mode 100755 src/pmdk/utils/cstyle create mode 100644 src/pmdk/utils/docker/0001-travis-fix-travisci_build_coverity_scan.sh.patch create mode 100644 src/pmdk/utils/docker/README create mode 100755 src/pmdk/utils/docker/build-CI.sh create mode 100755 src/pmdk/utils/docker/build-local.sh create mode 100755 src/pmdk/utils/docker/configure-tests.sh create mode 100644 src/pmdk/utils/docker/images/0001-fix-generating-gcov-files-and-turn-off-verbose-log.patch create mode 100644 src/pmdk/utils/docker/images/Dockerfile.fedora-31 create mode 100644 src/pmdk/utils/docker/images/Dockerfile.ubuntu-19.10 create mode 100644 src/pmdk/utils/docker/images/README create mode 100755 src/pmdk/utils/docker/images/build-image.sh create mode 100755 src/pmdk/utils/docker/images/download-scripts.sh create mode 100755 src/pmdk/utils/docker/images/install-libfabric.sh create mode 100755 src/pmdk/utils/docker/images/install-libndctl.sh create mode 100755 src/pmdk/utils/docker/images/install-valgrind.sh create mode 100755 src/pmdk/utils/docker/images/push-image.sh create mode 100644 src/pmdk/utils/docker/ppc64le.blacklist create mode 100755 src/pmdk/utils/docker/prepare-for-build.sh create mode 100755 src/pmdk/utils/docker/pull-or-rebuild-image.sh create mode 100755 src/pmdk/utils/docker/run-build-package.sh create mode 100755 src/pmdk/utils/docker/run-build.sh create mode 100755 src/pmdk/utils/docker/run-coverage.sh create mode 100755 src/pmdk/utils/docker/run-coverity.sh create mode 100755 src/pmdk/utils/docker/run-doc-update.sh create mode 100755 src/pmdk/utils/docker/set-ci-vars.sh create mode 100755 src/pmdk/utils/docker/set-vars.sh create mode 100644 src/pmdk/utils/docker/test_package/.gitignore create mode 100644 src/pmdk/utils/docker/test_package/Makefile create mode 100644 src/pmdk/utils/docker/test_package/README create mode 100644 src/pmdk/utils/docker/test_package/test_package.c create mode 100755 src/pmdk/utils/docker/valid-branches.sh create mode 100755 src/pmdk/utils/get_aliases.sh create mode 100755 src/pmdk/utils/git-years create mode 100644 src/pmdk/utils/libpmem.pc.in create mode 100644 src/pmdk/utils/libpmem2.pc.in create mode 100644 src/pmdk/utils/libpmemblk.pc.in create mode 100644 src/pmdk/utils/libpmemlog.pc.in create mode 100644 src/pmdk/utils/libpmemobj.pc.in create mode 100644 src/pmdk/utils/libpmempool.pc.in create mode 100644 src/pmdk/utils/librpmem.pc.in create mode 100644 src/pmdk/utils/magic-install.sh create mode 100644 src/pmdk/utils/magic-uninstall.sh create mode 100755 src/pmdk/utils/md2man.sh create mode 100644 src/pmdk/utils/os-banned create mode 100644 src/pmdk/utils/pkg-common.sh create mode 100644 src/pmdk/utils/pkg-config.sh create mode 100644 src/pmdk/utils/pmdk.magic create mode 100644 src/pmdk/utils/pmdk.spec.in create mode 100644 src/pmdk/utils/ps_analyze.ps1 create mode 100755 src/pmdk/utils/sort_solution create mode 100755 src/pmdk/utils/style_check.sh create mode 100755 src/pmdk/utils/version.sh (limited to 'src/pmdk') diff --git a/src/pmdk/.cirrus.yml b/src/pmdk/.cirrus.yml new file mode 100644 index 000000000..caabe833e --- /dev/null +++ b/src/pmdk/.cirrus.yml @@ -0,0 +1,10 @@ +freebsd_instance: + image: freebsd-12-1-release-amd64 + +task: + install_script: ASSUME_ALWAYS_YES=yes pkg bootstrap -f; + pkg install -y + autoconf bash binutils coreutils e2fsprogs-libuuid + git gmake libunwind ncurses pkgconf hs-pandoc + + script: CFLAGS="-Wno-unused-value" gmake \ No newline at end of file diff --git a/src/pmdk/.codecov.yml b/src/pmdk/.codecov.yml new file mode 100644 index 000000000..48ec0af20 --- /dev/null +++ b/src/pmdk/.codecov.yml @@ -0,0 +1,18 @@ +ignore: + - src/windows/ + - src/test/ + - src/common/valgrind/ + - src/benchmarks/ + +comment: + layout: "diff" + behavior: default + require_changes: yes + +parsers: + gcov: + branch_detection: + conditional: false + loop: false + method: false + macro: false diff --git a/src/pmdk/.gitattributes b/src/pmdk/.gitattributes new file mode 100644 index 000000000..5a466f795 --- /dev/null +++ b/src/pmdk/.gitattributes @@ -0,0 +1,7 @@ +* text=auto eol=lf +*.jpg binary +*.png binary +*.gif binary +*.ico binary +*.match text -whitespace +GIT_VERSION export-subst diff --git a/src/pmdk/.github/ISSUE_TEMPLATE.md b/src/pmdk/.github/ISSUE_TEMPLATE.md new file mode 100644 index 000000000..ecf4d6168 --- /dev/null +++ b/src/pmdk/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,28 @@ +# GENERAL ISSUE: + +## Bug Report + +- PMDK package version(s): +- OS(es) version(s): +- ndctl version(s): +- kernel version(s): +- compiler, libraries, packaging and other related tools version(s): + + +## Describe the issue: + + + +## Actual behavior: + + + +## Expected behavior: + + + +## Additional information about Priority and Help Requested: + +Are you willing to submit a pull request with a proposed change? (Yes, No) + +Requested priority: (Showstopper, High, Medium, Low) diff --git a/src/pmdk/.github/ISSUE_TEMPLATE/bug_report.md b/src/pmdk/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..528efcafa --- /dev/null +++ b/src/pmdk/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,49 @@ +--- +name: Bug report +about: Did you find a bug in PMDK? Please let us know. +labels: "Type: Bug" +--- + + +# ISSUE: + +## Environment Information + +- PMDK package version(s): +- OS(es) version(s): +- ndctl version(s): +- kernel version(s): +- compiler, libraries, packaging and other related tools version(s): + + +## Please provide a reproduction of the bug: + + + +## How often bug is revealed: (always, often, rare): + + + +## Actual behavior: + + + +## Expected behavior: + + + +## Details + + + +## Additional information about Priority and Help Requested: + +Are you willing to submit a pull request with a proposed change? (Yes, No) + +Requested priority: (Showstopper, High, Medium, Low) diff --git a/src/pmdk/.github/ISSUE_TEMPLATE/feature.md b/src/pmdk/.github/ISSUE_TEMPLATE/feature.md new file mode 100644 index 000000000..12d6e406b --- /dev/null +++ b/src/pmdk/.github/ISSUE_TEMPLATE/feature.md @@ -0,0 +1,26 @@ +--- +name: Feature +about: Feature your request +labels: "Type: Feature" +--- +# FEAT: + +## Rationale + + + +## Description + + + +## API Changes + + + +## Implementation details + + + +## Meta + + diff --git a/src/pmdk/.github/ISSUE_TEMPLATE/question.md b/src/pmdk/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 000000000..3e4e452f9 --- /dev/null +++ b/src/pmdk/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,15 @@ +--- +name: Question +about: Do you have question regarding PMDK? Don't hesitate to ask. +labels: "Type: Question" +--- +# QUESTION: + +## Details + + + + diff --git a/src/pmdk/.github/workflows/coverity.yml b/src/pmdk/.github/workflows/coverity.yml new file mode 100644 index 000000000..f69783b93 --- /dev/null +++ b/src/pmdk/.github/workflows/coverity.yml @@ -0,0 +1,41 @@ + +name: Coverity + +on: + schedule: + # run this job at 00:00 UTC every day + - cron: '0 0 * * *' + +env: + GITHUB_REPO: pmem/pmdk + DOCKERHUB_REPO: pmem/pmdk + +jobs: + linux: + name: Linux + runs-on: ubuntu-latest + env: + COVERITY_SCAN_NOTIFICATION_EMAIL: ${{ secrets.COVERITY_SCAN_NOTIFICATION_EMAIL }} + COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} + HOST_WORKDIR: /home/runner/work/pmdk/pmdk + WORKDIR: utils/docker + PMDK_CC: gcc + PMDK_CXX: g++ + MAKE_PKG: 0 + REMOTE_TESTS: 1 + VALGRIND: 1 + strategy: + matrix: + CONFIG: ["COVERITY=1 OS=ubuntu OS_VER=19.10"] + steps: + - name: Print out the current date and time + run: date + + - name: Clone the git repo + uses: actions/checkout@v2 + + - name: Pull or rebuild the image + run: cd $WORKDIR && ${{ matrix.CONFIG }} ./pull-or-rebuild-image.sh + + - name: Run the build + run: cd $WORKDIR && ${{ matrix.CONFIG }} ./build-CI.sh diff --git a/src/pmdk/.github/workflows/gha.yml b/src/pmdk/.github/workflows/gha.yml new file mode 100644 index 000000000..402d4f878 --- /dev/null +++ b/src/pmdk/.github/workflows/gha.yml @@ -0,0 +1,155 @@ + +name: PMDK +on: [push, pull_request] + +env: + GITHUB_REPO: pmem/pmdk + DOCKERHUB_REPO: pmem/pmdk + +jobs: + linux: + name: Linux + runs-on: ubuntu-latest + env: + DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} + DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }} + DOC_UPDATE_GITHUB_TOKEN: ${{ secrets.DOC_UPDATE_GITHUB_TOKEN }} + HOST_WORKDIR: /home/runner/work/pmdk/pmdk + WORKDIR: utils/docker + PMDK_CC: gcc + PMDK_CXX: g++ + MAKE_PKG: 0 + REMOTE_TESTS: 1 + VALGRIND: 1 + SRC_CHECKERS: 0 + strategy: + matrix: + CONFIG: ["N=1 OS=ubuntu OS_VER=19.10 FAULT_INJECTION=1 TEST_BUILD=debug", + "N=2 OS=ubuntu OS_VER=19.10 FAULT_INJECTION=1 TEST_BUILD=nondebug UBSAN=1", + "N=3 OS=ubuntu OS_VER=19.10 PMDK_CC=clang PMDK_CXX=clang++ TEST_BUILD=debug SRC_CHECKERS=1", + "N=4 OS=ubuntu OS_VER=19.10 PMDK_CC=clang PMDK_CXX=clang++ TEST_BUILD=nondebug", + "N=5 OS=fedora OS_VER=31 PMDK_CC=clang PMDK_CXX=clang++ TEST_BUILD=debug", + "N=6 OS=fedora OS_VER=31 PMDK_CC=clang PMDK_CXX=clang++ TEST_BUILD=nondebug AUTO_DOC_UPDATE=1", + "N=7 OS=fedora OS_VER=31 MAKE_PKG=1 EXPERIMENTAL=y REMOTE_TESTS=0 VALGRIND=0 PUSH_IMAGE=1", + "N=8 OS=ubuntu OS_VER=19.10 MAKE_PKG=1 EXPERIMENTAL=y REMOTE_TESTS=0 VALGRIND=0 NDCTL_ENABLE=n PUSH_IMAGE=1", + "N=9 OS=ubuntu OS_VER=19.10 MAKE_PKG=1 EXPERIMENTAL=y REMOTE_TESTS=0 VALGRIND=0 NDCTL_ENABLE=n PMDK_CC=clang PMDK_CXX=clang++", + "N=10 OS=ubuntu OS_VER=19.10 COVERAGE=1 FAULT_INJECTION=1 TEST_BUILD=debug"] + steps: + - name: Clone the git repo + uses: actions/checkout@v2 + with: + fetch-depth: 50 + + - name: Pull or rebuild the image + run: cd $WORKDIR && ${{ matrix.CONFIG }} ./pull-or-rebuild-image.sh + + - name: Run the build + run: cd $WORKDIR && ${{ matrix.CONFIG }} ./build-CI.sh + + - name: Push the image + run: cd $WORKDIR && source ./set-vars.sh && ${{ matrix.CONFIG }} /bin/bash -c "if [[ -f ${CI_FILE_PUSH_IMAGE_TO_REPO} ]]; then images/push-image.sh; fi" + + windows: + name: Windows + runs-on: windows-latest + env: + platform: x64 + solutionname: PMDK.sln + ex_solutionname: Examples.sln + msbuild: "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Enterprise\\MSBuild\\Current\\Bin" + # Platform Toolset for Visual Studio 2019 + platform_toolset: "v142" + perl: "C:\\Strawberry\\perl\\bin" + strategy: + matrix: + CONFIG: [Debug, Release] + steps: + - name: Update Path + run: | + echo "::add-path::$Env:msbuild" + echo "::add-path::$Env:perl" + + - name: Clone the git repo + uses: actions/checkout@v2 + + - name: Unshallow it + run: git fetch --prune --unshallow + + - name: Various debug checks (cstyle, whitespace etc.) + run: | + if ("${{ matrix.CONFIG }}" -eq "Release") + { + Install-Module PsScriptAnalyzer -Force + utils/CSTYLE.ps1 + if ($LASTEXITCODE -ne 0) { + exit 1 + } + utils/CHECK_WHITESPACE.ps1 + if ($LASTEXITCODE -ne 0) { + exit 1 + } + utils/ps_analyze.ps1 + if ($LASTEXITCODE -ne 0) { + exit 1 + } + perl utils/sort_solution check + if ($LASTEXITCODE -ne 0) { + exit 1 + } + ./utils/check_sdk_version.py -d . + if ($LASTEXITCODE -ne 0) { + exit 1 + } + } + + - name: Build + run: | + msbuild src\$Env:solutionname -property:Configuration=${{ matrix.CONFIG }},PlatformToolset=$Env:platform_toolset -m -v:m + msbuild src\examples\$Env:ex_solutionname -property:Configuration=${{ matrix.CONFIG }},PlatformToolset=$Env:platform_toolset -m -v:m + + - name: Create ZIP archive + run: utils/CREATE-ZIP.ps1 -b ${{ matrix.CONFIG }} + + - name: Run tests + shell: powershell + run: | + if ($true) { + cd src\test + echo "`$Env:NON_PMEM_FS_DIR = `"C:\temp`"" >> testconfig.ps1 + echo "`$Env:PMEM_FS_DIR = `"C:\temp`"" >> testconfig.ps1 + echo "`$Env:PMEM_FS_DIR_FORCE_PMEM = `"1`"" >> testconfig.ps1 + echo "`$Env:PMDK_NO_ABORT_MSG = `"1`"" >> testconfig.ps1 + echo "`$Env:TM = `"1`"" >> testconfig.ps1 + write-output "config = { + 'unittest_log_level': 1, + 'cacheline_fs_dir': 'C:\\temp', + 'force_cacheline': True, + 'page_fs_dir': 'C:\\temp', + 'force_page': False, + 'byte_fs_dir': 'C:\\temp', + 'force_byte': True, + 'tm': True, + 'test_type': 'check', + 'granularity': 'all', + 'fs_dir_force_pmem': 1, + 'keep_going': False, + 'timeout': '4m', + 'build': 'debug', + 'force_enable': None, + 'fail_on_skip': False, + 'enable_admin_tests': False, + }" | out-file "testconfig.py" -encoding utf8 + + if ("${{ matrix.CONFIG }}" -eq "Debug") { + ./RUNTESTS.ps1 -b debug -o 4m + if ($?) { + python ./RUNTESTS.py -b debug + } + } + if ("${{ matrix.CONFIG }}" -eq "Release") { + ./RUNTESTS.ps1 -b nondebug -o 4m + if ($?) { + python ./RUNTESTS.py -b release + } + } + } diff --git a/src/pmdk/.gitignore b/src/pmdk/.gitignore new file mode 100644 index 000000000..20aa349e8 --- /dev/null +++ b/src/pmdk/.gitignore @@ -0,0 +1,31 @@ +.* +!.github +!.gitignore +!.gitattributes +!.cirrus.yml +!.clang-format +!.travis.yml +!.mailmap +!.cstyleignore +!.codecov.yml + +*~ +*.swp +*.o +make.out +core +a.out +nbproject/ +/rpmbuild/ +/dpkgbuild/ +/rpm/ +/dpkg/ +/user.mk +*.user +~* +*.db +*.htmp +*.hpptmp +*.aps +tags +*.link diff --git a/src/pmdk/.mailmap b/src/pmdk/.mailmap new file mode 100644 index 000000000..50848d517 --- /dev/null +++ b/src/pmdk/.mailmap @@ -0,0 +1,29 @@ +Daria Lewandowska +Gábor Buella +Grzegorz Brzeziński +Hu Wan +Igor Chorążewicz +Jacob Chang +Jan M Michalski +Kamil Diedrich +Kamil Diedrich +Krzysztof Czuryło +Lukasz Dorau +Lukasz Dorau +Łukasz Godlewski +Łukasz Godlewski +Łukasz Plewa +Łukasz Stolarczuk +Łukasz Stolarczuk +Maciej Ramotowski +Michał Biesek +Oksana Sałyk +Oksana Sałyk +Paul Luse +Paweł Lebioda +Piotr Balcer +Sławomir Pawłowski +Tomasz Kapela +Weronika Lewandowska +Weronika Lewandowska +Wojciech Uss diff --git a/src/pmdk/.skip-doc b/src/pmdk/.skip-doc new file mode 100644 index 000000000..e69de29bb diff --git a/src/pmdk/.travis.yml b/src/pmdk/.travis.yml new file mode 100644 index 000000000..caa2313e2 --- /dev/null +++ b/src/pmdk/.travis.yml @@ -0,0 +1,42 @@ +os: linux +dist: bionic +arch: + - ppc64le + +language: c + +services: + - docker + +env: + global: + - GITHUB_REPO=pmem/pmdk + - DOCKERHUB_REPO=pmem/pmdk + - OS=ubuntu + - OS_VER=19.10 + - MAKE_PKG=0 + - PMDK_CC=gcc + - PMDK_CXX=g++ + - REMOTE_TESTS=1 + - VALGRIND=1 + - SRC_CHECKERS=0 + - EXPERIMENTAL=n + jobs: + - FAULT_INJECTION=1 TEST_BUILD=debug PUSH_IMAGE=1 + - OS=fedora OS_VER=31 PMDK_CC=clang PMDK_CXX=clang++ TEST_BUILD=nondebug PUSH_IMAGE=1 + - MAKE_PKG=1 REMOTE_TESTS=0 VALGRIND=0 + - MAKE_PKG=1 REMOTE_TESTS=0 VALGRIND=0 OS=fedora OS_VER=31 + - COVERAGE=1 FAULT_INJECTION=1 TEST_BUILD=debug + +before_install: + - echo $TRAVIS_COMMIT_RANGE + - export HOST_WORKDIR=`pwd` + - cd utils/docker + - ./pull-or-rebuild-image.sh + +script: + - ./build-CI.sh + +after_success: + - source ./set-vars.sh + - if [[ -f $CI_FILE_PUSH_IMAGE_TO_REPO ]]; then ./images/push-image.sh; fi diff --git a/src/pmdk/CODING_STYLE.md b/src/pmdk/CODING_STYLE.md new file mode 100644 index 000000000..9b0cfa736 --- /dev/null +++ b/src/pmdk/CODING_STYLE.md @@ -0,0 +1,140 @@ +# C Style and Coding Standards for Persistent Memory Development Kit + +This document defines the coding standards and conventions for writing +PMDK code. To ensure readability and consistency within the code, +the contributed code must adhere to the rules below. + +### Introduction +The Persistent Memory Development Kit coding style is quite similar to the style +used for the SunOS product. +A full description of that standard can be found +[here.](https://www.cis.upenn.edu/~lee/06cse480/data/cstyle.ms.pdf) + +This document does not cover the entire set of recommendations and formatting rules +used in writing PMDK code, but rather focuses on some PMDK-specific conventions, +not described in the document mentioned above, as well as the ones the violation +of which is most frequently observed during the code review. +Also, keep in mind that more important than the particular style is **consistency** +of coding style. So, when modifying the existing code, the changes should be +coded in the same style as the file being modified. + +### Code formatting +Most of the common stylistic errors can be detected by the +[style checker program](https://github.com/pmem/pmdk/blob/master/utils/cstyle) +included in the repo. +Simply run `make cstyle` or `CSTYLE.ps1` to verify if your code is well-formatted. + +Here is the list of the most important rules: +- The limit of line length is 80 characters. +- Indent the code with TABs, not spaces. Tab width is 8 characters. +- Do not break user-visible strings (even when they are longer than 80 characters) +- Put each variable declaration in a separate line. +- Do not use C++ comments (`//`). +- Spaces around operators are mandatory. +- No whitespace is allowed at the end of line. +- For multi-line macros, do not put whitespace before `\` character. +- Precede definition of each function with a brief, non-trivial description. +(Usually a single line is enough.) +- Use `XXX` tag to indicate a hack, problematic code, or something to be done. +- For pointer variables, place the `*` close to the variable name not pointer type. +- Avoid unnecessary variable initialization. +- Never type `unsigned int` - just use `unsigned` in such case. +Same with `long int` and `long`, etc. +- Sized types like `uint32_t`, `int64_t` should be used when there is an on-media format. +Otherwise, just use `unsigned`, `long`, etc. +- Functions with local scope must be declared as `static`. + +### License & copyright +- Make sure you have the right to submit your contribution under the BSD license, +especially if it is based upon previous work. +See [CONTRIBUTING.md](https://github.com/pmem/pmdk/blob/master/CONTRIBUTING.md) for details. +- A copy of the [BSD-style License](https://github.com/pmem/pmdk/blob/master/LICENSE) +must be placed at the beginning of each source file, script or man page +(Obviously, it does not apply to README's, Visual Studio projects and \*.match files.) +- When adding a new file to the repo, or when making a contribution to an existing +file, feel free to put your copyright string on top of it. + +### Naming convention +- Keep identifier names short, but meaningful. One-letter variables are discouraged. +- Use proper prefix for function name, depending on the module it belongs to. +- Use *under_score* pattern for function/variable names. Please, do not use +CamelCase or Hungarian notation. +- UPPERCASE constant/macro/enum names. +- Capitalize first letter for variables with global or module-level scope. +- Avoid using `l` as a variable name, because it is hard to distinguish `l` from `1` +on some displays. + +### Multi-OS support (Linux/FreeBSD/Windows) +- Do not add `#ifdef ` sections lightly. They should be treated as technical +debt and avoided when possible. +- Use `_WIN32` macro for conditional directives when including code using +Windows-specific API. +- Use `__FreeBSD__` macro for conditional directives for FreeBSD-specific code. +- Use `_MSC_VER` macro for conditional directives when including code using VC++ +or gcc specific extensions. +- In case of large portions of code (i.e. a whole function) that have different +implementation for each OS, consider moving them to separate files. +(i.e. *xxx_linux.c*, *xxx_freebsd.c* and *xxx_windows.c*) +- Keep in mind that `long int` is always 32-bit in VC++, even when building for +64-bit platforms. Remember to use `long long` types whenever it applies, as well +as proper formatting strings and type suffixes (i.. `%llu`, `ULL`). +- Standard compliant solutions should be used in preference of compiler-specific ones. +(i.e. static inline functions versus statement expressions) +- Do not use formatting strings that are not supported by Windows implementations +of printf()/scanf() family. (like `%m`) +- It is recommended to use `PRI*` and `SCN*` macros in printf()/scanf() functions +for width-based integral types (`uint32_t`, `int64_t`, etc.). + +### Debug traces and assertions +- Put `LOG(3, ...)` at the beginning of each function. Consider using higher +log level for most frequently called routines. +- Make use of `COMPILE_ERROR_ON` and `ASSERT*` macros. +- Use `ERR()` macro to log error messages. + +### Unit tests +- There **must** be unit tests provided for each new function/module added. +- Test scripts **must** start with `#!/usr/bin/env ` for portability between Linux and FreeBSD. +- Please, see [this](https://github.com/pmem/pmdk/blob/master/src/test/README) +and [that](https://github.com/pmem/pmdk/blob/master/src/test/unittest/README) +document to get familiar with +our test framework and the guidelines on how to write and run unit tests. + +### Commit messages +All commit lines (entered when you run `git commit`) must follow the common +conventions for git commit messages: +- The first line is a short summary, no longer than **50 characters,** starting + with an area name and then a colon. There should be no period after + the short summary. +- Valid area names are: **pmem, pmem2, obj, blk, log, + test, doc, daxio, pmreorder, pool** (for *libpmempool* and *pmempool*), **rpmem** + (for *librpmem* and *rpmemd*), **benchmark, examples, core** and **common** (for everything else). +- It is acceptable for the short summary to be the only thing in the commit + message if it is a trivial change. Otherwise, the second line must be + a blank line. +- Starting at the third line, additional information is given in complete + English sentences and, optionally, bulleted points. This content must not + extend beyond **column 72.** +- The English sentences should be written in the imperative, so you say + "Fix bug X" instead of "Fixed bug X" or "Fixes bug X". +- Bullet points should use hanging indents when they take up more than + one line (see example below). +- There can be any number of paragraphs, separated by a blank line, as many + as it takes to describe the change. +- Any references to GitHub issues are at the end of the commit message. + +For example, here is a properly-formatted commit message: +``` +doc: fix code formatting in man pages + +This section contains paragraph style text with complete English +sentences. There can be as many paragraphs as necessary. + +- Bullet points are typically sentence fragments + +- The first word of the bullet point is usually capitalized and + if the point is long, it is continued with a hanging indent + +- The sentence fragments don't typically end with a period + +Ref: pmem/issues#1 +``` diff --git a/src/pmdk/CONTRIBUTING.md b/src/pmdk/CONTRIBUTING.md new file mode 100644 index 000000000..420411d89 --- /dev/null +++ b/src/pmdk/CONTRIBUTING.md @@ -0,0 +1,153 @@ +# Contributing to the Persistent Memory Development Kit + +Down below you'll find instructions on how to contribute to the +Persistent Memory Development Kit. + +Your contributions are most welcome! You'll find it is best to begin +with a conversation about your changes, rather than just writing a bunch +of code and contributing it out of the blue. +There are several good ways to suggest new features, offer to add a feature, +or just begin a dialog about the Persistent Memory Development Kit: + +* Open an issue in our [GitHub Issues Database](https://github.com/pmem/pmdk/issues) +* Suggest a feature, ask a question, start a discussion, etc. in our [pmem Google group](https://groups.google.com/group/pmem) +* Chat with members of the PMDK team real-time on the **#pmem** IRC channel on [OFTC](https://www.oftc.net) + +**NOTE: If you do decide to implement code changes and contribute them, +please make sure you agree your contribution can be made available +under the [BSD-style License used for the Persistent Memory Development Kit](https://github.com/pmem/pmdk/blob/master/LICENSE).** + +**NOTE: Submitting your changes also means that you certify the following:** + +``` +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +In case of any doubt, the gatekeeper may ask you to certify the above in writing, +i.e. via email or by including a `Signed-off-by:` line at the bottom +of your commit comments. + +To improve tracking of who is the author of the contribution, we kindly ask you +to use your real name (not an alias) when committing your changes to the +Persistent Memory Development Kit: +``` +Author: Random J Developer +``` + +### Code Contributions + +Please feel free to use the forums mentioned above to ask +for comments & questions on your code before submitting +a pull request. The Persistent Memory Development Kit project uses the common +*fork and merge* workflow used by most GitHub-hosted projects. +The [Git Workflow blog article](https://pmem.io/2014/09/09/git-workflow.html) +describes our workflow in more detail. + +#### Linux/FreeBSD + +Before contributing please remember to run: +``` + $ make cstyle +``` + +This will check all C/C++ files in the tree for style issues. To check C++ +files you have to have clang-format version 6.0, otherwise they will be +skipped. If you want to run this target automatically at build time, you can +pass CSTYLEON=1 to make. If you want cstyle to be run, but not fail the build, +pass CSTYLEON=2 to make. +There is also a target for automatic C++ code formatting, to do this run: +``` + $ make format +``` + +There are cases, when you might have several clang-format-X.Y binaries and either +no clang-format or it pointing to an older version. In such case run: +``` + $ make CLANG_FORMAT=/path/to/clang-format cstyle|format +``` + +#### Windows + +On Windows to check the code for style issues, please run: +``` + $ pmdk\utils\CSTYLE.ps1 +``` + +To check or format C++ files, you may use a standalone Visual Studio plugin +for clang-format. The plugin installer can be downloaded from +[LLVM Builds](https://llvm.org/builds) page. + +If you are actively working on an PMDK feature, please let other +developers know by [creating an issue](https://github.com/pmem/pmdk/issues). +Use the template `Feature` and assign it to yourself (due to the way +GitHub permissions work, you may have to ask a team member to assign it to you). + +### Bug Reports + +Bugs for the PMDK project are tracked in our +[GitHub Issues Database](https://github.com/pmem/pmdk/issues). + +When reporting a new bug, please use `New issue` button, pick proper template and fill +in all fields. Provide as much information as possible, including the product version: + +#### PMDK version + +Put the release name of the version of PMDK running when the +bug was discovered in a bug comment. If you saw this bug in multiple PMDK +versions, please put at least the most recent version and list the others +if necessary. +- Stable release names are in the form `#.#` (where `#` represents + an integer); for example `0.3`. +- Release names from working versions look like `#.#+b#` (adding a build #) + or `#.#-rc#` (adding a release candidate number) +If PMDK was built from source, the version number can be retrieved +from git using this command: `git describe` + +For binary PMDK releases, use the entire package name. +For RPMs, use `rpm -q pmdk` to display the name. +For Deb packages, run `dpkg-query -W pmdk` and use the +second (version) string. + +#### Priority + +Requested priority describes the urgency to resolve a defect and establishes +the time frame for providing a verified resolution. Priorities are defined as: + +* **P1**: Showstopper bug, requiring a resolution before the next release of the +library. +* **P2**: High-priority bug, requiring a resolution although it may be decided +that the bug does not prevent the next release of the library. +* **P3**: Medium-priority bug. The expectation is that the bug will be +evaluated and a plan will be made for when the bug will be resolved. +* **P4**: Low-priority bug, the least urgent. Fixed when the resources are available. + +### Other issues + +On our issues page we also gather feature requests and questions. Templates to use +are `Feature` and `Question`, respectively. They should help deliver a meaningful +description of a feature or ask a question to us (remember though we have +different means of communication, as described at the top of the page). diff --git a/src/pmdk/ChangeLog b/src/pmdk/ChangeLog new file mode 100644 index 000000000..a56648424 --- /dev/null +++ b/src/pmdk/ChangeLog @@ -0,0 +1,866 @@ +Wed Oct 28 2020 Piotr Balcer + + * Version 1.10 + + This release introduces a new stable PMDK library, libpmem2, which + is the next major release of libpmem. + This library has an entirely new, but familiar, API that addresses many + shortcomings of the previous version, while retaining all of its + functionality. To learn more, see https://pmem.io/pmdk/libpmem2/ or + libpmem2(7). + The old library, libpmem, is still going to be maintained for the + foreseeable future, but we'd like to encourage any new applications to + leverage libpmem2. + +Wed Sep 16 2020 Piotr Balcer + + * Version 1.9.1 + + Detailed list of bug fixes: + - common: fix LIBFABRIC flags + - common: Add runtime SDS check and disable + - pool: disable SDS check if not supported + - obj: fix failure atomicity bug in huge allocs + - obj: add missing drain after ulog processing + +Fri Jul 3 2020 Piotr Balcer + + * Version 1.9 + + This release: + - Switches the default instruction set for memcpy, memmove and memset + implementations on x86 to AVX512, and introduces numerous performance + improvements for those operations on AVX and SSE2 fallback paths. + - Optimizes transactional allocations in libpmemobj by avoiding one + extraneous cache miss and reducing the amount of work required to perform + a reservation. + - Introduces a new API in libpmemobj, pmemobj_tx_set_failure_behavior, + that enables the application to control the behavior of aborting + transactions. + - Improves performance of pool creation on Windows by avoiding expensive + physical page allocation during file allocation. + - Stabilizes support for ppc64. + + Other changes: + - pmem: mem[cpy|set] optimization when eADR is available + - obj: detect msync failures in non-pmem variants of mem[cpy|move|set] + + Notable bug fixes: + - core: fix Last_errormsg_get when NO_LIBPTHREAD is defined + - pmem: read Unsafe Shutdown Count from region instead of interleave set + - common: fix deep_flushes failing on platforms that don't need them + - pmem: fix data cache flush on ppc64 + - obj: fix run allocated recalculation + +Fri Jan 31 2020 Marcin Ślusarz + + * Version 1.8 + + This release: + - Introduces new API in libpmemobj for user-assisted defragmentation + (see pmemobj_defrag man page for details). + - Introduces experimental support for PPC64LE architecture. + - Introduces new flag in libpmemobj for opting-out of transaction aborts + on failure (POBJ_TX_NO_ABORT), along with new variants of existing APIs + that didn't accept flags (pmemobj_tx_xfree, pmemobj_tx_xpublish, + pmemobj_tx_xlock, pmemobj_tx_xlog_append_buffer, pmemobj_tx_xstrdup, + pmemobj_tx_xwcsdup). + - Moves out libvmem and libvmmalloc to the new repository + (https://github.com/pmem/vmem). + + Other changes: + - obj: introduce new statistics useful for defrag + - obj: introduce transient/persistent statistics enabling + - obj: introduce pmemobj_tx_(set/get)_user_data funcs + - obj: introduce pmemobj_(set/get)_user_data funcs + - obj: disable workaround for offsetof() since VS 15.5 in public header + - common: drop support for libndctl < 63 on Linux + - pool: rename -N --no-exec to -d --dry-run + + Notable bug fixes: + - obj: fix zone size calculations + - obj: fix potential NULL-dereference in ulog_store + - obj: fix unintended recursive locking during palloc + - obj: fix lock release order in palloc publish + - obj: fix transient redo log of more than 64 entries + - obj: fix capacity ulog calculation + - obj: fix check of unaligned capacity size + - rpmem: add a missing case for GPSPM + FLUSH_STRICT + - pmem: fix pmemcheck support on platforms with eADR + - pool: fix possible memory leak + - rpmem: fix possible memory leak in rpmemd_config_read + - rpmem: fix possible memory leak in rpmemd_log_init + - rpmem: fix possible use-after-free + +Mon Sep 30 2019 Marcin Ślusarz + + * Version 1.7 + + This release: + - Introduces new APIs in libpmemobj for managing space used by transactions. + (see pmemobj_tx_log_append_buffer man page for details) + - Introduces new APIs in librpmem, splitting rpmem_persist into rpmem_flush + and rpmem_drain, allowing applications to use the flush + drain model + already known from libpmem. (libpmemobj does not use this feature yet) + - Optimizes large libpmemobj transactions by significantly reducing + the amount of memory modified at the commit phase. + - Optimizes tracking of libpmemobj reservations. + - Adds new flags for libpmemobj's pmemobj_tx_xadd_range[_direct] API: + POBJ_XADD_NO_SNAPSHOT and POBJ_XADD_ASSUME_INITIALIZED, allowing + applications to optimize how memory is tracked by the library. + + To support some of the above changes the libpmemobj on-media layout had + to be changed, which means that old pools have to be converted using + pmdk-convert >= 1.7. + + Other changes: + - obj: fix merging of ranges when NOFLUSH flag is used (pmem/issues#1100) + - rpmem: fix closing of ssh connection (pmem/issues#995, pmem/issues#1060) + - obj: abort transaction on pmemobj_tx_publish failure + + Internal changes: + - test: fault injection tests for pmemblk, pmemlog, and pmemobj + - test: improved Python testing framework + - test: support real pmem in bad blocks tests + - common: allow not building examples and benchmarks + +Tue Aug 27 2019 Marcin Ślusarz + + * Version 1.6.1 + + This release fixes possible pool corruptions on Windows + (see https://github.com/pmem/pmdk/pull/3728 for details), + improves compatibility with newer Linux kernels with + respect to Device DAX detection, fixes pmemobj space management + for large pools, improves compatibility with newer toolchains, + incorporates build fixes for FreeBSD and fixes a number of + smaller bugs. + + Detailed list of bug fixes: + - common: (win) fix possible pool file coruption (pmem/issues#972, + pmem/issues#715, pmem/issues#603) + - common: implement correct / robust device_dax_alignment + (pmem/issues#1071) + - obj: fix recycler not locating unused chunks + - doc: update pmemobj_tx_lock documentation wrt behavior on fail + - common: fix persistent domain detection (pmem/issues#1093) + - common: vecq: fix a pointer-to-struct aliasing violation + (crash on arm64) + - common: fix minor issues related to ndctl linking + - obj: drop recursion from pmemobj_next + - common: fix bug in badblock file error handling + - obj: fix handling of malloc failures + - common: fix handling of malloc failures (ctl) + - jemalloc: fix build with gcc 9 + - obj: don't overwrite errno when palloc_heap_check_remote fails + - doc: fix pmreorder emit log macro + - rpmem: change order of rpmem init (pmem/issues#1043) + - common: Fix build failure due to unused macro PAGE_SIZE + - common: support older versions of pkg-config + - tools: link with release variant of pmemcommon + - common: add PMDK prefix to local copy of queue.h (pmem/issues#990) + - rpmem: switch to using an open coded basename (FreeBSD) + - common: posix_fallocate: guard against integer underflow in + check (FreeBSD) + - test: support Valgrind 3.15 + - test: skip if fi_info is missing + - test: (win) fix sparsefile error handling + - test: fix libpmempool_feature tests that match logs + - test: remove vmem_delete test (pmem/issues#1074) + - test: adjust matchfiles in vmem_valgrind_region test (pmem/issues#1087) + - test: remove old log files for windows (pmem/issues#1013) + - test: remove invalid expect_normal_exit (pmem/issues#1092) + - test: suppress ld leak (pmem/issues#1098) + - test: Expose necessary symbols in libvmmalloc_dummy_funcs + (FreeBSD) + - test: fix tests failing because `tput` fails (FreeBSD) + - test: avoid obj_critnib_mt taking very long on many-core machines + - test: deal with libndctl's path without build system + - test: overwrite old log in pmempool_create/TEST14.PS1 + - test: fix match files in tests which use dax devices + - test: fix match file in rpmem_addr_ext test + - test: fix pmempool_check test + +Wed Aug 28 2019 Marcin Ślusarz + + * Version 1.5.2 + + This release fixes possible pool corruptions on Windows + (see https://github.com/pmem/pmdk/pull/3728 for details), + improves compatibility with newer Linux kernels with + respect to Device DAX detection, fixes pmemobj space management + for large pools, improves compatibility with newer toolchains + and fixes a number of smaller bugs. + + Detailed list of bug fixes: + - common: (win) fix possible pool file coruption (pmem/issues#972, + pmem/issues#715, pmem/issues#603) + - common: implement correct / robust device_dax_alignment + (pmem/issues#1071) + - obj: fix crash after large undo log recovery + - obj: fix recycler not locating unused chunks + - doc: update pmemobj_tx_lock documentation wrt behavior on fail + - common: fix build of rpm packages on suse (pmem/issues#1023) + - common: fix persistent domain detection (pmem/issues#1093) + - common: vecq: fix a pointer-to-struct aliasing violation + (crash on arm64) + - rpmem: lock file prior to unlink (pmem/issues#833) + - common: fix for pool_set error handling (pmem/issues#1036) + - pmreorder: fix handling of store drain flush drain pattern + - obj: fix possible memory leak in tx_add_lock + - pool: free bad_block vector + - common: fix bug in badblock file error handling + - obj: fix handling of malloc failures + - common: fix handling of malloc failures (ctl) + - jemalloc: fix build with gcc 9 + - obj: don't overwrite errno when palloc_heap_check_remote fails + - doc: fix typos in pmreorder configuration + - doc: fix pmreorder emit log macro + - tools: link with release variant of pmemcommon + - test: support Valgrind 3.15 + - test: skip if fi_info is missing + - test: split test obj_tx_lock into two test cases (pmem/issues#1027) + - test: (win) fix sparsefile error handling + - test: fix libpmempool_feature tests that match logs + - test: remove vmem_delete test (pmem/issues#1074) + - test: adjust matchfiles in vmem_valgrind_region test (pmem/issues#1087) + - test: remove old log files for windows (pmem/issues#1013) + - test: remove invalid expect_normal_exit (pmem/issues#1092) + - test: suppress ld leak (pmem/issues#1098) + - test: fix failing pmemdetect on Windows + - test: fix match files in tests which use dax devices + - test: fix pmempool_check test + +Fri Aug 30 2019 Marcin Ślusarz + + * Version 1.4.3 + + This release fixes possible pool corruptions on Windows + (see https://github.com/pmem/pmdk/pull/3728 for details) + and improves compatibility with newer Linux kernels with + respect to Device DAX detection. + + Bug fixes: + - common: (win) fix possible pool file coruption (pmem/issues#972, + pmem/issues#715, pmem/issues#603) + - common: implement correct / robust device_dax_alignment + (pmem/issues#1071) + - common: fix device dax detection + - obj: fix pmemobj_check for pools with some sizes (pmem/issues#975) + - obj: fix type numbers for pmemobj_list_insert_new + - obj: fix pmemobj_tx_lock error handling + - obj: fix possible memory leak in tx_add_lock + - common: fix ctl_load_config during libpmemobj initialization + (pmem/issues#917) + - common: win: fix getopt returning "option is ambiguous" + - common: fix persistent domain detection (pmem/issues#1093) + - pool: do not copy same regions in update_uuids + - test: split test obj_tx_lock into two test cases + - test: remove checking errno in obj_tx_add_range_direct + - test: remove invalid expect_normal_exit + - test: fix int overflow in pmem_deep_persist test + - test: fix pmempool_check test + - test: (win) fix a few issues related to long paths + +Tue Aug 27 2019 Marcin Ślusarz + + * Version 1.3.3 + Bug fixes: + - pmem: fix clflush bit position + - common: implement correct / robust device_dax_alignment + - common: fix device dax detection + - common: fix library dependencies (pmem/issues#767) + - common: use rpm-config CFLAGS/LDFLAGS when building packages + (pmem/issues#768) + - test: fix vmmalloc_malloc_hooks (pmem/issues#773) + - test: fix compilation with clang-5.0 (pmem/issues#783) + - pool: fix set convert of v3 -> v4 + - common: generate pkg-config files on make install + (pmem/issues#610) + - common: fix dependencies for Debian's dev packages + - test: add missing include in unittest.h + - common: (win) fix timed locks + - common: provide src version in GitHub tarballs + - common: fix free function in tls + +Tue Aug 27 2019 Marcin Ślusarz + + * Version 1.2.4 + Bug fixes: + - common: fix device dax detection (compatibility with newer kernels) + +Tue Mar 26 2019 Marcin Ślusarz + + * Version 1.6 + + This release: + - Enables unsafe shutdown and bad block detection on Linux + on systems with libndctl >= 63. It is expected that systems with + libndctl >= 63 has necessary kernel support (Linux >= 4.20). + However, due to bugs in libndctl = 63 and Linux = 4.20, it is + recommended to use libndctl >= 64.1 and Linux >= 5.0.4. + On systems with libndctl < 63, PMDK uses old superuser-only + interfaces. Support for old or new interfaces is chosen at BUILD + time. + - Introduces arena control interface in pmemobj, allowing applications + to tweak performance and scalability of heap operations. See + pmemobj_ctl_get man page ("heap" namespace) for details. + - Introduces copy_on_write mode, which allows testing applications + using pmemobj with pmreorder. See pmemobj_ctl_get man page + ("copy_on_write" namespace) for details. + + Other changes: + - allocate file space when creating a pool on existing file + (pmem/issues#167) + - initial support for testing using fault injection + - initial Python test framework + - improve performance of pmemobj_pool_by_ptr + + Bug fixes: + - common: work around tmpfs bug during pool creation + (pmem/issues#1018) + - pool: race-free pmempool create --max-size + - obj: don't modify remote pools in pmemobj_check + +Tue Feb 19 2019 Marcin Ślusarz + + * Version 1.5.1 + + This release fixes minor bugs and improves compatibility with newer + tool chains. + + Notable bug fixes: + - common: make detection of device-dax instances more robust + - obj: fix pmemobj_check for pools with some sizes + - obj: don't use anon struct in an union (public header) + - obj: fix pmemobj_tx_lock error handling + - obj: don't use braces in an expression with clang (public header) + - obj: suppress pmemcheck warnings for statistics + - pmreorder: fix markers nontype issue + +Fri Oct 26 2018 Marcin Ślusarz + + * Version 1.5 + + This release has had two major focus areas - performance and RAS + (Reliability, Availability and Serviceability). Beyond that, it introduces + new APIs, new tools and many other improvements. As a side effect + of performance optimizations, the libpmemobj on-media layout had to be + changed, which means that old pools have to be converted using pmdk-convert. + libpmemcto experiment has been finished and removed from the tree. + + For more details, please see https://pmem.io/2018/10/22/release-1-5.html. + + New features: + - common: unsafe shutdown detection (SDS) + - common: detection and repair of uncorrectable memory errors (bad blocks) + - pool: new "feature" subcommand for enabling and disabling detection of + unsafe shutdown and uncorrectable memory errors + - common: auto flush detection on Windows (on Linux since 1.4) + - pmreorder: new tool for verification of persistent memory algorithms + - obj: new on media layout + - pmem/obj: new flexible memcpy|memmove|memset API + - obj: new flushing APIs: pmemobj_xpersist, pmemobj_xflush + (PMEMOBJ_F_RELAXED) + - rpmem: new flag RPMEM_PERSIST_RELAXED for rpmem_persist + - obj: lazily initialized volatile variables (pmemobj_volatile) + (EXPERIMENTAL) + - obj: allocation classes with alignment + - obj: new action APIs: pmemobj_defer_free, POBJ_XRESERVE_NEW, + POBJ_XRESERVE_ALLOC + - blk/log: new "ctl" API + + Optimizations: + - obj: major performance improvements for AEP NVDIMMs + - obj: better space utilization for small allocations + - common: call msync only on one page for deep drain + + Other changes: + - cto: removed + - obj: remove actions limit + - common: new dependency on libndctl on Linux + - pmempool: "convert" subcommand is now a wrapper around pmdk-convert + (please see https://github.com/pmem/pmdk-convert) + - obj: C++ bindings have been moved to a new repository + (please see https://github.com/pmem/libpmemobj-cpp) + + Bug fixes: + - obj: fix type numbers for pmemobj_list_insert_new + - pmem: fix inconsistency in pmem_is_pmem + - common: fix windows mmap destruction + - daxio: fix checking and adjusting length + - common: fix long paths support on Windows + +Thu Aug 16 2018 Marcin Ślusarz + + * Version 1.4.2 + + This release fixes the way PMDK reports its version via + pkg-config files. + + Bug fixes: + - common: fix reported version + - doc: use single "-" in NAME section (pmem/issues#914) + +Fri Jun 29 2018 Marcin Ślusarz + + * Version 1.4.1 + + In 1.4 development cycle, we created new daxio utility + (command line tool for performing I/O on Device-DAX), but due + to some complications we had to disable it just before + the 1.4 release. + In 1.4.1 we finally enable it. Daxio depends on ndctl v60.1. + + Bug fixes: + - pmem: fix clflush bit position + - obj: fix invalid OOMs when zones are fully packed + - obj: don't register undo logs twice in memcheck + - pool: fix bash completion script + - pool: fix incorrect errno after transform + - obj: fix clang-7 compilation + - obj: test for msync failures in non-pmem path + - doc: add missing field to alloc class entry point + - common: (win) fix timed locks + - common: provide src version in GitHub tarballs + - common: fix free function in tls + - common: fix double close + - test: allow testing installed libraries + - test: fix Valgrind vs stripped libraries issue + - test: fix dependencies between tests and tools + - test: fix races on make pcheck -jN + - test: use libvmmalloc.so.1 + - test: fix incorrect number of required dax devices + - test: add suppression for leak in ld.so + - test: fail if memcheck detects overlapping chunks + - test: simplify time measurements in obj_sync + - benchmark: check lseek() return value + - examples: catch exceptions in map_cli + +Thu Mar 29 2018 Krzysztof Czurylo + + * Version 1.4 + This is the first release of PMDK under a new name. + The NVML project has been renamed to PMDK (Persistent Memory + Development Kit). This is only the project/repo name change + and it does not affect the names of the PMDK packages. + See this blog article for more details on the reasons and + impact of the name change: + https://pmem.io/2017/12/11/NVML-is-now-PMDK.html + + New features: + - common: support for concatenated Device-DAX devices + with 2M/1G alignment + - common: add support for MAP_SYNC flag + - common: always enable Valgrind instrumentation (pmem/issues#292) + - common: pool set options / headerless pools + - pmem: add support for "deep flush" operation + - rpmem: add rpmem_deep_persist + - doc: split man pages and add per-function aliases (pmem/issues#385) + + Optimizations: + - pmem: skip CPU cache flushing when eADR is available + (no Windows support yet) + - pmem: add AVX512F support in pmem_memcpy/memset (pmem/issues#656) + + Bug fixes: + - common: fix library dependencies (pmem/issues#767, RHBZ #1539564) + - common: use rpm-config CFLAGS/LDFLAGS when building packages + (pmem/issues#768, RHBZ #1539564) + - common: do not unload librpmem on close (pmem/issues#776) + - common: fix NULL check in os_fopen (pmem/issues#813) + - common: fix missing version in .pc files + - obj: fix cancel of huge allocations (pmem/issues#726) + - obj: fix error handling in pmemobj_open (pmem/issues#750) + - obj: validate pe_offset in pmemobj_list_* APIs (pmem/issues#772) + - obj: fix add_range with size == 0 (pmem/issues#781) + - log: add check for negative iovcnt (pmem/issues#690) + - rpmem: limit maximum number of lanes (pmem/issues#609) + - rpmem: change order of memory registration (pmem/issues#655) + - rpmem: fix removing remote pools (pmem/issues#721) + - pool: fix error handling (pmem/issues#643) + - pool: fix sync with switched parts (pmem/issues#730) + - pool: fix sync with missing replica (pmem/issues#731) + - pool: fix detection of Device DAX size (pmem/issues#805) + - pool: fail pmempool_sync if there are no replicas (pmem/issues#816) + - benchmark: fix calculating standard deviation (pmem/issues#318) + - doc: clarify pmem_is_pmem behavior (pmem/issues#719) + - doc: clarify pmemobj_root behavior (pmem/issues#733) + + Experimental features: + - common: port PMDK to FreeBSD + - common: add experimental support for aarch64 + - obj: introduce allocation classes + - obj: introduce two-phase heap ops (reserve/publish) + (pmem/issues#380, pmem/issues#415) + - obj: provide basic heap statistics (pmem/issues#676) + - obj: implement run-time pool extending (pmem/issues#382) + - cto: add close-to-open persistence library (pmem/issues#192) + + The following features are disabled by default, until + ndctl v60.0 is available: + - daxio: add utility to perform I/O on Device-DAX + - RAS: unsafe shutdown detection/handling + +Wed Dec 20 2017 Krzysztof Czurylo + + * Version 1.3.1 + Bug fixes: + - rpmem: fix issues reported by Coverity + - rpmem: fix read error handling + - rpmem: add fip monitor (pmem/issues#597) + - test: add rpmemd termination handling test + - cpp: fix pop.persist function in obj_cpp_ptr + - rpmem: return failure for a failed allocation + - rpmem: fix potential memory leak + - common: fix available rm options msg (pmem/issues#651) + - pool: fix pmempool_get_max_size + - obj: fix potential deadlock during realloc + (pmem/issues#635, pmem/issues#636, pmem/issues#637) + - obj: initialize TLS data + - rpmem: fix cleanup if fork() failed (pmem/issues#634) + - obj: fix bogus OOM after exhausting first zone + +Thu Jul 13 2017 Krzysztof Czurylo + + * Version 1.3 + This release introduces some useful features and optimizations + in libpmemobj. Most of them are experimental and controlled by + the new pmemobj_ctl APIs. For details, please check the feature + requests identified by the issue numbers listed next to the items below. + + Other important changes are related to performance tuning and + stabilization of librpmem library, which is used by libpmemobj to get + remote access to persistent memory and to provide basic data replication + over RDMA. The librpmem is still considered experimental. + + NVML for Windows is feature complete (except for libvmmalloc). + This release includes the support for Unicode, long paths and + the NVML installer. + + New features: + - common: add support for concatenated DAX Devices + - common: add Unicode support on Windows + - common: add long path support on Windows + - common: add NVML installer for Windows + - pmem: make pmem_is_pmem() true for Device DAX only + - obj: add pmemobj_wcsdup()/pmemobj_tx_wcsdup() APIs + - obj: export non-inlined pmemobj_direct() + - obj: add PMEMOBJ_NLANES env variable + - cpp: introduce the allocator + - cpp: add wstring version of C++ entry points + - vmem: add vmem_wcsdup() API entry + - pool: add pmempool_rm() function (pmem/issues#307) + - pool: add --force flag for create command (pmem/issues#529) + - benchmark: add a minimal execution time option + - benchmark: add thread affinity option + - benchmark: print 99% and 99.9% percentiles + - doc: separate Linux/Windows version of web-based man pages + + Optimizations: + - obj: cache _pobj_cached_pool in pmemobj_direct() + - obj: optimize thread utilization of buckets + - obj: stop grabbing a lock when querying pool ptr + - rpmem: use multiple endpoints + + Bug fixes: + - common: fix issues reported by static code analyzers + - pmem: fix mmap() implementation on Windows + - pmem: fix mapping addr/length alignment on Windows + - pmem: fix PMEM_MMAP_HINT implementation on Windows + - pmem: fix pmem_is_pmem() on invalid memory ranges + - pmem: fix wrong is_pmem returned by pmem_map_file() + - pmem: fix mprotect() for private mappings on Windows + - pmem: modify pmem_is_pmem() behavior for len==0 + - obj: add failsafe to prevent allocs in constructor + - cpp: fix swap implementation + - cpp: fix sync primitives' constructors + - cpp: fix wrong pointer type in the allocator + - cpp: return persistent_ptr::swap to being public + - pool: treat invalid answer as 'n' + - pool: unify flags value for dry run + - pool: transform for remote replicas + - rpmem: persistency method detection + - benchmark: fix time measurement + + Experimental features/optimizations: + - obj: pmemobjctl - statistics and control submodule + (pmem/issues#194, pmem/issues#211) + - obj: zero-overhead allocations - customizable alloc header + (pmem/issues#347) + - obj: flexible run size index (pmem/issues#377) + - obj: dynamic range cache (pmem/issues#378) + - obj: asynchronous post-commit (pmem/issues#381) + - obj: configurable object cache (pmem/issues#515) + - obj: add cache size and threshold tx params + - obj: add CTL var for suppressing expensive checks + - rpmem: add rpmem_set_attr() API entry + - rpmem: switch to libfabric v1.4.2 + +Thu May 18 2017 Krzysztof Czurylo + + * Version 1.2.3 + Bug fixes: + - test: extend timeout for selected tests + - test: reduce number of operations in obj_tx_mt + - test: define cfree() as free() in vmmalloc_calloc + + Other changes: + - common: move Docker images to new repo + +Sat Apr 15 2017 Krzysztof Czurylo + + * Version 1.2.2 + Bug fixes: + - pmempool: fix mapping type in pool_params_parse + - test: limit number of arenas in vmem_stats + - test: do not run pool_lock test as root + - common: fix pkg-config files + - common: fix building packages for Debian + +Tue Feb 21 2017 Krzysztof Czurylo + + * Version 1.2.1 + This NVML release changes the behavior of pmem_is_pmem() on Linux. + The pmem_is_pmem() function will now return true only if the entire + range is mapped directly from Device DAX (/dev/daxX.Y) without an + intervening file system, and only if the corresponding file mapping + was created with pmem_map_file(). See libpmem(7) for details. + + Bug fixes: + - jemalloc: fix test compilation on Fedora 26 (rawhide) + - test: fix cpp test compilation on Fedora 26 (rawhide) + - common: use same queue.h on linux and windows + - common: queue.h clang static analyzer fix + - common: fix path handling in build-dpkg.sh + - test: fix match files in pmempool_transform/TEST8 + +Fri Dec 30 2016 Krzysztof Czurylo + + * Version 1.2 - Windows Technical Preview #1 + This is the first Technical Preview release of NVML for Windows. + It is based on NVML 1.2 version, but not all the 1.2 features + are ported to Windows. In particular, Device DAX and remote access + to persistent memory (librpmem) are not supported by design. + + NOTE: This release has not gone through the full validation cycle, + but only through some basic tests on Travis and AppVeyor. + Thus, it cannot be assumed "Production quality" and should not + be used in production environments. + + Besides several minor improvements and bug fixes, all the other + changes since NVML 1.2 release were related to Windows support: + - win: port libvmem (and jemalloc) + - win: benchmarks Windows port + - win: fix mapping files of unaligned length + - win: clean up possible race condition in mmap_init() + - win: enable QueryVirtualMemoryInformation() in pmem_is_pmem() + - test: check open handles at START/DONE + - test: port all the remaining unit tests + (scope, pmem_map, obj_debug, util_poolset, pmempool_*) + - win: add resource files for versioning + + Known issues and limitations of Windows version of NVML: + - Unicode support is missing. The UTF/USC-encoded file paths + or pool set files may not be handled correctly. + - The libvmmalloc library is not ported yet. + - The on-media format of pmem pools is not portable at the moment. + The pmem pools created using Windows version of NVM libraries + cannot be open on Linux and vice versa. + - Despite the fact the current version of NVML would work + with any recent version of Windows OS, to take full + advantage of PMEM and NVML features and to benefit from + the PMEM performance, the recommended platforms needs be + equipped with the real NVDIMMs hardware and should support + the native, Microsoft's implementation of DAX-enabled file + system (i.e. Windows Server 2016 or later). + In case of using NVML with older versions of Windows or with + the custom implementation of PMEM/DAX drivers, the performance + might not be satisfactory. Please, contact the provider + of PMEM/DAX drivers for your platform to get the customized + version of NVML in such case. + +Thu Dec 15 2016 Krzysztof Czurylo + + * Version 1.2 + This NVML release causes a "flag day" for libpmemobj. + The pmemobj pools built under NVML 1.1 are incompatible with pools built + under NVML 1.2 and later. This is because an issue was discovered with + the alignment of locks (pmem/issues#358) and, although rare, the issue potentially + impacts program correctness, making the fix mandatory. + The major version number of the pmemobj pool layout and the version + of the libpmemobj API is changed to prevent the use of the potentially + incorrect layout. + + Other key changes introduced in this release: + - Add Device DAX support, providing that "optimized flush" mechanism + defined in SNIA NVM Programming Model can safely be used, even + if PMEM-aware file system supporting that model is not available, + or if the user does not want to use the file system for some reason. + - Add a package for libpmemobj C++ bindings. + C++ API is no longer considered experimental. + Web-based documentation for C++ API is available on https://pmem.io. + - Add "sync" and "transform" commands to pmempool utility. + The "sync" command allows one to recover missing or corrupted part(s) + of a pool set from a healthy replica, while the "transform" command + is a convenient way for modifying the structure of an existing + pool set, i.e. by adding or removing replicas. + - Add experimental support for remote access to persistent memory and + basic remote data replication over RDMA (librpmem). Experimental + support for remote replicas is also provided by libpmemobj library. + + New features: + - common: add Device DAX support (pmem/issues#197) + - obj: add C++ bindings package (libpmemobj++-devel) + - obj: add TOID_OFFSETOF macro + - pmempool: add "sync" and "transform" commands + (pmem/issues#172, pmem/issues#196) + + Bug fixes: + - obj: force alignment of pmem lock structures (pmem/issues#358) + - blk: cast translation entry to uint64_t when calculating data offset + - obj: fix Valgrind instrumentation of chunk headers and cancelled + allocations + - obj: set error message when user called pmemobj_tx_abort() + - obj: fix status returned by pmemobj_list_insert() (pmem/issues#226) + - obj: defer allocation of global structures + + Optimizations: + - obj: fast path for pmemobj_pool_by_ptr() when inside a transaction + - obj: simplify and optimize allocation class generation + + Experimental features: + - rpmem: add support for remote access to persistent memory and basic + remote data replication over RDMA + - libpmempool: add pmempool_sync() and pmempool_transform() (pmem/issues#196) + - obj: introduce pmemobj_oid() + - obj: add pmemobj_tx_xalloc()/pmemobj_tx_xadd_range() APIs and + the corresponding macros + - obj: add transaction stage transition callbacks + +Thu Jun 23 2016 Krzysztof Czurylo + + * Version 1.1 + This NVML release introduces a new version of libpmemobj pool layout. + Internal undo log structure has been modified to improve performance + of pmemobj transactions. Memory pools created with older versions + of the libpmemobj library must be converted to the new format using + "pmempool convert" command. See pmempool-convert(1) for details. + + A new "libpmempool" library is available, providing support for + off-line pool management and diagnostics. Initially it provides only + "check" and "repair" operations for log and blk memory pools, and for + BTT devices. + + Other changes: + - pmem: deprecate PCOMMIT + - blk: match BTT Flog initialization with Linux NVDIMM BTT + - pmem: defer pmem_is_pmem() initialization (pmem/issues#158) + - obj: add TOID_TYPEOF macro + + Bug fixes: + - doc: update description of valid file size units (pmem/issues#133) + - pmempool: fix --version short option in man page (pmem/issues#135) + - pmempool: print usage when running rm without arg (pmem/issues#136) + - cpp: clarify polymorphism in persistent_ptr (pmem/issues#150) + - obj: let the before flag be any non-zero value (pmem/issues#151) + - obj: fix compare array pptr to nullptr (pmem/issues#152) + - obj: cpp pool.get_root() fix (pmem/issues#156) + - log/blk: set errno if replica section is specified (pmem/issues#161) + - cpp: change exception message (pmem/issues#163) + - doc: remove duplicated words in man page (pmem/issues#164) + - common: always append EXTRA_CFLAGS after our CFLAGS + + Experimental features: + - Implementation of C++ bindings for libpmempobj is complete. + Web-based documentation for C++ API is available on https://pmem.io. + Note that C++ API is still considered experimental. Do not use it + in production environments. + - Porting NVML to Windows is in progress. There are MS Visual Studio + solution/projects available, allowing to compile libpmem, libpmemlog, + libpmemblk and libpmemobj on Windows, but the libraries are not fully + functional and most of the test are not enabled yet. + +Thu Apr 07 2016 Krzysztof Czurylo + + * Version 1.0 + The API of six libraries (libpmem, libpmemblk, libpmemlog, + libpmemobj, libvmem, libvmmalloc) is complete and stable. + The on-media layout of persistent memory pools will be maintained + from this point, and if changed it will be backward compatible. + Man pages are all complete. + This release has been validated to "Production quality". + + For the purpose of new features planned for next releases of NVML + there have been some API modifications made: + - pmem: pmem_map replaced with pmem_map_file + - log/blk: 'off_t' substituted with 'long long' + - obj: type numbers extended to 64-bit + - obj: new entry points and macros added: + pmemobj_tx_errno, pmemobj_tx_lock, pmemobj_mutex_timedlock, + TX_ADD_DIRECT, TX_ADD_FIELD_DIRECT, TX_SET_DIRECT + + Other key changes since version 0.4 include: + - common: updated/fixed installation scripts + - common: eliminated dependency on libuuid + - pmem: CPU features/ISA detection using CPUID + - obj: improved error handling + - obj: atomic allocation fails if constructor returns error + - obj: multiple performance optimizations + - obj: object store refactoring + - obj: additional examples and benchmarks + + This release also introduces a prototype implementation + of C++ bindings for libpmemobj. Note that C++ API is still + experimental and should not be used in production environments. + +Fri Dec 04 2015 Krzysztof Czurylo + + * Version 0.4 + This NVML version primarily focuses on improving code quality + and reliability. In addition to a couple of bug fixes, + the changes include: + - benchmarks for libpmemobj, libpmemblk and libvmem + - additional pmemobj tests and examples + - pool mapping address randomization + - added pmempool "rm" command + - eliminated libpmem dependency on libpthread + - enabled extra warnings + - minor performance improvements + Man pages are all complete. + This release is considered "Beta quality" by the team, having + been thoroughly validated, including significant performance analysis. + The pmempool command does not yet support "check" and "repair" + operations for pmemobj type pools. + +Sun Sep 13 2015 Andy Rudoff + + * Version 0.3 + NVML is now feature complete, adding support for: + - pool sets + - pmemobj local replication (active/passive) + - experimental valgrind support + - pmempool support for all pool types + Man pages are all complete. + This release is considered "Alpha quality" by the team, having + gone through significant validation but only some performance + analysis at this point. + +Tue Jun 30 2015 Andy Rudoff + + * Version 0.2 + NVML now consists of six libraries: + - libpmem (basic flushing, etc) + - libpmemblk, libpmemlog, libpmemobj (transactions) + - libvmem, libvmmalloc (volatile use of pmem) + The "pmempool" command is available for managing pmem files. + Man pages for all the above are complete. + The only things documented in man pages but not implemented are: + - pmem sets (ability to spread a pool over a set of files) + - replication (coming for libpmemobj) + The pmempool command does not yet support pmemobj type pools. + +Thu Sep 11 2014 Andy Rudoff + + * Version 0.1 + Initial development done in 0.1 builds diff --git a/src/pmdk/LICENSE b/src/pmdk/LICENSE new file mode 100644 index 000000000..c80f5dfb5 --- /dev/null +++ b/src/pmdk/LICENSE @@ -0,0 +1,39 @@ +SPDX-License-Identifier: BSD-3-Clause +Copyright 2014-2020, Intel Corporation + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Everything in this source tree is covered by the previous license +with the following exceptions: + +* src/core/valgrind/valgrind.h, src/core/valgrind/memcheck.h, + src/core/valgrind/helgrind.h, src/core/valgrind/drd.h are covered by + another similar BSD license variant, contained in those files. + +* utils/cstyle (used only during development) licensed under CDDL. diff --git a/src/pmdk/Makefile b/src/pmdk/Makefile new file mode 100644 index 000000000..a44fe088c --- /dev/null +++ b/src/pmdk/Makefile @@ -0,0 +1,136 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# Makefile -- top-level Makefile for PMDK +# +# Use "make" to build the library. +# +# Use "make doc" to build documentation. +# +# Use "make test" to build unit tests. Add "SKIP_SYNC_REMOTES=y" to skip +# or "FORCE_SYNC_REMOTES=y" to force syncing remote nodes if any is defined. +# +# Use "make check" to run unit tests. +# +# Use "make check-remote" to run only remote unit tests. +# +# Use "make clean" to delete all intermediate files (*.o, etc). +# +# Use "make clobber" to delete everything re-buildable (binaries, etc.). +# +# Use "make cstyle" to run cstyle on all C source files +# +# Use "make check-license" to check copyright and license in all source files +# +# Use "make rpm" to build rpm packages +# +# Use "make dpkg" to build dpkg packages +# +# Use "make source DESTDIR=path_to_dir" to copy source files +# from HEAD to 'path_to_dir/pmdk' directory. +# +# As root, use "make install" to install the library in the usual +# locations (/usr/local/lib, /usr/local/include, and /usr/local/share/man). +# You can provide custom directory prefix for installation using +# DESTDIR variable e.g.: "make install DESTDIR=/opt" +# You can override the prefix within DESTDIR using prefix variable +# e.g.: "make install prefix=/usr" + +include src/common.inc + +RPM_BUILDDIR=rpmbuild +DPKG_BUILDDIR=dpkgbuild +EXPERIMENTAL ?= n +BUILD_PACKAGE_CHECK ?= y +BUILD_RPMEM ?= y +TEST_CONFIG_FILE ?= "$(CURDIR)"/src/test/testconfig.sh +DOC ?= y + +rpm : override DESTDIR="$(CURDIR)/$(RPM_BUILDDIR)" +dpkg: override DESTDIR="$(CURDIR)/$(DPKG_BUILDDIR)" +rpm dpkg: override prefix=/usr + +all: doc + $(MAKE) -C src $@ + +doc: +ifeq ($(DOC),y) + test -f .skip-doc || $(MAKE) -C doc all +endif + +clean: + $(MAKE) -C src $@ +ifeq ($(DOC),y) + test -f .skip-doc || $(MAKE) -C doc $@ +endif + $(RM) -r $(RPM_BUILDDIR) $(DPKG_BUILDDIR) + $(RM) -f $(GIT_VERSION) + +clobber: + $(MAKE) -C src $@ +ifeq ($(DOC),y) + test -f .skip-doc || $(MAKE) -C doc $@ +endif + $(RM) -r $(RPM_BUILDDIR) $(DPKG_BUILDDIR) rpm dpkg + $(RM) -f $(GIT_VERSION) + +require-rpmem: +ifneq ($(BUILD_RPMEM),y) + $(error ERROR: cannot run remote tests because $(BUILD_RPMEM_INFO)) +endif + +check-remote: require-rpmem all + $(MAKE) -C src $@ + +test check pcheck pycheck: all + $(MAKE) -C src $@ + +check pcheck pycheck: check-doc + +cstyle: + test -d .git && utils/check-commits.sh + $(MAKE) -C src $@ + $(MAKE) -C utils $@ + @echo Checking files for whitespace issues... + @utils/check_whitespace -g + @echo Done. + +format: + $(MAKE) -C src $@ + @echo Done. + +check-license: + @utils/check_license/check-headers.sh $(TOP) BSD-3-Clause + @echo Done. + +check-doc: doc + BUILD_RPMEM="$(BUILD_RPMEM)" utils/check-manpages + +sparse: + $(MAKE) -C src sparse + +source: clobber + $(if "$(DESTDIR)", , $(error Please provide DESTDIR variable)) + +utils/copy-source.sh "$(DESTDIR)" $(SRCVERSION) + +pkg-clean: + $(RM) -r "$(DESTDIR)" + +rpm dpkg: pkg-clean + $(MAKE) source DESTDIR="$(DESTDIR)" + +utils/build-$@.sh -t $(SRCVERSION) -s "$(DESTDIR)"/pmdk -w "$(DESTDIR)" -o $(CURDIR)/$@\ + -e $(EXPERIMENTAL) -c $(BUILD_PACKAGE_CHECK) -r $(BUILD_RPMEM)\ + -f $(TEST_CONFIG_FILE) -n $(NDCTL_ENABLE) + +install: all + +install uninstall: + $(MAKE) -C src $@ +ifeq ($(DOC),y) + $(MAKE) -C doc $@ +endif + +.PHONY: all clean clobber test check cstyle check-license install uninstall\ + source rpm dpkg pkg-clean pcheck check-remote format doc require-rpmem\ + $(SUBDIRS) diff --git a/src/pmdk/README.md b/src/pmdk/README.md new file mode 100644 index 000000000..fcbaea83d --- /dev/null +++ b/src/pmdk/README.md @@ -0,0 +1,386 @@ +# **PMDK: Persistent Memory Development Kit** + +[![Travis build status](https://travis-ci.org/pmem/pmdk.svg?branch=master)](https://travis-ci.org/pmem/pmdk) +[![GHA build status](https://github.com/pmem/pmdk/workflows/PMDK/badge.svg?branch=master)](https://github.com/pmem/pmdk/actions) +[![Appveyor build status](https://ci.appveyor.com/api/projects/status/u2l1db7ucl5ktq10/branch/master?svg=true&pr=false)](https://ci.appveyor.com/project/pmem/pmdk/branch/master) +[![Cirrus build status](https://api.cirrus-ci.com/github/pmem/pmdk.svg)](https://cirrus-ci.com/github/pmem/pmdk/master) +[![Coverity Scan Build Status](https://img.shields.io/coverity/scan/3015.svg)](https://scan.coverity.com/projects/pmem-pmdk) +[![Coverage Status](https://codecov.io/github/pmem/pmdk/coverage.svg?branch=master)](https://codecov.io/gh/pmem/pmdk/branch/master) +[![PMDK release version](https://img.shields.io/github/release/pmem/pmdk.svg?sort=semver)](https://github.com/pmem/pmdk/releases/latest) +[![Packaging status](https://repology.org/badge/tiny-repos/pmdk.svg)](https://repology.org/project/pmdk/versions) + +The **Persistent Memory Development Kit (PMDK)** is a collection of libraries and tools for System Administrators and Application Developers to simplify managing and accessing persistent memory devices. For more information, see https://pmem.io. + +To install PMDK libraries, either install pre-built packages, which we build for every stable release, or clone the tree and build it yourself. **Pre-built** packages can be found in popular Linux distribution package repositories, or you can check out our recent stable releases on our [github release page](https://github.com/pmem/pmdk/releases). Specific installation instructions are outlined below. + +Bugs and feature requests for this repo are tracked in our [GitHub Issues Database](https://github.com/pmem/pmdk/issues). + +## Contents +1. [Libraries and Utilities](#libraries-and-utilities) +2. [Getting Started](#getting-started) +3. [Version Conventions](#version-conventions) +4. [Pre-Built Packages for Windows](#pre-built-packages-for-windows) +5. [Dependencies](#dependencies) + * [Linux](#linux) + * [Windows](#windows) + * [FreeBSD](#freebsd) +6. [Building PMDK on Linux or FreeBSD](#building-pmdk-on-linux-or-freebsd) + * [Make Options](#make-options) + * [Testing Libraries](#testing-libraries-on-linux-and-freebsd) + * [Memory Management Tools](#memory-management-tools) +7. [Building PMDK on Windows](#building-pmdk-on-windows) + * [Testing Libraries](#testing-libraries-on-windows) +8. [Experimental Packages](#experimental-packages) + * [librpmem and rpmemd packages](#the-librpmem-and-rpmemd-packages) + * [Experimental support for 64-bit ARM](#experimental-support-for-64-bit-arm) +9. [Contact Us](#contact-us) + +## Libraries and Utilities +Available Libraries: +- [libpmem](https://pmem.io/pmdk/libpmem/): provides low level persistent memory support + +- [libpmemobj](https://pmem.io/pmdk/libpmemobj/): provides a transactional object store, providing memory allocation, transactions, and general facilities for persistent memory programming. + +- [libpmemblk](https://pmem.io/pmdk/libpmemblk/): supports arrays of pmem-resident blocks, all the same size, that are atomically updated. + +- [libpmemlog](https://pmem.io/pmdk/libpmemlog/): provides a pmem-resident log file. + +- [libpmempool](https://pmem.io/pmdk/libpmempool/): provides support for off-line pool management and diagnostics. + +- [librpmem](https://pmem.io/pmdk/librpmem/)1: provides low-level support for remote access to persistent memory utilizing RDMA-capable RNICs. + +If you're looking for *libvmem* and *libvmmalloc*, they have been moved to a +[separate repository](https://github.com/pmem/vmem). + +Available Utilities: + +- [pmempool](https://pmem.io/pmdk/pmempool/): Manage and analyze persistent memory pools with this stand-alone utility + +- [pmemcheck](https://pmem.io/2015/07/17/pmemcheck-basic.html): Use dynamic runtime analysis with an enhanced version of Valgrind for use with persistent memory. + +Currently these libraries only work on 64-bit Linux, Windows2, and 64-bit FreeBSD 11+3. +For information on how these libraries are licensed, see our [LICENSE](LICENSE) file. + +>1 Not supported on Windows. +> +>2 PMDK for Windows is feature complete, but not yet considered production quality. +> +>3 DAX and **libfabric** are not yet supported in FreeBSD, so at this time PMDK is available as a technical preview release for development purposes. + +## Getting Started + +Getting Started with Persistent Memory Programming is a tutorial series created by Intel Architect, Andy Rudoff. In this tutorial, you will be introduced to persistent memory programming and learn how to apply it to your applications. +- Part 1: [What is Persistent Memory?](https://software.intel.com/en-us/persistent-memory/get-started/series) +- Part 2: [Describing The SNIA Programming Model](https://software.intel.com/en-us/videos/the-nvm-programming-model-persistent-memory-programming-series) +- Part 3: [Introduction to PMDK Libraries](https://software.intel.com/en-us/videos/intro-to-the-nvm-libraries-persistent-memory-programming-series) +- Part 4: [Thinking Transactionally](https://software.intel.com/en-us/videos/thinking-transactionally-persistent-memory-programming-series) +- Part 5: [A C++ Example](https://software.intel.com/en-us/videos/a-c-example-persistent-memory-programming-series) + +Additionally, we recommend reading [Introduction to Programming with Persistent Memory from Intel](https://software.intel.com/en-us/articles/introduction-to-programming-with-persistent-memory-from-intel) + +## Version Conventions + +- **Builds** are tagged something like `0.2+b1`, which means _Build 1 on top of version 0.2_ +- **Release Candidates** have a '-rc{version}' tag, e.g. `0.2-rc3, meaning _Release Candidate 3 for version 0.2_ +- **Stable Releases** use a _major.minor_ tag like `0.2` + +## Pre-Built Packages for Windows + +The recommended and easiest way to install PMDK on Windows is to use Microsoft vcpkg. Vcpkg is an open source tool and ecosystem created for library management. + +To install the latest PMDK release and link it to your Visual Studio solution you first need to clone and set up vcpkg on your machine as described on the [vcpkg github page](https://github.com/Microsoft/vcpkg) in **Quick Start** section. + +In brief: + +``` + > git clone https://github.com/Microsoft/vcpkg + > cd vcpkg + > .\bootstrap-vcpkg.bat + > .\vcpkg integrate install + > .\vcpkg install pmdk:x64-windows +``` + +The last command can take a while - it is PMDK building and installation time. + +After a successful completion of all of the above steps, the libraries are ready +to be used in Visual Studio and no additional configuration is required. +Just open VS with your already existing project or create a new one +(remember to use platform **x64**) and then include headers to project as you always do. + +## Dependencies + +Required packages for each supported OS are listed below. It is important to note that some tests and example applications require additional packages, but they do not interrupt building if they are missing. An appropriate message is displayed instead. For details please read the DEPENDENCIES section in the appropriate README file. + +See our **[Dockerfiles](utils/docker/images)** +to get an idea what packages are required to build the entire PMDK, +with all the tests and examples on the _Travis-CI_ system. + +### Linux + +You will need to install the following required packages on the build system: + +* **autoconf** +* **pkg-config** +* **libndctl-devel** (v63 or later)1 +* **libdaxctl-devel** (v63 or later) + +The following packages are required only by selected PMDK components +or features: + +* **libfabric** (v1.4.2 or later) -- required by **librpmem** + +>1 PMDK depends on libndctl to support RAS features. It is possible +to disable this support by passing NDCTL_ENABLE=n to "make", but we strongly +discourage users from doing that. Disabling NDCTL strips PMDK from ability to +detect hardware failures, which may lead to silent data corruption. +For information how to disable RAS at runtime for kernels prior to 5.0.4 please +see https://github.com/pmem/pmdk/issues/4207. + +### Windows + +* **MS Visual Studio 2015** +* [Windows SDK 10.0.17134.12](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk) +* **Windows, version >= 1803** +* **perl** (i.e. [StrawberryPerl](http://strawberryperl.com/)) +* **PowerShell 5** + +### FreeBSD + +* **autoconf** +* **bash** +* **binutils** +* **coreutils** +* **e2fsprogs-libuuid** +* **gmake** +* **libunwind** +* **ncurses**4 +* **pkgconf** + +>4 The pkg version of ncurses is required for proper operation; the base version included in FreeBSD is not sufficient. + +## Building PMDK on Linux or FreeBSD + +To build from source, clone this tree: +``` + $ git clone https://github.com/pmem/pmdk + $ cd pmdk +``` + +For a stable version, checkout a [release tag](https://github.com/pmem/pmdk/releases) as follows. Otherwise skip this step to build the latest development release. +``` + $ git checkout tags/1.10 +``` + +Once the build system is setup, the Persistent Memory Development Kit is built using the `make` command at the top level: +``` + $ make +``` +For FreeBSD, use `gmake` rather than `make`. + +By default, all code is built with the `-Werror` flag, which fails +the whole build when the compiler emits any warning. This is very useful during +development, but can be annoying in deployment. If you want to **disable -Werror**, +use the EXTRA_CFLAGS variable: +``` + $ make EXTRA_CFLAGS="-Wno-error" +``` +>or +``` + $ make EXTRA_CFLAGS="-Wno-error=$(type-of-warning)" +``` + +### Make Options + +There are many options that follow `make`. If you want to invoke make with the same variables multiple times, you can create a user.mk file in the top level directory and put all variables there. +For example: +``` + $ cat user.mk + EXTRA_CFLAGS_RELEASE = -ggdb -fno-omit-frame-pointer + PATH += :$HOME/valgrind/bin +``` +This feature is intended to be used only by developers and it may not work for all variables. Please do not file bug reports about it. Just fix it and make a PR. + +**Built-in tests:** can be compiled and ran with different compiler. To do this, you must provide the `CC` and `CXX` variables. These variables are independent and setting `CC=clang` does not set `CXX=clang++`. +For example: +``` + $ make CC=clang CXX=clang++ +``` +Once make completes, all the libraries and examples are built. You can play with the library within the build tree, or install it locally on your machine. For information about running different types of tests, please refer to the [src/test/README](src/test/README). + +**Installing the library** is convenient since it installs man pages and libraries in the standard system locations: +``` + (as root...) + # make install +``` + +To install this library into **other locations**, you can use the `prefix` variable, e.g.: +``` + $ make install prefix=/usr/local +``` +This will install files to /usr/local/lib, /usr/local/include /usr/local/share/man. + +**Prepare library for packaging** can be done using the DESTDIR variable, e.g.: +``` + $ make install DESTDIR=/tmp +``` +This will install files to /tmp/usr/lib, /tmp/usr/include /tmp/usr/share/man. + +**Man pages** (groff files) are generated as part of the `install` rule. To generate the documentation separately, run: +``` + $ make doc +``` +This call requires the following dependencies: **pandoc**. Pandoc is provided by the hs-pandoc package on FreeBSD. + +**Install copy of source tree** can be done by specifying the path where you want it installed. +``` + $ make source DESTDIR=some_path +``` +For this example, it will be installed at $(DESTDIR)/pmdk. + +**Build rpm packages** on rpm-based distributions is done by: +``` + $ make rpm +``` + +To build rpm packages without running tests: +``` + $ make BUILD_PACKAGE_CHECK=n rpm +``` +This requires **rpmbuild** to be installed. + +**Build dpkg packages** on Debian-based distributions is done by: +``` + $ make dpkg +``` + +To build dpkg packages without running tests: +``` + $ make BUILD_PACKAGE_CHECK=n dpkg +``` +This requires **devscripts** to be installed. + +### Testing Libraries on Linux and FreeBSD + +Before running the tests, you may need to prepare a test configuration file (src/test/testconfig.sh). Please see the available configuration settings in the example file [src/test/testconfig.sh.example](src/test/testconfig.sh.example). + +To build and run the **unit tests**: +``` + $ make check +``` + +To run a specific **subset of tests**, run for example: +``` + $ make check TEST_TYPE=short TEST_BUILD=debug TEST_FS=pmem +``` + +To **modify the timeout** which is available for **check** type tests, run: +``` + $ make check TEST_TIME=1m +``` +This will set the timeout to 1 minute. + +Please refer to the **src/test/README** for more details on how to +run different types of tests. + +### Memory Management Tools + +The PMDK libraries support standard Valgrind DRD, Helgrind and Memcheck, as well as a PM-aware version of [Valgrind](https://github.com/pmem/valgrind) (not yet available for FreeBSD). By default, support for all tools is enabled. If you wish to disable it, supply the compiler with **VG_\_ENABLED** flag set to 0, for example: +``` + $ make EXTRA_CFLAGS=-DVG_MEMCHECK_ENABLED=0 +``` + +**VALGRIND_ENABLED** flag, when set to 0, disables all Valgrind tools +(drd, helgrind, memcheck and pmemcheck). + +The **SANITIZE** flag allows the libraries to be tested with various +sanitizers. For example, to test the libraries with AddressSanitizer +and UndefinedBehaviorSanitizer, run: +``` + $ make SANITIZE=address,undefined clobber check +``` + +## Building PMDK on Windows + +Clone the PMDK tree and open the solution: +``` + > git clone https://github.com/pmem/pmdk + > cd pmdk/src + > devenv PMDK.sln +``` + +Select the desired configuration (Debug or Release) and build the solution +(i.e. by pressing Ctrl-Shift-B). + +### Testing Libraries on Windows + +Before running the tests, you may need to prepare a test configuration file (src/test/testconfig.ps1). Please see the available configuration settings in the example file [src/test/testconfig.ps1.example](src/test/testconfig.ps1.example). + +To **run the unit tests**, open the PowerShell console and type: +``` + > cd pmdk/src/test + > RUNTESTS.ps1 +``` + +To run a specific **subset of tests**, run for example: +``` + > RUNTESTS.ps1 -b debug -t short +``` + +To run **just one test**, run for example: +``` + > RUNTESTS.ps1 -b debug -i pmem_is_pmem +``` + +To **modify the timeout**, run: +``` + > RUNTESTS.ps1 -o 3m +``` +This will set the timeout to 3 minutes. + +To **display all the possible options**, run: +``` + > RUNTESTS.ps1 -h +``` + +Please refer to the **[src/test/README](src/test/README)** for more details on how to run different types of tests. + +## Experimental Packages + +Some components in the source tree are treated as experimental. By default, +those components are built but not installed (and thus not included in +packages). + +If you want to build/install experimental packages run: +``` + $ make EXPERIMENTAL=y [install,rpm,dpkg] +``` + +### The librpmem and rpmemd packages + +**NOTE:** +The **libfabric** package required to build the **librpmem** and **rpmemd** is +not yet available on stable Debian-based distributions. This makes it +impossible to create Debian packages. + +If you want to build Debian packages of **librpmem** and **rpmemd** run: +``` + $ make RPMEM_DPKG=y dpkg +``` + +### Experimental Support for 64-bit ARM + +There is an initial support for 64-bit ARM processors provided, +currently only for aarch64. All the PMDK libraries except **librpmem** +can be built for 64-bit ARM. The examples, tools and benchmarks +are not ported yet and may not get built on ARM cores. + +**NOTE:** +The support for ARM processors is highly experimental. The libraries +are only validated to "early access" quality with Cortex-A53 processor. + +## Contact Us + +For more information on this library, contact +Piotr Balcer (piotr.balcer@intel.com), +Andy Rudoff (andy.rudoff@intel.com), or post to our +[Google group](https://groups.google.com/group/pmem). diff --git a/src/pmdk/VERSION b/src/pmdk/VERSION new file mode 100644 index 000000000..c044b1a32 --- /dev/null +++ b/src/pmdk/VERSION @@ -0,0 +1 @@ +1.10 diff --git a/src/pmdk/appveyor.yml b/src/pmdk/appveyor.yml new file mode 100644 index 000000000..c9e9a2953 --- /dev/null +++ b/src/pmdk/appveyor.yml @@ -0,0 +1,95 @@ +version: 1.4.{build} +os: Visual Studio 2019 +platform: x64 + +install: +- ps: Install-PackageProvider -Name NuGet -Force +- ps: Install-Module PsScriptAnalyzer -Force + +configuration: +- Debug +- Release + +environment: + solutionname: PMDK.sln + ex_solutionname: Examples.sln + +matrix: + fast_finish: true + +before_build: +- ps: >- + if ($Env:CONFIGURATION -eq "Release") { + utils/CSTYLE.ps1 + if ($LASTEXITCODE -ne 0) { + exit 1 + } + utils/CHECK_WHITESPACE.ps1 + if ($LASTEXITCODE -ne 0) { + exit 1 + } + utils/ps_analyze.ps1 + if ($LASTEXITCODE -ne 0) { + exit 1 + } + ./utils/check_sdk_version.py -d . + if ($LASTEXITCODE -ne 0) { + exit 1 + } + } + +build_script: +- ps: msbuild src\$Env:solutionname /property:Configuration=$Env:CONFIGURATION /m /v:m +- ps: msbuild src\examples\$Env:ex_solutionname /property:Configuration=$Env:CONFIGURATION /m /v:m + +after_build: +- ps: utils/CREATE-ZIP.ps1 -b $Env:CONFIGURATION + +test_script: +- ps: >- + if ($true) { + cd src\test + md C:\temp + echo "`$Env:NON_PMEM_FS_DIR = `"C:\temp`"" >> testconfig.ps1 + echo "`$Env:PMEM_FS_DIR = `"C:\temp`"" >> testconfig.ps1 + echo "`$Env:PMEM_FS_DIR_FORCE_PMEM = `"1`"" >> testconfig.ps1 + echo "`$Env:PMDK_NO_ABORT_MSG = `"1`"" >> testconfig.ps1 + echo "`$Env:TM = `"1`"" >> testconfig.ps1 + + write-output "config = { + 'unittest_log_level': 1, + 'cacheline_fs_dir': 'C:\\temp', + 'force_cacheline': True, + 'page_fs_dir': 'C:\\temp', + 'force_page': False, + 'byte_fs_dir': 'C:\\temp', + 'force_byte': True, + 'tm': True, + 'test_type': 'check', + 'granularity': 'all', + 'fs_dir_force_pmem': 1, + 'keep_going': False, + 'timeout': '4m', + 'build': 'debug', + 'force_enable': None, + 'fail_on_skip': False, + 'enable_admin_tests': False, + }" | out-file "testconfig.py" -encoding utf8 + + if ($Env:CONFIGURATION -eq "Debug") { + ./RUNTESTS.ps1 -b debug -o 4m + if ($?) { + ./RUNTESTS.py -b debug + } + } + if ($Env:CONFIGURATION -eq "Release") { + ./RUNTESTS.ps1 -b nondebug -o 4m + if ($?) { + ./RUNTESTS.py -b release + } + } + } + +artifacts: +- path: 'src\x64\*.zip' + name: PMDK diff --git a/src/pmdk/res/PMDK.ico b/src/pmdk/res/PMDK.ico new file mode 100644 index 000000000..8c176219d Binary files /dev/null and b/src/pmdk/res/PMDK.ico differ diff --git a/src/pmdk/src/.clang-format b/src/pmdk/src/.clang-format new file mode 100644 index 000000000..331f7db85 --- /dev/null +++ b/src/pmdk/src/.clang-format @@ -0,0 +1,33 @@ +BasedOnStyle: LLVM +IndentWidth: 8 +UseTab: Always +BreakBeforeBraces: Custom +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +AllowShortIfStatementsOnASingleLine: false +IndentCaseLabels: false +AlwaysBreakAfterDefinitionReturnType: true +SpaceBeforeParens: ControlStatements +SpacesBeforeTrailingComments: 1 +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: false +PointerAlignment: Right +ContinuationIndentWidth: 8 +AlignOperands: false +IndentCaseLabels: true +ConstructorInitializerAllOnOneLineOrOnePerLine: true +AlwaysBreakTemplateDeclarations: true +AccessModifierOffset: -8 +AllowShortBlocksOnASingleLine: false +AllowShortFunctionsOnASingleLine: false +BreakStringLiterals: false diff --git a/src/pmdk/src/.gitignore b/src/pmdk/src/.gitignore new file mode 100644 index 000000000..c02fddd5a --- /dev/null +++ b/src/pmdk/src/.gitignore @@ -0,0 +1,23 @@ +!/core/ + +*.so +*.so.* +*.a +*.pc +tags +TAGS +cscope.in.out +cscope.out +cscope.po.out +debug/ +nondebug/ +*.sdf +*.opensdf +*.opendb +*.log +*.suo +*.vcxproj.user +.vs/ +x64/ +Generated files/ +srcversion.h diff --git a/src/pmdk/src/LongPath.manifest b/src/pmdk/src/LongPath.manifest new file mode 100644 index 000000000..bc1b92570 --- /dev/null +++ b/src/pmdk/src/LongPath.manifest @@ -0,0 +1,7 @@ + + + + true + + + \ No newline at end of file diff --git a/src/pmdk/src/LongPathSupport.props b/src/pmdk/src/LongPathSupport.props new file mode 100644 index 000000000..821a1b872 --- /dev/null +++ b/src/pmdk/src/LongPathSupport.props @@ -0,0 +1,10 @@ + + + + + + + $(SolutionDir)LongPath.manifest + + + \ No newline at end of file diff --git a/src/pmdk/src/Makefile b/src/pmdk/src/Makefile new file mode 100644 index 000000000..7e208046c --- /dev/null +++ b/src/pmdk/src/Makefile @@ -0,0 +1,216 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# src/Makefile -- Makefile for PMDK +# +TOP := $(dir $(lastword $(MAKEFILE_LIST))).. +include $(TOP)/src/common.inc + +# core targets +TARGETS = libpmem libpmemblk libpmemlog libpmemobj libpmempool tools libpmem2 +# make all targets +ALL_TARGETS = $(TARGETS) common core librpmem +# all available targets +POSSIBLE_TARGETS = $(TARGETS) common core librpmem rpmem_common test\ + examples benchmarks + +SCOPE_DIRS = $(TARGETS) common core librpmem rpmem_common + +DEBUG_RELEASE_TARGETS = common core libpmem libpmemblk libpmemlog libpmemobj\ + libpmempool librpmem libpmem2 +RELEASE_TARGETS = tools + +ifneq ($(BUILD_EXAMPLES),n) + ALL_TARGETS += examples + RELEASE_TARGETS += examples +endif + +ifneq ($(BUILD_BENCHMARKS),n) + ALL_TARGETS += benchmarks + RELEASE_TARGETS += benchmarks +endif + +CLEAN_TARGETS = $(POSSIBLE_TARGETS) +CLOBBER_TARGETS = $(POSSIBLE_TARGETS) +CSTYLE_TARGETS = $(POSSIBLE_TARGETS) +INSTALL_TARGETS = $(TARGETS) +SPARSE_TARGETS = $(POSSIBLE_TARGETS) +EXAMPLES_TARGETS = $(TARGETS) librpmem + +HEADERS_DESTDIR = $(DESTDIR)$(includedir) +HEADERS_INSTALL = include/libpmem.h\ + include/libpmemobj.h include/libpmempool.h\ + include/libpmemblk.h include/libpmemlog.h\ + include/libpmem2.h + +OBJ_HEADERS_INSTALL = include/libpmemobj/*.h + +PKG_CONFIG_DESTDIR = $(DESTDIR)$(pkgconfigdir) +PKG_CONFIG_COMMON = common.pc +PKG_CONFIG_FILES = libpmem.pc libpmemobj.pc libpmemlog.pc libpmemblk.pc\ + libpmempool.pc libpmem2.pc +PMREORDER_BIN = $(DESTDIR)$(bindir) +PMREORDER_SRC = tools/pmreorder +PMREORDER_FILES = $(PMREORDER_SRC)/*.py +PMREORDER_DESTDIR = $(DESTDIR)$(datarootdir)/pmreorder +PMREORDER_ROOTDIR = $(datarootdir)/pmreorder + +ifeq ($(BUILD_RPMEM),y) + PKG_CONFIG_FILES += librpmem.pc + HEADERS_INSTALL += include/librpmem.h + INSTALL_TARGETS += librpmem +endif + +rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)\ + $(filter $(subst *,%,$2),$d))) + +SCOPE_SRC_DIRS = $(SCOPE_DIRS) include +SCOPE_HDR_DIRS = $(SCOPE_DIRS) include + +SCOPE_SRC_FILES = $(foreach d, $(SCOPE_SRC_DIRS), $(wildcard $(d)/*.c)) +SCOPE_HDR_FILES = $(foreach d, $(SCOPE_HDR_DIRS), $(wildcard $(D)/*.h)) +SCOPEFILES = $(SCOPE_SRC_FILES) $(SCOPE_HDR_FILES) + +# include/lib*.h - skip include/pmemcompat.h +HEADERS =\ + $(foreach f, $(wildcard\ + freebsd/include/*.h\ + freebsd/include/*/*.h\ + include/lib*.h\ + include/libpmemobj/*.h\ + windows/include/*.h\ + windows/include/*/*.h\ + ), $(f)) + +ifneq ($(filter 1 2, $(CSTYLEON)),) +TMP_HEADERS := $(addprefix debug/, $(addsuffix tmp, $(HEADERS))) +endif + +SCRIPTS = $(call rwildcard,,*.sh) + +debug/%.htmp: %.h + $(call check-cstyle, $<, $@) + +debug/%.hpptmp: %.hpp + $(call check-cstyle, $<, $@) + +all: $(TMP_HEADERS) $(ALL_TARGETS) +install: $(INSTALL_TARGETS:=-install) +uninstall: $(INSTALL_TARGETS:=-uninstall) +clean: $(CLEAN_TARGETS:=-clean) +clobber: $(CLOBBER_TARGETS:=-clobber) +cstyle: $(CSTYLE_TARGETS:=-cstyle) +format: $(CSTYLE_TARGETS:=-format) +examples benchmarks: $(EXAMPLES_TARGETS) +benchmarks: examples +sparse: $(SPARSE_TARGETS:=-sparse) + +custom_build = $(DEBUG)$(OBJDIR) + +tools: libpmem libpmemblk libpmemlog libpmemobj libpmempool +libpmemblk libpmemlog libpmemobj: libpmem +libpmempool: libpmemblk +benchmarks test tools: common core + +pkg-cfg-common: + @printf "version=%s\nlibdir=%s\nprefix=%s\nrasdeps=%s\n" "$(SRCVERSION)" "$(libdir)" "$(prefix)" "$(LIBNDCTL_PKG_CONFIG_DEPS_VAR)" > $(PKG_CONFIG_COMMON) + +$(PKG_CONFIG_COMMON): pkg-cfg-common + +%.pc: $(PKG_CONFIG_COMMON) $(TOP)/utils/%.pc.in + @echo Generating $@ + @cat $(PKG_CONFIG_COMMON) > $@ + @cat $(TOP)/utils/$@.in >> $@ + +pkg-config: $(PKG_CONFIG_FILES) + +%-install: % + $(MAKE) -C $^ install + $(MAKE) -C $^ install DEBUG=1 + +$(eval $(call sub-target,$(INSTALL_TARGETS),uninstall,y)) +$(eval $(call sub-target,$(CLEAN_TARGETS),clean,y)) +$(eval $(call sub-target,$(CLOBBER_TARGETS),clobber,y)) +$(eval $(call sub-target,$(CSTYLE_TARGETS),cstyle,n)) +$(eval $(call sub-target,$(CSTYLE_TARGETS),format,n)) +$(eval $(call sub-target,$(SPARSE_TARGETS),sparse,n)) + +$(DEBUG_RELEASE_TARGETS): + $(MAKE) -C $@ +ifeq ($(custom_build),) + $(MAKE) -C $@ DEBUG=1 +endif + +$(RELEASE_TARGETS): + $(MAKE) -C $@ + +df: + @echo ==================================================================================== + @if [ -e test/testconfig.sh ]; then \ + bash -c '. test/testconfig.sh; \ + if [ -n "$$PMEM_FS_DIR" ]; then df -h "$$PMEM_FS_DIR"; stat -f "$$PMEM_FS_DIR"; fi; \ + if [ -n "$$NON_PMEM_FS_DIR" ]; then df -h "$$NON_PMEM_FS_DIR"; stat -f "$$NON_PMEM_FS_DIR";fi'; \ + fi + @echo ==================================================================================== + +test: all + $(MAKE) -C test test + +check pcheck pycheck: test df + $(MAKE) -C test $@ + +require-rpmem: +ifneq ($(BUILD_RPMEM),y) + $(error ERROR: cannot run remote tests because $(BUILD_RPMEM_INFO)) +endif + +check-remote: require-rpmem test + $(MAKE) -C test $@ + +# Re-generate pkg-config files on 'make install' (not on 'make all'), +# to handle the case when prefix is specified only for 'install'. +# Clean up generated files when done. +install: all pkg-config + install -d $(HEADERS_DESTDIR) + install -p -m 0644 $(HEADERS_INSTALL) $(HEADERS_DESTDIR) + install -d $(HEADERS_DESTDIR)/libpmemobj + install -p -m 0644 $(OBJ_HEADERS_INSTALL) $(HEADERS_DESTDIR)/libpmemobj + install -d $(PKG_CONFIG_DESTDIR) + install -p -m 0644 $(PKG_CONFIG_FILES) $(PKG_CONFIG_DESTDIR) + install -d $(PMREORDER_DESTDIR) + install -p -m 0644 $(PMREORDER_FILES) $(PMREORDER_DESTDIR) + @echo "#!/bin/sh" > $(PMREORDER_SRC)/pmreorder + @echo "exec python3 $(PMREORDER_ROOTDIR)/pmreorder.py $(SRCVERSION) \$$*" >> $(PMREORDER_SRC)/pmreorder + install -d $(PMREORDER_BIN) + install -p $(PMREORDER_SRC)/pmreorder $(PMREORDER_BIN) + $(RM) $(PKG_CONFIG_FILES) + +uninstall: + $(foreach f, $(HEADERS_INSTALL), $(RM) $(HEADERS_DESTDIR)/$(notdir $(f))) + $(foreach f, $(OBJ_HEADERS_INSTALL), $(RM) $(HEADERS_DESTDIR)/libpmemobj/$(notdir $(f))) + $(foreach f, $(PKG_CONFIG_FILES), $(RM) $(PKG_CONFIG_DESTDIR)/$(notdir $(f))) + $(foreach f, $(PMREORDER_FILES), $(RM) $(PMREORDER_DESTDIR)/$(notdir $(f))) + $(RM) $(PMREORDER_BIN)/pmreorder + +cstyle: + $(STYLE_CHECK) check $(HEADERS) + $(CHECK_SHEBANG) $(SCRIPTS) + +format: + $(STYLE_CHECK) format $(HEADERS) + +cscope: + cscope -q -b $(SCOPEFILES) + ctags -e $(SCOPEFILES) + +clean-here: + $(RM) tags cscope.in.out cscope.out cscope.po.out *.pc $(TMP_HEADERS) + +clean: clean-here + +clobber: clean-here + +.PHONY: all install uninstall clean clobber cstyle format test check pcheck\ + cscope $(ALL_TARGETS)\ + pkg-config check-remote clean-here pkg-cfg-common require-rpmem diff --git a/src/pmdk/src/Makefile.inc b/src/pmdk/src/Makefile.inc new file mode 100644 index 000000000..04ab0192f --- /dev/null +++ b/src/pmdk/src/Makefile.inc @@ -0,0 +1,318 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation +# +# src/Makefile.inc -- common Makefile rules for PMDK +# + +TOP := $(dir $(lastword $(MAKEFILE_LIST))).. + +include $(TOP)/src/common.inc + +INCLUDE = $(TOP)/src/include + +RPMEM_COMMON = $(TOP)/src/rpmem_common +vpath %.c $(RPMEM_COMMON) + +COMMON = $(TOP)/src/common +vpath %.c $(COMMON) + +CORE = $(TOP)/src/core +vpath %.c $(CORE) + +PMEM2 = $(TOP)/src/libpmem2 +vpath %.c $(PMEM2) + +INCS += -I../include -I../common/ -I../core/ $(OS_INCS) + +# default CFLAGS +DEFAULT_CFLAGS += -std=gnu99 +DEFAULT_CFLAGS += -Wall +DEFAULT_CFLAGS += -Werror +DEFAULT_CFLAGS += -Wmissing-prototypes +DEFAULT_CFLAGS += -Wpointer-arith +DEFAULT_CFLAGS += -Wsign-conversion +DEFAULT_CFLAGS += -Wsign-compare + +ifeq ($(WCONVERSION_AVAILABLE), y) +DEFAULT_CFLAGS += -Wconversion +endif + +ifeq ($(IS_ICC), n) +DEFAULT_CFLAGS += -Wunused-macros +DEFAULT_CFLAGS += -Wmissing-field-initializers +endif + +ifeq ($(WUNREACHABLE_CODE_RETURN_AVAILABLE), y) +DEFAULT_CFLAGS += -Wunreachable-code-return +endif + +ifeq ($(WMISSING_VARIABLE_DECLARATIONS_AVAILABLE), y) +DEFAULT_CFLAGS += -Wmissing-variable-declarations +endif + +ifeq ($(WFLOAT_EQUAL_AVAILABLE), y) +DEFAULT_CFLAGS += -Wfloat-equal +endif + +ifeq ($(WSWITCH_DEFAULT_AVAILABLE), y) +DEFAULT_CFLAGS += -Wswitch-default +endif + +ifeq ($(WCAST_FUNCTION_TYPE_AVAILABLE), y) +DEFAULT_CFLAGS += -Wcast-function-type +endif + +ifeq ($(WSTRINGOP_TRUNCATION_AVAILABLE), y) +DEFAULT_CFLAGS += -DSTRINGOP_TRUNCATION_SUPPORTED +endif + +ifeq ($(DEBUG),1) +# Undefine _FORTIFY_SOURCE in case it's set in system-default or +# user-defined CFLAGS as it conflicts with -O0. +DEBUG_CFLAGS += -Wp,-U_FORTIFY_SOURCE +DEBUG_CFLAGS += -O0 -ggdb -DDEBUG +LIB_SUBDIR = /pmdk_debug +OBJDIR = debug +else +DEFAULT_CFLAGS += -O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 +LIB_SUBDIR = +OBJDIR = nondebug +endif + +# use defaults, if system or user-defined CFLAGS are not specified +CFLAGS ?= $(DEFAULT_CFLAGS) + +CFLAGS += -std=gnu99 +CFLAGS += -fno-common +CFLAGS += -pthread +CFLAGS += -DSRCVERSION=\"$(SRCVERSION)\" + +ifeq ($(OS_DIMM),ndctl) +CFLAGS += -DSDS_ENABLED +CFLAGS += $(OS_DIMM_CFLAG) +endif + +ifeq ($(COVERAGE),1) +CFLAGS += $(GCOV_CFLAGS) +LDFLAGS += $(GCOV_LDFLAGS) +LIBS += $(GCOV_LIBS) +endif + +ifeq ($(VALGRIND),0) +CFLAGS += -DVALGRIND_ENABLED=0 +CXXFLAGS += -DVALGRIND_ENABLED=0 +endif + +ifeq ($(FAULT_INJECTION),1) +CFLAGS += -DFAULT_INJECTION=1 +CXXFLAGS += -DFAULT_INJECTION=1 +endif + +ifneq ($(SANITIZE),) +CFLAGS += -fsanitize=$(tsanitize) +LDFLAGS += -fsanitize=$(tsanitize) +endif + +CFLAGS += $(EXTRA_CFLAGS) + +ifeq ($(DEBUG),1) +CFLAGS += $(EXTRA_CFLAGS_DEBUG) $(DEBUG_CFLAGS) +else +CFLAGS += $(EXTRA_CFLAGS_RELEASE) +endif + +LDFLAGS += -Wl,-z,relro -Wl,--fatal-warnings -Wl,--warn-common $(EXTRA_LDFLAGS) + +ifneq ($(NORPATH),1) +LDFLAGS += -Wl,-rpath=$(libdir)$(LIB_SUBDIR):$(LIBFABRIC_LD_LIBRARY_PATHS):$(LIBNDCTL_LD_LIBRARY_PATHS) +endif + +ifeq ($(LIBRT_NEEDED), y) +LIBS += -lrt +endif + +define arch32_error_msg + +################################################## +### 32-bit builds of PMDK are not supported! ### +### Please, use 64-bit platform/compiler. ### +################################################## + +endef + +TESTCMD := $(CC) $(CFLAGS) -dM -E -x c /dev/null -o /dev/null +TESTBUILD := $(shell $(TESTCMD) && echo 1 || echo 0) +ifneq ($(TESTBUILD), 1) +$(error "$(TESTCMD)" failed) +endif + +ifeq ($(filter $(ARCH), x86_64 aarch64 ppc64),) +$(error unsupported architecture: $(ARCH)) +endif + +LP64 := $(shell $(CC) $(CFLAGS) -dM -E -x c /dev/null | grep -Ec "__SIZEOF_LONG__.+8|__SIZEOF_POINTER__.+8" ) +ifneq ($(LP64), 2) +$(error $(arch32_error_msg)) +endif + +LIBS_DESTDIR = $(DESTDIR)$(libdir)$(LIB_SUBDIR) + +DIRNAME = $(shell basename $(CURDIR)) + +ifeq ($(OBJDIR),$(abspath $(OBJDIR))) +objdir = $(OBJDIR)/$(DIRNAME) +else +objdir = ../$(OBJDIR)/$(DIRNAME) +endif + +LIB_OUTDIR ?= $(objdir)/.. + +ifneq ($(LIB_OUTDIR),) +LDFLAGS += -L$(LIB_OUTDIR) +endif + +ifneq ($(SOURCE),) +_OBJS = $(SOURCE:.c=.o) +_OBJS_COMMON = $(patsubst $(COMMON)/%, %, $(_OBJS)) +_OBJS_CORE = $(patsubst $(CORE)/%, %, $(_OBJS_COMMON)) +_OBJS_PMEM2 = $(patsubst $(PMEM2)/%, %, $(_OBJS_CORE)) +_OBJS_RPMEM_COMMON = $(patsubst $(RPMEM_COMMON)/%, %, $(_OBJS_PMEM2)) +OBJS += $(addprefix $(objdir)/, $(_OBJS_RPMEM_COMMON)) +endif + +ifneq ($(HEADERS),) +ifneq ($(filter 1 2, $(CSTYLEON)),) +TMP_HEADERS := $(addsuffix tmp, $(HEADERS)) +TMP_HEADERS := $(addprefix $(objdir)/, $(TMP_HEADERS)) +endif +endif + +ifneq ($(LIBRARY_NAME),) +LIB_NAME = lib$(LIBRARY_NAME) +endif + +ifneq ($(LIBRARY_SO_VERSION),) +LIB_LINK = $(LIB_NAME).link +LIB_SONAME = $(LIB_NAME).so.$(LIBRARY_SO_VERSION) +LIB_SO = $(LIB_OUTDIR)/$(LIB_NAME).so + +LIB_SO_SONAME = $(LIB_SO).$(LIBRARY_SO_VERSION) + +ifneq ($(LIBRARY_VERSION),) +LIB_SO_REAL = $(LIB_SO_SONAME).$(LIBRARY_VERSION) +else +$(error LIBRARY_VERSION not set) +endif + +TARGET_LIBS = $(LIB_SO_REAL) +TARGET_LINKS = $(LIB_SO_SONAME) $(LIB_SO) +endif + +ifneq ($(LIB_NAME),) +LIB_AR = $(LIB_OUTDIR)/$(LIB_NAME).a +LIB_AR_UNSCOPED = $(objdir)/$(LIB_NAME)_unscoped.o +LIB_AR_ALL = $(objdir)/$(LIB_NAME)_all.o +TARGET_LIBS += $(LIB_AR) +endif + +ifneq ($(EXTRA_TARGETS),) +EXTRA_TARGETS_CLEAN = $(EXTRA_TARGETS:=-clean) +EXTRA_TARGETS_CLOBBER = $(EXTRA_TARGETS:=-clobber) +endif + +PMEMLOG_PRIV_OBJ=$(LIB_OUTDIR)/libpmemlog/libpmemlog_unscoped.o +PMEMBLK_PRIV_OBJ=$(LIB_OUTDIR)/libpmemblk/libpmemblk_unscoped.o + +ifneq ($(LIBPMEMLOG_PRIV_FUNCS),) +OBJS += pmemlog_priv_funcs.o +endif + +ifneq ($(LIBPMEMBLK_PRIV_FUNCS),) +OBJS += pmemblk_priv_funcs.o +endif + +MAKEFILE_DEPS=../Makefile.inc Makefile $(TOP)/src/common.inc + +all: $(objdir) $(LIB_OUTDIR) $(EXTRA_TARGETS) $(LIB_AR) $(LIB_SO_SONAME) $(LIB_SO_REAL) $(LIB_SO) $(TMP_HEADERS) + +$(objdir) $(LIB_OUTDIR): + $(MKDIR) -p $@ + +$(LIB_SO_REAL): $(OBJS) $(EXTRA_OBJS) $(LIB_LINK) $(MAKEFILE_DEPS) + $(CC) $(LDFLAGS) -shared -Wl,--version-script=$(LIB_LINK),-soname,$(LIB_SONAME) -o $@ $(OBJS) $(EXTRA_OBJS) $(LIBS) + +$(LIB_SO_SONAME): $(LIB_SO_REAL) $(MAKEFILE_DEPS) + $(LN) -sf $(shell basename $<) $@ + +$(LIB_SO): $(LIB_SO_SONAME) $(MAKEFILE_DEPS) + $(LN) -sf $(shell basename $<) $@ + +$(LIB_AR_UNSCOPED): $(OBJS) $(EXTRA_OBJS) $(MAKEFILE_DEPS) + $(LD) -o $@ -r $(OBJS) $(EXTRA_OBJS) + +ifeq ($(LIB_LINK),) +$(LIB_AR_ALL): $(LIB_AR_UNSCOPED) $(MAKEFILE_DEPS) + $(OBJCOPY) $< $@ +else +$(LIB_AR_ALL): $(LIB_AR_UNSCOPED) $(LIB_LINK) $(MAKEFILE_DEPS) + $(OBJCOPY) --localize-hidden `sed -n 's/^ *\([a-zA-Z0-9_]*\);$$/-G \1/p' $(LIB_LINK)` $< $@ +endif + +$(LIB_AR): $(LIB_AR_ALL) $(MAKEFILE_DEPS) + $(AR) rv $@ $(LIB_AR_ALL) + +$(PMEMBLK_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemblk + +install: all +ifneq ($(LIBRARY_NAME),) + $(INSTALL) -d $(LIBS_DESTDIR) + $(INSTALL) -p -m 0755 $(TARGET_LIBS) $(LIBS_DESTDIR) + $(CP) -d $(TARGET_LINKS) $(LIBS_DESTDIR) +endif + +uninstall: +ifneq ($(LIBRARY_NAME),) + $(foreach f, $(TARGET_LIBS), $(RM) $(LIBS_DESTDIR)/$(notdir $(f))) + $(foreach f, $(TARGET_LINKS), $(RM) $(LIBS_DESTDIR)/$(notdir $(f))) +endif + +clean: $(EXTRA_TARGETS_CLEAN) +ifneq ($(LIBRARY_NAME),) + $(RM) $(OBJS) $(TMP_HEADERS) + $(RM) $(LIB_AR_ALL) $(LIB_AR_UNSCOPED) +endif + +clobber: clean $(EXTRA_TARGETS_CLOBBER) +ifneq ($(LIBRARY_NAME),) + $(RM) $(LIB_AR) $(LIB_SO_SONAME) $(LIB_SO_REAL) $(LIB_SO) + $(RM) -r $(objdir)/.deps + $(RM) -f *.link +endif + +$(eval $(cstyle-rule)) + +$(objdir)/%.o: %.c $(MAKEFILE_DEPS) + $(call check-cstyle, $<) + @mkdir -p $(objdir)/.deps + $(CC) -MD -c -o $@ $(CFLAGS) $(INCS) -fPIC $(call coverage-path, $<) + $(call check-os, $@, $<) + $(create-deps) + +sparse: + $(if $(SOURCE), $(sparse-c)) + +$(objdir)/%.htmp: %.h + $(call check-cstyle, $<, $@) + +.PHONY: all clean clobber install uninstall cstyle + +-include $(objdir)/.deps/*.P + +%.link: %.link.in +ifeq ($(FAULT_INJECTION),1) + @sed 's/fault_injection;/$(LIBRARY_NAME)_inject_fault_at;\n\t\t$(LIBRARY_NAME)_fault_injection_enabled;/g' $< > $@_temp +else + @sed '/fault_injection;/d' $< > $@_temp +endif + @mv $@_temp $@ diff --git a/src/pmdk/src/PMDK.sln b/src/pmdk/src/PMDK.sln new file mode 100644 index 000000000..eb8d486d7 --- /dev/null +++ b/src/pmdk/src/PMDK.sln @@ -0,0 +1,2240 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.26730.15 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmemlog_minimal", "examples\libpmemobj\pmemlog\obj_pmemlog_minimal.vcxproj", "{0056B0B6-CB3E-4F0E-B6DC-48D59CB8E235}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ctl_arenas", "test\obj_ctl_arenas\obj_ctl_arenas.vcxproj", "{019F5586-5558-4C87-B319-85906D4AE407}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_movnt_align", "test\pmem_movnt_align\pmem_movnt_align.vcxproj", "{025E7D51-41F2-4CBA-956E-C37A4443DB1B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "full_copy", "examples\libpmem\full_copy.vcxproj", "{0287C3DC-AE03-4714-AAFF-C52F062ECA6F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "traces_custom_function", "test\traces_custom_function\traces_custom_function.vcxproj", "{02BC3B44-C7F1-4793-86C1-6F36CA8A7F53}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ctl_stats", "test\obj_ctl_stats\obj_ctl_stats.vcxproj", "{03228F84-4F41-4BCC-8C2D-F329DC87B289}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_memblock", "test\obj_memblock\obj_memblock.vcxproj", "{0388E945-A655-41A7-AF27-8981CEE0E49A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_direct_volatile", "test\obj_direct_volatile\obj_direct_volatile.vcxproj", "{03B54A12-7793-4827-B820-C07491F7F45E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_callbacks", "test\obj_tx_callbacks\obj_tx_callbacks.vcxproj", "{0529575C-F6E8-44FD-BB82-82A29948D0F2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "out_err_mt", "test\out_err_mt\out_err_mt.vcxproj", "{063037B2-CA35-4520-811C-19D9C4ED891E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmemlog_macros", "examples\libpmemobj\pmemlog\obj_pmemlog_macros.vcxproj", "{06877FED-15BA-421F-85C9-1A964FB97446}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_mt", "test\obj_tx_mt\obj_tx_mt.vcxproj", "{0703E813-9CC8-4DEA-AA33-42B099CD172D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_heap_interrupt", "test\obj_heap_interrupt\obj_heap_interrupt.vcxproj", "{07A153D9-DF17-4DE8-A3C2-EBF171B961AE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_parse_size", "test\util_parse_size\util_parse_size.vcxproj", "{08B62E36-63D2-4FF1-A605-4BBABAEE73FB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log_include", "test\log_include\log_include.vcxproj", "{0A049EAD-652F-4E20-8026-90FD99AEE77A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmemlog", "libpmemlog\libpmemlog.vcxproj", "{0B1818EB-BDC8-4865-964F-DB8BF05CFD86}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reader", "examples\libpmemobj\string_store\reader.vcxproj", "{0BFD78AA-FD94-4DB1-8495-8F5CC06D8F03}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Examples", "Examples", "{0CC6D525-806E-433F-AB4A-6CFD546418B1}" + ProjectSection(SolutionItems) = preProject + examples\ex_common.h = examples\ex_common.h + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_critnib", "test\obj_critnib\obj_critnib.vcxproj", "{0CDCEB97-3270-4939-A290-EA2D3BE34B0C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_deep_persist", "test\pmem_deep_persist\pmem_deep_persist.vcxproj", "{0D4E38EF-A9D5-4797-8994-5DBB1125C9EA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_perror", "test\pmem2_perror\pmem2_perror.vcxproj", "{0DF30DE0-7F7D-43D3-940A-809EC27D3061}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "btree", "examples\libpmemobj\btree.vcxproj", "{0FB8F0FD-276C-413B-97A8-67ABE0C9043B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_direct", "test\obj_direct\obj_direct.vcxproj", "{10469175-EEF7-44A0-9961-AC4E45EFD800}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_movnt", "test\pmem2_movnt\pmem2_movnt.vcxproj", "{10B732EF-1783-4B61-B431-36BA5A2A3C9C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pi", "examples\libpmemobj\pi.vcxproj", "{11D76FBC-DFAA-4B31-9DB0-206E171E3F94}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmemspoil", "test\tools\pmemspoil\pmemspoil.vcxproj", "{11E158AE-C85A-4A6E-B66A-ED2994709276}" + ProjectSection(ProjectDependencies) = postProject + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_map_file", "test\pmem_map_file\pmem_map_file.vcxproj", "{12A1A3EF-202C-4DD0-9B5A-F5126CAB078F}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmem", "libpmem", "{1434B17C-6165-4D42-BEA1-5A7730D5A6BB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_recreate", "test\obj_recreate\obj_recreate.vcxproj", "{1464398A-100F-4518-BDB9-939A6362B6CF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dllview", "test\tools\dllview\dllview.vcxproj", "{179BEB5A-2C90-44F5-A734-FA756A5E668C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rbtree_map", "examples\libpmemobj\tree_map\rbtree_map.vcxproj", "{17A4B817-68B1-4719-A9EF-BD8FAB747DE6}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "pmempool", "pmempool", "{181A4234-282C-41F0-85C2-2B7697B3CB1A}" + ProjectSection(SolutionItems) = preProject + ..\doc\pmempool\pmempool-check.1.md = ..\doc\pmempool\pmempool-check.1.md + ..\doc\pmempool\pmempool-convert.1.md = ..\doc\pmempool\pmempool-convert.1.md + ..\doc\pmempool\pmempool-create.1.md = ..\doc\pmempool\pmempool-create.1.md + ..\doc\pmempool\pmempool-dump.1.md = ..\doc\pmempool\pmempool-dump.1.md + ..\doc\pmempool\pmempool-info.1.md = ..\doc\pmempool\pmempool-info.1.md + ..\doc\pmempool\pmempool-rm.1.md = ..\doc\pmempool\pmempool-rm.1.md + ..\doc\pmempool\pmempool-sync.1.md = ..\doc\pmempool\pmempool-sync.1.md + ..\doc\pmempool\pmempool-transform.1.md = ..\doc\pmempool\pmempool-transform.1.md + ..\doc\pmempool\pmempool.1.md = ..\doc\pmempool\pmempool.1.md + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_non_zero", "test\blk_non_zero\blk_non_zero.vcxproj", "{18E90E1A-F2E0-40DF-9900-A14E560C9EB4}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {F7C6C6B6-4142-4C82-8699-4A9D8183181B} = {F7C6C6B6-4142-4C82-8699-4A9D8183181B} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemlog", "libpmemlog", "{1A36B57B-2E88-4D81-89C0-F575C9895E36}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cmpmap", "test\tools\cmpmap\cmpmap.vcxproj", "{1B871BA2-3F70-4BC9-9DF4-725EB07F6628}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ex_libpmemblk", "test\ex_libpmemblk\ex_libpmemblk.vcxproj", "{1B9B0D6D-E530-44A6-ADAE-09EA2BDC47DE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmemobj", "libpmemobj\libpmemobj.vcxproj", "{1BAA1617-93AE-4196-8A1A-BD492FB18AEF}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "map_multiple_files", "examples\libpmem2\map_multiple_files\map_multiple_files.vcxproj", "{1BFBAFED-A9CE-49AF-AB2C-84199E391EE6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_persist", "test\pmem2_persist\pmem2_persist.vcxproj", "{1C986F2C-9AF1-45E0-9E9B-8CABE9CAF437}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "writer", "examples\libpmemobj\string_store_tx_type\writer.vcxproj", "{1EB3DE5B-6357-498D-8CAC-EEC0209EA454}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "win_lists", "test\win_lists\win_lists.vcxproj", "{1F2E1C51-2B14-4047-BE6D-52E00FC3C780}" + ProjectSection(ProjectDependencies) = postProject + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_recovery", "test\obj_recovery\obj_recovery.vcxproj", "{2498FCDA-E2CC-43EF-9A35-8CD63F253171}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bttdevice", "test\bttdevice\bttdevice.vcxproj", "{25758581-DD46-4AE4-99D9-11E736F72AD1}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_transform", "test\pmempool_transform\pmempool_transform.vcxproj", "{26166DF1-3C94-44AF-9075-BA31DCD2F6BB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_test_win", "test\libpmempool_api_win\libpmempool_test_win.vcxproj", "{27FA11C6-431D-41D1-A417-FAB7C4F93DCA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_toid", "test\obj_toid\obj_toid.vcxproj", "{296F3C5D-3951-423E-8E2F-FD4A37958C72}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_include", "test\blk_include\blk_include.vcxproj", "{29D9376B-DC36-4940-83F1-A7CBE38A2103}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_dump", "test\pmempool_dump\pmempool_dump.vcxproj", "{2A1D6AF2-7336-4966-A4B3-0BE9A24BAE00}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "out_err_mt_win", "test\out_err_mt_win\out_err_mt_win.vcxproj", "{2B1A5104-A324-4D02-B5C7-D021FB8F880C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_fragmentation2", "test\obj_fragmentation2\obj_fragmentation2.vcxproj", "{2B2DE575-1422-4FBF-97BE-35AEDA0AB465}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_include", "test\pmem_include\pmem_include.vcxproj", "{2B7772E6-9DAA-4F38-B0BC-7B2399366325}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "poolset", "poolset", "{2C24CC4F-B340-467D-908F-1BF2C69BC79F}" + ProjectSection(SolutionItems) = preProject + ..\doc\poolset\poolset.5.md = ..\doc\poolset\poolset.5.md + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lists", "examples\libpmemobj\lists.vcxproj", "{2CD7408E-2F60-43C3-ACEB-C7D58CDD8462}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_locks", "test\obj_locks\obj_locks.vcxproj", "{2DE6B085-3C19-49B1-894A-AD9376000E09}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_action", "test\obj_action\obj_action.vcxproj", "{2ED26FDA-3C4E-4514-B387-5E77C302FF71}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmemdetect", "test\tools\pmemdetect\pmemdetect.vcxproj", "{2EFFC590-BF5E-46A2-AF04-E67E1D571D2E}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmempool", "libpmempool", "{2F543422-4B8A-4898-BE6B-590F52B4E9D1}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmemcore", "core\libpmemcore.vcxproj", "{2FA3155B-6F26-4D15-AC03-9D82D48DBC42}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bttcreate", "test\tools\bttcreate\bttcreate.vcxproj", "{3142CB13-CADA-48D3-9A25-E6ACB243760A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_map_file_trunc", "test\pmem_map_file_trunc\pmem_map_file_trunc.vcxproj", "{34DB4951-DA08-45F1-938D-B08E5FF5AB46}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_source", "test\pmem2_source\pmem2_source.vcxproj", "{34F31D9D-3D33-4C09-85A3-4749A8AB8EBB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "list_map", "examples\libpmemobj\list_map\list_map.vcxproj", "{3799BA67-3C4F-4AE0-85DC-5BAAEA01A180}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "traces_pmem", "test\traces_pmem\traces_pmem.vcxproj", "{3B23831B-E5DE-4A62-9D0B-27D0D9F293F4}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_source_alignment", "test\pmem2_source_alignment\pmem2_source_alignment.vcxproj", "{3B44D717-EEDE-470A-B631-C9D6BFE4ADF2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log_pool", "test\log_pool\log_pool.vcxproj", "{3CF270CD-0F56-48E3-AD84-82F369C568BF}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86} = {0B1818EB-BDC8-4865-964F-DB8BF05CFD86} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log", "examples\libpmem2\log\log.vcxproj", "{3EC20BDD-2E48-4291-A9EE-D0675AF77C7F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sparsefile", "test\tools\sparsefile\sparsefile.vcxproj", "{3EC30D6A-BDA4-4971-879A-8814204EAE31}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pool", "test\obj_pool\obj_pool.vcxproj", "{3ECCB0F1-3ADF-486A-91C5-79DF0FC22F78}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rtree_map", "examples\libpmemobj\tree_map\rtree_map.vcxproj", "{3ED56E55-84A6-422C-A8D4-A8439FB8F245}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_alloc", "test\obj_alloc\obj_alloc.vcxproj", "{42B97D47-F800-4100-BFA2-B3AC357E8B6B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_info", "test\pmempool_info\pmempool_info.vcxproj", "{42CCEF95-5ADD-460C-967E-DD5B2C744943}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "getopt", "test\getopt\getopt.vcxproj", "{433F7840-C597-4950-84C9-E4FF7DF6A298}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "sys", "sys", "{45027FC5-4A32-47BD-AC5B-66CC7616B1D2}" + ProjectSection(SolutionItems) = preProject + windows\include\sys\file.h = windows\include\sys\file.h + windows\include\sys\mman.h = windows\include\sys\mman.h + windows\include\sys\mount.h = windows\include\sys\mount.h + windows\include\sys\param.h = windows\include\sys\param.h + windows\include\sys\resource.h = windows\include\sys\resource.h + windows\include\sys\statvfs.h = windows\include\sys\statvfs.h + windows\include\sys\uio.h = windows\include\sys\uio.h + windows\include\sys\wait.h = windows\include\sys\wait.h + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_vm_reservation", "test\pmem2_vm_reservation\pmem2_vm_reservation.vcxproj", "{46629F21-089C-4205-B2F8-E01748ECE517}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctl_cow", "test\ctl_cow\ctl_cow.vcxproj", "{46B82069-10BE-432A-8D93-F4D995148555}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pool_lookup", "test\obj_pool_lookup\obj_pool_lookup.vcxproj", "{4850F425-9128-4E91-973C-5AE7BD97395B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmemcommon", "common\libpmemcommon.vcxproj", "{492BAA3D-0D5D-478E-9765-500463AE69AA}" + ProjectSection(ProjectDependencies) = postProject + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmap", "examples\libpmemobj\map\libmap.vcxproj", "{49A7CC5A-D5E7-4A07-917F-C6918B982BE8}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "util", "util", "{4C291EEB-3874-4724-9CC2-1335D13FF0EE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_alloc", "test\obj_tx_alloc\obj_tx_alloc.vcxproj", "{4C429783-0B01-449F-A36F-C2019233890B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmemalloc", "test\tools\pmemalloc\pmemalloc.vcxproj", "{4C6E7F0A-7E6A-4713-B1D2-B7B4ADC992AF}" + ProjectSection(ProjectDependencies) = postProject + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_include", "test\libpmempool_include\libpmempool_include.vcxproj", "{4E334022-7A71-4197-9E15-878F7EFC877E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_memmove", "test\pmem2_memmove\pmem2_memmove.vcxproj", "{4EE3C4D6-F707-4A05-8032-8FC2A44D29E8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log_walker", "test\log_walker\log_walker.vcxproj", "{4FB4FF90-4E92-4CFB-A01F-C73D6861CA03}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_poolset_parse", "test\util_poolset_parse\util_poolset_parse.vcxproj", "{50FD1E47-2131-48D2-9435-5CB28DF6B15A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asset_checkout", "examples\libpmemblk\assetdb\asset_checkout.vcxproj", "{513C4CFA-BD5B-4470-BA93-F6D43778A754}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "arch_flags", "test\arch_flags\arch_flags.vcxproj", "{53115A01-460C-4339-A2C8-AE1323A6E7EA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mmap", "test\mmap\mmap.vcxproj", "{5580D11C-FDA6-4CF2-A0E8-1C2D3FBC11F1}" + ProjectSection(ProjectDependencies) = postProject + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_mem_ext", "test\pmem2_mem_ext\pmem2_mem_ext.vcxproj", "{5632B41F-19DD-4BA7-A6EB-74F9E8A7EF8A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asset_checkin", "examples\libpmemblk\assetdb\asset_checkin.vcxproj", "{581B3A58-F3F0-4765-91E5-D0C82816A528}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_basic_integration", "test\obj_basic_integration\obj_basic_integration.vcxproj", "{58386481-30B7-40FC-96AF-0723A4A7B228}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "pmempool", "pmempool", "{59AB6976-D16B-48D0-8D16-94360D3FE51D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reader", "examples\libpmemobj\string_store_tx\reader.vcxproj", "{59D7A9CD-9912-40E4-96E1-8A873F777F62}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_map_prot", "test\pmem2_map_prot\pmem2_map_prot.vcxproj", "{59D9E21C-57D7-4D18-B792-24738BD26DE4}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_memmove", "test\pmem_memmove\pmem_memmove.vcxproj", "{5A391A14-8E29-4788-93FC-EDADED31D32F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_map_file_win", "test\pmem_map_file_win\pmem_map_file_win.vcxproj", "{5AD07646-5E16-4CEF-B80A-BE5EE4D54FEF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "data_store", "examples\libpmemobj\map\data_store.vcxproj", "{5B2B9C0D-1B6D-4357-8307-6DE1EE0A41A3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_many_size_allocs", "test\obj_many_size_allocs\obj_many_size_allocs.vcxproj", "{5D362DB7-D2BD-4907-AAD8-4B8627E72282}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmemlog_simple", "examples\libpmemobj\pmemlog\obj_pmemlog_simple.vcxproj", "{5DB2E259-0D19-4A89-B8EC-B2912F39924D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "unsafe_shutdown", "examples\libpmem2\unsafe_shutdown\unsafe_shutdown.vcxproj", "{5E005D50-1C73-4E52-B295-864BB9AF7AC6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_user_data", "test\obj_tx_user_data\obj_tx_user_data.vcxproj", "{5E7305DB-93E6-448B-AE44-90EAF916A776}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_sds", "test\util_sds\util_sds.vcxproj", "{5EC35099-9777-45E8-9520-EB2EE75BDF88}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_rm", "test\libpmempool_rm\libpmempool_rm.vcxproj", "{5F2B687A-1B42-439C-AEEC-135DD22FB851}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_is_pmem_windows", "test\pmem_is_pmem_windows\pmem_is_pmem_windows.vcxproj", "{5F8A56F8-2C5B-48B6-9654-DD642D3E5F5C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmemlog", "examples\libpmemobj\pmemlog\obj_pmemlog.vcxproj", "{60206D22-E132-4695-8486-10BECA32C5CC}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_backup", "test\libpmempool_backup\libpmempool_backup.vcxproj", "{60B463D4-8CD5-4BF6-A25B-01BE13B87590}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_fragmentation", "test\obj_fragmentation\obj_fragmentation.vcxproj", "{60EF55C7-8399-4543-B5B2-3AE2C532C67E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_rw_mt", "test\blk_rw_mt\blk_rw_mt.vcxproj", "{628FADA9-7047-4DD9-BD17-9FE4B5A1ADB0}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pool_lock", "test\obj_pool_lock\obj_pool_lock.vcxproj", "{63B8184D-85E0-4E6A-9729-558C567D1D1D}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemobj", "libpmemobj", "{63C9B3F8-437D-4AD9-B32D-D04AE38C35B6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_strdup", "test\obj_tx_strdup\obj_tx_strdup.vcxproj", "{643B82A1-D009-46A9-92A0-2883399B05C2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_sync", "test\obj_sync\obj_sync.vcxproj", "{6516D6CF-8000-4341-9487-312BC83EE370}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmalloc_basic", "test\obj_pmalloc_basic\obj_pmalloc_basic.vcxproj", "{65D92D98-97E1-48F7-AEF6-75221CF48EA4}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_memcpy", "test\pmem_memcpy\pmem_memcpy.vcxproj", "{673277EC-D26B-414D-92E3-84EE873316A8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_memset", "test\pmem2_memset\pmem2_memset.vcxproj", "{6770917C-5B8E-49F1-9297-163FAB76DAFB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_rm_win", "test\libpmempool_rm_win\libpmempool_rm_win.vcxproj", "{67AC1343-98FD-4143-92C0-559C55F749F5}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_rw", "test\blk_rw\blk_rw.vcxproj", "{6851356E-A5D9-46A6-8262-A7E208729F18}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {F7C6C6B6-4142-4C82-8699-4A9D8183181B} = {F7C6C6B6-4142-4C82-8699-4A9D8183181B} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "win_common", "test\win_common\win_common.vcxproj", "{6AE1B8BE-D46A-4E99-87A2-F160FB950DCA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_poolset_size", "test\util_poolset_size\util_poolset_size.vcxproj", "{6B492754-9F80-44B3-A2A7-1D98AF06F3B2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_list_macro", "test\obj_list_macro\obj_list_macro.vcxproj", "{6BCEF2A5-0CEC-4CC6-9CB0-D3FBF871A408}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "string_store_tx", "string_store_tx", "{6D63CDF1-F62C-4614-AD8A-95B0A63AA070}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "set_funcs", "test\set_funcs\set_funcs.vcxproj", "{6D7C1169-3246-465F-B630-ECFEF4F3179A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ctl_alignment", "test\obj_ctl_alignment\obj_ctl_alignment.vcxproj", "{6DBD8C02-0C75-4DB0-BFDA-CD053B1B2D89}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_unmap", "test\pmem_unmap\pmem_unmap.vcxproj", "{6EC93484-AAF3-487E-84E4-5ABFBA0AFC53}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_poolset", "test\util_poolset\util_poolset.vcxproj", "{6F06A19B-0921-4B71-A3A5-B350B5FFEADB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_locks_abort", "test\obj_tx_locks_abort\obj_tx_locks_abort.vcxproj", "{6F4953DA-FDC3-46CF-BF24-3752CCF2E1CB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_feature", "test\libpmempool_feature\libpmempool_feature.vcxproj", "{6F776280-B383-4DCE-8F42-9670164D038D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_out_of_memory", "test\obj_out_of_memory\obj_out_of_memory.vcxproj", "{70EE1D40-0C65-4985-8EFC-BD40EE3A89B2}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_file_open", "test\util_file_open\util_file_open.vcxproj", "{715EADD7-0FFE-4F1F-94E7-49302968DF79}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_check", "test\obj_check\obj_check.vcxproj", "{71D182E0-345A-4375-B0FA-3536821B0EE3}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "array", "examples\libpmemobj\array\array.vcxproj", "{7264C8F6-73FB-4830-9306-1558D3EAC71B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_list", "test\obj_list\obj_list.vcxproj", "{729E3905-FF7D-49C5-9871-6D35D839183E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_ravl", "test\util_ravl\util_ravl.vcxproj", "{72C9DB46-C665-48AD-B805-BA885B40CA3E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "writer", "examples\libpmemobj\string_store_tx\writer.vcxproj", "{7337E34A-97B0-44FC-988B-7E6AE7E0FBBF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_memops", "test\obj_memops\obj_memops.vcxproj", "{740ED97D-005F-4F58-98B2-4EF5EF5776E8}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{746BA101-5C93-42A5-AC7A-64DCEB186572}" + ProjectSection(SolutionItems) = preProject + test\match = test\match + test\RUNTESTLIB.PS1 = test\RUNTESTLIB.PS1 + test\RUNTESTS.ps1 = test\RUNTESTS.ps1 + test\unittest\unittest.ps1 = test\unittest\unittest.ps1 + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reader", "examples\libpmemobj\string_store_tx_type\reader.vcxproj", "{74D655D5-F661-4887-A1EB-5A6222AF5FCA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_critnib_mt", "test\obj_critnib_mt\obj_critnib_mt.vcxproj", "{7701627C-CFD9-48F6-942E-EAACC8D057FA}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "linux", "linux", "{774627B7-6532-4464-AEE4-02F72CA44F95}" + ProjectSection(SolutionItems) = preProject + windows\include\linux\limits.h = windows\include\linux\limits.h + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemblk", "libpmemblk", "{7783BC49-A25B-468B-A6F8-AB6B39A91C65}" + ProjectSection(SolutionItems) = preProject + ..\doc\libpmemblk\libpmemblk.7.md = ..\doc\libpmemblk\libpmemblk.7.md + ..\doc\libpmemblk\pmemblk_bsize.3.md = ..\doc\libpmemblk\pmemblk_bsize.3.md + ..\doc\libpmemblk\pmemblk_create.3.md = ..\doc\libpmemblk\pmemblk_create.3.md + ..\doc\libpmemblk\pmemblk_read.3.md = ..\doc\libpmemblk\pmemblk_read.3.md + ..\doc\libpmemblk\pmemblk_set_zero.3.md = ..\doc\libpmemblk\pmemblk_set_zero.3.md + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_pool_lock", "test\blk_pool_lock\blk_pool_lock.vcxproj", "{779425B1-2211-499B-A7CC-4F9EC6CB0D25}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {F7C6C6B6-4142-4C82-8699-4A9D8183181B} = {F7C6C6B6-4142-4C82-8699-4A9D8183181B} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "btree_map", "examples\libpmemobj\tree_map\btree_map.vcxproj", "{79D37FFE-FF76-44B3-BB27-3DCAEFF2EBE9}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_extend", "test\obj_extend\obj_extend.vcxproj", "{7ABF755C-821B-49CD-8EDE-83C16594FF7F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool", "tools\pmempool\pmempool.vcxproj", "{7DC3B3DD-73ED-4602-9AF3-8D7053620DEA}" + ProjectSection(ProjectDependencies) = postProject + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ex_libpmem", "test\ex_libpmem\ex_libpmem.vcxproj", "{7DFEB4A5-8B04-4302-9D09-8144918FCF81}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_free", "test\obj_tx_free\obj_tx_free.vcxproj", "{7F51CD29-3BCD-4DD8-B327-F384B5A616D1}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asset_list", "examples\libpmemblk\assetdb\asset_list.vcxproj", "{8008010F-8718-4C5F-86B2-195AEBF73422}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "manpage", "examples\libpmemblk\manpage.vcxproj", "{8010BBB0-C71B-4EFF-95EB-65C01E5EC197}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_pool_win", "test\blk_pool_win\blk_pool_win.vcxproj", "{80AF1B7D-B8CE-4AF0-AE3B-1DABED1B57E7}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "win_poolset_unmap", "test\win_poolset_unmap\win_poolset_unmap.vcxproj", "{810DB909-6581-42D8-9616-906888F12149}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{853D45D8-980C-4991-B62A-DAC6FD245402}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_heap", "test\obj_heap\obj_heap.vcxproj", "{85D4076B-896B-4EBB-8F3A-8B44C24CD452}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_debug", "test\obj_debug\obj_debug.vcxproj", "{85DBDA9B-AEF6-43E7-B8B5-05FF2BEC61A3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_heap_state", "test\obj_heap_state\obj_heap_state.vcxproj", "{86EE22CC-6D3C-4F81-ADC8-394946F0DA81}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tools", "Tools", "{877E7D1D-8150-4FE5-A139-B6FBCEAEC393}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemobj", "libpmemobj", "{87A32959-E477-4CD5-8A1C-C85646D806B2}" + ProjectSection(SolutionItems) = preProject + ..\doc\libpmemobj\libpmemobj.7.md = ..\doc\libpmemobj\libpmemobj.7.md + ..\doc\libpmemobj\oid_is_null.3.md = ..\doc\libpmemobj\oid_is_null.3.md + ..\doc\libpmemobj\pmemobj_action.3.md = ..\doc\libpmemobj\pmemobj_action.3.md + ..\doc\libpmemobj\pmemobj_alloc.3.md = ..\doc\libpmemobj\pmemobj_alloc.3.md + ..\doc\libpmemobj\pmemobj_ctl_get.3.md = ..\doc\libpmemobj\pmemobj_ctl_get.3.md + ..\doc\libpmemobj\pmemobj_first.3.md = ..\doc\libpmemobj\pmemobj_first.3.md + ..\doc\libpmemobj\pmemobj_list_insert.3.md = ..\doc\libpmemobj\pmemobj_list_insert.3.md + ..\doc\libpmemobj\pmemobj_memcpy_persist.3.md = ..\doc\libpmemobj\pmemobj_memcpy_persist.3.md + ..\doc\libpmemobj\pmemobj_mutex_zero.3.md = ..\doc\libpmemobj\pmemobj_mutex_zero.3.md + ..\doc\libpmemobj\pmemobj_open.3.md = ..\doc\libpmemobj\pmemobj_open.3.md + ..\doc\libpmemobj\pmemobj_root.3.md = ..\doc\libpmemobj\pmemobj_root.3.md + ..\doc\libpmemobj\pmemobj_tx_add_range.3.md = ..\doc\libpmemobj\pmemobj_tx_add_range.3.md + ..\doc\libpmemobj\pmemobj_tx_alloc.3.md = ..\doc\libpmemobj\pmemobj_tx_alloc.3.md + ..\doc\libpmemobj\pmemobj_tx_begin.3.md = ..\doc\libpmemobj\pmemobj_tx_begin.3.md + ..\doc\libpmemobj\pobj_layout_begin.3.md = ..\doc\libpmemobj\pobj_layout_begin.3.md + ..\doc\libpmemobj\pobj_list_head.3.md = ..\doc\libpmemobj\pobj_list_head.3.md + ..\doc\libpmemobj\toid_declare.3.md = ..\doc\libpmemobj\toid_declare.3.md + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmalloc_oom_mt", "test\obj_pmalloc_oom_mt\obj_pmalloc_oom_mt.vcxproj", "{88D239E4-EB7D-4E0A-BE3A-AD78B9F408FC}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_add_range_direct", "test\obj_tx_add_range_direct\obj_tx_add_range_direct.vcxproj", "{89F947CA-DDEF-4131-8AFB-584ABA4A1302}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "out_err", "test\out_err\out_err.vcxproj", "{8A0FA780-068A-4534-AA2F-4FF4CF977AF2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_bucket", "test\obj_bucket\obj_bucket.vcxproj", "{8A4872D7-A234-4B9B-8215-82C6BB15F3A2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmemblk", "examples\libpmemobj\pmemblk\obj_pmemblk.vcxproj", "{8C42CA7C-1543-4F1B-A55F-28CD419C7D35}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_oid_thread", "test\obj_oid_thread\obj_oid_thread.vcxproj", "{8C6D73E0-0A6F-4487-A040-0EC78D7D6D9A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_persist_count", "test\obj_persist_count\obj_persist_count.vcxproj", "{8D75FA1A-EC74-4F88-8AC1-CE3F98E4D828}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_flow", "test\obj_tx_flow\obj_tx_flow.vcxproj", "{8E374371-30E1-4623-8755-2A2F3742170B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "srcversion", "windows\srcversion\srcversion.vcxproj", "{901F04DB-E1A5-4A41-8B81-9D31C19ACD59}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "getopt", "windows\getopt\getopt.vcxproj", "{9186EAC4-2F34-4F17-B940-6585D7869BCD}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemlog", "libpmemlog", "{91C30620-70CA-46C7-AC71-71F3C602690E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_realloc", "test\obj_realloc\obj_realloc.vcxproj", "{91E19AEB-7B75-43E0-B8B4-D2BB60D839EA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_movnt_align", "test\pmem2_movnt_align\pmem2_movnt_align.vcxproj", "{9233FC80-B51C-4A89-AF58-5AE86C068F6A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_create", "test\pmempool_create\pmempool_create.vcxproj", "{92388A20-50FC-45F8-89E3-71F1618EFABB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compat_incompat_features", "test\compat_incompat_features\compat_incompat_features.vcxproj", "{924B2937-0B53-4DC6-B7E1-5F3102728F89}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_pool", "test\blk_pool\blk_pool.vcxproj", "{95B683BD-B9DC-400F-9BC0-8F1505F08BF5}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {F7C6C6B6-4142-4C82-8699-4A9D8183181B} = {F7C6C6B6-4142-4C82-8699-4A9D8183181B} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Windows", "Windows", "{95FAF291-03D1-42FC-9C10-424D551D475D}" + ProjectSection(SolutionItems) = preProject + common\common.rc = common\common.rc + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_movnt", "test\pmem_movnt\pmem_movnt.vcxproj", "{96D00A19-5CEF-4CC5-BDE8-E33C68BCE90F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_cpuid", "test\util_cpuid\util_cpuid.vcxproj", "{98ACBE5D-1A92-46F9-AA81-533412172952}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_rm", "test\pmempool_rm\pmempool_rm.vcxproj", "{99F7F00F-1DE5-45EA-992B-64BA282FAC76}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_uuid_generate", "test\util_uuid_generate\util_uuid_generate.vcxproj", "{9A4078F8-B8E4-4EC6-A6FF-4F29DAD9CE48}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "include", "include", "{9A8482A7-BF0C-423D-8266-189456ED41F6}" + ProjectSection(SolutionItems) = preProject + windows\include\dirent.h = windows\include\dirent.h + windows\include\endian.h = windows\include\endian.h + windows\include\err.h = windows\include\err.h + windows\include\features.h = windows\include\features.h + windows\include\libgen.h = windows\include\libgen.h + windows\include\platform.h = windows\include\platform.h + include\pmemcompat.h = include\pmemcompat.h + windows\include\sched.h = windows\include\sched.h + windows\include\srcversion.h = windows\include\srcversion.h + windows\include\strings.h = windows\include\strings.h + windows\include\unistd.h = windows\include\unistd.h + windows\include\win_mmap.h = windows\include\win_mmap.h + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_realloc", "test\obj_tx_realloc\obj_tx_realloc.vcxproj", "{9AE2DAF9-10C4-4EC3-AE52-AD5EE9C77C55}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Benchmarks", "Benchmarks", "{9C37B8CC-F810-4787-924D-65BC227091A3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_invalid", "test\obj_tx_invalid\obj_tx_invalid.vcxproj", "{9D9E33EB-4C24-4646-A3FB-35DA17247917}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmem", "libpmem\libpmem.vcxproj", "{9E9E3D25-2139-4A5D-9200-18148DDEAD45}" + ProjectSection(ProjectDependencies) = postProject + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "manpage", "examples\libpmemlog\manpage.vcxproj", "{9FF51F3E-AF36-4F45-A797-C5F03A090298}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmalloc_mt", "test\obj_pmalloc_mt\obj_pmalloc_mt.vcxproj", "{9FF62356-30B4-42A1-8DC7-45262A18DD44}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmem2", "libpmem2", "{A14A4556-9092-430D-B9CA-B2B1223D56CB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gran_detecto", "test\tools\gran_detecto\gran_detecto.vcxproj", "{A18B076A-CE8C-49A6-8B80-F02843E4BF0A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ddmap", "test\tools\ddmap\ddmap.vcxproj", "{A216BF23-FC5C-4426-BF20-8568A2AA5FA0}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_test", "test\libpmempool_api\libpmempool_test.vcxproj", "{A2A0FAEA-2B7C-4FC3-B904-1DB4DEACF88D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_nblock", "test\blk_nblock\blk_nblock.vcxproj", "{A38EFCDB-53D6-4474-97F3-0DDC6CE70D76}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {F7C6C6B6-4142-4C82-8699-4A9D8183181B} = {F7C6C6B6-4142-4C82-8699-4A9D8183181B} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_check_version", "test\libpmempool_check_version\libpmempool_check_version.vcxproj", "{A39D1640-8DBA-450D-9103-2533C248991A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "out_err_win", "test\out_err_win\out_err_win.vcxproj", "{A57D9365-172E-4782-ADC6-82A594E30943}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ctl_alloc_class_config", "test\obj_ctl_alloc_class_config\obj_ctl_alloc_class_config.vcxproj", "{A79E3093-B157-4B09-BABD-29266EA16407}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "addlog", "examples\libpmemlog\logfile\addlog.vcxproj", "{A7CA7975-CEDB-48E6-9AEB-1209DCBD07F2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "basic", "examples\libpmem2\basic\basic.vcxproj", "{A9ADD224-1755-407F-906D-C13EC37FF7B0}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_include", "test\obj_include\obj_include.vcxproj", "{AB15A115-E429-4123-BEBF-206FBA4CF615}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_sync", "test\libpmempool_sync\libpmempool_sync.vcxproj", "{AE1C32FB-9B52-4760-ABFC-0D2FA2C7A6C8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_defrag_advanced", "test\obj_defrag_advanced\obj_defrag_advanced.vcxproj", "{AE952763-5C84-43FC-B344-CACC950F056C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "signal_handle", "test\signal_handle\signal_handle.vcxproj", "{AE9E908D-BAEC-491F-9914-436B3CE35E94}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ctl_config", "test\obj_ctl_config\obj_ctl_config.vcxproj", "{AEAA72CD-E060-417C-9CA1-49B4738384E0}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_feature", "test\pmempool_feature\pmempool_feature.vcxproj", "{AF038868-2432-4159-A62F-941F11D12C5D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "checksum", "test\checksum\checksum.vcxproj", "{AF0B7480-EBE3-486B-B0C8-134910BC9324}" + ProjectSection(ProjectDependencies) = postProject + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_transform_win", "test\libpmempool_transform_win\libpmempool_transform_win.vcxproj", "{B30C6212-A160-405A-8FE7-340E721738A2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmemwrite", "test\tools\pmemwrite\pmemwrite.vcxproj", "{B35BFA09-DE68-483B-AB61-8790E8F060A8}" + ProjectSection(ProjectDependencies) = postProject + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_memset", "test\pmem_memset\pmem_memset.vcxproj", "{B36F115C-8139-4C35-A3E7-E6BF9F3DA793}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ctl_heap_size", "test\obj_ctl_heap_size\obj_ctl_heap_size.vcxproj", "{B379539C-E130-460D-AE82-4EBDD1A97845}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_mem", "test\obj_mem\obj_mem.vcxproj", "{B3AF8A19-5802-4A34-9157-27BBE4E53C0A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ex_linkedlist", "test\ex_linkedlist\ex_linkedlist.vcxproj", "{B440BB05-37A8-42EA-98D3-D83EB113E497}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_compat", "test\pmem2_compat\pmem2_compat.vcxproj", "{B6C0521B-EECA-47EF-BFA8-147F9C3F6DFE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_include", "test\pmem2_include\pmem2_include.vcxproj", "{B6C0521B-EECA-47EF-BFA8-147F9C3F6DFF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_sync_win", "test\libpmempool_sync_win\libpmempool_sync_win.vcxproj", "{B6DA6617-D98F-4A4D-A7C4-A317212924BF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tools_pmemspoil", "test\pmemspoil\pmemspoil.vcxproj", "{B6F4B85D-FE55-4A1B-AE97-D4A9ECFE195F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pool_win", "test\obj_pool_win\obj_pool_win.vcxproj", "{B775480C-5B32-4F64-B026-47367280EC56}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "windows", "windows", "{B870D8A6-12CD-4DD0-B843-833695C2310A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_list_recovery", "test\obj_list_recovery\obj_list_recovery.vcxproj", "{B887EA26-846C-4D6A-B0E4-432487506BC7}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemlog", "libpmemlog", "{B8A4320D-E9A3-4F89-A8AA-B16D746C158A}" + ProjectSection(SolutionItems) = preProject + ..\doc\libpmemlog\libpmemlog.7.md = ..\doc\libpmemlog\libpmemlog.7.md + ..\doc\libpmemlog\pmemlog_append.3.md = ..\doc\libpmemlog\pmemlog_append.3.md + ..\doc\libpmemlog\pmemlog_create.3.md = ..\doc\libpmemlog\pmemlog_create.3.md + ..\doc\libpmemlog\pmemlog_nbyte.3.md = ..\doc\libpmemlog\pmemlog_nbyte.3.md + ..\doc\libpmemlog\pmemlog_tell.3.md = ..\doc\libpmemlog\pmemlog_tell.3.md + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "buffons_needle_problem", "examples\libpmemobj\buffons_needle_problem.vcxproj", "{BA0EF7F5-BE6C-4B61-9D5F-1480462EE001}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_first_next", "test\obj_first_next\obj_first_next.vcxproj", "{BABC6427-E533-4DCF-91E3-B5B2ED253F46}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_list_move", "test\obj_list_move\obj_list_move.vcxproj", "{BAE107BA-7618-4972-8188-2D3CDAAE0453}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_layout", "test\obj_layout\obj_layout.vcxproj", "{BB1120CF-B721-4EF9-8735-58F76AE51D2F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mapcli", "examples\libpmemobj\map\mapcli.vcxproj", "{BB248BAC-6E1B-433C-A254-75140A273AB5}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "map", "map", "{BD6CC700-B36B-435B-BAF9-FC5AFCD766C9}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctree_map", "examples\libpmemobj\tree_map\ctree_map.vcxproj", "{BE18F227-A9F0-4B38-B689-4E2F9F09CA5F}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmem2", "libpmem2", "{BEA6AC7C-831D-44EF-AD61-DA65A448CC9B}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemblk", "libpmemblk", "{BFBAB433-860E-4A28-96E3-A4B7AFE3B297}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "string_store", "string_store", "{BFEDF709-A700-4769-9056-ACA934D828A8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scope", "test\scope\scope.vcxproj", "{C0E811E0-8942-4CFD-A817-74D99E9E6577}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_list_insert", "test\obj_list_insert\obj_list_insert.vcxproj", "{C2C36D03-26EE-4BD8-8FFC-86CFE16C1218}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ex_libpmem2", "test\ex_libpmem2\ex_libpmem2.vcxproj", "{C2D5E690-748B-4138-B572-1774B99A8572}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_strdup", "test\obj_strdup\obj_strdup.vcxproj", "{C2F94489-A483-4C44-B8A7-11A75F6AEC66}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ulog_size", "test\obj_ulog_size\obj_ulog_size.vcxproj", "{C35052AF-2383-4F9C-B18B-55A01829F2BF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "printlog", "examples\libpmemlog\logfile\printlog.vcxproj", "{C3CEE34C-29E0-4A22-B258-3FBAF662AA19}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_sync", "test\pmempool_sync\pmempool_sync.vcxproj", "{C5E8B8DB-2507-4904-847F-A52196B075F0}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_integration", "test\pmem2_integration\pmem2_integration.vcxproj", "{C7025EE1-57E5-44B9-A4F5-3CB059601FC3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log_pool_win", "test\log_pool_win\log_pool_win.vcxproj", "{C71DAF3E-9361-4723-93E2-C475D1D0C0D0}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemblk", "libpmemblk", "{C721EFBD-45DC-479E-9B99-E62FCC1FC6E5}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asset_load", "examples\libpmemblk\assetdb\asset_load.vcxproj", "{C7E42AE1-052F-4024-B8BA-DE5DCE6BBEEC}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_is_absolute", "test\util_is_absolute\util_is_absolute.vcxproj", "{C973CD39-D63B-4F5C-BE1D-DED17388B5A4}" + ProjectSection(ProjectDependencies) = postProject + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "traces", "test\traces\traces.vcxproj", "{CA4BBB24-D33E-42E2-A495-F10D80DE8C1D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmembench", "benchmarks\pmembench.vcxproj", "{CB906E89-1313-4929-AFF7-86FBF1CC301F}" + ProjectSection(ProjectDependencies) = postProject + {99F7F00F-1DE5-45EA-992B-64BA282FAC76} = {99F7F00F-1DE5-45EA-992B-64BA282FAC76} + {67AC1343-98FD-4143-92C0-559C55F749F5} = {67AC1343-98FD-4143-92C0-559C55F749F5} + {5F2B687A-1B42-439C-AEEC-135DD22FB851} = {5F2B687A-1B42-439C-AEEC-135DD22FB851} + {CF9A0883-6334-44C7-AC29-349468C78E27} = {CF9A0883-6334-44C7-AC29-349468C78E27} + {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA} = {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_lane", "test\obj_lane\obj_lane.vcxproj", "{CCA9B681-D10B-45E4-98CC-531503D2EDE8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_check", "test\pmempool_check\pmempool_check.vcxproj", "{CDD9DFC6-5C3D-42F7-B822-FE29A1C21752}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libut", "test\unittest\libut.vcxproj", "{CE3F2DFB-8470-4802-AD37-21CAF6CB2681}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool", "libpmempool\libpmempool.vcxproj", "{CF9A0883-6334-44C7-AC29-349468C78E27}" + ProjectSection(ProjectDependencies) = postProject + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_zones", "test\obj_zones\obj_zones.vcxproj", "{CF9F4CEA-EC66-4E78-A086-107EB29E0637}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simple_copy", "examples\libpmem\simple_copy.vcxproj", "{D062166F-0EC7-4C13-A772-0C7157EEFE41}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_pmalloc_rand_mt", "test\obj_pmalloc_rand_mt\obj_pmalloc_rand_mt.vcxproj", "{D140560D-FDEC-4D3D-8F58-BF5FD5E4DAA1}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tools_pmemobjcli", "test\pmemobjcli\pmemobjcli.vcxproj", "{D28F5FF6-8401-4E0D-94F9-3A1FD7ED64E3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "advanced", "examples\libpmem2\advanced\advanced.vcxproj", "{D2964B88-EB05-4EBF-ACDA-44596FBFECB6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmemobjcli", "test\tools\pmemobjcli\pmemobjcli.vcxproj", "{D2C30C7E-A7D3-487A-956E-418CECAFFE8E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fifo", "examples\libpmemobj\linkedlist\fifo.vcxproj", "{D3A99F36-4B72-4766-ABCD-CCEDC26DD139}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmempool_help", "test\pmempool_help\pmempool_help.vcxproj", "{D4035736-1AD6-4100-9FA9-A8A0C1DAE0C7}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_memcpy", "test\pmem2_memcpy\pmem2_memcpy.vcxproj", "{D43FCFB6-97D2-44B2-8577-94B43B97D7CA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "redo", "examples\libpmem2\redo\redo.vcxproj", "{D6A1F30D-C9E5-4F5C-9A16-50430AB1F26D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_file_create", "test\util_file_create\util_file_create.vcxproj", "{D829DB63-E046-474D-8EA3-43A6659294D8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ex_libpmemlog", "test\ex_libpmemlog\ex_libpmemlog.vcxproj", "{D8317F1D-7A70-4A39-977A-EAB05A04A87B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_lock", "test\obj_tx_lock\obj_tx_lock.vcxproj", "{D88187D2-1977-4C5F-B0CD-83C69BD6C1BC}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hashmap_tx", "examples\libpmemobj\hashmap\hashmap_tx.vcxproj", "{D93A2683-6D99-4F18-B378-91195D23E007}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_map", "test\pmem2_map\pmem2_map.vcxproj", "{D9A70E35-0C85-4A09-ACA8-B15B21B66F50}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "blk_recovery", "test\blk_recovery\blk_recovery.vcxproj", "{DB68AB21-510B-4BA1-9E6F-E5731D8647BC}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_config", "test\pmem2_config\pmem2_config.vcxproj", "{DE068BE1-A8E9-48A2-B216-92A7CE5EA4CE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_source_size", "test\pmem2_source_size\pmem2_source_size.vcxproj", "{DE068BE1-A8E9-48A2-B216-92A7CE5EA4CF}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_has_auto_flush_win", "test\pmem_has_auto_flush_win\pmem_has_auto_flush_win.vcxproj", "{DEA3CD0A-8781-4ABE-9A7D-00B91132FED0}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_ctl_alloc_class", "test\obj_ctl_alloc_class\obj_ctl_alloc_class.vcxproj", "{E07C9A5F-B2E4-44FB-AA87-FBC885AC955D}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{E23BB160-006E-44F2-8FB4-3A2240BBC20C}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "string_store_tx_type", "string_store_tx_type", "{E3229AF7-1FA2-4632-BB0B-B74F709F1A33}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem_is_pmem", "test\pmem_is_pmem\pmem_is_pmem.vcxproj", "{E4E2EC33-7902-45D0-9C3C-ADBAFA46874A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_poolset_foreach", "test\util_poolset_foreach\util_poolset_foreach.vcxproj", "{E648732D-78FA-427A-928C-9A59222D37B7}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_map_from_existing", "test\pmem2_map_from_existing\pmem2_map_from_existing.vcxproj", "{E660218B-3B2D-4378-A2CD-78B865764CF1}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log_pool_lock", "test\log_pool_lock\log_pool_lock.vcxproj", "{E68DEB59-C709-4945-AF80-EEBCADDED944}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_constructor", "test\obj_constructor\obj_constructor.vcxproj", "{E7691F81-86EF-467D-82E1-F5B9416386F9}" + ProjectSection(ProjectDependencies) = postProject + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_ctl", "test\util_ctl\util_ctl.vcxproj", "{E796AA20-D664-4D05-ABD9-C93A4FBE3E5C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_bttdev", "test\libpmempool_bttdev\libpmempool_bttdev.vcxproj", "{E85E017F-04C0-4716-BF21-949C82C68912}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log_recovery", "test\log_recovery\log_recovery.vcxproj", "{E901B756-EA72-4B8D-967F-85F109D0D1DE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_locks", "test\obj_tx_locks\obj_tx_locks.vcxproj", "{E9E079D6-25BF-46E3-8075-7D733303DD59}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_map_flog", "test\libpmempool_map_flog\libpmempool_map_flog.vcxproj", "{ED2A831F-4AAF-4CF7-A953-3C45B0EC1BE6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "manpage", "examples\libpmemobj\manpage.vcxproj", "{EDA88BAB-9FA7-4A2D-8974-EFCFA24B3FEB}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_sds", "test\obj_sds\obj_sds.vcxproj", "{EDD5FA29-69AF-445F-842A-132E65D3C92B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_granularity", "test\pmem2_granularity\pmem2_granularity.vcxproj", "{EF951090-8938-4F7D-8674-7F6FB1F2C25E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "win_mmap_dtor", "test\win_mmap_dtor\win_mmap_dtor.vcxproj", "{F03DABEE-A03E-4437-BFD3-D012836F2D94}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tools", "Tools", "{F09A0864-9221-47AD-872F-D4538104D747}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_is_poolset", "test\util_is_poolset\util_is_poolset.vcxproj", "{F0B613C4-1D9A-4259-BD0E-C1B9FF2AA3A0}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "win_signal", "test\win_signal\win_signal.vcxproj", "{F13108C4-4C86-4D56-A317-A4E5892A8AF7}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Documentation", "Documentation", "{F18C84B3-7898-4324-9D75-99A6048F442D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_tx_add_range", "test\obj_tx_add_range\obj_tx_add_range.vcxproj", "{F3E5650D-834E-45E6-90C7-3FC2AA954929}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmemobj", "libpmemobj", "{F42C09CD-ABA5-4DA9-8383-5EA40FA4D763}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmem2", "libpmem2\libpmem2.vcxproj", "{F596C36C-5C96-4F08-B420-8908AF500954}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "writer", "examples\libpmemobj\string_store\writer.vcxproj", "{F5D850C9-D353-4B84-99BC-E336C231018C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hashmap_rp", "examples\libpmemobj\hashmap\hashmap_rp.vcxproj", "{F5E2F6C4-19BA-497A-B754-232E4666E647}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hashmap_atomic", "examples\libpmemobj\hashmap\hashmap_atomic.vcxproj", "{F5E2F6C4-19BA-497A-B754-232E469BE647}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ex_libpmemobj", "test\ex_libpmemobj\ex_libpmemobj.vcxproj", "{F63FB47F-1DCE-48E5-9CBD-F3E0A354472B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pmem2_deep_flush", "test\pmem2_deep_flush\pmem2_deep_flush.vcxproj", "{F7508935-C65A-4521-88E3-76AB24F2978D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmemblk", "libpmemblk\libpmemblk.vcxproj", "{F7C6C6B6-4142-4C82-8699-4A9D8183181B}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {492BAA3D-0D5D-478E-9765-500463AE69AA} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libpmem", "libpmem", "{F8373EDD-1B9E-462D-BF23-55638E23E98B}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Utils", "Utils", "{F8CCA5AE-2D75-4C79-BEAB-2588CD5956C8}" + ProjectSection(SolutionItems) = preProject + ..\appveyor.yml = ..\appveyor.yml + ..\utils\CHECK_WHITESPACE.PS1 = ..\utils\CHECK_WHITESPACE.PS1 + ..\utils\CREATE-ZIP.PS1 = ..\utils\CREATE-ZIP.PS1 + ..\utils\cstyle = ..\utils\cstyle + ..\utils\CSTYLE.ps1 = ..\utils\CSTYLE.ps1 + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libpmempool_transform", "test\libpmempool_transform\libpmempool_transform.vcxproj", "{FB2D2B18-E616-4639-8593-0E1AF2DA01A8}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "log_basic", "test\log_basic\log_basic.vcxproj", "{FBB77433-639E-42DC-9355-EA94CAE294D2}" + ProjectSection(ProjectDependencies) = postProject + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {9E9E3D25-2139-4A5D-9200-18148DDEAD45} + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86} = {0B1818EB-BDC8-4865-964F-DB8BF05CFD86} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_root", "test\obj_root\obj_root.vcxproj", "{FC2248F5-3E9E-495B-9767-87F59614047C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpufd", "test\tools\cpufd\cpufd.vcxproj", "{FC998FE5-C843-42BA-9731-F46DB02F1853}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "manpage", "examples\libpmem\manpage.vcxproj", "{FCD0587A-4504-4F5E-8E9C-468CC03D250A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_vec", "test\util_vec\util_vec.vcxproj", "{FD726AA3-D4FA-4597-B435-08CC7752888C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_is_zeroed", "test\util_is_zeroed\util_is_zeroed.vcxproj", "{FD726AA3-D4FA-4597-B435-08CC7752888D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util_vecq", "test\util_vecq\util_vecq.vcxproj", "{FD726AA3-D4FA-4597-B435-08CC7752888E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mmap_fixed", "test\mmap_fixed\mmap_fixed.vcxproj", "{FEA09B48-34C2-4963-8A5A-F97BDA136D72}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_list_remove", "test\obj_list_remove\obj_list_remove.vcxproj", "{FF6E5B0C-DC00-4C93-B9C2-63D1E858BA79}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_defrag", "test\obj_defrag\obj_defrag.vcxproj", "{FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {0056B0B6-CB3E-4F0E-B6DC-48D59CB8E235}.Debug|x64.ActiveCfg = Debug|x64 + {0056B0B6-CB3E-4F0E-B6DC-48D59CB8E235}.Debug|x64.Build.0 = Debug|x64 + {0056B0B6-CB3E-4F0E-B6DC-48D59CB8E235}.Release|x64.ActiveCfg = Release|x64 + {0056B0B6-CB3E-4F0E-B6DC-48D59CB8E235}.Release|x64.Build.0 = Release|x64 + {019F5586-5558-4C87-B319-85906D4AE407}.Debug|x64.ActiveCfg = Debug|x64 + {019F5586-5558-4C87-B319-85906D4AE407}.Debug|x64.Build.0 = Debug|x64 + {019F5586-5558-4C87-B319-85906D4AE407}.Release|x64.ActiveCfg = Release|x64 + {019F5586-5558-4C87-B319-85906D4AE407}.Release|x64.Build.0 = Release|x64 + {025E7D51-41F2-4CBA-956E-C37A4443DB1B}.Debug|x64.ActiveCfg = Debug|x64 + {025E7D51-41F2-4CBA-956E-C37A4443DB1B}.Debug|x64.Build.0 = Debug|x64 + {025E7D51-41F2-4CBA-956E-C37A4443DB1B}.Release|x64.ActiveCfg = Release|x64 + {025E7D51-41F2-4CBA-956E-C37A4443DB1B}.Release|x64.Build.0 = Release|x64 + {0287C3DC-AE03-4714-AAFF-C52F062ECA6F}.Debug|x64.ActiveCfg = Debug|x64 + {0287C3DC-AE03-4714-AAFF-C52F062ECA6F}.Debug|x64.Build.0 = Debug|x64 + {0287C3DC-AE03-4714-AAFF-C52F062ECA6F}.Release|x64.ActiveCfg = Release|x64 + {0287C3DC-AE03-4714-AAFF-C52F062ECA6F}.Release|x64.Build.0 = Release|x64 + {02BC3B44-C7F1-4793-86C1-6F36CA8A7F53}.Debug|x64.ActiveCfg = Debug|x64 + {02BC3B44-C7F1-4793-86C1-6F36CA8A7F53}.Debug|x64.Build.0 = Debug|x64 + {02BC3B44-C7F1-4793-86C1-6F36CA8A7F53}.Release|x64.ActiveCfg = Release|x64 + {03228F84-4F41-4BCC-8C2D-F329DC87B289}.Debug|x64.ActiveCfg = Debug|x64 + {03228F84-4F41-4BCC-8C2D-F329DC87B289}.Debug|x64.Build.0 = Debug|x64 + {03228F84-4F41-4BCC-8C2D-F329DC87B289}.Release|x64.ActiveCfg = Release|x64 + {03228F84-4F41-4BCC-8C2D-F329DC87B289}.Release|x64.Build.0 = Release|x64 + {0388E945-A655-41A7-AF27-8981CEE0E49A}.Debug|x64.ActiveCfg = Debug|x64 + {0388E945-A655-41A7-AF27-8981CEE0E49A}.Debug|x64.Build.0 = Debug|x64 + {0388E945-A655-41A7-AF27-8981CEE0E49A}.Release|x64.ActiveCfg = Release|x64 + {0388E945-A655-41A7-AF27-8981CEE0E49A}.Release|x64.Build.0 = Release|x64 + {03B54A12-7793-4827-B820-C07491F7F45E}.Debug|x64.ActiveCfg = Debug|x64 + {03B54A12-7793-4827-B820-C07491F7F45E}.Debug|x64.Build.0 = Debug|x64 + {03B54A12-7793-4827-B820-C07491F7F45E}.Release|x64.ActiveCfg = Release|x64 + {03B54A12-7793-4827-B820-C07491F7F45E}.Release|x64.Build.0 = Release|x64 + {0529575C-F6E8-44FD-BB82-82A29948D0F2}.Debug|x64.ActiveCfg = Debug|x64 + {0529575C-F6E8-44FD-BB82-82A29948D0F2}.Debug|x64.Build.0 = Debug|x64 + {0529575C-F6E8-44FD-BB82-82A29948D0F2}.Release|x64.ActiveCfg = Release|x64 + {0529575C-F6E8-44FD-BB82-82A29948D0F2}.Release|x64.Build.0 = Release|x64 + {063037B2-CA35-4520-811C-19D9C4ED891E}.Debug|x64.ActiveCfg = Debug|x64 + {063037B2-CA35-4520-811C-19D9C4ED891E}.Debug|x64.Build.0 = Debug|x64 + {063037B2-CA35-4520-811C-19D9C4ED891E}.Release|x64.ActiveCfg = Release|x64 + {063037B2-CA35-4520-811C-19D9C4ED891E}.Release|x64.Build.0 = Release|x64 + {06877FED-15BA-421F-85C9-1A964FB97446}.Debug|x64.ActiveCfg = Debug|x64 + {06877FED-15BA-421F-85C9-1A964FB97446}.Debug|x64.Build.0 = Debug|x64 + {06877FED-15BA-421F-85C9-1A964FB97446}.Release|x64.ActiveCfg = Release|x64 + {06877FED-15BA-421F-85C9-1A964FB97446}.Release|x64.Build.0 = Release|x64 + {0703E813-9CC8-4DEA-AA33-42B099CD172D}.Debug|x64.ActiveCfg = Debug|x64 + {0703E813-9CC8-4DEA-AA33-42B099CD172D}.Debug|x64.Build.0 = Debug|x64 + {0703E813-9CC8-4DEA-AA33-42B099CD172D}.Release|x64.ActiveCfg = Release|x64 + {0703E813-9CC8-4DEA-AA33-42B099CD172D}.Release|x64.Build.0 = Release|x64 + {07A153D9-DF17-4DE8-A3C2-EBF171B961AE}.Debug|x64.ActiveCfg = Debug|x64 + {07A153D9-DF17-4DE8-A3C2-EBF171B961AE}.Debug|x64.Build.0 = Debug|x64 + {07A153D9-DF17-4DE8-A3C2-EBF171B961AE}.Release|x64.ActiveCfg = Release|x64 + {07A153D9-DF17-4DE8-A3C2-EBF171B961AE}.Release|x64.Build.0 = Release|x64 + {08B62E36-63D2-4FF1-A605-4BBABAEE73FB}.Debug|x64.ActiveCfg = Debug|x64 + {08B62E36-63D2-4FF1-A605-4BBABAEE73FB}.Debug|x64.Build.0 = Debug|x64 + {08B62E36-63D2-4FF1-A605-4BBABAEE73FB}.Release|x64.ActiveCfg = Release|x64 + {08B62E36-63D2-4FF1-A605-4BBABAEE73FB}.Release|x64.Build.0 = Release|x64 + {0A049EAD-652F-4E20-8026-90FD99AEE77A}.Debug|x64.ActiveCfg = Debug|x64 + {0A049EAD-652F-4E20-8026-90FD99AEE77A}.Debug|x64.Build.0 = Debug|x64 + {0A049EAD-652F-4E20-8026-90FD99AEE77A}.Release|x64.ActiveCfg = Release|x64 + {0A049EAD-652F-4E20-8026-90FD99AEE77A}.Release|x64.Build.0 = Release|x64 + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86}.Debug|x64.ActiveCfg = Debug|x64 + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86}.Debug|x64.Build.0 = Debug|x64 + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86}.Release|x64.ActiveCfg = Release|x64 + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86}.Release|x64.Build.0 = Release|x64 + {0BFD78AA-FD94-4DB1-8495-8F5CC06D8F03}.Debug|x64.ActiveCfg = Debug|x64 + {0BFD78AA-FD94-4DB1-8495-8F5CC06D8F03}.Debug|x64.Build.0 = Debug|x64 + {0BFD78AA-FD94-4DB1-8495-8F5CC06D8F03}.Release|x64.ActiveCfg = Release|x64 + {0BFD78AA-FD94-4DB1-8495-8F5CC06D8F03}.Release|x64.Build.0 = Release|x64 + {0CDCEB97-3270-4939-A290-EA2D3BE34B0C}.Debug|x64.ActiveCfg = Debug|x64 + {0CDCEB97-3270-4939-A290-EA2D3BE34B0C}.Debug|x64.Build.0 = Debug|x64 + {0CDCEB97-3270-4939-A290-EA2D3BE34B0C}.Release|x64.ActiveCfg = Release|x64 + {0CDCEB97-3270-4939-A290-EA2D3BE34B0C}.Release|x64.Build.0 = Release|x64 + {0D4E38EF-A9D5-4797-8994-5DBB1125C9EA}.Debug|x64.ActiveCfg = Debug|x64 + {0D4E38EF-A9D5-4797-8994-5DBB1125C9EA}.Debug|x64.Build.0 = Debug|x64 + {0D4E38EF-A9D5-4797-8994-5DBB1125C9EA}.Release|x64.ActiveCfg = Release|x64 + {0D4E38EF-A9D5-4797-8994-5DBB1125C9EA}.Release|x64.Build.0 = Release|x64 + {0DF30DE0-7F7D-43D3-940A-809EC27D3061}.Debug|x64.ActiveCfg = Debug|x64 + {0DF30DE0-7F7D-43D3-940A-809EC27D3061}.Debug|x64.Build.0 = Debug|x64 + {0DF30DE0-7F7D-43D3-940A-809EC27D3061}.Release|x64.ActiveCfg = Release|x64 + {0DF30DE0-7F7D-43D3-940A-809EC27D3061}.Release|x64.Build.0 = Release|x64 + {0FB8F0FD-276C-413B-97A8-67ABE0C9043B}.Debug|x64.ActiveCfg = Debug|x64 + {0FB8F0FD-276C-413B-97A8-67ABE0C9043B}.Debug|x64.Build.0 = Debug|x64 + {0FB8F0FD-276C-413B-97A8-67ABE0C9043B}.Release|x64.ActiveCfg = Release|x64 + {0FB8F0FD-276C-413B-97A8-67ABE0C9043B}.Release|x64.Build.0 = Release|x64 + {10469175-EEF7-44A0-9961-AC4E45EFD800}.Debug|x64.ActiveCfg = Debug|x64 + {10469175-EEF7-44A0-9961-AC4E45EFD800}.Debug|x64.Build.0 = Debug|x64 + {10469175-EEF7-44A0-9961-AC4E45EFD800}.Release|x64.ActiveCfg = Release|x64 + {10469175-EEF7-44A0-9961-AC4E45EFD800}.Release|x64.Build.0 = Release|x64 + {10B732EF-1783-4B61-B431-36BA5A2A3C9C}.Debug|x64.ActiveCfg = Debug|x64 + {10B732EF-1783-4B61-B431-36BA5A2A3C9C}.Debug|x64.Build.0 = Debug|x64 + {10B732EF-1783-4B61-B431-36BA5A2A3C9C}.Release|x64.ActiveCfg = Release|x64 + {10B732EF-1783-4B61-B431-36BA5A2A3C9C}.Release|x64.Build.0 = Release|x64 + {11D76FBC-DFAA-4B31-9DB0-206E171E3F94}.Debug|x64.ActiveCfg = Debug|x64 + {11D76FBC-DFAA-4B31-9DB0-206E171E3F94}.Debug|x64.Build.0 = Debug|x64 + {11D76FBC-DFAA-4B31-9DB0-206E171E3F94}.Release|x64.ActiveCfg = Release|x64 + {11D76FBC-DFAA-4B31-9DB0-206E171E3F94}.Release|x64.Build.0 = Release|x64 + {11E158AE-C85A-4A6E-B66A-ED2994709276}.Debug|x64.ActiveCfg = Debug|x64 + {11E158AE-C85A-4A6E-B66A-ED2994709276}.Debug|x64.Build.0 = Debug|x64 + {11E158AE-C85A-4A6E-B66A-ED2994709276}.Release|x64.ActiveCfg = Release|x64 + {11E158AE-C85A-4A6E-B66A-ED2994709276}.Release|x64.Build.0 = Release|x64 + {12A1A3EF-202C-4DD0-9B5A-F5126CAB078F}.Debug|x64.ActiveCfg = Debug|x64 + {12A1A3EF-202C-4DD0-9B5A-F5126CAB078F}.Debug|x64.Build.0 = Debug|x64 + {12A1A3EF-202C-4DD0-9B5A-F5126CAB078F}.Release|x64.ActiveCfg = Release|x64 + {12A1A3EF-202C-4DD0-9B5A-F5126CAB078F}.Release|x64.Build.0 = Release|x64 + {1464398A-100F-4518-BDB9-939A6362B6CF}.Debug|x64.ActiveCfg = Debug|x64 + {1464398A-100F-4518-BDB9-939A6362B6CF}.Debug|x64.Build.0 = Debug|x64 + {1464398A-100F-4518-BDB9-939A6362B6CF}.Release|x64.ActiveCfg = Release|x64 + {1464398A-100F-4518-BDB9-939A6362B6CF}.Release|x64.Build.0 = Release|x64 + {179BEB5A-2C90-44F5-A734-FA756A5E668C}.Debug|x64.ActiveCfg = Debug|x64 + {179BEB5A-2C90-44F5-A734-FA756A5E668C}.Debug|x64.Build.0 = Debug|x64 + {179BEB5A-2C90-44F5-A734-FA756A5E668C}.Release|x64.ActiveCfg = Release|x64 + {179BEB5A-2C90-44F5-A734-FA756A5E668C}.Release|x64.Build.0 = Release|x64 + {17A4B817-68B1-4719-A9EF-BD8FAB747DE6}.Debug|x64.ActiveCfg = Debug|x64 + {17A4B817-68B1-4719-A9EF-BD8FAB747DE6}.Debug|x64.Build.0 = Debug|x64 + {17A4B817-68B1-4719-A9EF-BD8FAB747DE6}.Release|x64.ActiveCfg = Release|x64 + {17A4B817-68B1-4719-A9EF-BD8FAB747DE6}.Release|x64.Build.0 = Release|x64 + {18E90E1A-F2E0-40DF-9900-A14E560C9EB4}.Debug|x64.ActiveCfg = Debug|x64 + {18E90E1A-F2E0-40DF-9900-A14E560C9EB4}.Debug|x64.Build.0 = Debug|x64 + {18E90E1A-F2E0-40DF-9900-A14E560C9EB4}.Release|x64.ActiveCfg = Release|x64 + {18E90E1A-F2E0-40DF-9900-A14E560C9EB4}.Release|x64.Build.0 = Release|x64 + {1B871BA2-3F70-4BC9-9DF4-725EB07F6628}.Debug|x64.ActiveCfg = Debug|x64 + {1B871BA2-3F70-4BC9-9DF4-725EB07F6628}.Debug|x64.Build.0 = Debug|x64 + {1B871BA2-3F70-4BC9-9DF4-725EB07F6628}.Release|x64.ActiveCfg = Release|x64 + {1B871BA2-3F70-4BC9-9DF4-725EB07F6628}.Release|x64.Build.0 = Release|x64 + {1B9B0D6D-E530-44A6-ADAE-09EA2BDC47DE}.Debug|x64.ActiveCfg = Debug|x64 + {1B9B0D6D-E530-44A6-ADAE-09EA2BDC47DE}.Debug|x64.Build.0 = Debug|x64 + {1B9B0D6D-E530-44A6-ADAE-09EA2BDC47DE}.Release|x64.ActiveCfg = Release|x64 + {1B9B0D6D-E530-44A6-ADAE-09EA2BDC47DE}.Release|x64.Build.0 = Release|x64 + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF}.Debug|x64.ActiveCfg = Debug|x64 + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF}.Debug|x64.Build.0 = Debug|x64 + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF}.Release|x64.ActiveCfg = Release|x64 + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF}.Release|x64.Build.0 = Release|x64 + {1BFBAFED-A9CE-49AF-AB2C-84199E391EE6}.Debug|x64.ActiveCfg = Debug|x64 + {1BFBAFED-A9CE-49AF-AB2C-84199E391EE6}.Debug|x64.Build.0 = Debug|x64 + {1BFBAFED-A9CE-49AF-AB2C-84199E391EE6}.Release|x64.ActiveCfg = Release|x64 + {1BFBAFED-A9CE-49AF-AB2C-84199E391EE6}.Release|x64.Build.0 = Release|x64 + {1C986F2C-9AF1-45E0-9E9B-8CABE9CAF437}.Debug|x64.ActiveCfg = Debug|x64 + {1C986F2C-9AF1-45E0-9E9B-8CABE9CAF437}.Debug|x64.Build.0 = Debug|x64 + {1C986F2C-9AF1-45E0-9E9B-8CABE9CAF437}.Release|x64.ActiveCfg = Release|x64 + {1C986F2C-9AF1-45E0-9E9B-8CABE9CAF437}.Release|x64.Build.0 = Release|x64 + {1EB3DE5B-6357-498D-8CAC-EEC0209EA454}.Debug|x64.ActiveCfg = Debug|x64 + {1EB3DE5B-6357-498D-8CAC-EEC0209EA454}.Debug|x64.Build.0 = Debug|x64 + {1EB3DE5B-6357-498D-8CAC-EEC0209EA454}.Release|x64.ActiveCfg = Release|x64 + {1EB3DE5B-6357-498D-8CAC-EEC0209EA454}.Release|x64.Build.0 = Release|x64 + {1F2E1C51-2B14-4047-BE6D-52E00FC3C780}.Debug|x64.ActiveCfg = Debug|x64 + {1F2E1C51-2B14-4047-BE6D-52E00FC3C780}.Debug|x64.Build.0 = Debug|x64 + {1F2E1C51-2B14-4047-BE6D-52E00FC3C780}.Release|x64.ActiveCfg = Release|x64 + {1F2E1C51-2B14-4047-BE6D-52E00FC3C780}.Release|x64.Build.0 = Release|x64 + {2498FCDA-E2CC-43EF-9A35-8CD63F253171}.Debug|x64.ActiveCfg = Debug|x64 + {2498FCDA-E2CC-43EF-9A35-8CD63F253171}.Debug|x64.Build.0 = Debug|x64 + {2498FCDA-E2CC-43EF-9A35-8CD63F253171}.Release|x64.ActiveCfg = Release|x64 + {2498FCDA-E2CC-43EF-9A35-8CD63F253171}.Release|x64.Build.0 = Release|x64 + {25758581-DD46-4AE4-99D9-11E736F72AD1}.Debug|x64.ActiveCfg = Debug|x64 + {25758581-DD46-4AE4-99D9-11E736F72AD1}.Debug|x64.Build.0 = Debug|x64 + {25758581-DD46-4AE4-99D9-11E736F72AD1}.Release|x64.ActiveCfg = Release|x64 + {25758581-DD46-4AE4-99D9-11E736F72AD1}.Release|x64.Build.0 = Release|x64 + {26166DF1-3C94-44AF-9075-BA31DCD2F6BB}.Debug|x64.ActiveCfg = Debug|x64 + {26166DF1-3C94-44AF-9075-BA31DCD2F6BB}.Debug|x64.Build.0 = Debug|x64 + {26166DF1-3C94-44AF-9075-BA31DCD2F6BB}.Release|x64.ActiveCfg = Release|x64 + {26166DF1-3C94-44AF-9075-BA31DCD2F6BB}.Release|x64.Build.0 = Release|x64 + {27FA11C6-431D-41D1-A417-FAB7C4F93DCA}.Debug|x64.ActiveCfg = Debug|x64 + {27FA11C6-431D-41D1-A417-FAB7C4F93DCA}.Debug|x64.Build.0 = Debug|x64 + {27FA11C6-431D-41D1-A417-FAB7C4F93DCA}.Release|x64.ActiveCfg = Release|x64 + {27FA11C6-431D-41D1-A417-FAB7C4F93DCA}.Release|x64.Build.0 = Release|x64 + {296F3C5D-3951-423E-8E2F-FD4A37958C72}.Debug|x64.ActiveCfg = Debug|x64 + {296F3C5D-3951-423E-8E2F-FD4A37958C72}.Debug|x64.Build.0 = Debug|x64 + {296F3C5D-3951-423E-8E2F-FD4A37958C72}.Release|x64.ActiveCfg = Release|x64 + {296F3C5D-3951-423E-8E2F-FD4A37958C72}.Release|x64.Build.0 = Release|x64 + {29D9376B-DC36-4940-83F1-A7CBE38A2103}.Debug|x64.ActiveCfg = Debug|x64 + {29D9376B-DC36-4940-83F1-A7CBE38A2103}.Debug|x64.Build.0 = Debug|x64 + {29D9376B-DC36-4940-83F1-A7CBE38A2103}.Release|x64.ActiveCfg = Release|x64 + {29D9376B-DC36-4940-83F1-A7CBE38A2103}.Release|x64.Build.0 = Release|x64 + {2A1D6AF2-7336-4966-A4B3-0BE9A24BAE00}.Debug|x64.ActiveCfg = Debug|x64 + {2A1D6AF2-7336-4966-A4B3-0BE9A24BAE00}.Debug|x64.Build.0 = Debug|x64 + {2A1D6AF2-7336-4966-A4B3-0BE9A24BAE00}.Release|x64.ActiveCfg = Release|x64 + {2A1D6AF2-7336-4966-A4B3-0BE9A24BAE00}.Release|x64.Build.0 = Release|x64 + {2B1A5104-A324-4D02-B5C7-D021FB8F880C}.Debug|x64.ActiveCfg = Debug|x64 + {2B1A5104-A324-4D02-B5C7-D021FB8F880C}.Debug|x64.Build.0 = Debug|x64 + {2B1A5104-A324-4D02-B5C7-D021FB8F880C}.Release|x64.ActiveCfg = Release|x64 + {2B1A5104-A324-4D02-B5C7-D021FB8F880C}.Release|x64.Build.0 = Release|x64 + {2B2DE575-1422-4FBF-97BE-35AEDA0AB465}.Debug|x64.ActiveCfg = Debug|x64 + {2B2DE575-1422-4FBF-97BE-35AEDA0AB465}.Debug|x64.Build.0 = Debug|x64 + {2B2DE575-1422-4FBF-97BE-35AEDA0AB465}.Release|x64.ActiveCfg = Release|x64 + {2B2DE575-1422-4FBF-97BE-35AEDA0AB465}.Release|x64.Build.0 = Release|x64 + {2B7772E6-9DAA-4F38-B0BC-7B2399366325}.Debug|x64.ActiveCfg = Debug|x64 + {2B7772E6-9DAA-4F38-B0BC-7B2399366325}.Debug|x64.Build.0 = Debug|x64 + {2B7772E6-9DAA-4F38-B0BC-7B2399366325}.Release|x64.ActiveCfg = Release|x64 + {2B7772E6-9DAA-4F38-B0BC-7B2399366325}.Release|x64.Build.0 = Release|x64 + {2CD7408E-2F60-43C3-ACEB-C7D58CDD8462}.Debug|x64.ActiveCfg = Debug|x64 + {2CD7408E-2F60-43C3-ACEB-C7D58CDD8462}.Debug|x64.Build.0 = Debug|x64 + {2CD7408E-2F60-43C3-ACEB-C7D58CDD8462}.Release|x64.ActiveCfg = Release|x64 + {2CD7408E-2F60-43C3-ACEB-C7D58CDD8462}.Release|x64.Build.0 = Release|x64 + {2DE6B085-3C19-49B1-894A-AD9376000E09}.Debug|x64.ActiveCfg = Debug|x64 + {2DE6B085-3C19-49B1-894A-AD9376000E09}.Debug|x64.Build.0 = Debug|x64 + {2DE6B085-3C19-49B1-894A-AD9376000E09}.Release|x64.ActiveCfg = Release|x64 + {2DE6B085-3C19-49B1-894A-AD9376000E09}.Release|x64.Build.0 = Release|x64 + {2ED26FDA-3C4E-4514-B387-5E77C302FF71}.Debug|x64.ActiveCfg = Debug|x64 + {2ED26FDA-3C4E-4514-B387-5E77C302FF71}.Debug|x64.Build.0 = Debug|x64 + {2ED26FDA-3C4E-4514-B387-5E77C302FF71}.Release|x64.ActiveCfg = Release|x64 + {2ED26FDA-3C4E-4514-B387-5E77C302FF71}.Release|x64.Build.0 = Release|x64 + {2EFFC590-BF5E-46A2-AF04-E67E1D571D2E}.Debug|x64.ActiveCfg = Debug|x64 + {2EFFC590-BF5E-46A2-AF04-E67E1D571D2E}.Debug|x64.Build.0 = Debug|x64 + {2EFFC590-BF5E-46A2-AF04-E67E1D571D2E}.Release|x64.ActiveCfg = Release|x64 + {2EFFC590-BF5E-46A2-AF04-E67E1D571D2E}.Release|x64.Build.0 = Release|x64 + {2FA3155B-6F26-4D15-AC03-9D82D48DBC42}.Debug|x64.ActiveCfg = Debug|x64 + {2FA3155B-6F26-4D15-AC03-9D82D48DBC42}.Debug|x64.Build.0 = Debug|x64 + {2FA3155B-6F26-4D15-AC03-9D82D48DBC42}.Release|x64.ActiveCfg = Release|x64 + {2FA3155B-6F26-4D15-AC03-9D82D48DBC42}.Release|x64.Build.0 = Release|x64 + {3142CB13-CADA-48D3-9A25-E6ACB243760A}.Debug|x64.ActiveCfg = Debug|x64 + {3142CB13-CADA-48D3-9A25-E6ACB243760A}.Debug|x64.Build.0 = Debug|x64 + {3142CB13-CADA-48D3-9A25-E6ACB243760A}.Release|x64.ActiveCfg = Release|x64 + {3142CB13-CADA-48D3-9A25-E6ACB243760A}.Release|x64.Build.0 = Release|x64 + {34DB4951-DA08-45F1-938D-B08E5FF5AB46}.Debug|x64.ActiveCfg = Debug|x64 + {34DB4951-DA08-45F1-938D-B08E5FF5AB46}.Debug|x64.Build.0 = Debug|x64 + {34DB4951-DA08-45F1-938D-B08E5FF5AB46}.Release|x64.ActiveCfg = Release|x64 + {34DB4951-DA08-45F1-938D-B08E5FF5AB46}.Release|x64.Build.0 = Release|x64 + {34F31D9D-3D33-4C09-85A3-4749A8AB8EBB}.Debug|x64.ActiveCfg = Debug|x64 + {34F31D9D-3D33-4C09-85A3-4749A8AB8EBB}.Debug|x64.Build.0 = Debug|x64 + {34F31D9D-3D33-4C09-85A3-4749A8AB8EBB}.Release|x64.ActiveCfg = Release|x64 + {34F31D9D-3D33-4C09-85A3-4749A8AB8EBB}.Release|x64.Build.0 = Release|x64 + {3799BA67-3C4F-4AE0-85DC-5BAAEA01A180}.Debug|x64.ActiveCfg = Debug|x64 + {3799BA67-3C4F-4AE0-85DC-5BAAEA01A180}.Debug|x64.Build.0 = Debug|x64 + {3799BA67-3C4F-4AE0-85DC-5BAAEA01A180}.Release|x64.ActiveCfg = Release|x64 + {3799BA67-3C4F-4AE0-85DC-5BAAEA01A180}.Release|x64.Build.0 = Release|x64 + {3B23831B-E5DE-4A62-9D0B-27D0D9F293F4}.Debug|x64.ActiveCfg = Debug|x64 + {3B23831B-E5DE-4A62-9D0B-27D0D9F293F4}.Debug|x64.Build.0 = Debug|x64 + {3B23831B-E5DE-4A62-9D0B-27D0D9F293F4}.Release|x64.ActiveCfg = Release|x64 + {3B23831B-E5DE-4A62-9D0B-27D0D9F293F4}.Release|x64.Build.0 = Release|x64 + {3B44D717-EEDE-470A-B631-C9D6BFE4ADF2}.Debug|x64.ActiveCfg = Debug|x64 + {3B44D717-EEDE-470A-B631-C9D6BFE4ADF2}.Debug|x64.Build.0 = Debug|x64 + {3B44D717-EEDE-470A-B631-C9D6BFE4ADF2}.Release|x64.ActiveCfg = Release|x64 + {3B44D717-EEDE-470A-B631-C9D6BFE4ADF2}.Release|x64.Build.0 = Release|x64 + {3CF270CD-0F56-48E3-AD84-82F369C568BF}.Debug|x64.ActiveCfg = Debug|x64 + {3CF270CD-0F56-48E3-AD84-82F369C568BF}.Debug|x64.Build.0 = Debug|x64 + {3CF270CD-0F56-48E3-AD84-82F369C568BF}.Release|x64.ActiveCfg = Release|x64 + {3CF270CD-0F56-48E3-AD84-82F369C568BF}.Release|x64.Build.0 = Release|x64 + {3EC20BDD-2E48-4291-A9EE-D0675AF77C7F}.Debug|x64.ActiveCfg = Debug|x64 + {3EC20BDD-2E48-4291-A9EE-D0675AF77C7F}.Debug|x64.Build.0 = Debug|x64 + {3EC20BDD-2E48-4291-A9EE-D0675AF77C7F}.Release|x64.ActiveCfg = Release|x64 + {3EC20BDD-2E48-4291-A9EE-D0675AF77C7F}.Release|x64.Build.0 = Release|x64 + {3EC30D6A-BDA4-4971-879A-8814204EAE31}.Debug|x64.ActiveCfg = Debug|x64 + {3EC30D6A-BDA4-4971-879A-8814204EAE31}.Debug|x64.Build.0 = Debug|x64 + {3EC30D6A-BDA4-4971-879A-8814204EAE31}.Release|x64.ActiveCfg = Release|x64 + {3EC30D6A-BDA4-4971-879A-8814204EAE31}.Release|x64.Build.0 = Release|x64 + {3ECCB0F1-3ADF-486A-91C5-79DF0FC22F78}.Debug|x64.ActiveCfg = Debug|x64 + {3ECCB0F1-3ADF-486A-91C5-79DF0FC22F78}.Debug|x64.Build.0 = Debug|x64 + {3ECCB0F1-3ADF-486A-91C5-79DF0FC22F78}.Release|x64.ActiveCfg = Release|x64 + {3ECCB0F1-3ADF-486A-91C5-79DF0FC22F78}.Release|x64.Build.0 = Release|x64 + {3ED56E55-84A6-422C-A8D4-A8439FB8F245}.Debug|x64.ActiveCfg = Debug|x64 + {3ED56E55-84A6-422C-A8D4-A8439FB8F245}.Debug|x64.Build.0 = Debug|x64 + {3ED56E55-84A6-422C-A8D4-A8439FB8F245}.Release|x64.ActiveCfg = Release|x64 + {3ED56E55-84A6-422C-A8D4-A8439FB8F245}.Release|x64.Build.0 = Release|x64 + {42B97D47-F800-4100-BFA2-B3AC357E8B6B}.Debug|x64.ActiveCfg = Debug|x64 + {42B97D47-F800-4100-BFA2-B3AC357E8B6B}.Debug|x64.Build.0 = Debug|x64 + {42B97D47-F800-4100-BFA2-B3AC357E8B6B}.Release|x64.ActiveCfg = Release|x64 + {42B97D47-F800-4100-BFA2-B3AC357E8B6B}.Release|x64.Build.0 = Release|x64 + {42CCEF95-5ADD-460C-967E-DD5B2C744943}.Debug|x64.ActiveCfg = Debug|x64 + {42CCEF95-5ADD-460C-967E-DD5B2C744943}.Debug|x64.Build.0 = Debug|x64 + {42CCEF95-5ADD-460C-967E-DD5B2C744943}.Release|x64.ActiveCfg = Release|x64 + {42CCEF95-5ADD-460C-967E-DD5B2C744943}.Release|x64.Build.0 = Release|x64 + {433F7840-C597-4950-84C9-E4FF7DF6A298}.Debug|x64.ActiveCfg = Debug|x64 + {433F7840-C597-4950-84C9-E4FF7DF6A298}.Debug|x64.Build.0 = Debug|x64 + {433F7840-C597-4950-84C9-E4FF7DF6A298}.Release|x64.ActiveCfg = Release|x64 + {433F7840-C597-4950-84C9-E4FF7DF6A298}.Release|x64.Build.0 = Release|x64 + {46629F21-089C-4205-B2F8-E01748ECE517}.Debug|x64.ActiveCfg = Debug|x64 + {46629F21-089C-4205-B2F8-E01748ECE517}.Debug|x64.Build.0 = Debug|x64 + {46629F21-089C-4205-B2F8-E01748ECE517}.Release|x64.ActiveCfg = Release|x64 + {46629F21-089C-4205-B2F8-E01748ECE517}.Release|x64.Build.0 = Release|x64 + {46B82069-10BE-432A-8D93-F4D995148555}.Debug|x64.ActiveCfg = Debug|x64 + {46B82069-10BE-432A-8D93-F4D995148555}.Debug|x64.Build.0 = Debug|x64 + {46B82069-10BE-432A-8D93-F4D995148555}.Release|x64.ActiveCfg = Release|x64 + {46B82069-10BE-432A-8D93-F4D995148555}.Release|x64.Build.0 = Release|x64 + {4850F425-9128-4E91-973C-5AE7BD97395B}.Debug|x64.ActiveCfg = Debug|x64 + {4850F425-9128-4E91-973C-5AE7BD97395B}.Debug|x64.Build.0 = Debug|x64 + {4850F425-9128-4E91-973C-5AE7BD97395B}.Release|x64.ActiveCfg = Release|x64 + {4850F425-9128-4E91-973C-5AE7BD97395B}.Release|x64.Build.0 = Release|x64 + {492BAA3D-0D5D-478E-9765-500463AE69AA}.Debug|x64.ActiveCfg = Debug|x64 + {492BAA3D-0D5D-478E-9765-500463AE69AA}.Debug|x64.Build.0 = Debug|x64 + {492BAA3D-0D5D-478E-9765-500463AE69AA}.Release|x64.ActiveCfg = Release|x64 + {492BAA3D-0D5D-478E-9765-500463AE69AA}.Release|x64.Build.0 = Release|x64 + {49A7CC5A-D5E7-4A07-917F-C6918B982BE8}.Debug|x64.ActiveCfg = Debug|x64 + {49A7CC5A-D5E7-4A07-917F-C6918B982BE8}.Debug|x64.Build.0 = Debug|x64 + {49A7CC5A-D5E7-4A07-917F-C6918B982BE8}.Release|x64.ActiveCfg = Release|x64 + {49A7CC5A-D5E7-4A07-917F-C6918B982BE8}.Release|x64.Build.0 = Release|x64 + {4C429783-0B01-449F-A36F-C2019233890B}.Debug|x64.ActiveCfg = Debug|x64 + {4C429783-0B01-449F-A36F-C2019233890B}.Debug|x64.Build.0 = Debug|x64 + {4C429783-0B01-449F-A36F-C2019233890B}.Release|x64.ActiveCfg = Release|x64 + {4C429783-0B01-449F-A36F-C2019233890B}.Release|x64.Build.0 = Release|x64 + {4C6E7F0A-7E6A-4713-B1D2-B7B4ADC992AF}.Debug|x64.ActiveCfg = Debug|x64 + {4C6E7F0A-7E6A-4713-B1D2-B7B4ADC992AF}.Debug|x64.Build.0 = Debug|x64 + {4C6E7F0A-7E6A-4713-B1D2-B7B4ADC992AF}.Release|x64.ActiveCfg = Release|x64 + {4C6E7F0A-7E6A-4713-B1D2-B7B4ADC992AF}.Release|x64.Build.0 = Release|x64 + {4E334022-7A71-4197-9E15-878F7EFC877E}.Debug|x64.ActiveCfg = Debug|x64 + {4E334022-7A71-4197-9E15-878F7EFC877E}.Debug|x64.Build.0 = Debug|x64 + {4E334022-7A71-4197-9E15-878F7EFC877E}.Release|x64.ActiveCfg = Release|x64 + {4E334022-7A71-4197-9E15-878F7EFC877E}.Release|x64.Build.0 = Release|x64 + {4EE3C4D6-F707-4A05-8032-8FC2A44D29E8}.Debug|x64.ActiveCfg = Debug|x64 + {4EE3C4D6-F707-4A05-8032-8FC2A44D29E8}.Debug|x64.Build.0 = Debug|x64 + {4EE3C4D6-F707-4A05-8032-8FC2A44D29E8}.Release|x64.ActiveCfg = Release|x64 + {4EE3C4D6-F707-4A05-8032-8FC2A44D29E8}.Release|x64.Build.0 = Release|x64 + {4FB4FF90-4E92-4CFB-A01F-C73D6861CA03}.Debug|x64.ActiveCfg = Debug|x64 + {4FB4FF90-4E92-4CFB-A01F-C73D6861CA03}.Debug|x64.Build.0 = Debug|x64 + {4FB4FF90-4E92-4CFB-A01F-C73D6861CA03}.Release|x64.ActiveCfg = Release|x64 + {4FB4FF90-4E92-4CFB-A01F-C73D6861CA03}.Release|x64.Build.0 = Release|x64 + {50FD1E47-2131-48D2-9435-5CB28DF6B15A}.Debug|x64.ActiveCfg = Debug|x64 + {50FD1E47-2131-48D2-9435-5CB28DF6B15A}.Debug|x64.Build.0 = Debug|x64 + {50FD1E47-2131-48D2-9435-5CB28DF6B15A}.Release|x64.ActiveCfg = Release|x64 + {50FD1E47-2131-48D2-9435-5CB28DF6B15A}.Release|x64.Build.0 = Release|x64 + {513C4CFA-BD5B-4470-BA93-F6D43778A754}.Debug|x64.ActiveCfg = Debug|x64 + {513C4CFA-BD5B-4470-BA93-F6D43778A754}.Debug|x64.Build.0 = Debug|x64 + {513C4CFA-BD5B-4470-BA93-F6D43778A754}.Release|x64.ActiveCfg = Release|x64 + {513C4CFA-BD5B-4470-BA93-F6D43778A754}.Release|x64.Build.0 = Release|x64 + {53115A01-460C-4339-A2C8-AE1323A6E7EA}.Debug|x64.ActiveCfg = Debug|x64 + {53115A01-460C-4339-A2C8-AE1323A6E7EA}.Debug|x64.Build.0 = Debug|x64 + {53115A01-460C-4339-A2C8-AE1323A6E7EA}.Release|x64.ActiveCfg = Release|x64 + {53115A01-460C-4339-A2C8-AE1323A6E7EA}.Release|x64.Build.0 = Release|x64 + {5580D11C-FDA6-4CF2-A0E8-1C2D3FBC11F1}.Debug|x64.ActiveCfg = Debug|x64 + {5580D11C-FDA6-4CF2-A0E8-1C2D3FBC11F1}.Debug|x64.Build.0 = Debug|x64 + {5580D11C-FDA6-4CF2-A0E8-1C2D3FBC11F1}.Release|x64.ActiveCfg = Release|x64 + {5580D11C-FDA6-4CF2-A0E8-1C2D3FBC11F1}.Release|x64.Build.0 = Release|x64 + {5632B41F-19DD-4BA7-A6EB-74F9E8A7EF8A}.Debug|x64.ActiveCfg = Debug|x64 + {5632B41F-19DD-4BA7-A6EB-74F9E8A7EF8A}.Debug|x64.Build.0 = Debug|x64 + {5632B41F-19DD-4BA7-A6EB-74F9E8A7EF8A}.Release|x64.ActiveCfg = Release|x64 + {5632B41F-19DD-4BA7-A6EB-74F9E8A7EF8A}.Release|x64.Build.0 = Release|x64 + {581B3A58-F3F0-4765-91E5-D0C82816A528}.Debug|x64.ActiveCfg = Debug|x64 + {581B3A58-F3F0-4765-91E5-D0C82816A528}.Debug|x64.Build.0 = Debug|x64 + {581B3A58-F3F0-4765-91E5-D0C82816A528}.Release|x64.ActiveCfg = Release|x64 + {581B3A58-F3F0-4765-91E5-D0C82816A528}.Release|x64.Build.0 = Release|x64 + {58386481-30B7-40FC-96AF-0723A4A7B228}.Debug|x64.ActiveCfg = Debug|x64 + {58386481-30B7-40FC-96AF-0723A4A7B228}.Debug|x64.Build.0 = Debug|x64 + {58386481-30B7-40FC-96AF-0723A4A7B228}.Release|x64.ActiveCfg = Release|x64 + {58386481-30B7-40FC-96AF-0723A4A7B228}.Release|x64.Build.0 = Release|x64 + {59D7A9CD-9912-40E4-96E1-8A873F777F62}.Debug|x64.ActiveCfg = Debug|x64 + {59D7A9CD-9912-40E4-96E1-8A873F777F62}.Debug|x64.Build.0 = Debug|x64 + {59D7A9CD-9912-40E4-96E1-8A873F777F62}.Release|x64.ActiveCfg = Release|x64 + {59D7A9CD-9912-40E4-96E1-8A873F777F62}.Release|x64.Build.0 = Release|x64 + {59D9E21C-57D7-4D18-B792-24738BD26DE4}.Debug|x64.ActiveCfg = Debug|x64 + {59D9E21C-57D7-4D18-B792-24738BD26DE4}.Debug|x64.Build.0 = Debug|x64 + {59D9E21C-57D7-4D18-B792-24738BD26DE4}.Release|x64.ActiveCfg = Release|x64 + {59D9E21C-57D7-4D18-B792-24738BD26DE4}.Release|x64.Build.0 = Release|x64 + {5A391A14-8E29-4788-93FC-EDADED31D32F}.Debug|x64.ActiveCfg = Debug|x64 + {5A391A14-8E29-4788-93FC-EDADED31D32F}.Debug|x64.Build.0 = Debug|x64 + {5A391A14-8E29-4788-93FC-EDADED31D32F}.Release|x64.ActiveCfg = Release|x64 + {5A391A14-8E29-4788-93FC-EDADED31D32F}.Release|x64.Build.0 = Release|x64 + {5AD07646-5E16-4CEF-B80A-BE5EE4D54FEF}.Debug|x64.ActiveCfg = Debug|x64 + {5AD07646-5E16-4CEF-B80A-BE5EE4D54FEF}.Debug|x64.Build.0 = Debug|x64 + {5AD07646-5E16-4CEF-B80A-BE5EE4D54FEF}.Release|x64.ActiveCfg = Release|x64 + {5AD07646-5E16-4CEF-B80A-BE5EE4D54FEF}.Release|x64.Build.0 = Release|x64 + {5B2B9C0D-1B6D-4357-8307-6DE1EE0A41A3}.Debug|x64.ActiveCfg = Debug|x64 + {5B2B9C0D-1B6D-4357-8307-6DE1EE0A41A3}.Debug|x64.Build.0 = Debug|x64 + {5B2B9C0D-1B6D-4357-8307-6DE1EE0A41A3}.Release|x64.ActiveCfg = Release|x64 + {5B2B9C0D-1B6D-4357-8307-6DE1EE0A41A3}.Release|x64.Build.0 = Release|x64 + {5D362DB7-D2BD-4907-AAD8-4B8627E72282}.Debug|x64.ActiveCfg = Debug|x64 + {5D362DB7-D2BD-4907-AAD8-4B8627E72282}.Debug|x64.Build.0 = Debug|x64 + {5D362DB7-D2BD-4907-AAD8-4B8627E72282}.Release|x64.ActiveCfg = Release|x64 + {5D362DB7-D2BD-4907-AAD8-4B8627E72282}.Release|x64.Build.0 = Release|x64 + {5DB2E259-0D19-4A89-B8EC-B2912F39924D}.Debug|x64.ActiveCfg = Debug|x64 + {5DB2E259-0D19-4A89-B8EC-B2912F39924D}.Debug|x64.Build.0 = Debug|x64 + {5DB2E259-0D19-4A89-B8EC-B2912F39924D}.Release|x64.ActiveCfg = Release|x64 + {5DB2E259-0D19-4A89-B8EC-B2912F39924D}.Release|x64.Build.0 = Release|x64 + {5E005D50-1C73-4E52-B295-864BB9AF7AC6}.Debug|x64.ActiveCfg = Debug|x64 + {5E005D50-1C73-4E52-B295-864BB9AF7AC6}.Debug|x64.Build.0 = Debug|x64 + {5E005D50-1C73-4E52-B295-864BB9AF7AC6}.Release|x64.ActiveCfg = Release|x64 + {5E005D50-1C73-4E52-B295-864BB9AF7AC6}.Release|x64.Build.0 = Release|x64 + {5E7305DB-93E6-448B-AE44-90EAF916A776}.Debug|x64.ActiveCfg = Debug|x64 + {5E7305DB-93E6-448B-AE44-90EAF916A776}.Debug|x64.Build.0 = Debug|x64 + {5E7305DB-93E6-448B-AE44-90EAF916A776}.Release|x64.ActiveCfg = Release|x64 + {5E7305DB-93E6-448B-AE44-90EAF916A776}.Release|x64.Build.0 = Release|x64 + {5EC35099-9777-45E8-9520-EB2EE75BDF88}.Debug|x64.ActiveCfg = Debug|x64 + {5EC35099-9777-45E8-9520-EB2EE75BDF88}.Debug|x64.Build.0 = Debug|x64 + {5EC35099-9777-45E8-9520-EB2EE75BDF88}.Release|x64.ActiveCfg = Release|x64 + {5EC35099-9777-45E8-9520-EB2EE75BDF88}.Release|x64.Build.0 = Release|x64 + {5F2B687A-1B42-439C-AEEC-135DD22FB851}.Debug|x64.ActiveCfg = Debug|x64 + {5F2B687A-1B42-439C-AEEC-135DD22FB851}.Debug|x64.Build.0 = Debug|x64 + {5F2B687A-1B42-439C-AEEC-135DD22FB851}.Release|x64.ActiveCfg = Release|x64 + {5F2B687A-1B42-439C-AEEC-135DD22FB851}.Release|x64.Build.0 = Release|x64 + {5F8A56F8-2C5B-48B6-9654-DD642D3E5F5C}.Debug|x64.ActiveCfg = Debug|x64 + {5F8A56F8-2C5B-48B6-9654-DD642D3E5F5C}.Debug|x64.Build.0 = Debug|x64 + {5F8A56F8-2C5B-48B6-9654-DD642D3E5F5C}.Release|x64.ActiveCfg = Release|x64 + {5F8A56F8-2C5B-48B6-9654-DD642D3E5F5C}.Release|x64.Build.0 = Release|x64 + {60206D22-E132-4695-8486-10BECA32C5CC}.Debug|x64.ActiveCfg = Debug|x64 + {60206D22-E132-4695-8486-10BECA32C5CC}.Debug|x64.Build.0 = Debug|x64 + {60206D22-E132-4695-8486-10BECA32C5CC}.Release|x64.ActiveCfg = Release|x64 + {60206D22-E132-4695-8486-10BECA32C5CC}.Release|x64.Build.0 = Release|x64 + {60B463D4-8CD5-4BF6-A25B-01BE13B87590}.Debug|x64.ActiveCfg = Debug|x64 + {60B463D4-8CD5-4BF6-A25B-01BE13B87590}.Debug|x64.Build.0 = Debug|x64 + {60B463D4-8CD5-4BF6-A25B-01BE13B87590}.Release|x64.ActiveCfg = Release|x64 + {60B463D4-8CD5-4BF6-A25B-01BE13B87590}.Release|x64.Build.0 = Release|x64 + {60EF55C7-8399-4543-B5B2-3AE2C532C67E}.Debug|x64.ActiveCfg = Debug|x64 + {60EF55C7-8399-4543-B5B2-3AE2C532C67E}.Debug|x64.Build.0 = Debug|x64 + {60EF55C7-8399-4543-B5B2-3AE2C532C67E}.Release|x64.ActiveCfg = Release|x64 + {60EF55C7-8399-4543-B5B2-3AE2C532C67E}.Release|x64.Build.0 = Release|x64 + {628FADA9-7047-4DD9-BD17-9FE4B5A1ADB0}.Debug|x64.ActiveCfg = Debug|x64 + {628FADA9-7047-4DD9-BD17-9FE4B5A1ADB0}.Debug|x64.Build.0 = Debug|x64 + {628FADA9-7047-4DD9-BD17-9FE4B5A1ADB0}.Release|x64.ActiveCfg = Release|x64 + {628FADA9-7047-4DD9-BD17-9FE4B5A1ADB0}.Release|x64.Build.0 = Release|x64 + {63B8184D-85E0-4E6A-9729-558C567D1D1D}.Debug|x64.ActiveCfg = Debug|x64 + {63B8184D-85E0-4E6A-9729-558C567D1D1D}.Debug|x64.Build.0 = Debug|x64 + {63B8184D-85E0-4E6A-9729-558C567D1D1D}.Release|x64.ActiveCfg = Release|x64 + {63B8184D-85E0-4E6A-9729-558C567D1D1D}.Release|x64.Build.0 = Release|x64 + {643B82A1-D009-46A9-92A0-2883399B05C2}.Debug|x64.ActiveCfg = Debug|x64 + {643B82A1-D009-46A9-92A0-2883399B05C2}.Debug|x64.Build.0 = Debug|x64 + {643B82A1-D009-46A9-92A0-2883399B05C2}.Release|x64.ActiveCfg = Release|x64 + {643B82A1-D009-46A9-92A0-2883399B05C2}.Release|x64.Build.0 = Release|x64 + {6516D6CF-8000-4341-9487-312BC83EE370}.Debug|x64.ActiveCfg = Debug|x64 + {6516D6CF-8000-4341-9487-312BC83EE370}.Debug|x64.Build.0 = Debug|x64 + {6516D6CF-8000-4341-9487-312BC83EE370}.Release|x64.ActiveCfg = Release|x64 + {6516D6CF-8000-4341-9487-312BC83EE370}.Release|x64.Build.0 = Release|x64 + {65D92D98-97E1-48F7-AEF6-75221CF48EA4}.Debug|x64.ActiveCfg = Debug|x64 + {65D92D98-97E1-48F7-AEF6-75221CF48EA4}.Debug|x64.Build.0 = Debug|x64 + {65D92D98-97E1-48F7-AEF6-75221CF48EA4}.Release|x64.ActiveCfg = Release|x64 + {65D92D98-97E1-48F7-AEF6-75221CF48EA4}.Release|x64.Build.0 = Release|x64 + {673277EC-D26B-414D-92E3-84EE873316A8}.Debug|x64.ActiveCfg = Debug|x64 + {673277EC-D26B-414D-92E3-84EE873316A8}.Debug|x64.Build.0 = Debug|x64 + {673277EC-D26B-414D-92E3-84EE873316A8}.Release|x64.ActiveCfg = Release|x64 + {673277EC-D26B-414D-92E3-84EE873316A8}.Release|x64.Build.0 = Release|x64 + {6770917C-5B8E-49F1-9297-163FAB76DAFB}.Debug|x64.ActiveCfg = Debug|x64 + {6770917C-5B8E-49F1-9297-163FAB76DAFB}.Debug|x64.Build.0 = Debug|x64 + {6770917C-5B8E-49F1-9297-163FAB76DAFB}.Release|x64.ActiveCfg = Release|x64 + {6770917C-5B8E-49F1-9297-163FAB76DAFB}.Release|x64.Build.0 = Release|x64 + {67AC1343-98FD-4143-92C0-559C55F749F5}.Debug|x64.ActiveCfg = Debug|x64 + {67AC1343-98FD-4143-92C0-559C55F749F5}.Debug|x64.Build.0 = Debug|x64 + {67AC1343-98FD-4143-92C0-559C55F749F5}.Release|x64.ActiveCfg = Release|x64 + {67AC1343-98FD-4143-92C0-559C55F749F5}.Release|x64.Build.0 = Release|x64 + {6851356E-A5D9-46A6-8262-A7E208729F18}.Debug|x64.ActiveCfg = Debug|x64 + {6851356E-A5D9-46A6-8262-A7E208729F18}.Debug|x64.Build.0 = Debug|x64 + {6851356E-A5D9-46A6-8262-A7E208729F18}.Release|x64.ActiveCfg = Release|x64 + {6851356E-A5D9-46A6-8262-A7E208729F18}.Release|x64.Build.0 = Release|x64 + {6AE1B8BE-D46A-4E99-87A2-F160FB950DCA}.Debug|x64.ActiveCfg = Debug|x64 + {6AE1B8BE-D46A-4E99-87A2-F160FB950DCA}.Debug|x64.Build.0 = Debug|x64 + {6AE1B8BE-D46A-4E99-87A2-F160FB950DCA}.Release|x64.ActiveCfg = Release|x64 + {6AE1B8BE-D46A-4E99-87A2-F160FB950DCA}.Release|x64.Build.0 = Release|x64 + {6B492754-9F80-44B3-A2A7-1D98AF06F3B2}.Debug|x64.ActiveCfg = Debug|x64 + {6B492754-9F80-44B3-A2A7-1D98AF06F3B2}.Debug|x64.Build.0 = Debug|x64 + {6B492754-9F80-44B3-A2A7-1D98AF06F3B2}.Release|x64.ActiveCfg = Release|x64 + {6B492754-9F80-44B3-A2A7-1D98AF06F3B2}.Release|x64.Build.0 = Release|x64 + {6BCEF2A5-0CEC-4CC6-9CB0-D3FBF871A408}.Debug|x64.ActiveCfg = Debug|x64 + {6BCEF2A5-0CEC-4CC6-9CB0-D3FBF871A408}.Debug|x64.Build.0 = Debug|x64 + {6BCEF2A5-0CEC-4CC6-9CB0-D3FBF871A408}.Release|x64.ActiveCfg = Release|x64 + {6BCEF2A5-0CEC-4CC6-9CB0-D3FBF871A408}.Release|x64.Build.0 = Release|x64 + {6D7C1169-3246-465F-B630-ECFEF4F3179A}.Debug|x64.ActiveCfg = Debug|x64 + {6D7C1169-3246-465F-B630-ECFEF4F3179A}.Debug|x64.Build.0 = Debug|x64 + {6D7C1169-3246-465F-B630-ECFEF4F3179A}.Release|x64.ActiveCfg = Release|x64 + {6D7C1169-3246-465F-B630-ECFEF4F3179A}.Release|x64.Build.0 = Release|x64 + {6DBD8C02-0C75-4DB0-BFDA-CD053B1B2D89}.Debug|x64.ActiveCfg = Debug|x64 + {6DBD8C02-0C75-4DB0-BFDA-CD053B1B2D89}.Debug|x64.Build.0 = Debug|x64 + {6DBD8C02-0C75-4DB0-BFDA-CD053B1B2D89}.Release|x64.ActiveCfg = Release|x64 + {6DBD8C02-0C75-4DB0-BFDA-CD053B1B2D89}.Release|x64.Build.0 = Release|x64 + {6EC93484-AAF3-487E-84E4-5ABFBA0AFC53}.Debug|x64.ActiveCfg = Debug|x64 + {6EC93484-AAF3-487E-84E4-5ABFBA0AFC53}.Debug|x64.Build.0 = Debug|x64 + {6EC93484-AAF3-487E-84E4-5ABFBA0AFC53}.Release|x64.ActiveCfg = Release|x64 + {6EC93484-AAF3-487E-84E4-5ABFBA0AFC53}.Release|x64.Build.0 = Release|x64 + {6F06A19B-0921-4B71-A3A5-B350B5FFEADB}.Debug|x64.ActiveCfg = Debug|x64 + {6F06A19B-0921-4B71-A3A5-B350B5FFEADB}.Debug|x64.Build.0 = Debug|x64 + {6F06A19B-0921-4B71-A3A5-B350B5FFEADB}.Release|x64.ActiveCfg = Release|x64 + {6F06A19B-0921-4B71-A3A5-B350B5FFEADB}.Release|x64.Build.0 = Release|x64 + {6F4953DA-FDC3-46CF-BF24-3752CCF2E1CB}.Debug|x64.ActiveCfg = Debug|x64 + {6F4953DA-FDC3-46CF-BF24-3752CCF2E1CB}.Debug|x64.Build.0 = Debug|x64 + {6F4953DA-FDC3-46CF-BF24-3752CCF2E1CB}.Release|x64.ActiveCfg = Release|x64 + {6F4953DA-FDC3-46CF-BF24-3752CCF2E1CB}.Release|x64.Build.0 = Release|x64 + {6F776280-B383-4DCE-8F42-9670164D038D}.Debug|x64.ActiveCfg = Debug|x64 + {6F776280-B383-4DCE-8F42-9670164D038D}.Debug|x64.Build.0 = Debug|x64 + {6F776280-B383-4DCE-8F42-9670164D038D}.Release|x64.ActiveCfg = Release|x64 + {6F776280-B383-4DCE-8F42-9670164D038D}.Release|x64.Build.0 = Release|x64 + {70EE1D40-0C65-4985-8EFC-BD40EE3A89B2}.Debug|x64.ActiveCfg = Debug|x64 + {70EE1D40-0C65-4985-8EFC-BD40EE3A89B2}.Debug|x64.Build.0 = Debug|x64 + {70EE1D40-0C65-4985-8EFC-BD40EE3A89B2}.Release|x64.ActiveCfg = Release|x64 + {70EE1D40-0C65-4985-8EFC-BD40EE3A89B2}.Release|x64.Build.0 = Release|x64 + {715EADD7-0FFE-4F1F-94E7-49302968DF79}.Debug|x64.ActiveCfg = Debug|x64 + {715EADD7-0FFE-4F1F-94E7-49302968DF79}.Debug|x64.Build.0 = Debug|x64 + {715EADD7-0FFE-4F1F-94E7-49302968DF79}.Release|x64.ActiveCfg = Release|x64 + {715EADD7-0FFE-4F1F-94E7-49302968DF79}.Release|x64.Build.0 = Release|x64 + {71D182E0-345A-4375-B0FA-3536821B0EE3}.Debug|x64.ActiveCfg = Debug|x64 + {71D182E0-345A-4375-B0FA-3536821B0EE3}.Debug|x64.Build.0 = Debug|x64 + {71D182E0-345A-4375-B0FA-3536821B0EE3}.Release|x64.ActiveCfg = Release|x64 + {71D182E0-345A-4375-B0FA-3536821B0EE3}.Release|x64.Build.0 = Release|x64 + {7264C8F6-73FB-4830-9306-1558D3EAC71B}.Debug|x64.ActiveCfg = Debug|x64 + {7264C8F6-73FB-4830-9306-1558D3EAC71B}.Debug|x64.Build.0 = Debug|x64 + {7264C8F6-73FB-4830-9306-1558D3EAC71B}.Release|x64.ActiveCfg = Release|x64 + {7264C8F6-73FB-4830-9306-1558D3EAC71B}.Release|x64.Build.0 = Release|x64 + {729E3905-FF7D-49C5-9871-6D35D839183E}.Debug|x64.ActiveCfg = Debug|x64 + {729E3905-FF7D-49C5-9871-6D35D839183E}.Debug|x64.Build.0 = Debug|x64 + {729E3905-FF7D-49C5-9871-6D35D839183E}.Release|x64.ActiveCfg = Release|x64 + {729E3905-FF7D-49C5-9871-6D35D839183E}.Release|x64.Build.0 = Release|x64 + {72C9DB46-C665-48AD-B805-BA885B40CA3E}.Debug|x64.ActiveCfg = Debug|x64 + {72C9DB46-C665-48AD-B805-BA885B40CA3E}.Debug|x64.Build.0 = Debug|x64 + {72C9DB46-C665-48AD-B805-BA885B40CA3E}.Release|x64.ActiveCfg = Release|x64 + {72C9DB46-C665-48AD-B805-BA885B40CA3E}.Release|x64.Build.0 = Release|x64 + {7337E34A-97B0-44FC-988B-7E6AE7E0FBBF}.Debug|x64.ActiveCfg = Debug|x64 + {7337E34A-97B0-44FC-988B-7E6AE7E0FBBF}.Debug|x64.Build.0 = Debug|x64 + {7337E34A-97B0-44FC-988B-7E6AE7E0FBBF}.Release|x64.ActiveCfg = Release|x64 + {7337E34A-97B0-44FC-988B-7E6AE7E0FBBF}.Release|x64.Build.0 = Release|x64 + {740ED97D-005F-4F58-98B2-4EF5EF5776E8}.Debug|x64.ActiveCfg = Debug|x64 + {740ED97D-005F-4F58-98B2-4EF5EF5776E8}.Debug|x64.Build.0 = Debug|x64 + {740ED97D-005F-4F58-98B2-4EF5EF5776E8}.Release|x64.ActiveCfg = Release|x64 + {740ED97D-005F-4F58-98B2-4EF5EF5776E8}.Release|x64.Build.0 = Release|x64 + {74D655D5-F661-4887-A1EB-5A6222AF5FCA}.Debug|x64.ActiveCfg = Debug|x64 + {74D655D5-F661-4887-A1EB-5A6222AF5FCA}.Debug|x64.Build.0 = Debug|x64 + {74D655D5-F661-4887-A1EB-5A6222AF5FCA}.Release|x64.ActiveCfg = Release|x64 + {74D655D5-F661-4887-A1EB-5A6222AF5FCA}.Release|x64.Build.0 = Release|x64 + {7701627C-CFD9-48F6-942E-EAACC8D057FA}.Debug|x64.ActiveCfg = Debug|x64 + {7701627C-CFD9-48F6-942E-EAACC8D057FA}.Debug|x64.Build.0 = Debug|x64 + {7701627C-CFD9-48F6-942E-EAACC8D057FA}.Release|x64.ActiveCfg = Release|x64 + {7701627C-CFD9-48F6-942E-EAACC8D057FA}.Release|x64.Build.0 = Release|x64 + {779425B1-2211-499B-A7CC-4F9EC6CB0D25}.Debug|x64.ActiveCfg = Debug|x64 + {779425B1-2211-499B-A7CC-4F9EC6CB0D25}.Debug|x64.Build.0 = Debug|x64 + {779425B1-2211-499B-A7CC-4F9EC6CB0D25}.Release|x64.ActiveCfg = Release|x64 + {779425B1-2211-499B-A7CC-4F9EC6CB0D25}.Release|x64.Build.0 = Release|x64 + {79D37FFE-FF76-44B3-BB27-3DCAEFF2EBE9}.Debug|x64.ActiveCfg = Debug|x64 + {79D37FFE-FF76-44B3-BB27-3DCAEFF2EBE9}.Debug|x64.Build.0 = Debug|x64 + {79D37FFE-FF76-44B3-BB27-3DCAEFF2EBE9}.Release|x64.ActiveCfg = Release|x64 + {79D37FFE-FF76-44B3-BB27-3DCAEFF2EBE9}.Release|x64.Build.0 = Release|x64 + {7ABF755C-821B-49CD-8EDE-83C16594FF7F}.Debug|x64.ActiveCfg = Debug|x64 + {7ABF755C-821B-49CD-8EDE-83C16594FF7F}.Debug|x64.Build.0 = Debug|x64 + {7ABF755C-821B-49CD-8EDE-83C16594FF7F}.Release|x64.ActiveCfg = Release|x64 + {7ABF755C-821B-49CD-8EDE-83C16594FF7F}.Release|x64.Build.0 = Release|x64 + {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA}.Debug|x64.ActiveCfg = Debug|x64 + {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA}.Debug|x64.Build.0 = Debug|x64 + {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA}.Release|x64.ActiveCfg = Release|x64 + {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA}.Release|x64.Build.0 = Release|x64 + {7DFEB4A5-8B04-4302-9D09-8144918FCF81}.Debug|x64.ActiveCfg = Debug|x64 + {7DFEB4A5-8B04-4302-9D09-8144918FCF81}.Debug|x64.Build.0 = Debug|x64 + {7DFEB4A5-8B04-4302-9D09-8144918FCF81}.Release|x64.ActiveCfg = Release|x64 + {7DFEB4A5-8B04-4302-9D09-8144918FCF81}.Release|x64.Build.0 = Release|x64 + {7F51CD29-3BCD-4DD8-B327-F384B5A616D1}.Debug|x64.ActiveCfg = Debug|x64 + {7F51CD29-3BCD-4DD8-B327-F384B5A616D1}.Debug|x64.Build.0 = Debug|x64 + {7F51CD29-3BCD-4DD8-B327-F384B5A616D1}.Release|x64.ActiveCfg = Release|x64 + {7F51CD29-3BCD-4DD8-B327-F384B5A616D1}.Release|x64.Build.0 = Release|x64 + {8008010F-8718-4C5F-86B2-195AEBF73422}.Debug|x64.ActiveCfg = Debug|x64 + {8008010F-8718-4C5F-86B2-195AEBF73422}.Debug|x64.Build.0 = Debug|x64 + {8008010F-8718-4C5F-86B2-195AEBF73422}.Release|x64.ActiveCfg = Release|x64 + {8008010F-8718-4C5F-86B2-195AEBF73422}.Release|x64.Build.0 = Release|x64 + {8010BBB0-C71B-4EFF-95EB-65C01E5EC197}.Debug|x64.ActiveCfg = Debug|x64 + {8010BBB0-C71B-4EFF-95EB-65C01E5EC197}.Debug|x64.Build.0 = Debug|x64 + {8010BBB0-C71B-4EFF-95EB-65C01E5EC197}.Release|x64.ActiveCfg = Release|x64 + {8010BBB0-C71B-4EFF-95EB-65C01E5EC197}.Release|x64.Build.0 = Release|x64 + {80AF1B7D-B8CE-4AF0-AE3B-1DABED1B57E7}.Debug|x64.ActiveCfg = Debug|x64 + {80AF1B7D-B8CE-4AF0-AE3B-1DABED1B57E7}.Debug|x64.Build.0 = Debug|x64 + {80AF1B7D-B8CE-4AF0-AE3B-1DABED1B57E7}.Release|x64.ActiveCfg = Release|x64 + {80AF1B7D-B8CE-4AF0-AE3B-1DABED1B57E7}.Release|x64.Build.0 = Release|x64 + {810DB909-6581-42D8-9616-906888F12149}.Debug|x64.ActiveCfg = Debug|x64 + {810DB909-6581-42D8-9616-906888F12149}.Debug|x64.Build.0 = Debug|x64 + {810DB909-6581-42D8-9616-906888F12149}.Release|x64.ActiveCfg = Release|x64 + {810DB909-6581-42D8-9616-906888F12149}.Release|x64.Build.0 = Release|x64 + {85D4076B-896B-4EBB-8F3A-8B44C24CD452}.Debug|x64.ActiveCfg = Debug|x64 + {85D4076B-896B-4EBB-8F3A-8B44C24CD452}.Debug|x64.Build.0 = Debug|x64 + {85D4076B-896B-4EBB-8F3A-8B44C24CD452}.Release|x64.ActiveCfg = Release|x64 + {85D4076B-896B-4EBB-8F3A-8B44C24CD452}.Release|x64.Build.0 = Release|x64 + {85DBDA9B-AEF6-43E7-B8B5-05FF2BEC61A3}.Debug|x64.ActiveCfg = Debug|x64 + {85DBDA9B-AEF6-43E7-B8B5-05FF2BEC61A3}.Debug|x64.Build.0 = Debug|x64 + {85DBDA9B-AEF6-43E7-B8B5-05FF2BEC61A3}.Release|x64.ActiveCfg = Release|x64 + {85DBDA9B-AEF6-43E7-B8B5-05FF2BEC61A3}.Release|x64.Build.0 = Release|x64 + {86EE22CC-6D3C-4F81-ADC8-394946F0DA81}.Debug|x64.ActiveCfg = Debug|x64 + {86EE22CC-6D3C-4F81-ADC8-394946F0DA81}.Debug|x64.Build.0 = Debug|x64 + {86EE22CC-6D3C-4F81-ADC8-394946F0DA81}.Release|x64.ActiveCfg = Release|x64 + {86EE22CC-6D3C-4F81-ADC8-394946F0DA81}.Release|x64.Build.0 = Release|x64 + {88D239E4-EB7D-4E0A-BE3A-AD78B9F408FC}.Debug|x64.ActiveCfg = Debug|x64 + {88D239E4-EB7D-4E0A-BE3A-AD78B9F408FC}.Debug|x64.Build.0 = Debug|x64 + {88D239E4-EB7D-4E0A-BE3A-AD78B9F408FC}.Release|x64.ActiveCfg = Release|x64 + {88D239E4-EB7D-4E0A-BE3A-AD78B9F408FC}.Release|x64.Build.0 = Release|x64 + {89F947CA-DDEF-4131-8AFB-584ABA4A1302}.Debug|x64.ActiveCfg = Debug|x64 + {89F947CA-DDEF-4131-8AFB-584ABA4A1302}.Debug|x64.Build.0 = Debug|x64 + {89F947CA-DDEF-4131-8AFB-584ABA4A1302}.Release|x64.ActiveCfg = Release|x64 + {89F947CA-DDEF-4131-8AFB-584ABA4A1302}.Release|x64.Build.0 = Release|x64 + {8A0FA780-068A-4534-AA2F-4FF4CF977AF2}.Debug|x64.ActiveCfg = Debug|x64 + {8A0FA780-068A-4534-AA2F-4FF4CF977AF2}.Debug|x64.Build.0 = Debug|x64 + {8A0FA780-068A-4534-AA2F-4FF4CF977AF2}.Release|x64.ActiveCfg = Release|x64 + {8A4872D7-A234-4B9B-8215-82C6BB15F3A2}.Debug|x64.ActiveCfg = Debug|x64 + {8A4872D7-A234-4B9B-8215-82C6BB15F3A2}.Debug|x64.Build.0 = Debug|x64 + {8A4872D7-A234-4B9B-8215-82C6BB15F3A2}.Release|x64.ActiveCfg = Release|x64 + {8A4872D7-A234-4B9B-8215-82C6BB15F3A2}.Release|x64.Build.0 = Release|x64 + {8C42CA7C-1543-4F1B-A55F-28CD419C7D35}.Debug|x64.ActiveCfg = Debug|x64 + {8C42CA7C-1543-4F1B-A55F-28CD419C7D35}.Debug|x64.Build.0 = Debug|x64 + {8C42CA7C-1543-4F1B-A55F-28CD419C7D35}.Release|x64.ActiveCfg = Release|x64 + {8C42CA7C-1543-4F1B-A55F-28CD419C7D35}.Release|x64.Build.0 = Release|x64 + {8C6D73E0-0A6F-4487-A040-0EC78D7D6D9A}.Debug|x64.ActiveCfg = Debug|x64 + {8C6D73E0-0A6F-4487-A040-0EC78D7D6D9A}.Debug|x64.Build.0 = Debug|x64 + {8C6D73E0-0A6F-4487-A040-0EC78D7D6D9A}.Release|x64.ActiveCfg = Release|x64 + {8C6D73E0-0A6F-4487-A040-0EC78D7D6D9A}.Release|x64.Build.0 = Release|x64 + {8D75FA1A-EC74-4F88-8AC1-CE3F98E4D828}.Debug|x64.ActiveCfg = Debug|x64 + {8D75FA1A-EC74-4F88-8AC1-CE3F98E4D828}.Debug|x64.Build.0 = Debug|x64 + {8D75FA1A-EC74-4F88-8AC1-CE3F98E4D828}.Release|x64.ActiveCfg = Release|x64 + {8D75FA1A-EC74-4F88-8AC1-CE3F98E4D828}.Release|x64.Build.0 = Release|x64 + {8E374371-30E1-4623-8755-2A2F3742170B}.Debug|x64.ActiveCfg = Debug|x64 + {8E374371-30E1-4623-8755-2A2F3742170B}.Debug|x64.Build.0 = Debug|x64 + {8E374371-30E1-4623-8755-2A2F3742170B}.Release|x64.ActiveCfg = Release|x64 + {8E374371-30E1-4623-8755-2A2F3742170B}.Release|x64.Build.0 = Release|x64 + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59}.Debug|x64.ActiveCfg = Debug|x64 + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59}.Debug|x64.Build.0 = Debug|x64 + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59}.Release|x64.ActiveCfg = Release|x64 + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59}.Release|x64.Build.0 = Release|x64 + {9186EAC4-2F34-4F17-B940-6585D7869BCD}.Debug|x64.ActiveCfg = Debug|x64 + {9186EAC4-2F34-4F17-B940-6585D7869BCD}.Debug|x64.Build.0 = Debug|x64 + {9186EAC4-2F34-4F17-B940-6585D7869BCD}.Release|x64.ActiveCfg = Release|x64 + {9186EAC4-2F34-4F17-B940-6585D7869BCD}.Release|x64.Build.0 = Release|x64 + {91E19AEB-7B75-43E0-B8B4-D2BB60D839EA}.Debug|x64.ActiveCfg = Debug|x64 + {91E19AEB-7B75-43E0-B8B4-D2BB60D839EA}.Debug|x64.Build.0 = Debug|x64 + {91E19AEB-7B75-43E0-B8B4-D2BB60D839EA}.Release|x64.ActiveCfg = Release|x64 + {91E19AEB-7B75-43E0-B8B4-D2BB60D839EA}.Release|x64.Build.0 = Release|x64 + {9233FC80-B51C-4A89-AF58-5AE86C068F6A}.Debug|x64.ActiveCfg = Debug|x64 + {9233FC80-B51C-4A89-AF58-5AE86C068F6A}.Debug|x64.Build.0 = Debug|x64 + {9233FC80-B51C-4A89-AF58-5AE86C068F6A}.Release|x64.ActiveCfg = Release|x64 + {9233FC80-B51C-4A89-AF58-5AE86C068F6A}.Release|x64.Build.0 = Release|x64 + {92388A20-50FC-45F8-89E3-71F1618EFABB}.Debug|x64.ActiveCfg = Debug|x64 + {92388A20-50FC-45F8-89E3-71F1618EFABB}.Debug|x64.Build.0 = Debug|x64 + {92388A20-50FC-45F8-89E3-71F1618EFABB}.Release|x64.ActiveCfg = Release|x64 + {92388A20-50FC-45F8-89E3-71F1618EFABB}.Release|x64.Build.0 = Release|x64 + {924B2937-0B53-4DC6-B7E1-5F3102728F89}.Debug|x64.ActiveCfg = Debug|x64 + {924B2937-0B53-4DC6-B7E1-5F3102728F89}.Debug|x64.Build.0 = Debug|x64 + {924B2937-0B53-4DC6-B7E1-5F3102728F89}.Release|x64.ActiveCfg = Release|x64 + {924B2937-0B53-4DC6-B7E1-5F3102728F89}.Release|x64.Build.0 = Release|x64 + {95B683BD-B9DC-400F-9BC0-8F1505F08BF5}.Debug|x64.ActiveCfg = Debug|x64 + {95B683BD-B9DC-400F-9BC0-8F1505F08BF5}.Debug|x64.Build.0 = Debug|x64 + {95B683BD-B9DC-400F-9BC0-8F1505F08BF5}.Release|x64.ActiveCfg = Release|x64 + {95B683BD-B9DC-400F-9BC0-8F1505F08BF5}.Release|x64.Build.0 = Release|x64 + {96D00A19-5CEF-4CC5-BDE8-E33C68BCE90F}.Debug|x64.ActiveCfg = Debug|x64 + {96D00A19-5CEF-4CC5-BDE8-E33C68BCE90F}.Debug|x64.Build.0 = Debug|x64 + {96D00A19-5CEF-4CC5-BDE8-E33C68BCE90F}.Release|x64.ActiveCfg = Release|x64 + {96D00A19-5CEF-4CC5-BDE8-E33C68BCE90F}.Release|x64.Build.0 = Release|x64 + {98ACBE5D-1A92-46F9-AA81-533412172952}.Debug|x64.ActiveCfg = Debug|x64 + {98ACBE5D-1A92-46F9-AA81-533412172952}.Debug|x64.Build.0 = Debug|x64 + {98ACBE5D-1A92-46F9-AA81-533412172952}.Release|x64.ActiveCfg = Release|x64 + {98ACBE5D-1A92-46F9-AA81-533412172952}.Release|x64.Build.0 = Release|x64 + {99F7F00F-1DE5-45EA-992B-64BA282FAC76}.Debug|x64.ActiveCfg = Debug|x64 + {99F7F00F-1DE5-45EA-992B-64BA282FAC76}.Debug|x64.Build.0 = Debug|x64 + {99F7F00F-1DE5-45EA-992B-64BA282FAC76}.Release|x64.ActiveCfg = Release|x64 + {99F7F00F-1DE5-45EA-992B-64BA282FAC76}.Release|x64.Build.0 = Release|x64 + {9A4078F8-B8E4-4EC6-A6FF-4F29DAD9CE48}.Debug|x64.ActiveCfg = Debug|x64 + {9A4078F8-B8E4-4EC6-A6FF-4F29DAD9CE48}.Debug|x64.Build.0 = Debug|x64 + {9A4078F8-B8E4-4EC6-A6FF-4F29DAD9CE48}.Release|x64.ActiveCfg = Release|x64 + {9A4078F8-B8E4-4EC6-A6FF-4F29DAD9CE48}.Release|x64.Build.0 = Release|x64 + {9AE2DAF9-10C4-4EC3-AE52-AD5EE9C77C55}.Debug|x64.ActiveCfg = Debug|x64 + {9AE2DAF9-10C4-4EC3-AE52-AD5EE9C77C55}.Debug|x64.Build.0 = Debug|x64 + {9AE2DAF9-10C4-4EC3-AE52-AD5EE9C77C55}.Release|x64.ActiveCfg = Release|x64 + {9AE2DAF9-10C4-4EC3-AE52-AD5EE9C77C55}.Release|x64.Build.0 = Release|x64 + {9D9E33EB-4C24-4646-A3FB-35DA17247917}.Debug|x64.ActiveCfg = Debug|x64 + {9D9E33EB-4C24-4646-A3FB-35DA17247917}.Debug|x64.Build.0 = Debug|x64 + {9D9E33EB-4C24-4646-A3FB-35DA17247917}.Release|x64.ActiveCfg = Release|x64 + {9D9E33EB-4C24-4646-A3FB-35DA17247917}.Release|x64.Build.0 = Release|x64 + {9E9E3D25-2139-4A5D-9200-18148DDEAD45}.Debug|x64.ActiveCfg = Debug|x64 + {9E9E3D25-2139-4A5D-9200-18148DDEAD45}.Debug|x64.Build.0 = Debug|x64 + {9E9E3D25-2139-4A5D-9200-18148DDEAD45}.Release|x64.ActiveCfg = Release|x64 + {9E9E3D25-2139-4A5D-9200-18148DDEAD45}.Release|x64.Build.0 = Release|x64 + {9FF51F3E-AF36-4F45-A797-C5F03A090298}.Debug|x64.ActiveCfg = Debug|x64 + {9FF51F3E-AF36-4F45-A797-C5F03A090298}.Debug|x64.Build.0 = Debug|x64 + {9FF51F3E-AF36-4F45-A797-C5F03A090298}.Release|x64.ActiveCfg = Release|x64 + {9FF51F3E-AF36-4F45-A797-C5F03A090298}.Release|x64.Build.0 = Release|x64 + {9FF62356-30B4-42A1-8DC7-45262A18DD44}.Debug|x64.ActiveCfg = Debug|x64 + {9FF62356-30B4-42A1-8DC7-45262A18DD44}.Debug|x64.Build.0 = Debug|x64 + {9FF62356-30B4-42A1-8DC7-45262A18DD44}.Release|x64.ActiveCfg = Release|x64 + {9FF62356-30B4-42A1-8DC7-45262A18DD44}.Release|x64.Build.0 = Release|x64 + {A18B076A-CE8C-49A6-8B80-F02843E4BF0A}.Debug|x64.ActiveCfg = Debug|x64 + {A18B076A-CE8C-49A6-8B80-F02843E4BF0A}.Debug|x64.Build.0 = Debug|x64 + {A18B076A-CE8C-49A6-8B80-F02843E4BF0A}.Release|x64.ActiveCfg = Release|x64 + {A18B076A-CE8C-49A6-8B80-F02843E4BF0A}.Release|x64.Build.0 = Release|x64 + {A216BF23-FC5C-4426-BF20-8568A2AA5FA0}.Debug|x64.ActiveCfg = Debug|x64 + {A216BF23-FC5C-4426-BF20-8568A2AA5FA0}.Debug|x64.Build.0 = Debug|x64 + {A216BF23-FC5C-4426-BF20-8568A2AA5FA0}.Release|x64.ActiveCfg = Release|x64 + {A216BF23-FC5C-4426-BF20-8568A2AA5FA0}.Release|x64.Build.0 = Release|x64 + {A2A0FAEA-2B7C-4FC3-B904-1DB4DEACF88D}.Debug|x64.ActiveCfg = Debug|x64 + {A2A0FAEA-2B7C-4FC3-B904-1DB4DEACF88D}.Debug|x64.Build.0 = Debug|x64 + {A2A0FAEA-2B7C-4FC3-B904-1DB4DEACF88D}.Release|x64.ActiveCfg = Release|x64 + {A2A0FAEA-2B7C-4FC3-B904-1DB4DEACF88D}.Release|x64.Build.0 = Release|x64 + {A38EFCDB-53D6-4474-97F3-0DDC6CE70D76}.Debug|x64.ActiveCfg = Debug|x64 + {A38EFCDB-53D6-4474-97F3-0DDC6CE70D76}.Debug|x64.Build.0 = Debug|x64 + {A38EFCDB-53D6-4474-97F3-0DDC6CE70D76}.Release|x64.ActiveCfg = Release|x64 + {A38EFCDB-53D6-4474-97F3-0DDC6CE70D76}.Release|x64.Build.0 = Release|x64 + {A39D1640-8DBA-450D-9103-2533C248991A}.Debug|x64.ActiveCfg = Debug|x64 + {A39D1640-8DBA-450D-9103-2533C248991A}.Debug|x64.Build.0 = Debug|x64 + {A39D1640-8DBA-450D-9103-2533C248991A}.Release|x64.ActiveCfg = Release|x64 + {A39D1640-8DBA-450D-9103-2533C248991A}.Release|x64.Build.0 = Release|x64 + {A57D9365-172E-4782-ADC6-82A594E30943}.Debug|x64.ActiveCfg = Debug|x64 + {A57D9365-172E-4782-ADC6-82A594E30943}.Debug|x64.Build.0 = Debug|x64 + {A57D9365-172E-4782-ADC6-82A594E30943}.Release|x64.ActiveCfg = Release|x64 + {A57D9365-172E-4782-ADC6-82A594E30943}.Release|x64.Build.0 = Release|x64 + {A79E3093-B157-4B09-BABD-29266EA16407}.Debug|x64.ActiveCfg = Debug|x64 + {A79E3093-B157-4B09-BABD-29266EA16407}.Debug|x64.Build.0 = Debug|x64 + {A79E3093-B157-4B09-BABD-29266EA16407}.Release|x64.ActiveCfg = Release|x64 + {A79E3093-B157-4B09-BABD-29266EA16407}.Release|x64.Build.0 = Release|x64 + {A7CA7975-CEDB-48E6-9AEB-1209DCBD07F2}.Debug|x64.ActiveCfg = Debug|x64 + {A7CA7975-CEDB-48E6-9AEB-1209DCBD07F2}.Debug|x64.Build.0 = Debug|x64 + {A7CA7975-CEDB-48E6-9AEB-1209DCBD07F2}.Release|x64.ActiveCfg = Release|x64 + {A7CA7975-CEDB-48E6-9AEB-1209DCBD07F2}.Release|x64.Build.0 = Release|x64 + {A9ADD224-1755-407F-906D-C13EC37FF7B0}.Debug|x64.ActiveCfg = Debug|x64 + {A9ADD224-1755-407F-906D-C13EC37FF7B0}.Debug|x64.Build.0 = Debug|x64 + {A9ADD224-1755-407F-906D-C13EC37FF7B0}.Release|x64.ActiveCfg = Release|x64 + {A9ADD224-1755-407F-906D-C13EC37FF7B0}.Release|x64.Build.0 = Release|x64 + {AB15A115-E429-4123-BEBF-206FBA4CF615}.Debug|x64.ActiveCfg = Debug|x64 + {AB15A115-E429-4123-BEBF-206FBA4CF615}.Debug|x64.Build.0 = Debug|x64 + {AB15A115-E429-4123-BEBF-206FBA4CF615}.Release|x64.ActiveCfg = Release|x64 + {AB15A115-E429-4123-BEBF-206FBA4CF615}.Release|x64.Build.0 = Release|x64 + {AE1C32FB-9B52-4760-ABFC-0D2FA2C7A6C8}.Debug|x64.ActiveCfg = Debug|x64 + {AE1C32FB-9B52-4760-ABFC-0D2FA2C7A6C8}.Debug|x64.Build.0 = Debug|x64 + {AE1C32FB-9B52-4760-ABFC-0D2FA2C7A6C8}.Release|x64.ActiveCfg = Release|x64 + {AE1C32FB-9B52-4760-ABFC-0D2FA2C7A6C8}.Release|x64.Build.0 = Release|x64 + {AE952763-5C84-43FC-B344-CACC950F056C}.Debug|x64.ActiveCfg = Debug|x64 + {AE952763-5C84-43FC-B344-CACC950F056C}.Debug|x64.Build.0 = Debug|x64 + {AE952763-5C84-43FC-B344-CACC950F056C}.Release|x64.ActiveCfg = Release|x64 + {AE952763-5C84-43FC-B344-CACC950F056C}.Release|x64.Build.0 = Release|x64 + {AE9E908D-BAEC-491F-9914-436B3CE35E94}.Debug|x64.ActiveCfg = Debug|x64 + {AE9E908D-BAEC-491F-9914-436B3CE35E94}.Debug|x64.Build.0 = Debug|x64 + {AE9E908D-BAEC-491F-9914-436B3CE35E94}.Release|x64.ActiveCfg = Release|x64 + {AE9E908D-BAEC-491F-9914-436B3CE35E94}.Release|x64.Build.0 = Release|x64 + {AEAA72CD-E060-417C-9CA1-49B4738384E0}.Debug|x64.ActiveCfg = Debug|x64 + {AEAA72CD-E060-417C-9CA1-49B4738384E0}.Debug|x64.Build.0 = Debug|x64 + {AEAA72CD-E060-417C-9CA1-49B4738384E0}.Release|x64.ActiveCfg = Release|x64 + {AEAA72CD-E060-417C-9CA1-49B4738384E0}.Release|x64.Build.0 = Release|x64 + {AF038868-2432-4159-A62F-941F11D12C5D}.Debug|x64.ActiveCfg = Debug|x64 + {AF038868-2432-4159-A62F-941F11D12C5D}.Debug|x64.Build.0 = Debug|x64 + {AF038868-2432-4159-A62F-941F11D12C5D}.Release|x64.ActiveCfg = Release|x64 + {AF038868-2432-4159-A62F-941F11D12C5D}.Release|x64.Build.0 = Release|x64 + {AF0B7480-EBE3-486B-B0C8-134910BC9324}.Debug|x64.ActiveCfg = Debug|x64 + {AF0B7480-EBE3-486B-B0C8-134910BC9324}.Debug|x64.Build.0 = Debug|x64 + {AF0B7480-EBE3-486B-B0C8-134910BC9324}.Release|x64.ActiveCfg = Release|x64 + {AF0B7480-EBE3-486B-B0C8-134910BC9324}.Release|x64.Build.0 = Release|x64 + {B30C6212-A160-405A-8FE7-340E721738A2}.Debug|x64.ActiveCfg = Debug|x64 + {B30C6212-A160-405A-8FE7-340E721738A2}.Debug|x64.Build.0 = Debug|x64 + {B30C6212-A160-405A-8FE7-340E721738A2}.Release|x64.ActiveCfg = Release|x64 + {B30C6212-A160-405A-8FE7-340E721738A2}.Release|x64.Build.0 = Release|x64 + {B35BFA09-DE68-483B-AB61-8790E8F060A8}.Debug|x64.ActiveCfg = Debug|x64 + {B35BFA09-DE68-483B-AB61-8790E8F060A8}.Debug|x64.Build.0 = Debug|x64 + {B35BFA09-DE68-483B-AB61-8790E8F060A8}.Release|x64.ActiveCfg = Release|x64 + {B35BFA09-DE68-483B-AB61-8790E8F060A8}.Release|x64.Build.0 = Release|x64 + {B36F115C-8139-4C35-A3E7-E6BF9F3DA793}.Debug|x64.ActiveCfg = Debug|x64 + {B36F115C-8139-4C35-A3E7-E6BF9F3DA793}.Debug|x64.Build.0 = Debug|x64 + {B36F115C-8139-4C35-A3E7-E6BF9F3DA793}.Release|x64.ActiveCfg = Release|x64 + {B36F115C-8139-4C35-A3E7-E6BF9F3DA793}.Release|x64.Build.0 = Release|x64 + {B379539C-E130-460D-AE82-4EBDD1A97845}.Debug|x64.ActiveCfg = Debug|x64 + {B379539C-E130-460D-AE82-4EBDD1A97845}.Debug|x64.Build.0 = Debug|x64 + {B379539C-E130-460D-AE82-4EBDD1A97845}.Release|x64.ActiveCfg = Release|x64 + {B379539C-E130-460D-AE82-4EBDD1A97845}.Release|x64.Build.0 = Release|x64 + {B3AF8A19-5802-4A34-9157-27BBE4E53C0A}.Debug|x64.ActiveCfg = Debug|x64 + {B3AF8A19-5802-4A34-9157-27BBE4E53C0A}.Debug|x64.Build.0 = Debug|x64 + {B3AF8A19-5802-4A34-9157-27BBE4E53C0A}.Release|x64.ActiveCfg = Release|x64 + {B3AF8A19-5802-4A34-9157-27BBE4E53C0A}.Release|x64.Build.0 = Release|x64 + {B440BB05-37A8-42EA-98D3-D83EB113E497}.Debug|x64.ActiveCfg = Debug|x64 + {B440BB05-37A8-42EA-98D3-D83EB113E497}.Debug|x64.Build.0 = Debug|x64 + {B440BB05-37A8-42EA-98D3-D83EB113E497}.Release|x64.ActiveCfg = Release|x64 + {B440BB05-37A8-42EA-98D3-D83EB113E497}.Release|x64.Build.0 = Release|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFE}.Debug|x64.ActiveCfg = Debug|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFE}.Debug|x64.Build.0 = Debug|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFE}.Release|x64.ActiveCfg = Release|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFE}.Release|x64.Build.0 = Release|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFF}.Debug|x64.ActiveCfg = Debug|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFF}.Debug|x64.Build.0 = Debug|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFF}.Release|x64.ActiveCfg = Release|x64 + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFF}.Release|x64.Build.0 = Release|x64 + {B6DA6617-D98F-4A4D-A7C4-A317212924BF}.Debug|x64.ActiveCfg = Debug|x64 + {B6DA6617-D98F-4A4D-A7C4-A317212924BF}.Debug|x64.Build.0 = Debug|x64 + {B6DA6617-D98F-4A4D-A7C4-A317212924BF}.Release|x64.ActiveCfg = Release|x64 + {B6DA6617-D98F-4A4D-A7C4-A317212924BF}.Release|x64.Build.0 = Release|x64 + {B6F4B85D-FE55-4A1B-AE97-D4A9ECFE195F}.Debug|x64.ActiveCfg = Debug|x64 + {B6F4B85D-FE55-4A1B-AE97-D4A9ECFE195F}.Debug|x64.Build.0 = Debug|x64 + {B6F4B85D-FE55-4A1B-AE97-D4A9ECFE195F}.Release|x64.ActiveCfg = Release|x64 + {B6F4B85D-FE55-4A1B-AE97-D4A9ECFE195F}.Release|x64.Build.0 = Release|x64 + {B775480C-5B32-4F64-B026-47367280EC56}.Debug|x64.ActiveCfg = Debug|x64 + {B775480C-5B32-4F64-B026-47367280EC56}.Debug|x64.Build.0 = Debug|x64 + {B775480C-5B32-4F64-B026-47367280EC56}.Release|x64.ActiveCfg = Release|x64 + {B775480C-5B32-4F64-B026-47367280EC56}.Release|x64.Build.0 = Release|x64 + {B887EA26-846C-4D6A-B0E4-432487506BC7}.Debug|x64.ActiveCfg = Debug|x64 + {B887EA26-846C-4D6A-B0E4-432487506BC7}.Debug|x64.Build.0 = Debug|x64 + {B887EA26-846C-4D6A-B0E4-432487506BC7}.Release|x64.ActiveCfg = Release|x64 + {B887EA26-846C-4D6A-B0E4-432487506BC7}.Release|x64.Build.0 = Release|x64 + {BA0EF7F5-BE6C-4B61-9D5F-1480462EE001}.Debug|x64.ActiveCfg = Debug|x64 + {BA0EF7F5-BE6C-4B61-9D5F-1480462EE001}.Debug|x64.Build.0 = Debug|x64 + {BA0EF7F5-BE6C-4B61-9D5F-1480462EE001}.Release|x64.ActiveCfg = Release|x64 + {BA0EF7F5-BE6C-4B61-9D5F-1480462EE001}.Release|x64.Build.0 = Release|x64 + {BABC6427-E533-4DCF-91E3-B5B2ED253F46}.Debug|x64.ActiveCfg = Debug|x64 + {BABC6427-E533-4DCF-91E3-B5B2ED253F46}.Debug|x64.Build.0 = Debug|x64 + {BABC6427-E533-4DCF-91E3-B5B2ED253F46}.Release|x64.ActiveCfg = Release|x64 + {BABC6427-E533-4DCF-91E3-B5B2ED253F46}.Release|x64.Build.0 = Release|x64 + {BAE107BA-7618-4972-8188-2D3CDAAE0453}.Debug|x64.ActiveCfg = Debug|x64 + {BAE107BA-7618-4972-8188-2D3CDAAE0453}.Debug|x64.Build.0 = Debug|x64 + {BAE107BA-7618-4972-8188-2D3CDAAE0453}.Release|x64.ActiveCfg = Release|x64 + {BAE107BA-7618-4972-8188-2D3CDAAE0453}.Release|x64.Build.0 = Release|x64 + {BB1120CF-B721-4EF9-8735-58F76AE51D2F}.Debug|x64.ActiveCfg = Debug|x64 + {BB1120CF-B721-4EF9-8735-58F76AE51D2F}.Debug|x64.Build.0 = Debug|x64 + {BB1120CF-B721-4EF9-8735-58F76AE51D2F}.Release|x64.ActiveCfg = Release|x64 + {BB1120CF-B721-4EF9-8735-58F76AE51D2F}.Release|x64.Build.0 = Release|x64 + {BB248BAC-6E1B-433C-A254-75140A273AB5}.Debug|x64.ActiveCfg = Debug|x64 + {BB248BAC-6E1B-433C-A254-75140A273AB5}.Debug|x64.Build.0 = Debug|x64 + {BB248BAC-6E1B-433C-A254-75140A273AB5}.Release|x64.ActiveCfg = Release|x64 + {BB248BAC-6E1B-433C-A254-75140A273AB5}.Release|x64.Build.0 = Release|x64 + {BE18F227-A9F0-4B38-B689-4E2F9F09CA5F}.Debug|x64.ActiveCfg = Debug|x64 + {BE18F227-A9F0-4B38-B689-4E2F9F09CA5F}.Debug|x64.Build.0 = Debug|x64 + {BE18F227-A9F0-4B38-B689-4E2F9F09CA5F}.Release|x64.ActiveCfg = Release|x64 + {BE18F227-A9F0-4B38-B689-4E2F9F09CA5F}.Release|x64.Build.0 = Release|x64 + {C0E811E0-8942-4CFD-A817-74D99E9E6577}.Debug|x64.ActiveCfg = Debug|x64 + {C0E811E0-8942-4CFD-A817-74D99E9E6577}.Debug|x64.Build.0 = Debug|x64 + {C0E811E0-8942-4CFD-A817-74D99E9E6577}.Release|x64.ActiveCfg = Release|x64 + {C0E811E0-8942-4CFD-A817-74D99E9E6577}.Release|x64.Build.0 = Release|x64 + {C2C36D03-26EE-4BD8-8FFC-86CFE16C1218}.Debug|x64.ActiveCfg = Debug|x64 + {C2C36D03-26EE-4BD8-8FFC-86CFE16C1218}.Debug|x64.Build.0 = Debug|x64 + {C2C36D03-26EE-4BD8-8FFC-86CFE16C1218}.Release|x64.ActiveCfg = Release|x64 + {C2C36D03-26EE-4BD8-8FFC-86CFE16C1218}.Release|x64.Build.0 = Release|x64 + {C2D5E690-748B-4138-B572-1774B99A8572}.Debug|x64.ActiveCfg = Debug|x64 + {C2D5E690-748B-4138-B572-1774B99A8572}.Debug|x64.Build.0 = Debug|x64 + {C2D5E690-748B-4138-B572-1774B99A8572}.Release|x64.ActiveCfg = Release|x64 + {C2D5E690-748B-4138-B572-1774B99A8572}.Release|x64.Build.0 = Release|x64 + {C2F94489-A483-4C44-B8A7-11A75F6AEC66}.Debug|x64.ActiveCfg = Debug|x64 + {C2F94489-A483-4C44-B8A7-11A75F6AEC66}.Debug|x64.Build.0 = Debug|x64 + {C2F94489-A483-4C44-B8A7-11A75F6AEC66}.Release|x64.ActiveCfg = Release|x64 + {C2F94489-A483-4C44-B8A7-11A75F6AEC66}.Release|x64.Build.0 = Release|x64 + {C35052AF-2383-4F9C-B18B-55A01829F2BF}.Debug|x64.ActiveCfg = Debug|x64 + {C35052AF-2383-4F9C-B18B-55A01829F2BF}.Debug|x64.Build.0 = Debug|x64 + {C35052AF-2383-4F9C-B18B-55A01829F2BF}.Release|x64.ActiveCfg = Release|x64 + {C35052AF-2383-4F9C-B18B-55A01829F2BF}.Release|x64.Build.0 = Release|x64 + {C3CEE34C-29E0-4A22-B258-3FBAF662AA19}.Debug|x64.ActiveCfg = Debug|x64 + {C3CEE34C-29E0-4A22-B258-3FBAF662AA19}.Debug|x64.Build.0 = Debug|x64 + {C3CEE34C-29E0-4A22-B258-3FBAF662AA19}.Release|x64.ActiveCfg = Release|x64 + {C3CEE34C-29E0-4A22-B258-3FBAF662AA19}.Release|x64.Build.0 = Release|x64 + {C5E8B8DB-2507-4904-847F-A52196B075F0}.Debug|x64.ActiveCfg = Debug|x64 + {C5E8B8DB-2507-4904-847F-A52196B075F0}.Debug|x64.Build.0 = Debug|x64 + {C5E8B8DB-2507-4904-847F-A52196B075F0}.Release|x64.ActiveCfg = Release|x64 + {C5E8B8DB-2507-4904-847F-A52196B075F0}.Release|x64.Build.0 = Release|x64 + {C7025EE1-57E5-44B9-A4F5-3CB059601FC3}.Debug|x64.ActiveCfg = Debug|x64 + {C7025EE1-57E5-44B9-A4F5-3CB059601FC3}.Debug|x64.Build.0 = Debug|x64 + {C7025EE1-57E5-44B9-A4F5-3CB059601FC3}.Release|x64.ActiveCfg = Release|x64 + {C7025EE1-57E5-44B9-A4F5-3CB059601FC3}.Release|x64.Build.0 = Release|x64 + {C71DAF3E-9361-4723-93E2-C475D1D0C0D0}.Debug|x64.ActiveCfg = Debug|x64 + {C71DAF3E-9361-4723-93E2-C475D1D0C0D0}.Debug|x64.Build.0 = Debug|x64 + {C71DAF3E-9361-4723-93E2-C475D1D0C0D0}.Release|x64.ActiveCfg = Release|x64 + {C71DAF3E-9361-4723-93E2-C475D1D0C0D0}.Release|x64.Build.0 = Release|x64 + {C7E42AE1-052F-4024-B8BA-DE5DCE6BBEEC}.Debug|x64.ActiveCfg = Debug|x64 + {C7E42AE1-052F-4024-B8BA-DE5DCE6BBEEC}.Debug|x64.Build.0 = Debug|x64 + {C7E42AE1-052F-4024-B8BA-DE5DCE6BBEEC}.Release|x64.ActiveCfg = Release|x64 + {C7E42AE1-052F-4024-B8BA-DE5DCE6BBEEC}.Release|x64.Build.0 = Release|x64 + {C973CD39-D63B-4F5C-BE1D-DED17388B5A4}.Debug|x64.ActiveCfg = Debug|x64 + {C973CD39-D63B-4F5C-BE1D-DED17388B5A4}.Debug|x64.Build.0 = Debug|x64 + {C973CD39-D63B-4F5C-BE1D-DED17388B5A4}.Release|x64.ActiveCfg = Release|x64 + {C973CD39-D63B-4F5C-BE1D-DED17388B5A4}.Release|x64.Build.0 = Release|x64 + {CA4BBB24-D33E-42E2-A495-F10D80DE8C1D}.Debug|x64.ActiveCfg = Debug|x64 + {CA4BBB24-D33E-42E2-A495-F10D80DE8C1D}.Debug|x64.Build.0 = Debug|x64 + {CA4BBB24-D33E-42E2-A495-F10D80DE8C1D}.Release|x64.ActiveCfg = Release|x64 + {CB906E89-1313-4929-AFF7-86FBF1CC301F}.Debug|x64.ActiveCfg = Debug|x64 + {CB906E89-1313-4929-AFF7-86FBF1CC301F}.Debug|x64.Build.0 = Debug|x64 + {CB906E89-1313-4929-AFF7-86FBF1CC301F}.Release|x64.ActiveCfg = Release|x64 + {CB906E89-1313-4929-AFF7-86FBF1CC301F}.Release|x64.Build.0 = Release|x64 + {CCA9B681-D10B-45E4-98CC-531503D2EDE8}.Debug|x64.ActiveCfg = Debug|x64 + {CCA9B681-D10B-45E4-98CC-531503D2EDE8}.Debug|x64.Build.0 = Debug|x64 + {CCA9B681-D10B-45E4-98CC-531503D2EDE8}.Release|x64.ActiveCfg = Release|x64 + {CCA9B681-D10B-45E4-98CC-531503D2EDE8}.Release|x64.Build.0 = Release|x64 + {CDD9DFC6-5C3D-42F7-B822-FE29A1C21752}.Debug|x64.ActiveCfg = Debug|x64 + {CDD9DFC6-5C3D-42F7-B822-FE29A1C21752}.Debug|x64.Build.0 = Debug|x64 + {CDD9DFC6-5C3D-42F7-B822-FE29A1C21752}.Release|x64.ActiveCfg = Release|x64 + {CDD9DFC6-5C3D-42F7-B822-FE29A1C21752}.Release|x64.Build.0 = Release|x64 + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681}.Debug|x64.ActiveCfg = Debug|x64 + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681}.Debug|x64.Build.0 = Debug|x64 + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681}.Release|x64.ActiveCfg = Release|x64 + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681}.Release|x64.Build.0 = Release|x64 + {CF9A0883-6334-44C7-AC29-349468C78E27}.Debug|x64.ActiveCfg = Debug|x64 + {CF9A0883-6334-44C7-AC29-349468C78E27}.Debug|x64.Build.0 = Debug|x64 + {CF9A0883-6334-44C7-AC29-349468C78E27}.Release|x64.ActiveCfg = Release|x64 + {CF9A0883-6334-44C7-AC29-349468C78E27}.Release|x64.Build.0 = Release|x64 + {CF9F4CEA-EC66-4E78-A086-107EB29E0637}.Debug|x64.ActiveCfg = Debug|x64 + {CF9F4CEA-EC66-4E78-A086-107EB29E0637}.Debug|x64.Build.0 = Debug|x64 + {CF9F4CEA-EC66-4E78-A086-107EB29E0637}.Release|x64.ActiveCfg = Release|x64 + {CF9F4CEA-EC66-4E78-A086-107EB29E0637}.Release|x64.Build.0 = Release|x64 + {D062166F-0EC7-4C13-A772-0C7157EEFE41}.Debug|x64.ActiveCfg = Debug|x64 + {D062166F-0EC7-4C13-A772-0C7157EEFE41}.Debug|x64.Build.0 = Debug|x64 + {D062166F-0EC7-4C13-A772-0C7157EEFE41}.Release|x64.ActiveCfg = Release|x64 + {D062166F-0EC7-4C13-A772-0C7157EEFE41}.Release|x64.Build.0 = Release|x64 + {D140560D-FDEC-4D3D-8F58-BF5FD5E4DAA1}.Debug|x64.ActiveCfg = Debug|x64 + {D140560D-FDEC-4D3D-8F58-BF5FD5E4DAA1}.Debug|x64.Build.0 = Debug|x64 + {D140560D-FDEC-4D3D-8F58-BF5FD5E4DAA1}.Release|x64.ActiveCfg = Release|x64 + {D140560D-FDEC-4D3D-8F58-BF5FD5E4DAA1}.Release|x64.Build.0 = Release|x64 + {D28F5FF6-8401-4E0D-94F9-3A1FD7ED64E3}.Debug|x64.ActiveCfg = Debug|x64 + {D28F5FF6-8401-4E0D-94F9-3A1FD7ED64E3}.Debug|x64.Build.0 = Debug|x64 + {D28F5FF6-8401-4E0D-94F9-3A1FD7ED64E3}.Release|x64.ActiveCfg = Release|x64 + {D28F5FF6-8401-4E0D-94F9-3A1FD7ED64E3}.Release|x64.Build.0 = Release|x64 + {D2964B88-EB05-4EBF-ACDA-44596FBFECB6}.Debug|x64.ActiveCfg = Debug|x64 + {D2964B88-EB05-4EBF-ACDA-44596FBFECB6}.Debug|x64.Build.0 = Debug|x64 + {D2964B88-EB05-4EBF-ACDA-44596FBFECB6}.Release|x64.ActiveCfg = Release|x64 + {D2964B88-EB05-4EBF-ACDA-44596FBFECB6}.Release|x64.Build.0 = Release|x64 + {D2C30C7E-A7D3-487A-956E-418CECAFFE8E}.Debug|x64.ActiveCfg = Debug|x64 + {D2C30C7E-A7D3-487A-956E-418CECAFFE8E}.Debug|x64.Build.0 = Debug|x64 + {D2C30C7E-A7D3-487A-956E-418CECAFFE8E}.Release|x64.ActiveCfg = Release|x64 + {D2C30C7E-A7D3-487A-956E-418CECAFFE8E}.Release|x64.Build.0 = Release|x64 + {D3A99F36-4B72-4766-ABCD-CCEDC26DD139}.Debug|x64.ActiveCfg = Debug|x64 + {D3A99F36-4B72-4766-ABCD-CCEDC26DD139}.Debug|x64.Build.0 = Debug|x64 + {D3A99F36-4B72-4766-ABCD-CCEDC26DD139}.Release|x64.ActiveCfg = Release|x64 + {D3A99F36-4B72-4766-ABCD-CCEDC26DD139}.Release|x64.Build.0 = Release|x64 + {D4035736-1AD6-4100-9FA9-A8A0C1DAE0C7}.Debug|x64.ActiveCfg = Debug|x64 + {D4035736-1AD6-4100-9FA9-A8A0C1DAE0C7}.Debug|x64.Build.0 = Debug|x64 + {D4035736-1AD6-4100-9FA9-A8A0C1DAE0C7}.Release|x64.ActiveCfg = Release|x64 + {D4035736-1AD6-4100-9FA9-A8A0C1DAE0C7}.Release|x64.Build.0 = Release|x64 + {D43FCFB6-97D2-44B2-8577-94B43B97D7CA}.Debug|x64.ActiveCfg = Debug|x64 + {D43FCFB6-97D2-44B2-8577-94B43B97D7CA}.Debug|x64.Build.0 = Debug|x64 + {D43FCFB6-97D2-44B2-8577-94B43B97D7CA}.Release|x64.ActiveCfg = Release|x64 + {D43FCFB6-97D2-44B2-8577-94B43B97D7CA}.Release|x64.Build.0 = Release|x64 + {D6A1F30D-C9E5-4F5C-9A16-50430AB1F26D}.Debug|x64.ActiveCfg = Debug|x64 + {D6A1F30D-C9E5-4F5C-9A16-50430AB1F26D}.Debug|x64.Build.0 = Debug|x64 + {D6A1F30D-C9E5-4F5C-9A16-50430AB1F26D}.Release|x64.ActiveCfg = Release|x64 + {D6A1F30D-C9E5-4F5C-9A16-50430AB1F26D}.Release|x64.Build.0 = Release|x64 + {D829DB63-E046-474D-8EA3-43A6659294D8}.Debug|x64.ActiveCfg = Debug|x64 + {D829DB63-E046-474D-8EA3-43A6659294D8}.Debug|x64.Build.0 = Debug|x64 + {D829DB63-E046-474D-8EA3-43A6659294D8}.Release|x64.ActiveCfg = Release|x64 + {D829DB63-E046-474D-8EA3-43A6659294D8}.Release|x64.Build.0 = Release|x64 + {D8317F1D-7A70-4A39-977A-EAB05A04A87B}.Debug|x64.ActiveCfg = Debug|x64 + {D8317F1D-7A70-4A39-977A-EAB05A04A87B}.Debug|x64.Build.0 = Debug|x64 + {D8317F1D-7A70-4A39-977A-EAB05A04A87B}.Release|x64.ActiveCfg = Release|x64 + {D8317F1D-7A70-4A39-977A-EAB05A04A87B}.Release|x64.Build.0 = Release|x64 + {D88187D2-1977-4C5F-B0CD-83C69BD6C1BC}.Debug|x64.ActiveCfg = Debug|x64 + {D88187D2-1977-4C5F-B0CD-83C69BD6C1BC}.Debug|x64.Build.0 = Debug|x64 + {D88187D2-1977-4C5F-B0CD-83C69BD6C1BC}.Release|x64.ActiveCfg = Release|x64 + {D88187D2-1977-4C5F-B0CD-83C69BD6C1BC}.Release|x64.Build.0 = Release|x64 + {D93A2683-6D99-4F18-B378-91195D23E007}.Debug|x64.ActiveCfg = Debug|x64 + {D93A2683-6D99-4F18-B378-91195D23E007}.Debug|x64.Build.0 = Debug|x64 + {D93A2683-6D99-4F18-B378-91195D23E007}.Release|x64.ActiveCfg = Release|x64 + {D93A2683-6D99-4F18-B378-91195D23E007}.Release|x64.Build.0 = Release|x64 + {D9A70E35-0C85-4A09-ACA8-B15B21B66F50}.Debug|x64.ActiveCfg = Debug|x64 + {D9A70E35-0C85-4A09-ACA8-B15B21B66F50}.Debug|x64.Build.0 = Debug|x64 + {D9A70E35-0C85-4A09-ACA8-B15B21B66F50}.Release|x64.ActiveCfg = Release|x64 + {D9A70E35-0C85-4A09-ACA8-B15B21B66F50}.Release|x64.Build.0 = Release|x64 + {DB68AB21-510B-4BA1-9E6F-E5731D8647BC}.Debug|x64.ActiveCfg = Debug|x64 + {DB68AB21-510B-4BA1-9E6F-E5731D8647BC}.Debug|x64.Build.0 = Debug|x64 + {DB68AB21-510B-4BA1-9E6F-E5731D8647BC}.Release|x64.ActiveCfg = Release|x64 + {DB68AB21-510B-4BA1-9E6F-E5731D8647BC}.Release|x64.Build.0 = Release|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CE}.Debug|x64.ActiveCfg = Debug|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CE}.Debug|x64.Build.0 = Debug|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CE}.Release|x64.ActiveCfg = Release|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CE}.Release|x64.Build.0 = Release|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CF}.Debug|x64.ActiveCfg = Debug|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CF}.Debug|x64.Build.0 = Debug|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CF}.Release|x64.ActiveCfg = Release|x64 + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CF}.Release|x64.Build.0 = Release|x64 + {DEA3CD0A-8781-4ABE-9A7D-00B91132FED0}.Debug|x64.ActiveCfg = Debug|x64 + {DEA3CD0A-8781-4ABE-9A7D-00B91132FED0}.Debug|x64.Build.0 = Debug|x64 + {DEA3CD0A-8781-4ABE-9A7D-00B91132FED0}.Release|x64.ActiveCfg = Release|x64 + {DEA3CD0A-8781-4ABE-9A7D-00B91132FED0}.Release|x64.Build.0 = Release|x64 + {E07C9A5F-B2E4-44FB-AA87-FBC885AC955D}.Debug|x64.ActiveCfg = Debug|x64 + {E07C9A5F-B2E4-44FB-AA87-FBC885AC955D}.Debug|x64.Build.0 = Debug|x64 + {E07C9A5F-B2E4-44FB-AA87-FBC885AC955D}.Release|x64.ActiveCfg = Release|x64 + {E07C9A5F-B2E4-44FB-AA87-FBC885AC955D}.Release|x64.Build.0 = Release|x64 + {E4E2EC33-7902-45D0-9C3C-ADBAFA46874A}.Debug|x64.ActiveCfg = Debug|x64 + {E4E2EC33-7902-45D0-9C3C-ADBAFA46874A}.Debug|x64.Build.0 = Debug|x64 + {E4E2EC33-7902-45D0-9C3C-ADBAFA46874A}.Release|x64.ActiveCfg = Release|x64 + {E4E2EC33-7902-45D0-9C3C-ADBAFA46874A}.Release|x64.Build.0 = Release|x64 + {E648732D-78FA-427A-928C-9A59222D37B7}.Debug|x64.ActiveCfg = Debug|x64 + {E648732D-78FA-427A-928C-9A59222D37B7}.Debug|x64.Build.0 = Debug|x64 + {E648732D-78FA-427A-928C-9A59222D37B7}.Release|x64.ActiveCfg = Release|x64 + {E648732D-78FA-427A-928C-9A59222D37B7}.Release|x64.Build.0 = Release|x64 + {E660218B-3B2D-4378-A2CD-78B865764CF1}.Debug|x64.ActiveCfg = Debug|x64 + {E660218B-3B2D-4378-A2CD-78B865764CF1}.Debug|x64.Build.0 = Debug|x64 + {E660218B-3B2D-4378-A2CD-78B865764CF1}.Release|x64.ActiveCfg = Release|x64 + {E660218B-3B2D-4378-A2CD-78B865764CF1}.Release|x64.Build.0 = Release|x64 + {E68DEB59-C709-4945-AF80-EEBCADDED944}.Debug|x64.ActiveCfg = Debug|x64 + {E68DEB59-C709-4945-AF80-EEBCADDED944}.Debug|x64.Build.0 = Debug|x64 + {E68DEB59-C709-4945-AF80-EEBCADDED944}.Release|x64.ActiveCfg = Release|x64 + {E68DEB59-C709-4945-AF80-EEBCADDED944}.Release|x64.Build.0 = Release|x64 + {E7691F81-86EF-467D-82E1-F5B9416386F9}.Debug|x64.ActiveCfg = Debug|x64 + {E7691F81-86EF-467D-82E1-F5B9416386F9}.Debug|x64.Build.0 = Debug|x64 + {E7691F81-86EF-467D-82E1-F5B9416386F9}.Release|x64.ActiveCfg = Release|x64 + {E7691F81-86EF-467D-82E1-F5B9416386F9}.Release|x64.Build.0 = Release|x64 + {E796AA20-D664-4D05-ABD9-C93A4FBE3E5C}.Debug|x64.ActiveCfg = Debug|x64 + {E796AA20-D664-4D05-ABD9-C93A4FBE3E5C}.Debug|x64.Build.0 = Debug|x64 + {E796AA20-D664-4D05-ABD9-C93A4FBE3E5C}.Release|x64.ActiveCfg = Release|x64 + {E796AA20-D664-4D05-ABD9-C93A4FBE3E5C}.Release|x64.Build.0 = Release|x64 + {E85E017F-04C0-4716-BF21-949C82C68912}.Debug|x64.ActiveCfg = Debug|x64 + {E85E017F-04C0-4716-BF21-949C82C68912}.Debug|x64.Build.0 = Debug|x64 + {E85E017F-04C0-4716-BF21-949C82C68912}.Release|x64.ActiveCfg = Release|x64 + {E85E017F-04C0-4716-BF21-949C82C68912}.Release|x64.Build.0 = Release|x64 + {E901B756-EA72-4B8D-967F-85F109D0D1DE}.Debug|x64.ActiveCfg = Debug|x64 + {E901B756-EA72-4B8D-967F-85F109D0D1DE}.Debug|x64.Build.0 = Debug|x64 + {E901B756-EA72-4B8D-967F-85F109D0D1DE}.Release|x64.ActiveCfg = Release|x64 + {E901B756-EA72-4B8D-967F-85F109D0D1DE}.Release|x64.Build.0 = Release|x64 + {E9E079D6-25BF-46E3-8075-7D733303DD59}.Debug|x64.ActiveCfg = Debug|x64 + {E9E079D6-25BF-46E3-8075-7D733303DD59}.Debug|x64.Build.0 = Debug|x64 + {E9E079D6-25BF-46E3-8075-7D733303DD59}.Release|x64.ActiveCfg = Release|x64 + {E9E079D6-25BF-46E3-8075-7D733303DD59}.Release|x64.Build.0 = Release|x64 + {ED2A831F-4AAF-4CF7-A953-3C45B0EC1BE6}.Debug|x64.ActiveCfg = Debug|x64 + {ED2A831F-4AAF-4CF7-A953-3C45B0EC1BE6}.Debug|x64.Build.0 = Debug|x64 + {ED2A831F-4AAF-4CF7-A953-3C45B0EC1BE6}.Release|x64.ActiveCfg = Release|x64 + {ED2A831F-4AAF-4CF7-A953-3C45B0EC1BE6}.Release|x64.Build.0 = Release|x64 + {EDA88BAB-9FA7-4A2D-8974-EFCFA24B3FEB}.Debug|x64.ActiveCfg = Debug|x64 + {EDA88BAB-9FA7-4A2D-8974-EFCFA24B3FEB}.Debug|x64.Build.0 = Debug|x64 + {EDA88BAB-9FA7-4A2D-8974-EFCFA24B3FEB}.Release|x64.ActiveCfg = Release|x64 + {EDA88BAB-9FA7-4A2D-8974-EFCFA24B3FEB}.Release|x64.Build.0 = Release|x64 + {EDD5FA29-69AF-445F-842A-132E65D3C92B}.Debug|x64.ActiveCfg = Debug|x64 + {EDD5FA29-69AF-445F-842A-132E65D3C92B}.Debug|x64.Build.0 = Debug|x64 + {EDD5FA29-69AF-445F-842A-132E65D3C92B}.Release|x64.ActiveCfg = Release|x64 + {EDD5FA29-69AF-445F-842A-132E65D3C92B}.Release|x64.Build.0 = Release|x64 + {EF951090-8938-4F7D-8674-7F6FB1F2C25E}.Debug|x64.ActiveCfg = Debug|x64 + {EF951090-8938-4F7D-8674-7F6FB1F2C25E}.Debug|x64.Build.0 = Debug|x64 + {EF951090-8938-4F7D-8674-7F6FB1F2C25E}.Release|x64.ActiveCfg = Release|x64 + {EF951090-8938-4F7D-8674-7F6FB1F2C25E}.Release|x64.Build.0 = Release|x64 + {F03DABEE-A03E-4437-BFD3-D012836F2D94}.Debug|x64.ActiveCfg = Debug|x64 + {F03DABEE-A03E-4437-BFD3-D012836F2D94}.Debug|x64.Build.0 = Debug|x64 + {F03DABEE-A03E-4437-BFD3-D012836F2D94}.Release|x64.ActiveCfg = Release|x64 + {F03DABEE-A03E-4437-BFD3-D012836F2D94}.Release|x64.Build.0 = Release|x64 + {F0B613C4-1D9A-4259-BD0E-C1B9FF2AA3A0}.Debug|x64.ActiveCfg = Debug|x64 + {F0B613C4-1D9A-4259-BD0E-C1B9FF2AA3A0}.Debug|x64.Build.0 = Debug|x64 + {F0B613C4-1D9A-4259-BD0E-C1B9FF2AA3A0}.Release|x64.ActiveCfg = Release|x64 + {F0B613C4-1D9A-4259-BD0E-C1B9FF2AA3A0}.Release|x64.Build.0 = Release|x64 + {F13108C4-4C86-4D56-A317-A4E5892A8AF7}.Debug|x64.ActiveCfg = Debug|x64 + {F13108C4-4C86-4D56-A317-A4E5892A8AF7}.Debug|x64.Build.0 = Debug|x64 + {F13108C4-4C86-4D56-A317-A4E5892A8AF7}.Release|x64.ActiveCfg = Release|x64 + {F13108C4-4C86-4D56-A317-A4E5892A8AF7}.Release|x64.Build.0 = Release|x64 + {F3E5650D-834E-45E6-90C7-3FC2AA954929}.Debug|x64.ActiveCfg = Debug|x64 + {F3E5650D-834E-45E6-90C7-3FC2AA954929}.Debug|x64.Build.0 = Debug|x64 + {F3E5650D-834E-45E6-90C7-3FC2AA954929}.Release|x64.ActiveCfg = Release|x64 + {F3E5650D-834E-45E6-90C7-3FC2AA954929}.Release|x64.Build.0 = Release|x64 + {F596C36C-5C96-4F08-B420-8908AF500954}.Debug|x64.ActiveCfg = Debug|x64 + {F596C36C-5C96-4F08-B420-8908AF500954}.Debug|x64.Build.0 = Debug|x64 + {F596C36C-5C96-4F08-B420-8908AF500954}.Release|x64.ActiveCfg = Release|x64 + {F596C36C-5C96-4F08-B420-8908AF500954}.Release|x64.Build.0 = Release|x64 + {F5D850C9-D353-4B84-99BC-E336C231018C}.Debug|x64.ActiveCfg = Debug|x64 + {F5D850C9-D353-4B84-99BC-E336C231018C}.Debug|x64.Build.0 = Debug|x64 + {F5D850C9-D353-4B84-99BC-E336C231018C}.Release|x64.ActiveCfg = Release|x64 + {F5D850C9-D353-4B84-99BC-E336C231018C}.Release|x64.Build.0 = Release|x64 + {F5E2F6C4-19BA-497A-B754-232E4666E647}.Debug|x64.ActiveCfg = Debug|x64 + {F5E2F6C4-19BA-497A-B754-232E4666E647}.Debug|x64.Build.0 = Debug|x64 + {F5E2F6C4-19BA-497A-B754-232E4666E647}.Release|x64.ActiveCfg = Release|x64 + {F5E2F6C4-19BA-497A-B754-232E4666E647}.Release|x64.Build.0 = Release|x64 + {F5E2F6C4-19BA-497A-B754-232E469BE647}.Debug|x64.ActiveCfg = Debug|x64 + {F5E2F6C4-19BA-497A-B754-232E469BE647}.Debug|x64.Build.0 = Debug|x64 + {F5E2F6C4-19BA-497A-B754-232E469BE647}.Release|x64.ActiveCfg = Release|x64 + {F5E2F6C4-19BA-497A-B754-232E469BE647}.Release|x64.Build.0 = Release|x64 + {F63FB47F-1DCE-48E5-9CBD-F3E0A354472B}.Debug|x64.ActiveCfg = Debug|x64 + {F63FB47F-1DCE-48E5-9CBD-F3E0A354472B}.Debug|x64.Build.0 = Debug|x64 + {F63FB47F-1DCE-48E5-9CBD-F3E0A354472B}.Release|x64.ActiveCfg = Release|x64 + {F63FB47F-1DCE-48E5-9CBD-F3E0A354472B}.Release|x64.Build.0 = Release|x64 + {F7508935-C65A-4521-88E3-76AB24F2978D}.Debug|x64.ActiveCfg = Debug|x64 + {F7508935-C65A-4521-88E3-76AB24F2978D}.Debug|x64.Build.0 = Debug|x64 + {F7508935-C65A-4521-88E3-76AB24F2978D}.Release|x64.ActiveCfg = Release|x64 + {F7508935-C65A-4521-88E3-76AB24F2978D}.Release|x64.Build.0 = Release|x64 + {F7C6C6B6-4142-4C82-8699-4A9D8183181B}.Debug|x64.ActiveCfg = Debug|x64 + {F7C6C6B6-4142-4C82-8699-4A9D8183181B}.Debug|x64.Build.0 = Debug|x64 + {F7C6C6B6-4142-4C82-8699-4A9D8183181B}.Release|x64.ActiveCfg = Release|x64 + {F7C6C6B6-4142-4C82-8699-4A9D8183181B}.Release|x64.Build.0 = Release|x64 + {FB2D2B18-E616-4639-8593-0E1AF2DA01A8}.Debug|x64.ActiveCfg = Debug|x64 + {FB2D2B18-E616-4639-8593-0E1AF2DA01A8}.Debug|x64.Build.0 = Debug|x64 + {FB2D2B18-E616-4639-8593-0E1AF2DA01A8}.Release|x64.ActiveCfg = Release|x64 + {FB2D2B18-E616-4639-8593-0E1AF2DA01A8}.Release|x64.Build.0 = Release|x64 + {FBB77433-639E-42DC-9355-EA94CAE294D2}.Debug|x64.ActiveCfg = Debug|x64 + {FBB77433-639E-42DC-9355-EA94CAE294D2}.Debug|x64.Build.0 = Debug|x64 + {FBB77433-639E-42DC-9355-EA94CAE294D2}.Release|x64.ActiveCfg = Release|x64 + {FBB77433-639E-42DC-9355-EA94CAE294D2}.Release|x64.Build.0 = Release|x64 + {FC2248F5-3E9E-495B-9767-87F59614047C}.Debug|x64.ActiveCfg = Debug|x64 + {FC2248F5-3E9E-495B-9767-87F59614047C}.Debug|x64.Build.0 = Debug|x64 + {FC2248F5-3E9E-495B-9767-87F59614047C}.Release|x64.ActiveCfg = Release|x64 + {FC2248F5-3E9E-495B-9767-87F59614047C}.Release|x64.Build.0 = Release|x64 + {FC998FE5-C843-42BA-9731-F46DB02F1853}.Debug|x64.ActiveCfg = Debug|x64 + {FC998FE5-C843-42BA-9731-F46DB02F1853}.Debug|x64.Build.0 = Debug|x64 + {FC998FE5-C843-42BA-9731-F46DB02F1853}.Release|x64.ActiveCfg = Release|x64 + {FC998FE5-C843-42BA-9731-F46DB02F1853}.Release|x64.Build.0 = Release|x64 + {FCD0587A-4504-4F5E-8E9C-468CC03D250A}.Debug|x64.ActiveCfg = Debug|x64 + {FCD0587A-4504-4F5E-8E9C-468CC03D250A}.Debug|x64.Build.0 = Debug|x64 + {FCD0587A-4504-4F5E-8E9C-468CC03D250A}.Release|x64.ActiveCfg = Release|x64 + {FCD0587A-4504-4F5E-8E9C-468CC03D250A}.Release|x64.Build.0 = Release|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888C}.Debug|x64.ActiveCfg = Debug|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888C}.Debug|x64.Build.0 = Debug|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888C}.Release|x64.ActiveCfg = Release|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888C}.Release|x64.Build.0 = Release|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888D}.Debug|x64.ActiveCfg = Debug|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888D}.Debug|x64.Build.0 = Debug|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888D}.Release|x64.ActiveCfg = Release|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888D}.Release|x64.Build.0 = Release|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888E}.Debug|x64.ActiveCfg = Debug|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888E}.Debug|x64.Build.0 = Debug|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888E}.Release|x64.ActiveCfg = Release|x64 + {FD726AA3-D4FA-4597-B435-08CC7752888E}.Release|x64.Build.0 = Release|x64 + {FEA09B48-34C2-4963-8A5A-F97BDA136D72}.Debug|x64.ActiveCfg = Debug|x64 + {FEA09B48-34C2-4963-8A5A-F97BDA136D72}.Debug|x64.Build.0 = Debug|x64 + {FEA09B48-34C2-4963-8A5A-F97BDA136D72}.Release|x64.ActiveCfg = Release|x64 + {FEA09B48-34C2-4963-8A5A-F97BDA136D72}.Release|x64.Build.0 = Release|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA79}.Debug|x64.ActiveCfg = Debug|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA79}.Debug|x64.Build.0 = Debug|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA79}.Release|x64.ActiveCfg = Release|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA79}.Release|x64.Build.0 = Release|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}.Debug|x64.ActiveCfg = Debug|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}.Debug|x64.Build.0 = Debug|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}.Release|x64.ActiveCfg = Release|x64 + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {0056B0B6-CB3E-4F0E-B6DC-48D59CB8E235} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {019F5586-5558-4C87-B319-85906D4AE407} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {025E7D51-41F2-4CBA-956E-C37A4443DB1B} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {0287C3DC-AE03-4714-AAFF-C52F062ECA6F} = {1434B17C-6165-4D42-BEA1-5A7730D5A6BB} + {02BC3B44-C7F1-4793-86C1-6F36CA8A7F53} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {03228F84-4F41-4BCC-8C2D-F329DC87B289} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {0388E945-A655-41A7-AF27-8981CEE0E49A} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {03B54A12-7793-4827-B820-C07491F7F45E} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {0529575C-F6E8-44FD-BB82-82A29948D0F2} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {063037B2-CA35-4520-811C-19D9C4ED891E} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {06877FED-15BA-421F-85C9-1A964FB97446} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {0703E813-9CC8-4DEA-AA33-42B099CD172D} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {07A153D9-DF17-4DE8-A3C2-EBF171B961AE} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {08B62E36-63D2-4FF1-A605-4BBABAEE73FB} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {0A049EAD-652F-4E20-8026-90FD99AEE77A} = {1A36B57B-2E88-4D81-89C0-F575C9895E36} + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {0BFD78AA-FD94-4DB1-8495-8F5CC06D8F03} = {BFEDF709-A700-4769-9056-ACA934D828A8} + {0CC6D525-806E-433F-AB4A-6CFD546418B1} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {0CDCEB97-3270-4939-A290-EA2D3BE34B0C} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {0D4E38EF-A9D5-4797-8994-5DBB1125C9EA} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {0DF30DE0-7F7D-43D3-940A-809EC27D3061} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {0FB8F0FD-276C-413B-97A8-67ABE0C9043B} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {10469175-EEF7-44A0-9961-AC4E45EFD800} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {10B732EF-1783-4B61-B431-36BA5A2A3C9C} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {11D76FBC-DFAA-4B31-9DB0-206E171E3F94} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {11E158AE-C85A-4A6E-B66A-ED2994709276} = {F09A0864-9221-47AD-872F-D4538104D747} + {12A1A3EF-202C-4DD0-9B5A-F5126CAB078F} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {1434B17C-6165-4D42-BEA1-5A7730D5A6BB} = {0CC6D525-806E-433F-AB4A-6CFD546418B1} + {1464398A-100F-4518-BDB9-939A6362B6CF} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {179BEB5A-2C90-44F5-A734-FA756A5E668C} = {F09A0864-9221-47AD-872F-D4538104D747} + {17A4B817-68B1-4719-A9EF-BD8FAB747DE6} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {181A4234-282C-41F0-85C2-2B7697B3CB1A} = {F18C84B3-7898-4324-9D75-99A6048F442D} + {18E90E1A-F2E0-40DF-9900-A14E560C9EB4} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {1A36B57B-2E88-4D81-89C0-F575C9895E36} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {1B871BA2-3F70-4BC9-9DF4-725EB07F6628} = {F09A0864-9221-47AD-872F-D4538104D747} + {1B9B0D6D-E530-44A6-ADAE-09EA2BDC47DE} = {E23BB160-006E-44F2-8FB4-3A2240BBC20C} + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {1BFBAFED-A9CE-49AF-AB2C-84199E391EE6} = {BEA6AC7C-831D-44EF-AD61-DA65A448CC9B} + {1C986F2C-9AF1-45E0-9E9B-8CABE9CAF437} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {1EB3DE5B-6357-498D-8CAC-EEC0209EA454} = {E3229AF7-1FA2-4632-BB0B-B74F709F1A33} + {1F2E1C51-2B14-4047-BE6D-52E00FC3C780} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {2498FCDA-E2CC-43EF-9A35-8CD63F253171} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {25758581-DD46-4AE4-99D9-11E736F72AD1} = {F09A0864-9221-47AD-872F-D4538104D747} + {26166DF1-3C94-44AF-9075-BA31DCD2F6BB} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {27FA11C6-431D-41D1-A417-FAB7C4F93DCA} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {296F3C5D-3951-423E-8E2F-FD4A37958C72} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {29D9376B-DC36-4940-83F1-A7CBE38A2103} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {2A1D6AF2-7336-4966-A4B3-0BE9A24BAE00} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {2B1A5104-A324-4D02-B5C7-D021FB8F880C} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {2B2DE575-1422-4FBF-97BE-35AEDA0AB465} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {2B7772E6-9DAA-4F38-B0BC-7B2399366325} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {2C24CC4F-B340-467D-908F-1BF2C69BC79F} = {F18C84B3-7898-4324-9D75-99A6048F442D} + {2CD7408E-2F60-43C3-ACEB-C7D58CDD8462} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {2DE6B085-3C19-49B1-894A-AD9376000E09} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {2ED26FDA-3C4E-4514-B387-5E77C302FF71} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {2EFFC590-BF5E-46A2-AF04-E67E1D571D2E} = {F09A0864-9221-47AD-872F-D4538104D747} + {2F543422-4B8A-4898-BE6B-590F52B4E9D1} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {2FA3155B-6F26-4D15-AC03-9D82D48DBC42} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {3142CB13-CADA-48D3-9A25-E6ACB243760A} = {F09A0864-9221-47AD-872F-D4538104D747} + {34DB4951-DA08-45F1-938D-B08E5FF5AB46} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {34F31D9D-3D33-4C09-85A3-4749A8AB8EBB} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {3799BA67-3C4F-4AE0-85DC-5BAAEA01A180} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {3B23831B-E5DE-4A62-9D0B-27D0D9F293F4} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {3B44D717-EEDE-470A-B631-C9D6BFE4ADF2} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {3CF270CD-0F56-48E3-AD84-82F369C568BF} = {1A36B57B-2E88-4D81-89C0-F575C9895E36} + {3EC20BDD-2E48-4291-A9EE-D0675AF77C7F} = {BEA6AC7C-831D-44EF-AD61-DA65A448CC9B} + {3EC30D6A-BDA4-4971-879A-8814204EAE31} = {F09A0864-9221-47AD-872F-D4538104D747} + {3ECCB0F1-3ADF-486A-91C5-79DF0FC22F78} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {3ED56E55-84A6-422C-A8D4-A8439FB8F245} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {42B97D47-F800-4100-BFA2-B3AC357E8B6B} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {42CCEF95-5ADD-460C-967E-DD5B2C744943} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {433F7840-C597-4950-84C9-E4FF7DF6A298} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {45027FC5-4A32-47BD-AC5B-66CC7616B1D2} = {9A8482A7-BF0C-423D-8266-189456ED41F6} + {46629F21-089C-4205-B2F8-E01748ECE517} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {46B82069-10BE-432A-8D93-F4D995148555} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {4850F425-9128-4E91-973C-5AE7BD97395B} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {492BAA3D-0D5D-478E-9765-500463AE69AA} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {49A7CC5A-D5E7-4A07-917F-C6918B982BE8} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {4C291EEB-3874-4724-9CC2-1335D13FF0EE} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {4C429783-0B01-449F-A36F-C2019233890B} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {4C6E7F0A-7E6A-4713-B1D2-B7B4ADC992AF} = {F09A0864-9221-47AD-872F-D4538104D747} + {4E334022-7A71-4197-9E15-878F7EFC877E} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {4EE3C4D6-F707-4A05-8032-8FC2A44D29E8} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {4FB4FF90-4E92-4CFB-A01F-C73D6861CA03} = {1A36B57B-2E88-4D81-89C0-F575C9895E36} + {50FD1E47-2131-48D2-9435-5CB28DF6B15A} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {513C4CFA-BD5B-4470-BA93-F6D43778A754} = {C721EFBD-45DC-479E-9B99-E62FCC1FC6E5} + {53115A01-460C-4339-A2C8-AE1323A6E7EA} = {F09A0864-9221-47AD-872F-D4538104D747} + {5580D11C-FDA6-4CF2-A0E8-1C2D3FBC11F1} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {5632B41F-19DD-4BA7-A6EB-74F9E8A7EF8A} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {581B3A58-F3F0-4765-91E5-D0C82816A528} = {C721EFBD-45DC-479E-9B99-E62FCC1FC6E5} + {58386481-30B7-40FC-96AF-0723A4A7B228} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {59AB6976-D16B-48D0-8D16-94360D3FE51D} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {59D7A9CD-9912-40E4-96E1-8A873F777F62} = {6D63CDF1-F62C-4614-AD8A-95B0A63AA070} + {59D9E21C-57D7-4D18-B792-24738BD26DE4} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {5A391A14-8E29-4788-93FC-EDADED31D32F} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {5AD07646-5E16-4CEF-B80A-BE5EE4D54FEF} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {5B2B9C0D-1B6D-4357-8307-6DE1EE0A41A3} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {5D362DB7-D2BD-4907-AAD8-4B8627E72282} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {5DB2E259-0D19-4A89-B8EC-B2912F39924D} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {5E005D50-1C73-4E52-B295-864BB9AF7AC6} = {BEA6AC7C-831D-44EF-AD61-DA65A448CC9B} + {5E7305DB-93E6-448B-AE44-90EAF916A776} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {5EC35099-9777-45E8-9520-EB2EE75BDF88} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {5F2B687A-1B42-439C-AEEC-135DD22FB851} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {5F8A56F8-2C5B-48B6-9654-DD642D3E5F5C} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {60206D22-E132-4695-8486-10BECA32C5CC} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {60B463D4-8CD5-4BF6-A25B-01BE13B87590} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {60EF55C7-8399-4543-B5B2-3AE2C532C67E} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {628FADA9-7047-4DD9-BD17-9FE4B5A1ADB0} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {63B8184D-85E0-4E6A-9729-558C567D1D1D} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {643B82A1-D009-46A9-92A0-2883399B05C2} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {6516D6CF-8000-4341-9487-312BC83EE370} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {65D92D98-97E1-48F7-AEF6-75221CF48EA4} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {673277EC-D26B-414D-92E3-84EE873316A8} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {6770917C-5B8E-49F1-9297-163FAB76DAFB} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {67AC1343-98FD-4143-92C0-559C55F749F5} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {6851356E-A5D9-46A6-8262-A7E208729F18} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {6AE1B8BE-D46A-4E99-87A2-F160FB950DCA} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {6B492754-9F80-44B3-A2A7-1D98AF06F3B2} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {6BCEF2A5-0CEC-4CC6-9CB0-D3FBF871A408} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {6D63CDF1-F62C-4614-AD8A-95B0A63AA070} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {6D7C1169-3246-465F-B630-ECFEF4F3179A} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {6DBD8C02-0C75-4DB0-BFDA-CD053B1B2D89} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {6EC93484-AAF3-487E-84E4-5ABFBA0AFC53} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {6F06A19B-0921-4B71-A3A5-B350B5FFEADB} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {6F4953DA-FDC3-46CF-BF24-3752CCF2E1CB} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {6F776280-B383-4DCE-8F42-9670164D038D} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {70EE1D40-0C65-4985-8EFC-BD40EE3A89B2} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {715EADD7-0FFE-4F1F-94E7-49302968DF79} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {71D182E0-345A-4375-B0FA-3536821B0EE3} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {7264C8F6-73FB-4830-9306-1558D3EAC71B} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {729E3905-FF7D-49C5-9871-6D35D839183E} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {72C9DB46-C665-48AD-B805-BA885B40CA3E} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {7337E34A-97B0-44FC-988B-7E6AE7E0FBBF} = {6D63CDF1-F62C-4614-AD8A-95B0A63AA070} + {740ED97D-005F-4F58-98B2-4EF5EF5776E8} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {746BA101-5C93-42A5-AC7A-64DCEB186572} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {74D655D5-F661-4887-A1EB-5A6222AF5FCA} = {E3229AF7-1FA2-4632-BB0B-B74F709F1A33} + {7701627C-CFD9-48F6-942E-EAACC8D057FA} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {774627B7-6532-4464-AEE4-02F72CA44F95} = {9A8482A7-BF0C-423D-8266-189456ED41F6} + {7783BC49-A25B-468B-A6F8-AB6B39A91C65} = {F18C84B3-7898-4324-9D75-99A6048F442D} + {779425B1-2211-499B-A7CC-4F9EC6CB0D25} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {79D37FFE-FF76-44B3-BB27-3DCAEFF2EBE9} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {7ABF755C-821B-49CD-8EDE-83C16594FF7F} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA} = {877E7D1D-8150-4FE5-A139-B6FBCEAEC393} + {7DFEB4A5-8B04-4302-9D09-8144918FCF81} = {E23BB160-006E-44F2-8FB4-3A2240BBC20C} + {7F51CD29-3BCD-4DD8-B327-F384B5A616D1} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {8008010F-8718-4C5F-86B2-195AEBF73422} = {C721EFBD-45DC-479E-9B99-E62FCC1FC6E5} + {8010BBB0-C71B-4EFF-95EB-65C01E5EC197} = {C721EFBD-45DC-479E-9B99-E62FCC1FC6E5} + {80AF1B7D-B8CE-4AF0-AE3B-1DABED1B57E7} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {810DB909-6581-42D8-9616-906888F12149} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {85D4076B-896B-4EBB-8F3A-8B44C24CD452} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {85DBDA9B-AEF6-43E7-B8B5-05FF2BEC61A3} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {86EE22CC-6D3C-4F81-ADC8-394946F0DA81} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {877E7D1D-8150-4FE5-A139-B6FBCEAEC393} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {87A32959-E477-4CD5-8A1C-C85646D806B2} = {F18C84B3-7898-4324-9D75-99A6048F442D} + {88D239E4-EB7D-4E0A-BE3A-AD78B9F408FC} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {89F947CA-DDEF-4131-8AFB-584ABA4A1302} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {8A0FA780-068A-4534-AA2F-4FF4CF977AF2} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {8A4872D7-A234-4B9B-8215-82C6BB15F3A2} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {8C42CA7C-1543-4F1B-A55F-28CD419C7D35} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {8C6D73E0-0A6F-4487-A040-0EC78D7D6D9A} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {8D75FA1A-EC74-4F88-8AC1-CE3F98E4D828} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {8E374371-30E1-4623-8755-2A2F3742170B} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} = {95FAF291-03D1-42FC-9C10-424D551D475D} + {9186EAC4-2F34-4F17-B940-6585D7869BCD} = {95FAF291-03D1-42FC-9C10-424D551D475D} + {91C30620-70CA-46C7-AC71-71F3C602690E} = {0CC6D525-806E-433F-AB4A-6CFD546418B1} + {91E19AEB-7B75-43E0-B8B4-D2BB60D839EA} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {9233FC80-B51C-4A89-AF58-5AE86C068F6A} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {92388A20-50FC-45F8-89E3-71F1618EFABB} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {924B2937-0B53-4DC6-B7E1-5F3102728F89} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {95B683BD-B9DC-400F-9BC0-8F1505F08BF5} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {95FAF291-03D1-42FC-9C10-424D551D475D} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {96D00A19-5CEF-4CC5-BDE8-E33C68BCE90F} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {98ACBE5D-1A92-46F9-AA81-533412172952} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {99F7F00F-1DE5-45EA-992B-64BA282FAC76} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {9A4078F8-B8E4-4EC6-A6FF-4F29DAD9CE48} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {9A8482A7-BF0C-423D-8266-189456ED41F6} = {95FAF291-03D1-42FC-9C10-424D551D475D} + {9AE2DAF9-10C4-4EC3-AE52-AD5EE9C77C55} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {9C37B8CC-F810-4787-924D-65BC227091A3} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {9D9E33EB-4C24-4646-A3FB-35DA17247917} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {9E9E3D25-2139-4A5D-9200-18148DDEAD45} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {9FF51F3E-AF36-4F45-A797-C5F03A090298} = {91C30620-70CA-46C7-AC71-71F3C602690E} + {9FF62356-30B4-42A1-8DC7-45262A18DD44} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {A14A4556-9092-430D-B9CA-B2B1223D56CB} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {A18B076A-CE8C-49A6-8B80-F02843E4BF0A} = {F09A0864-9221-47AD-872F-D4538104D747} + {A216BF23-FC5C-4426-BF20-8568A2AA5FA0} = {F09A0864-9221-47AD-872F-D4538104D747} + {A2A0FAEA-2B7C-4FC3-B904-1DB4DEACF88D} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {A38EFCDB-53D6-4474-97F3-0DDC6CE70D76} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {A39D1640-8DBA-450D-9103-2533C248991A} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {A57D9365-172E-4782-ADC6-82A594E30943} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {A79E3093-B157-4B09-BABD-29266EA16407} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {A7CA7975-CEDB-48E6-9AEB-1209DCBD07F2} = {91C30620-70CA-46C7-AC71-71F3C602690E} + {A9ADD224-1755-407F-906D-C13EC37FF7B0} = {BEA6AC7C-831D-44EF-AD61-DA65A448CC9B} + {AB15A115-E429-4123-BEBF-206FBA4CF615} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {AE1C32FB-9B52-4760-ABFC-0D2FA2C7A6C8} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {AE952763-5C84-43FC-B344-CACC950F056C} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {AE9E908D-BAEC-491F-9914-436B3CE35E94} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {AEAA72CD-E060-417C-9CA1-49B4738384E0} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {AF038868-2432-4159-A62F-941F11D12C5D} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {AF0B7480-EBE3-486B-B0C8-134910BC9324} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {B30C6212-A160-405A-8FE7-340E721738A2} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {B35BFA09-DE68-483B-AB61-8790E8F060A8} = {F09A0864-9221-47AD-872F-D4538104D747} + {B36F115C-8139-4C35-A3E7-E6BF9F3DA793} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {B379539C-E130-460D-AE82-4EBDD1A97845} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {B3AF8A19-5802-4A34-9157-27BBE4E53C0A} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {B440BB05-37A8-42EA-98D3-D83EB113E497} = {E23BB160-006E-44F2-8FB4-3A2240BBC20C} + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFE} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {B6C0521B-EECA-47EF-BFA8-147F9C3F6DFF} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {B6DA6617-D98F-4A4D-A7C4-A317212924BF} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {B6F4B85D-FE55-4A1B-AE97-D4A9ECFE195F} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {B775480C-5B32-4F64-B026-47367280EC56} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {B870D8A6-12CD-4DD0-B843-833695C2310A} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {B887EA26-846C-4D6A-B0E4-432487506BC7} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {B8A4320D-E9A3-4F89-A8AA-B16D746C158A} = {F18C84B3-7898-4324-9D75-99A6048F442D} + {BA0EF7F5-BE6C-4B61-9D5F-1480462EE001} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {BABC6427-E533-4DCF-91E3-B5B2ED253F46} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {BAE107BA-7618-4972-8188-2D3CDAAE0453} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {BB1120CF-B721-4EF9-8735-58F76AE51D2F} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {BB248BAC-6E1B-433C-A254-75140A273AB5} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {BE18F227-A9F0-4B38-B689-4E2F9F09CA5F} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {BEA6AC7C-831D-44EF-AD61-DA65A448CC9B} = {0CC6D525-806E-433F-AB4A-6CFD546418B1} + {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {BFEDF709-A700-4769-9056-ACA934D828A8} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {C0E811E0-8942-4CFD-A817-74D99E9E6577} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {C2C36D03-26EE-4BD8-8FFC-86CFE16C1218} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {C2D5E690-748B-4138-B572-1774B99A8572} = {E23BB160-006E-44F2-8FB4-3A2240BBC20C} + {C2F94489-A483-4C44-B8A7-11A75F6AEC66} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {C35052AF-2383-4F9C-B18B-55A01829F2BF} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {C3CEE34C-29E0-4A22-B258-3FBAF662AA19} = {91C30620-70CA-46C7-AC71-71F3C602690E} + {C5E8B8DB-2507-4904-847F-A52196B075F0} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {C7025EE1-57E5-44B9-A4F5-3CB059601FC3} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {C71DAF3E-9361-4723-93E2-C475D1D0C0D0} = {1A36B57B-2E88-4D81-89C0-F575C9895E36} + {C721EFBD-45DC-479E-9B99-E62FCC1FC6E5} = {0CC6D525-806E-433F-AB4A-6CFD546418B1} + {C7E42AE1-052F-4024-B8BA-DE5DCE6BBEEC} = {C721EFBD-45DC-479E-9B99-E62FCC1FC6E5} + {C973CD39-D63B-4F5C-BE1D-DED17388B5A4} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {CA4BBB24-D33E-42E2-A495-F10D80DE8C1D} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {CB906E89-1313-4929-AFF7-86FBF1CC301F} = {9C37B8CC-F810-4787-924D-65BC227091A3} + {CCA9B681-D10B-45E4-98CC-531503D2EDE8} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {CDD9DFC6-5C3D-42F7-B822-FE29A1C21752} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {CE3F2DFB-8470-4802-AD37-21CAF6CB2681} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {CF9A0883-6334-44C7-AC29-349468C78E27} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {CF9F4CEA-EC66-4E78-A086-107EB29E0637} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {D062166F-0EC7-4C13-A772-0C7157EEFE41} = {1434B17C-6165-4D42-BEA1-5A7730D5A6BB} + {D140560D-FDEC-4D3D-8F58-BF5FD5E4DAA1} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {D28F5FF6-8401-4E0D-94F9-3A1FD7ED64E3} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {D2964B88-EB05-4EBF-ACDA-44596FBFECB6} = {BEA6AC7C-831D-44EF-AD61-DA65A448CC9B} + {D2C30C7E-A7D3-487A-956E-418CECAFFE8E} = {F09A0864-9221-47AD-872F-D4538104D747} + {D3A99F36-4B72-4766-ABCD-CCEDC26DD139} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {D4035736-1AD6-4100-9FA9-A8A0C1DAE0C7} = {59AB6976-D16B-48D0-8D16-94360D3FE51D} + {D43FCFB6-97D2-44B2-8577-94B43B97D7CA} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {D6A1F30D-C9E5-4F5C-9A16-50430AB1F26D} = {BEA6AC7C-831D-44EF-AD61-DA65A448CC9B} + {D829DB63-E046-474D-8EA3-43A6659294D8} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {D8317F1D-7A70-4A39-977A-EAB05A04A87B} = {E23BB160-006E-44F2-8FB4-3A2240BBC20C} + {D88187D2-1977-4C5F-B0CD-83C69BD6C1BC} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {D93A2683-6D99-4F18-B378-91195D23E007} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {D9A70E35-0C85-4A09-ACA8-B15B21B66F50} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {DB68AB21-510B-4BA1-9E6F-E5731D8647BC} = {BFBAB433-860E-4A28-96E3-A4B7AFE3B297} + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CE} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {DE068BE1-A8E9-48A2-B216-92A7CE5EA4CF} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {DEA3CD0A-8781-4ABE-9A7D-00B91132FED0} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {E07C9A5F-B2E4-44FB-AA87-FBC885AC955D} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {E23BB160-006E-44F2-8FB4-3A2240BBC20C} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {E3229AF7-1FA2-4632-BB0B-B74F709F1A33} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {E4E2EC33-7902-45D0-9C3C-ADBAFA46874A} = {F8373EDD-1B9E-462D-BF23-55638E23E98B} + {E648732D-78FA-427A-928C-9A59222D37B7} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {E660218B-3B2D-4378-A2CD-78B865764CF1} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {E68DEB59-C709-4945-AF80-EEBCADDED944} = {1A36B57B-2E88-4D81-89C0-F575C9895E36} + {E7691F81-86EF-467D-82E1-F5B9416386F9} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {E796AA20-D664-4D05-ABD9-C93A4FBE3E5C} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {E85E017F-04C0-4716-BF21-949C82C68912} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {E901B756-EA72-4B8D-967F-85F109D0D1DE} = {1A36B57B-2E88-4D81-89C0-F575C9895E36} + {E9E079D6-25BF-46E3-8075-7D733303DD59} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {ED2A831F-4AAF-4CF7-A953-3C45B0EC1BE6} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {EDA88BAB-9FA7-4A2D-8974-EFCFA24B3FEB} = {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} + {EDD5FA29-69AF-445F-842A-132E65D3C92B} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {EF951090-8938-4F7D-8674-7F6FB1F2C25E} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {F03DABEE-A03E-4437-BFD3-D012836F2D94} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {F09A0864-9221-47AD-872F-D4538104D747} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {F0B613C4-1D9A-4259-BD0E-C1B9FF2AA3A0} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {F13108C4-4C86-4D56-A317-A4E5892A8AF7} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {F3E5650D-834E-45E6-90C7-3FC2AA954929} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {F42C09CD-ABA5-4DA9-8383-5EA40FA4D763} = {0CC6D525-806E-433F-AB4A-6CFD546418B1} + {F596C36C-5C96-4F08-B420-8908AF500954} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {F5D850C9-D353-4B84-99BC-E336C231018C} = {BFEDF709-A700-4769-9056-ACA934D828A8} + {F5E2F6C4-19BA-497A-B754-232E4666E647} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {F5E2F6C4-19BA-497A-B754-232E469BE647} = {BD6CC700-B36B-435B-BAF9-FC5AFCD766C9} + {F63FB47F-1DCE-48E5-9CBD-F3E0A354472B} = {E23BB160-006E-44F2-8FB4-3A2240BBC20C} + {F7508935-C65A-4521-88E3-76AB24F2978D} = {A14A4556-9092-430D-B9CA-B2B1223D56CB} + {F7C6C6B6-4142-4C82-8699-4A9D8183181B} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {F8373EDD-1B9E-462D-BF23-55638E23E98B} = {746BA101-5C93-42A5-AC7A-64DCEB186572} + {F8CCA5AE-2D75-4C79-BEAB-2588CD5956C8} = {853D45D8-980C-4991-B62A-DAC6FD245402} + {FB2D2B18-E616-4639-8593-0E1AF2DA01A8} = {2F543422-4B8A-4898-BE6B-590F52B4E9D1} + {FBB77433-639E-42DC-9355-EA94CAE294D2} = {1A36B57B-2E88-4D81-89C0-F575C9895E36} + {FC2248F5-3E9E-495B-9767-87F59614047C} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {FC998FE5-C843-42BA-9731-F46DB02F1853} = {F09A0864-9221-47AD-872F-D4538104D747} + {FCD0587A-4504-4F5E-8E9C-468CC03D250A} = {1434B17C-6165-4D42-BEA1-5A7730D5A6BB} + {FD726AA3-D4FA-4597-B435-08CC7752888C} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {FD726AA3-D4FA-4597-B435-08CC7752888D} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {FD726AA3-D4FA-4597-B435-08CC7752888E} = {4C291EEB-3874-4724-9CC2-1335D13FF0EE} + {FEA09B48-34C2-4963-8A5A-F97BDA136D72} = {B870D8A6-12CD-4DD0-B843-833695C2310A} + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA79} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + {FF6E5B0C-DC00-4C93-B9C2-63D1E858BA80} = {63C9B3F8-437D-4AD9-B32D-D04AE38C35B6} + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {5E690324-2D48-486A-8D3C-DCB520D3F693} + EndGlobalSection +EndGlobal diff --git a/src/pmdk/src/README b/src/pmdk/src/README new file mode 100644 index 000000000..2a0ca4667 --- /dev/null +++ b/src/pmdk/src/README @@ -0,0 +1,16 @@ +Persistent Memory Development Kit + +This is src/README. + +This directory contains the source for the Persistent Memory Development Kit. + +The subdirectory "include" contains header files that get delivered +along with the libraries. Everything else is internal to the libraries +and lives in this directory. + +Two versions of the libraries are built, a debug version and a nondebug +version. The object files and the libraries themselves end up in the +subdirectories "debug" and "nondebug". + +See the top-level README for build, test, and installation instructions. +The basic "make" and "make test" targets also work from this directory. diff --git a/src/pmdk/src/common.inc b/src/pmdk/src/common.inc new file mode 100644 index 000000000..8112512ae --- /dev/null +++ b/src/pmdk/src/common.inc @@ -0,0 +1,400 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation +# +# src/common.inc -- common Makefile rules for PMDK +# + +TOP := $(dir $(lastword $(MAKEFILE_LIST))).. + +# import user variables +ifneq ($(wildcard $(TOP)/user.mk),) +include $(TOP)/user.mk +endif + +LN = ln +OBJCOPY ?= objcopy +MKDIR = mkdir +INSTALL = install +CP = cp +CSTYLE = $(TOP)/utils/cstyle +CSTYLEON ?= 0 +STYLE_CHECK = $(TOP)/utils/style_check.sh +CHECK_SHEBANG = $(TOP)/utils/check-shebang.sh +CHECK_OS = $(TOP)/utils/check-os.sh +OS_BANNED = $(TOP)/utils/os-banned +COVERAGE = 0 +FAULT_INJECTION ?= 0 + +PKG_CONFIG ?= pkg-config +HEADERS = $(wildcard *.h) $(wildcard *.hpp) + +ifeq ($(SRCVERSION),) +export SRCVERSION := $(shell $(TOP)/utils/version.sh $(TOP)) +else +export SRCVERSION +endif + +ifeq ($(SRCVERSION),) +$(error Cannot evaluate version) +endif + +ifeq ($(CLANG_FORMAT),) +ifeq ($(shell command -v clang-format-9 > /dev/null && echo y || echo n), y) +export CLANG_FORMAT ?= clang-format-9 +else +export CLANG_FORMAT ?= clang-format +endif +endif + +ifeq ($(FLAKE8),) +export FLAKE8 ?= flake8 +endif + +GCOV_CFLAGS=-fprofile-arcs -ftest-coverage --coverage +GCOV_LDFLAGS=-fprofile-arcs -ftest-coverage +GCOV_LIBS=-lgcov + +LIBS += $(EXTRA_LIBS) + +ifeq ($(OS_KERNEL_NAME),) +export OS_KERNEL_NAME := $(shell uname -s) +endif + +osdep = $(1)_$(shell echo $(OS_KERNEL_NAME) | tr "[:upper:]" "[:lower:]")$(2) + +get_arch = $(shell $(CC) -dumpmachine | awk -F'[/-]' '{print $$1}') +ifeq ($(ARCH),) +export ARCH := $(call get_arch) +endif +ifeq ($(ARCH),amd64) +override ARCH := x86_64 +endif +ifeq ($(ARCH),arm64) +override ARCH := aarch64 +endif +ifneq ($(filter $(ARCH), powerpc64 powerpc64le ppc64 ppc64le ppc64el powerpc),) +override ARCH := ppc64 +endif + +ifeq ($(PKG_CONFIG_CHECKED),) +ifeq ($(shell command -v $(PKG_CONFIG) && echo y || echo n), n) +$(error $(PKG_CONFIG) not found) +endif +endif +export PKG_CONFIG_CHECKED := y + +check_package = $(shell $(PKG_CONFIG) $(1) && echo y || echo n) + +check_flag = $(shell echo "int main(){return 0;}" |\ + $(CC) $(CFLAGS) -Werror $(1) -x c -o /dev/null - 2>/dev/null && echo y || echo n) + +check_compiler = $(shell $(CC) --version | grep $(1) && echo y || echo n) + +check_Wconversion = $(shell echo "long random(void); char test(void); char test(void){char a = 0; char b = 'a'; char ret = random() == 1 ? a : b; return ret;}" |\ + $(CC) -c $(CFLAGS) -Wconversion -x c -o /dev/null - 2>/dev/null && echo y || echo n) + +check_librt = $(shell echo "int main() { struct timespec t; return clock_gettime(CLOCK_MONOTONIC, &t); }" |\ + $(CC) $(CFLAGS) -x c -include time.h -o /dev/null - 2>/dev/null && echo n || echo y) + +# XXX: required by clock_gettime(), if glibc version < 2.17 +# The os_clock_gettime() function is now in OS abstraction layer, +# linked to all the librariess, unit tests and benchmarks. +ifeq ($(LIBRT_NEEDED),) +export LIBRT_NEEDED := $(call check_librt) +else +export LIBRT_NEEDED +endif + +ifeq ($(IS_ICC),) +export IS_ICC := $(call check_compiler, icc) +else +export IS_ICC +endif + +ifeq ($(WCONVERSION_AVAILABLE),) +export WCONVERSION_AVAILABLE := $(call check_Wconversion) +else +export WCONVERSION_AVAILABLE +endif + +ifeq ($(WUNREACHABLE_CODE_RETURN_AVAILABLE),) +ifeq ($(IS_ICC), n) +export WUNREACHABLE_CODE_RETURN_AVAILABLE := $(call check_flag, -Wunreachable-code-return) +else +export WUNREACHABLE_CODE_RETURN_AVAILABLE := n +endif +else +export WUNREACHABLE_CODE_RETURN_AVAILABLE +endif + +ifeq ($(WMISSING_VARIABLE_DECLARATIONS_AVAILABLE),) +ifeq ($(IS_ICC), n) +export WMISSING_VARIABLE_DECLARATIONS_AVAILABLE := $(call check_flag, -Wmissing-variable-declarations) +else +export WMISSING_VARIABLE_DECLARATIONS_AVAILABLE := n +endif +else +export WMISSING_VARIABLE_DECLARATIONS_AVAILABLE +endif + +ifeq ($(WFLOAT_EQUAL_AVAILABLE),) +ifeq ($(IS_ICC), n) +export WFLOAT_EQUAL_AVAILABLE := $(call check_flag, -Wfloat-equal) +else +export WFLOAT_EQUAL_AVAILABLE := n +endif +else +export WFLOAT_EQUAL_AVAILABLE +endif + +ifeq ($(WSWITCH_DEFAULT_AVAILABLE),) +ifeq ($(IS_ICC), n) +export WSWITCH_DEFAULT_AVAILABLE := $(call check_flag, -Wswitch-default) +else +export WSWITCH_DEFAULT_AVAILABLE := n +endif +else +export WSWITCH_DEFAULT_AVAILABLE +endif + +ifeq ($(WCAST_FUNCTION_TYPE_AVAILABLE),) +ifeq ($(IS_ICC), n) +export WCAST_FUNCTION_TYPE_AVAILABLE := $(call check_flag, -Wcast-function-type) +else +export WCAST_FUNCTION_TYPE_AVAILABLE := n +endif +else +export WCAST_FUNCTION_TYPE_AVAILABLE +endif + +ifeq ($(WSTRINGOP_TRUNCATION_AVAILABLE),) +export WSTRINGOP_TRUNCATION_AVAILABLE := $(call check_flag, -Wstringop-truncation) +else +export WSTRINGOP_TRUNCATION_AVAILABLE +endif + +ifeq ($(OG_AVAILABLE),) +export OG_AVAILABLE := $(call check_flag, -Og) +else +export OG_AVAILABLE +endif + +install_recursive = $(shell cd $(1) && find . -type f -exec install -m $(2) -D {} $(3)/{} \;) + +install_recursive_filter = $(shell cd $(1) && find . -type f -name "$(2)" -exec install -m $(3) -D {} $(4)/{} \;) + +define create-deps + @cp $(objdir)/$*.d $(objdir)/.deps/$*.P; \ + sed -e 's/#.*//' -e 's/^[^:]*: *//' -e 's/ *\\$$//' \ + -e '/^$$/ d' -e 's/$$/ :/' < $(objdir)/$*.d >> $(objdir)/.deps/$*.P; \ + $(RM) -f $(objdir)/$*.d +endef + +check_defined = \ + $(strip $(foreach 1,$1, \ + $(call __check_defined,$1,$(strip $(value 2))))) + +__check_defined = \ + $(if $(value $1),, \ + $(error Undefined $1$(if $2, ($2)))) + +export prefix = /usr/local +export exec_prefix := $(prefix) +export sysconfdir := $(prefix)/etc +export datarootdir := $(prefix)/share +export mandir := $(datarootdir)/man +export docdir := $(datarootdir)/doc +export man1dir := $(mandir)/man1 +export man3dir := $(mandir)/man3 +export man5dir := $(mandir)/man5 +export man7dir := $(mandir)/man7 +export cstyle_bin := $(CSTYLE) +export clang_format_bin := $(CLANG_FORMAT) +export flake8_bin := $(FLAKE8) + +ifneq ($(wildcard $(exec_prefix)/x86_64-linux-gnu),) +LIB_PREFIX ?= x86_64-linux-gnu/lib +endif + +ifneq ($(wildcard $(exec_prefix)/lib64),) +LIB_PREFIX ?= lib64 +endif + +LIB_PREFIX ?= lib + +all: + +cstyle-%: + $(STYLE_CHECK) $* $(wildcard *.[ch]) $(wildcard *.[ch]pp) $(wildcard *.py) + +cstyle: cstyle-check + +format: cstyle-format + +ifeq ($(CSTYLEON),1) +define check-cstyle + @$(STYLE_CHECK) check $1 && if [ "$2" != "" ]; then mkdir -p `dirname $2` && touch $2; fi +endef +else ifeq ($(CSTYLEON),2) +define check-cstyle + @$(STYLE_CHECK) check $1 && if [ "$2" != "" ]; then mkdir -p `dirname $2` && touch $2; fi || true +endef +else +define check-cstyle +endef +endif + +define check-os +$(CHECK_OS) $(OS_BANNED) $(1) $(2) +endef + +# XXX: to allow gcov tool to connect coverage with source code, we have to +# use absolute path to source files +ifeq ($(COVERAGE),1) +define coverage-path +`readlink -f $(1)` +endef +else +define coverage-path +$(1) +endef +endif + +define sub-target-foreach +$(1)-$(2): + $$(MAKE) -C $1 $2 +ifeq ($(3),y) +ifeq ($(custom_build),) + $$(MAKE) -C $1 $2 DEBUG=1 +endif +endif +endef + +define sub-target +$(foreach f, $(1), $(eval $(call sub-target-foreach, $f,$(2),$(3)))) +endef + +ifneq ($(wildcard $(prefix)/x86_64-linux-gnu),) +INC_PREFIX ?= x86_64-linux-gnu/include +endif + +INC_PREFIX ?= include + +test_build=$(addprefix "-b ", $(TEST_BUILD)) +test_type=$(addprefix " -t ", $(TEST_TYPE)) +test_fs=$(addprefix " -f ", $(TEST_FS)) +test_time=$(addprefix " -o ", $(TEST_TIME)) +test_memcheck=$(addprefix " -m ", $(MEMCHECK)) +test_pmemcheck=$(addprefix " -p ", $(PMEMCHECK)) +test_helgrind=$(addprefix " -e ", $(HELGRIND)) +test_drd=$(addprefix " -d ", $(DRD)) +test_providers=$(addprefix " -q ", $(TEST_PROVIDERS)) +test_pmethods=$(addprefix " -r ", $(TEST_PMETHODS)) + +ifeq ($(CHECK_POOL),y) +test_check_pool=" -c " +endif + +RUNTEST_OPTIONS := "$(test_build)$(test_type)$(test_fs)$(test_time)" +RUNTEST_OPTIONS += "$(test_memcheck)$(test_pmemcheck)$(test_helgrind)$(test_drd)" +RUNTEST_OPTIONS += "$(test_providers)$(test_pmethods)$(test_check_pool)" + +export libdir := $(exec_prefix)/$(LIB_PREFIX) +export includedir := $(prefix)/$(INC_PREFIX) +export pkgconfigdir := $(libdir)/pkgconfig +export bindir := $(exec_prefix)/bin +export bashcompdir := $(sysconfdir)/bash_completion.d + +LIBFABRIC_MIN_VERSION := 1.4.2 + +# Keep in sync with requirements in src/test/unittest/unittest.sh and +# utils/docker/images/install-libfabric.sh. +ifeq ($(BUILD_RPMEM),) +BUILD_RPMEM := $(call check_package, libfabric --atleast-version=$(LIBFABRIC_MIN_VERSION)) +endif +ifneq ($(BUILD_RPMEM),y) +export BUILD_RPMEM_INFO := libfabric (version >= $(LIBFABRIC_MIN_VERSION)) is missing -- \ +see src/librpmem/README for details +else +LIBFABRIC_CFLAGS := $(shell $(PKG_CONFIG) --cflags libfabric) +LIBFABRIC_LD_LIBRARY_PATHS := $(shell $(PKG_CONFIG) --variable=libdir libfabric) +LIBFABRIC_LIBS := $(shell $(PKG_CONFIG) --libs libfabric) +LIBFABRIC_PATH := $(shell $(PKG_CONFIG) --variable=exec_prefix libfabric)/bin +endif +export BUILD_RPMEM +export LIBFABRIC_CFLAGS +export LIBFABRIC_LD_LIBRARY_PATHS +export LIBFABRIC_LIBS +export LIBFABRIC_PATH + +# unsafe shutdown count and badblock access without root (depends on kernel 4.20) +NDCTL_MIN_VERSION := 63 + +sparse-c = $(shell for c in *.c; do sparse -Wsparse-all -Wno-declaration-after-statement $(CFLAGS) $(INCS) $$c || true; done) + +ifeq ($(USE_LIBUNWIND),) +export USE_LIBUNWIND := $(call check_package, libunwind) +ifeq ($(USE_LIBUNWIND),y) +export LIBUNWIND_LIBS := $(shell $(PKG_CONFIG) --libs libunwind) +endif +else +export USE_LIBUNWIND +export LIBUNWIND_LIBS +endif + +ifeq ($(OS_KERNEL_NAME),FreeBSD) + +GLIBC_CXXFLAGS=-D_GLIBCXX_USE_C99 +UNIX98_CFLAGS= +OS_INCS=-I$(TOP)/src/freebsd/include -I/usr/local/include +OS_LIBS=-L/usr/local/lib +LIBDL= +LIBUTIL=-lutil +LIBUUID=-luuid +LIBNDCTL= +OS_DIMM=none + +else + +GLIBC_CXXFLAGS= +UNIX98_CFLAGS=-D__USE_UNIX98 +OS_INCS= +OS_LIBS= +LIBDL=-ldl +LIBUTIL= +LIBUUID= + +NDCTL_ENABLE ?= y + +# Detect libndctl if not disabled. +ifeq ($(NDCTL_ENABLE),y) + ifeq ($(LIBNDCTL_LIBS),) + HAS_NDCTL := $(call check_package, libndctl --atleast-version $(NDCTL_MIN_VERSION)) + ifeq ($(HAS_NDCTL),y) + OS_DIMM_CFLAG=-DNDCTL_ENABLED=1 + else + $(error Please install libndctl-dev/libndctl-devel >= $(NDCTL_MIN_VERSION)) + endif + HAS_DAXCTL := $(call check_package, libdaxctl --atleast-version $(NDCTL_MIN_VERSION)) + ifeq ($(HAS_DAXCTL),n) + $(error Please install libdaxctl-dev/libdaxctl-devel >= $(NDCTL_MIN_VERSION)) + endif + LIBNDCTL_PKG_CONFIG_DEPS := libndctl libdaxctl + LIBNDCTL_PKG_CONFIG_DEPS_VAR := ,libndctl,libdaxctl + LIBNDCTL_CFLAGS := $(shell $(PKG_CONFIG) --cflags $(LIBNDCTL_PKG_CONFIG_DEPS)) + LIBNDCTL_LD_LIBRARY_PATHS := $(shell $(PKG_CONFIG) --variable=libdir $(LIBNDCTL_PKG_CONFIG_DEPS) | sed "s/ /:/") + LIBNDCTL_LIBS := $(shell $(PKG_CONFIG) --libs $(LIBNDCTL_PKG_CONFIG_DEPS)) + endif + OS_DIMM := ndctl +else + OS_DIMM := none +endif +export OS_DIMM +export LIBNDCTL_PKG_CONFIG_DEPS +export LIBNDCTL_PKG_CONFIG_DEPS_VAR +export LIBNDCTL_CFLAGS +export LIBNDCTL_LD_LIBRARY_PATHS +export LIBNDCTL_LIBS +export OS_DIMM_CFLAG + +endif diff --git a/src/pmdk/src/common/.cstyleignore b/src/pmdk/src/common/.cstyleignore new file mode 100644 index 000000000..ff1e08160 --- /dev/null +++ b/src/pmdk/src/common/.cstyleignore @@ -0,0 +1 @@ +queue.h diff --git a/src/pmdk/src/common/Makefile b/src/pmdk/src/common/Makefile new file mode 100644 index 000000000..535b1088e --- /dev/null +++ b/src/pmdk/src/common/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2019, Intel Corporation + +# +# src/common/Makefile -- Makefile for common +# + +LIBRARY_NAME = pmemcommon + +include pmemcommon.inc + +include ../Makefile.inc + +CFLAGS += $(LIBNDCTL_CFLAGS) +CFLAGS += -DUSE_LIBDL diff --git a/src/pmdk/src/common/bad_blocks.c b/src/pmdk/src/common/bad_blocks.c new file mode 100644 index 000000000..28bf2843a --- /dev/null +++ b/src/pmdk/src/common/bad_blocks.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * bad_blocks.c - implementation of the bad block API using libpmem2 library + */ + +#include +#include + +#include "libpmem2.h" +#include "badblocks.h" +#include "out.h" +#include "vec.h" +#include "os.h" + +/* + * badblocks_count -- returns number of bad blocks in the file + * or -1 in case of an error + */ +long +badblocks_count(const char *file) +{ + LOG(3, "file %s", file); + + struct badblocks *bbs = badblocks_new(); + if (bbs == NULL) + return -1; + + int ret = badblocks_get(file, bbs); + + long count = (ret == 0) ? (long)bbs->bb_cnt : -1; + + badblocks_delete(bbs); + + return count; +} + +/* + * badblocks_get -- returns 0 and bad blocks in the 'bbs' array + * (that has to be pre-allocated) + * or -1 in case of an error + */ +int +badblocks_get(const char *file, struct badblocks *bbs) +{ + LOG(3, "file %s badblocks %p", file, bbs); + + ASSERTne(bbs, NULL); + + struct pmem2_source *src; + struct pmem2_badblock_context *bbctx; + struct pmem2_badblock bb; + int bb_found = -1; /* -1 means an error */ + int ret; + + VEC(bbsvec, struct bad_block) bbv = VEC_INITIALIZER; + memset(bbs, 0, sizeof(*bbs)); + + int fd = os_open(file, O_RDONLY); + if (fd == -1) { + ERR("!open %s", file); + return -1; + } + + ret = pmem2_source_from_fd(&src, fd); + if (ret) + goto exit_close; + + ret = pmem2_badblock_context_new(&bbctx, src); + if (ret) + goto exit_delete_source; + + bb_found = 0; + while ((pmem2_badblock_next(bbctx, &bb)) == 0) { + bb_found++; + /* + * Form a new bad block structure with offset and length + * expressed in bytes and offset relative + * to the beginning of the file. + */ + struct bad_block bbn; + bbn.offset = bb.offset; + bbn.length = bb.length; + /* unknown healthy replica */ + bbn.nhealthy = NO_HEALTHY_REPLICA; + + /* add the new bad block to the vector */ + if (VEC_PUSH_BACK(&bbv, bbn)) { + VEC_DELETE(&bbv); + bb_found = -1; + Free(bbs->bbv); + bbs->bbv = NULL; + bbs->bb_cnt = 0; + } + } + + if (bb_found > 0) { + bbs->bbv = VEC_ARR(&bbv); + bbs->bb_cnt = (unsigned)VEC_SIZE(&bbv); + + LOG(10, "number of bad blocks detected: %u", bbs->bb_cnt); + + /* sanity check */ + ASSERTeq((unsigned)bb_found, bbs->bb_cnt); + } + + pmem2_badblock_context_delete(&bbctx); + +exit_delete_source: + pmem2_source_delete(&src); + +exit_close: + if (fd != -1) + os_close(fd); + + if (ret && bb_found == -1) + errno = pmem2_err_to_errno(ret); + + return (bb_found >= 0) ? 0 : -1; +} + +/* + * badblocks_clear -- clears the given bad blocks in a file + * (regular file or dax device) + */ +int +badblocks_clear(const char *file, struct badblocks *bbs) +{ + LOG(3, "file %s badblocks %p", file, bbs); + + ASSERTne(bbs, NULL); + + struct pmem2_source *src; + struct pmem2_badblock_context *bbctx; + struct pmem2_badblock bb; + int ret = -1; + + int fd = os_open(file, O_RDWR); + if (fd == -1) { + ERR("!open %s", file); + return -1; + } + + ret = pmem2_source_from_fd(&src, fd); + if (ret) + goto exit_close; + + ret = pmem2_badblock_context_new(&bbctx, src); + if (ret) { + LOG(1, "pmem2_badblock_context_new failed -- %s", file); + goto exit_delete_source; + } + + for (unsigned b = 0; b < bbs->bb_cnt; b++) { + bb.offset = bbs->bbv[b].offset; + bb.length = bbs->bbv[b].length; + ret = pmem2_badblock_clear(bbctx, &bb); + if (ret) { + LOG(1, "pmem2_badblock_clear -- %s", file); + goto exit_delete_ctx; + } + } + +exit_delete_ctx: + pmem2_badblock_context_delete(&bbctx); + +exit_delete_source: + pmem2_source_delete(&src); + +exit_close: + if (fd != -1) + os_close(fd); + + if (ret) { + errno = pmem2_err_to_errno(ret); + ret = -1; + } + + return ret; +} + +/* + * badblocks_clear_all -- clears all bad blocks in a file + * (regular file or dax device) + */ +int +badblocks_clear_all(const char *file) +{ + LOG(3, "file %s", file); + + struct pmem2_source *src; + struct pmem2_badblock_context *bbctx; + struct pmem2_badblock bb; + int ret = -1; + + int fd = os_open(file, O_RDWR); + if (fd == -1) { + ERR("!open %s", file); + return -1; + } + + ret = pmem2_source_from_fd(&src, fd); + if (ret) + goto exit_close; + + ret = pmem2_badblock_context_new(&bbctx, src); + if (ret) { + LOG(1, "pmem2_badblock_context_new failed -- %s", file); + goto exit_delete_source; + } + + while ((pmem2_badblock_next(bbctx, &bb)) == 0) { + ret = pmem2_badblock_clear(bbctx, &bb); + if (ret) { + LOG(1, "pmem2_badblock_clear -- %s", file); + goto exit_delete_ctx; + } + }; + +exit_delete_ctx: + pmem2_badblock_context_delete(&bbctx); + +exit_delete_source: + pmem2_source_delete(&src); + +exit_close: + if (fd != -1) + os_close(fd); + + if (ret) { + errno = pmem2_err_to_errno(ret); + ret = -1; + } + + return ret; +} + +/* + * badblocks_check_file -- check if the file contains bad blocks + * + * Return value: + * -1 : an error + * 0 : no bad blocks + * 1 : bad blocks detected + */ +int +badblocks_check_file(const char *file) +{ + LOG(3, "file %s", file); + + long bbsc = badblocks_count(file); + if (bbsc < 0) { + LOG(1, "counting bad blocks failed -- '%s'", file); + return -1; + } + + if (bbsc > 0) { + LOG(1, "pool file '%s' contains %li bad block(s)", file, bbsc); + return 1; + } + + return 0; +} diff --git a/src/pmdk/src/common/badblocks.h b/src/pmdk/src/common/badblocks.h new file mode 100644 index 000000000..d06e81adb --- /dev/null +++ b/src/pmdk/src/common/badblocks.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * badblocks.h -- bad blocks API based on the libpmem2 library + */ + +#ifndef PMDK_BADBLOCKS_H +#define PMDK_BADBLOCKS_H 1 + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define B2SEC(n) ((n) >> 9) /* convert bytes to sectors */ +#define SEC2B(n) ((n) << 9) /* convert sectors to bytes */ + +#define NO_HEALTHY_REPLICA ((int)(-1)) + +#define BB_NOT_SUPP \ + "checking bad blocks is not supported on this OS, please switch off the CHECK_BAD_BLOCKS compat feature using 'pmempool-feature'" + +/* + * 'struct badblock' is already defined in ndctl/libndctl.h, + * so we cannot use this name. + * + * libndctl returns offset relative to the beginning of the region, + * but in this structure we save offset relative to the beginning of: + * - namespace (before badblocks_get()) + * and + * - file (before sync_recalc_badblocks()) + * and + * - pool (after sync_recalc_badblocks()) + */ +struct bad_block { + /* + * offset in bytes relative to the beginning of + * - namespace (before badblocks_get()) + * and + * - file (before sync_recalc_badblocks()) + * and + * - pool (after sync_recalc_badblocks()) + */ + size_t offset; + + /* length in bytes */ + size_t length; + + /* number of healthy replica to fix this bad block */ + int nhealthy; +}; + +struct badblocks { + unsigned bb_cnt; /* number of bad blocks */ + struct bad_block *bbv; /* array of bad blocks */ +}; + +struct badblocks *badblocks_new(void); +void badblocks_delete(struct badblocks *bbs); + +long badblocks_count(const char *path); +int badblocks_get(const char *file, struct badblocks *bbs); + +int badblocks_clear(const char *path, struct badblocks *bbs); +int badblocks_clear_all(const char *file); + +int badblocks_check_file(const char *path); + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_BADBLOCKS_H */ diff --git a/src/pmdk/src/common/common.rc b/src/pmdk/src/common/common.rc new file mode 100644 index 000000000..96d0e0975 --- /dev/null +++ b/src/pmdk/src/common/common.rc @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2017, Intel Corporation */ + +/* + * common.rc -- common part of PMDK rc files + */ + +#include +#include "srcversion.h" + +#define VERSION(major, minor, build, revision) major, minor, build, revision + +#ifdef _DEBUG +#define VERSION_DEBUG VS_FF_DEBUG +#else +#define VERSION_DEBUG 0 +#endif + +#ifdef PRERELEASE +#define VERSION_PRERELEASE VS_FF_PRERELEASE +#else +#define VERSION_PRERELEASE 0 +#endif + +#ifdef BUGFIX +#define VERSION_PATCHED VS_FF_PATCHED +#else +#define VERSION_PATCHED 0 +#endif + +#ifdef PRIVATE +#define VERSION_PRIVATE VS_FF_PRIVATE +#else +#define VERSION_PRIVATE 0 +#endif + +#ifdef CUSTOM +#define VERSION_SPECIAL VS_FF_SPECIALBUILD +#else +#define VERSION_SPECIAL 0 +#endif + +#define VERSION_PRIVATEBUILD VS_FF_PRIVATEBUILD +#define VER_PATCHED VS_FF_PATCHED + +VS_VERSION_INFO VERSIONINFO +FILEVERSION VERSION(MAJOR, MINOR, BUILD, REVISION) +PRODUCTVERSION VERSION(MAJOR, MINOR, BUILD, REVISION) +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEFLAGS (VERSION_PRIVATEBUILD | VERSION_PRERELEASE | VERSION_DEBUG | VERSION_SPECIAL | VERSION_PATCHED) +FILEOS VOS__WINDOWS32 +FILETYPE TYPE +FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", "Intel" + VALUE "FileDescription", DESCRIPTION + VALUE "FileVersion", SRCVERSION + VALUE "InternalName", "PMDK" + VALUE "LegalCopyright", "Copyright 2014-2017, Intel Corporation" + VALUE "OriginalFilename", FILE_NAME + VALUE "ProductName", "Persistent Memory Development Kit" + VALUE "ProductVersion", SRCVERSION +#if VERSION_SPECIAL == VS_FF_SPECIALBUILD + VALUE "SpecialBuild", VERSION_CUSTOM_MSG +#endif +#if VERSION_PRIVATEBUILD == VS_FF_SPECIALBUILD + VALUE "PrivateBuild", "Not a release build" +#endif + END + END + BLOCK "VarFileInfo" + BEGIN + /* XXX: Update to UNICODE */ + VALUE "Translation", 0x409, 0 + END +END diff --git a/src/pmdk/src/common/ctl.c b/src/pmdk/src/common/ctl.c new file mode 100644 index 000000000..d6afba88e --- /dev/null +++ b/src/pmdk/src/common/ctl.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * ctl.c -- implementation of the interface for examination and modification of + * the library's internal state + */ +#include "ctl.h" +#include "os.h" +#include "alloc.h" + +#define CTL_MAX_ENTRIES 100 + +#define MAX_CONFIG_FILE_LEN (1 << 20) /* 1 megabyte */ + +#define CTL_STRING_QUERY_SEPARATOR ";" +#define CTL_NAME_VALUE_SEPARATOR "=" +#define CTL_QUERY_NODE_SEPARATOR "." +#define CTL_VALUE_ARG_SEPARATOR "," + +static int ctl_global_first_free = 0; +static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; + +/* + * This is the top level node of the ctl tree structure. Each node can contain + * children and leaf nodes. + * + * Internal nodes simply create a new path in the tree whereas child nodes are + * the ones providing the read/write functionality by the means of callbacks. + * + * Each tree node must be NULL-terminated, CTL_NODE_END macro is provided for + * convenience. + */ +struct ctl { + struct ctl_node root[CTL_MAX_ENTRIES]; + int first_free; +}; + +/* + * ctl_find_node -- (internal) searches for a matching entry point in the + * provided nodes + * + * The caller is responsible for freeing all of the allocated indexes, + * regardless of the return value. + */ +static const struct ctl_node * +ctl_find_node(const struct ctl_node *nodes, const char *name, + struct ctl_indexes *indexes) +{ + LOG(3, "nodes %p name %s indexes %p", nodes, name, indexes); + + const struct ctl_node *n = NULL; + char *sptr = NULL; + char *parse_str = Strdup(name); + if (parse_str == NULL) + return NULL; + + char *node_name = strtok_r(parse_str, CTL_QUERY_NODE_SEPARATOR, &sptr); + + /* + * Go through the string and separate tokens that correspond to nodes + * in the main ctl tree. + */ + while (node_name != NULL) { + char *endptr; + /* + * Ignore errno from strtol: FreeBSD returns EINVAL if no + * conversion is performed. Linux does not, but endptr + * check is valid in both cases. + */ + int tmp_errno = errno; + long index_value = strtol(node_name, &endptr, 0); + errno = tmp_errno; + struct ctl_index *index_entry = NULL; + if (endptr != node_name) { /* a valid index */ + index_entry = Malloc(sizeof(*index_entry)); + if (index_entry == NULL) + goto error; + index_entry->value = index_value; + PMDK_SLIST_INSERT_HEAD(indexes, index_entry, entry); + } + + for (n = &nodes[0]; n->name != NULL; ++n) { + if (index_entry && n->type == CTL_NODE_INDEXED) + break; + else if (strcmp(n->name, node_name) == 0) + break; + } + if (n->name == NULL) + goto error; + + if (index_entry) + index_entry->name = n->name; + + nodes = n->children; + node_name = strtok_r(NULL, CTL_QUERY_NODE_SEPARATOR, &sptr); + } + + Free(parse_str); + return n; + +error: + Free(parse_str); + return NULL; +} + +/* + * ctl_delete_indexes -- + * (internal) removes and frees all entries on the index list + */ +static void +ctl_delete_indexes(struct ctl_indexes *indexes) +{ + while (!PMDK_SLIST_EMPTY(indexes)) { + struct ctl_index *index = PMDK_SLIST_FIRST(indexes); + PMDK_SLIST_REMOVE_HEAD(indexes, entry); + Free(index); + } +} + +/* + * ctl_parse_args -- (internal) parses a string argument based on the node + * structure + */ +static void * +ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) +{ + ASSERTne(arg, NULL); + + char *dest_arg = Malloc(arg_proto->dest_size); + if (dest_arg == NULL) { + ERR("!Malloc"); + return NULL; + } + + char *sptr = NULL; + char *arg_sep = strtok_r(arg, CTL_VALUE_ARG_SEPARATOR, &sptr); + for (const struct ctl_argument_parser *p = arg_proto->parsers; + p->parser != NULL; ++p) { + ASSERT(p->dest_offset + p->dest_size <= arg_proto->dest_size); + if (arg_sep == NULL) { + ERR("!strtok_r"); + goto error_parsing; + } + + if (p->parser(arg_sep, dest_arg + p->dest_offset, + p->dest_size) != 0) + goto error_parsing; + + arg_sep = strtok_r(NULL, CTL_VALUE_ARG_SEPARATOR, &sptr); + } + + return dest_arg; + +error_parsing: + Free(dest_arg); + return NULL; +} + +/* + * ctl_query_get_real_args -- (internal) returns a pointer with actual argument + * structure as required by the node callback + */ +static void * +ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, + enum ctl_query_source source) +{ + void *real_arg = NULL; + switch (source) { + case CTL_QUERY_CONFIG_INPUT: + real_arg = ctl_parse_args(n->arg, write_arg); + break; + case CTL_QUERY_PROGRAMMATIC: + real_arg = write_arg; + break; + default: + ASSERT(0); + break; + } + + return real_arg; +} + +/* + * ctl_query_cleanup_real_args -- (internal) cleanups relevant argument + * structures allocated as a result of the get_real_args call + */ +static void +ctl_query_cleanup_real_args(const struct ctl_node *n, void *real_arg, + enum ctl_query_source source) +{ + switch (source) { + case CTL_QUERY_CONFIG_INPUT: + Free(real_arg); + break; + case CTL_QUERY_PROGRAMMATIC: + break; + default: + ASSERT(0); + break; + } +} + +/* + * ctl_exec_query_read -- (internal) calls the read callback of a node + */ +static int +ctl_exec_query_read(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + if (arg == NULL) { + ERR("read queries require non-NULL argument"); + errno = EINVAL; + return -1; + } + + return n->cb[CTL_QUERY_READ](ctx, source, arg, indexes); +} + +/* + * ctl_exec_query_write -- (internal) calls the write callback of a node + */ +static int +ctl_exec_query_write(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + if (arg == NULL) { + ERR("write queries require non-NULL argument"); + errno = EINVAL; + return -1; + } + + void *real_arg = ctl_query_get_real_args(n, arg, source); + if (real_arg == NULL) { + LOG(1, "Invalid arguments"); + return -1; + } + + int ret = n->cb[CTL_QUERY_WRITE](ctx, source, real_arg, indexes); + ctl_query_cleanup_real_args(n, real_arg, source); + + return ret; +} + +/* + * ctl_exec_query_runnable -- (internal) calls the run callback of a node + */ +static int +ctl_exec_query_runnable(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes); +} + +static int (*ctl_exec_query[MAX_CTL_QUERY_TYPE])(void *ctx, + const struct ctl_node *n, enum ctl_query_source source, void *arg, + struct ctl_indexes *indexes) = { + ctl_exec_query_read, + ctl_exec_query_write, + ctl_exec_query_runnable, +}; + +/* + * ctl_query -- (internal) parses the name and calls the appropriate methods + * from the ctl tree + */ +int +ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, + const char *name, enum ctl_query_type type, void *arg) +{ + LOG(3, "ctl %p ctx %p source %d name %s type %d arg %p", + ctl, ctx, source, name, type, arg); + + if (name == NULL) { + ERR("invalid query"); + errno = EINVAL; + return -1; + } + + /* + * All of the indexes are put on this list so that the handlers can + * easily retrieve the index values. The list is cleared once the ctl + * query has been handled. + */ + struct ctl_indexes indexes; + PMDK_SLIST_INIT(&indexes); + + int ret = -1; + + const struct ctl_node *n = ctl_find_node(CTL_NODE(global), + name, &indexes); + + if (n == NULL && ctl) { + ctl_delete_indexes(&indexes); + n = ctl_find_node(ctl->root, name, &indexes); + } + + if (n == NULL || n->type != CTL_NODE_LEAF || n->cb[type] == NULL) { + ERR("invalid query entry point %s", name); + errno = EINVAL; + goto out; + } + + ret = ctl_exec_query[type](ctx, n, source, arg, &indexes); + +out: + ctl_delete_indexes(&indexes); + + return ret; +} + +/* + * ctl_register_module_node -- adds a new node to the CTL tree root. + */ +void +ctl_register_module_node(struct ctl *c, const char *name, struct ctl_node *n) +{ + struct ctl_node *nnode = c == NULL ? + &CTL_NODE(global)[ctl_global_first_free++] : + &c->root[c->first_free++]; + + nnode->children = n; + nnode->type = CTL_NODE_NAMED; + nnode->name = name; +} + +/* + * ctl_parse_query -- (internal) splits an entire query string + * into name and value + */ +static int +ctl_parse_query(char *qbuf, char **name, char **value) +{ + if (qbuf == NULL) + return -1; + + char *sptr; + *name = strtok_r(qbuf, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (*name == NULL) + return -1; + + *value = strtok_r(NULL, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (*value == NULL) + return -1; + + /* the value itself mustn't include CTL_NAME_VALUE_SEPARATOR */ + char *extra = strtok_r(NULL, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (extra != NULL) + return -1; + + return 0; +} + +/* + * ctl_load_config -- executes the entire query collection from a provider + */ +static int +ctl_load_config(struct ctl *ctl, void *ctx, char *buf) +{ + int r = 0; + char *sptr = NULL; /* for internal use of strtok */ + char *name; + char *value; + + ASSERTne(buf, NULL); + + char *qbuf = strtok_r(buf, CTL_STRING_QUERY_SEPARATOR, &sptr); + while (qbuf != NULL) { + r = ctl_parse_query(qbuf, &name, &value); + if (r != 0) { + ERR("failed to parse query %s", qbuf); + return -1; + } + + r = ctl_query(ctl, ctx, CTL_QUERY_CONFIG_INPUT, + name, CTL_QUERY_WRITE, value); + + if (r < 0 && ctx != NULL) + return -1; + + qbuf = strtok_r(NULL, CTL_STRING_QUERY_SEPARATOR, &sptr); + } + + return 0; +} + +/* + * ctl_load_config_from_string -- loads obj configuration from string + */ +int +ctl_load_config_from_string(struct ctl *ctl, void *ctx, const char *cfg_string) +{ + LOG(3, "ctl %p ctx %p cfg_string \"%s\"", ctl, ctx, cfg_string); + + char *buf = Strdup(cfg_string); + if (buf == NULL) { + ERR("!Strdup"); + return -1; + } + + int ret = ctl_load_config(ctl, ctx, buf); + + Free(buf); + return ret; +} + +/* + * ctl_load_config_from_file -- loads obj configuration from file + * + * This function opens up the config file, allocates a buffer of size equal to + * the size of the file, reads its content and sanitizes it for ctl_load_config. + */ +int +ctl_load_config_from_file(struct ctl *ctl, void *ctx, const char *cfg_file) +{ + LOG(3, "ctl %p ctx %p cfg_file \"%s\"", ctl, ctx, cfg_file); + + int ret = -1; + + FILE *fp = os_fopen(cfg_file, "r"); + if (fp == NULL) + return ret; + + int err; + if ((err = fseek(fp, 0, SEEK_END)) != 0) + goto error_file_parse; + + long fsize = ftell(fp); + if (fsize == -1) + goto error_file_parse; + + if (fsize > MAX_CONFIG_FILE_LEN) { + ERR("Config file too large"); + goto error_file_parse; + } + + if ((err = fseek(fp, 0, SEEK_SET)) != 0) + goto error_file_parse; + + char *buf = Zalloc((size_t)fsize + 1); /* +1 for NULL-termination */ + if (buf == NULL) { + ERR("!Zalloc"); + goto error_file_parse; + } + + size_t bufpos = 0; + + int c; + int is_comment_section = 0; + while ((c = fgetc(fp)) != EOF) { + if (c == '#') + is_comment_section = 1; + else if (c == '\n') + is_comment_section = 0; + else if (!is_comment_section && !isspace(c)) + buf[bufpos++] = (char)c; + } + + ret = ctl_load_config(ctl, ctx, buf); + + Free(buf); + +error_file_parse: + (void) fclose(fp); + return ret; +} + +/* + * ctl_new -- allocates and initializes ctl data structures + */ +struct ctl * +ctl_new(void) +{ + struct ctl *c = Zalloc(sizeof(struct ctl)); + if (c == NULL) { + ERR("!Zalloc"); + return NULL; + } + + c->first_free = 0; + return c; +} + +/* + * ctl_delete -- deletes ctl + */ +void +ctl_delete(struct ctl *c) +{ + Free(c); +} + +/* + * ctl_parse_ll -- (internal) parses and returns a long long signed integer + */ +static long long +ctl_parse_ll(const char *str) +{ + char *endptr; + int olderrno = errno; + errno = 0; + long long val = strtoll(str, &endptr, 0); + if (endptr == str || errno != 0) + return LLONG_MIN; + errno = olderrno; + + return val; +} + +/* + * ctl_arg_boolean -- checks whether the provided argument contains + * either a 1 or y or Y. + */ +int +ctl_arg_boolean(const void *arg, void *dest, size_t dest_size) +{ + int *intp = dest; + char in = ((char *)arg)[0]; + + if (tolower(in) == 'y' || in == '1') { + *intp = 1; + return 0; + } else if (tolower(in) == 'n' || in == '0') { + *intp = 0; + return 0; + } + + return -1; +} + +/* + * ctl_arg_integer -- parses signed integer argument + */ +int +ctl_arg_integer(const void *arg, void *dest, size_t dest_size) +{ + long long val = ctl_parse_ll(arg); + if (val == LLONG_MIN) + return -1; + + switch (dest_size) { + case sizeof(int): + if (val > INT_MAX || val < INT_MIN) + return -1; + *(int *)dest = (int)val; + break; + case sizeof(long long): + *(long long *)dest = val; + break; + case sizeof(uint8_t): + if (val > UINT8_MAX || val < 0) + return -1; + *(uint8_t *)dest = (uint8_t)val; + break; + default: + ERR("invalid destination size %zu", dest_size); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * ctl_arg_string -- verifies length and copies a string argument into a zeroed + * buffer + */ +int +ctl_arg_string(const void *arg, void *dest, size_t dest_size) +{ + /* check if the incoming string is longer or equal to dest_size */ + if (strnlen(arg, dest_size) == dest_size) + return -1; + + strncpy(dest, arg, dest_size); + + return 0; +} diff --git a/src/pmdk/src/common/ctl.h b/src/pmdk/src/common/ctl.h new file mode 100644 index 000000000..f70322a6f --- /dev/null +++ b/src/pmdk/src/common/ctl.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * ctl.h -- internal declaration of statistics and control related structures + */ + +#ifndef PMDK_CTL_H +#define PMDK_CTL_H 1 + +#include "queue.h" +#include "errno.h" +#include "out.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct ctl; + +struct ctl_index { + const char *name; + long value; + PMDK_SLIST_ENTRY(ctl_index) entry; +}; + +PMDK_SLIST_HEAD(ctl_indexes, ctl_index); + +enum ctl_query_source { + CTL_UNKNOWN_QUERY_SOURCE, + /* query executed directly from the program */ + CTL_QUERY_PROGRAMMATIC, + /* query executed from the config file */ + CTL_QUERY_CONFIG_INPUT, + + MAX_CTL_QUERY_SOURCE +}; + +enum ctl_query_type { + CTL_QUERY_READ, + CTL_QUERY_WRITE, + CTL_QUERY_RUNNABLE, + + MAX_CTL_QUERY_TYPE +}; + +typedef int (*node_callback)(void *ctx, enum ctl_query_source type, + void *arg, struct ctl_indexes *indexes); + +enum ctl_node_type { + CTL_NODE_UNKNOWN, + CTL_NODE_NAMED, + CTL_NODE_LEAF, + CTL_NODE_INDEXED, + + MAX_CTL_NODE +}; + +typedef int (*ctl_arg_parser)(const void *arg, void *dest, size_t dest_size); + +struct ctl_argument_parser { + size_t dest_offset; /* offset of the field inside of the argument */ + size_t dest_size; /* size of the field inside of the argument */ + ctl_arg_parser parser; +}; + +struct ctl_argument { + size_t dest_size; /* sizeof the entire argument */ + struct ctl_argument_parser parsers[]; /* array of 'fields' in arg */ +}; + +#define sizeof_member(t, m) sizeof(((t *)0)->m) + +#define CTL_ARG_PARSER(t, p)\ +{0, sizeof(t), p} + +#define CTL_ARG_PARSER_STRUCT(t, m, p)\ +{offsetof(t, m), sizeof_member(t, m), p} + +#define CTL_ARG_PARSER_END {0, 0, NULL} + +/* + * CTL Tree node structure, do not use directly. All the necessary functionality + * is provided by the included macros. + */ +struct ctl_node { + const char *name; + enum ctl_node_type type; + + node_callback cb[MAX_CTL_QUERY_TYPE]; + const struct ctl_argument *arg; + + const struct ctl_node *children; +}; + +struct ctl *ctl_new(void); +void ctl_delete(struct ctl *stats); + +int ctl_load_config_from_string(struct ctl *ctl, void *ctx, + const char *cfg_string); +int ctl_load_config_from_file(struct ctl *ctl, void *ctx, + const char *cfg_file); + +/* Use through CTL_REGISTER_MODULE, never directly */ +void ctl_register_module_node(struct ctl *c, + const char *name, struct ctl_node *n); + +int ctl_arg_boolean(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_BOOLEAN {sizeof(int),\ + {{0, sizeof(int), ctl_arg_boolean},\ + CTL_ARG_PARSER_END}}; + +int ctl_arg_integer(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_INT {sizeof(int),\ + {{0, sizeof(int), ctl_arg_integer},\ + CTL_ARG_PARSER_END}}; + +#define CTL_ARG_LONG_LONG {sizeof(long long),\ + {{0, sizeof(long long), ctl_arg_integer},\ + CTL_ARG_PARSER_END}}; + +int ctl_arg_string(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_STRING(len) {len,\ + {{0, len, ctl_arg_string},\ + CTL_ARG_PARSER_END}}; + +#define CTL_STR(name) #name + +#define CTL_NODE_END {NULL, CTL_NODE_UNKNOWN, {NULL, NULL, NULL}, NULL, NULL} + +#define CTL_NODE(name, ...)\ +ctl_node_##__VA_ARGS__##_##name + +int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, + const char *name, enum ctl_query_type type, void *arg); + +/* Declaration of a new child node */ +#define CTL_CHILD(name, ...)\ +{CTL_STR(name), CTL_NODE_NAMED, {NULL, NULL, NULL}, NULL,\ + (struct ctl_node *)CTL_NODE(name, __VA_ARGS__)} + +/* Declaration of a new indexed node */ +#define CTL_INDEXED(name, ...)\ +{CTL_STR(name), CTL_NODE_INDEXED, {NULL, NULL, NULL}, NULL,\ + (struct ctl_node *)CTL_NODE(name, __VA_ARGS__)} + +#define CTL_READ_HANDLER(name, ...)\ +ctl_##__VA_ARGS__##_##name##_read + +#define CTL_WRITE_HANDLER(name, ...)\ +ctl_##__VA_ARGS__##_##name##_write + +#define CTL_RUNNABLE_HANDLER(name, ...)\ +ctl_##__VA_ARGS__##_##name##_runnable + +#define CTL_ARG(name)\ +ctl_arg_##name + +/* + * Declaration of a new read-only leaf. If used the corresponding read function + * must be declared by CTL_READ_HANDLER macro. + */ +#define CTL_LEAF_RO(name, ...)\ +{CTL_STR(name), CTL_NODE_LEAF, \ + {CTL_READ_HANDLER(name, __VA_ARGS__), NULL, NULL}, NULL, NULL} + +/* + * Declaration of a new write-only leaf. If used the corresponding write + * function must be declared by CTL_WRITE_HANDLER macro. + */ +#define CTL_LEAF_WO(name, ...)\ +{CTL_STR(name), CTL_NODE_LEAF, \ + {NULL, CTL_WRITE_HANDLER(name, __VA_ARGS__), NULL},\ + &CTL_ARG(name), NULL} + +/* + * Declaration of a new runnable leaf. If used the corresponding run + * function must be declared by CTL_RUNNABLE_HANDLER macro. + */ +#define CTL_LEAF_RUNNABLE(name, ...)\ +{CTL_STR(name), CTL_NODE_LEAF, \ + {NULL, NULL, CTL_RUNNABLE_HANDLER(name, __VA_ARGS__)},\ + NULL, NULL} + +/* + * Declaration of a new read-write leaf. If used both read and write function + * must be declared by CTL_READ_HANDLER and CTL_WRITE_HANDLER macros. + */ +#define CTL_LEAF_RW(name)\ +{CTL_STR(name), CTL_NODE_LEAF,\ + {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL},\ + &CTL_ARG(name), NULL} + +#define CTL_REGISTER_MODULE(_ctl, name)\ +ctl_register_module_node((_ctl), CTL_STR(name),\ +(struct ctl_node *)CTL_NODE(name)) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/ctl_cow.c b/src/pmdk/src/common/ctl_cow.c new file mode 100644 index 000000000..1ce38e548 --- /dev/null +++ b/src/pmdk/src/common/ctl_cow.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019, Intel Corporation */ + +/* + * ctl_cow.c -- implementation of the CTL copy on write namespace + */ + +#include "ctl.h" +#include "set.h" +#include "out.h" +#include "ctl_global.h" + +/* + * CTL_READ_HANDLER(at_open) -- returns at_open field + */ +static int +CTL_READ_HANDLER(at_open)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + int *arg_out = arg; + *arg_out = COW_at_open; + return 0; +} +/* + * CTL_WRITE_HANDLER(at_open) -- sets the at_open field in copy_on_write + */ +static int +CTL_WRITE_HANDLER(at_open)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + int arg_in = *(int *)arg; + COW_at_open = arg_in; + return 0; +} + +static struct ctl_argument CTL_ARG(at_open) = CTL_ARG_BOOLEAN; + +static const struct ctl_node CTL_NODE(copy_on_write)[] = { + CTL_LEAF_RW(at_open), + + CTL_NODE_END +}; + +/* + * cow_ctl_register -- registers ctl nodes for "copy_on_write" module + */ +void +ctl_cow_register(void) +{ + CTL_REGISTER_MODULE(NULL, copy_on_write); +} diff --git a/src/pmdk/src/common/ctl_fallocate.c b/src/pmdk/src/common/ctl_fallocate.c new file mode 100644 index 000000000..ac0af6ed2 --- /dev/null +++ b/src/pmdk/src/common/ctl_fallocate.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2019, Intel Corporation */ + +/* + * ctl_fallocate.c -- implementation of the fallocate CTL namespace + */ + +#include "ctl.h" +#include "set.h" +#include "out.h" +#include "ctl_global.h" +#include "file.h" + +static int +CTL_READ_HANDLER(at_create)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int *arg_out = arg; + *arg_out = Fallocate_at_create; + + return 0; +} + +static int +CTL_WRITE_HANDLER(at_create)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int arg_in = *(int *)arg; + Fallocate_at_create = arg_in; + + return 0; +} + +static struct ctl_argument CTL_ARG(at_create) = CTL_ARG_BOOLEAN; + +static const struct ctl_node CTL_NODE(fallocate)[] = { + CTL_LEAF_RW(at_create), + + CTL_NODE_END +}; + +void +ctl_fallocate_register(void) +{ + CTL_REGISTER_MODULE(NULL, fallocate); +} diff --git a/src/pmdk/src/common/ctl_global.h b/src/pmdk/src/common/ctl_global.h new file mode 100644 index 000000000..b70859a8b --- /dev/null +++ b/src/pmdk/src/common/ctl_global.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * ctl_global.h -- definitions for the global CTL namespace + */ + +#ifndef PMDK_CTL_GLOBAL_H +#define PMDK_CTL_GLOBAL_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +extern void ctl_prefault_register(void); +extern void ctl_sds_register(void); +extern void ctl_fallocate_register(void); +extern void ctl_cow_register(void); + +static inline void +ctl_global_register(void) +{ + ctl_prefault_register(); + ctl_sds_register(); + ctl_fallocate_register(); + ctl_cow_register(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/ctl_prefault.c b/src/pmdk/src/common/ctl_prefault.c new file mode 100644 index 000000000..bfb9abe62 --- /dev/null +++ b/src/pmdk/src/common/ctl_prefault.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * ctl_prefault.c -- implementation of the prefault CTL namespace + */ + +#include "ctl.h" +#include "set.h" +#include "out.h" +#include "ctl_global.h" + +static int +CTL_READ_HANDLER(at_create)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int *arg_out = arg; + *arg_out = Prefault_at_create; + + return 0; +} + +static int +CTL_WRITE_HANDLER(at_create)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int arg_in = *(int *)arg; + + Prefault_at_create = arg_in; + + return 0; +} + +static int +CTL_READ_HANDLER(at_open)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int *arg_out = arg; + *arg_out = Prefault_at_open; + + return 0; +} + +static int +CTL_WRITE_HANDLER(at_open)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int arg_in = *(int *)arg; + + Prefault_at_open = arg_in; + + return 0; +} + +static const struct ctl_argument CTL_ARG(at_create) = CTL_ARG_BOOLEAN; +static const struct ctl_argument CTL_ARG(at_open) = CTL_ARG_BOOLEAN; + +static const struct ctl_node CTL_NODE(prefault)[] = { + CTL_LEAF_RW(at_create), + CTL_LEAF_RW(at_open), + + CTL_NODE_END +}; + +void +ctl_prefault_register(void) +{ + CTL_REGISTER_MODULE(NULL, prefault); +} diff --git a/src/pmdk/src/common/ctl_sds.c b/src/pmdk/src/common/ctl_sds.c new file mode 100644 index 000000000..255c5b790 --- /dev/null +++ b/src/pmdk/src/common/ctl_sds.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2019, Intel Corporation */ + +/* + * ctl_sds.c -- implementation of the sds CTL namespace + */ + +#include "ctl.h" +#include "set.h" +#include "out.h" +#include "ctl_global.h" + +static int +CTL_READ_HANDLER(at_create)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int *arg_out = arg; + *arg_out = SDS_at_create; + + return 0; +} + +static int +CTL_WRITE_HANDLER(at_create)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + int arg_in = *(int *)arg; + + SDS_at_create = arg_in; + + return 0; +} + +static const struct ctl_argument CTL_ARG(at_create) = CTL_ARG_BOOLEAN; + +static const struct ctl_node CTL_NODE(sds)[] = { + CTL_LEAF_RW(at_create), + + CTL_NODE_END +}; + +void +ctl_sds_register(void) +{ + CTL_REGISTER_MODULE(NULL, sds); +} diff --git a/src/pmdk/src/common/dlsym.h b/src/pmdk/src/common/dlsym.h new file mode 100644 index 000000000..dd4f58bf3 --- /dev/null +++ b/src/pmdk/src/common/dlsym.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * dlsym.h -- dynamic linking utilities with library-specific implementation + */ + +#ifndef PMDK_DLSYM_H +#define PMDK_DLSYM_H 1 + +#include "out.h" + +#if defined(USE_LIBDL) && !defined(_WIN32) + +#include + +/* + * util_dlopen -- calls real dlopen() + */ +static inline void * +util_dlopen(const char *filename) +{ + LOG(3, "filename %s", filename); + + return dlopen(filename, RTLD_NOW); +} + +/* + * util_dlerror -- calls real dlerror() + */ +static inline char * +util_dlerror(void) +{ + return dlerror(); +} + +/* + * util_dlsym -- calls real dlsym() + */ +static inline void * +util_dlsym(void *handle, const char *symbol) +{ + LOG(3, "handle %p symbol %s", handle, symbol); + + return dlsym(handle, symbol); +} + +/* + * util_dlclose -- calls real dlclose() + */ +static inline int +util_dlclose(void *handle) +{ + LOG(3, "handle %p", handle); + + return dlclose(handle); +} + +#else /* empty functions */ + +/* + * util_dlopen -- empty function + */ +static inline void * +util_dlopen(const char *filename) +{ + errno = ENOSYS; + return NULL; +} + +/* + * util_dlerror -- empty function + */ +static inline char * +util_dlerror(void) +{ + errno = ENOSYS; + return NULL; +} + +/* + * util_dlsym -- empty function + */ +static inline void * +util_dlsym(void *handle, const char *symbol) +{ + errno = ENOSYS; + return NULL; +} + +/* + * util_dlclose -- empty function + */ +static inline int +util_dlclose(void *handle) +{ + errno = ENOSYS; + return 0; +} + +#endif + +#endif diff --git a/src/pmdk/src/common/file.c b/src/pmdk/src/common/file.c new file mode 100644 index 000000000..e64e97b69 --- /dev/null +++ b/src/pmdk/src/common/file.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * file.c -- file utilities + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(_WIN32) && !defined(__FreeBSD__) +#include +#endif + +#include "../libpmem2/config.h" +#include "../libpmem2/pmem2_utils.h" +#include "file.h" +#include "os.h" +#include "out.h" +#include "mmap.h" + +#define DEVICE_DAX_ZERO_LEN (2 * MEGABYTE) + +/* + * util_file_exists -- checks whether file exists + */ +int +util_file_exists(const char *path) +{ + LOG(3, "path \"%s\"", path); + + if (os_access(path, F_OK) == 0) + return 1; + + if (errno != ENOENT) { + ERR("!os_access \"%s\"", path); + return -1; + } + + /* + * ENOENT means that some component of a pathname does not exists. + * + * XXX - we should also call os_access on parent directory and + * if this also results in ENOENT -1 should be returned. + * + * The problem is that we would need to use realpath, which fails + * if file does not exist. + */ + + return 0; +} + +/* + * util_stat_get_type -- checks whether stat structure describes + * device dax or a normal file + */ +enum file_type +util_stat_get_type(const os_stat_t *st) +{ + enum pmem2_file_type type; + + int ret = pmem2_get_type_from_stat(st, &type); + if (ret) { + errno = pmem2_err_to_errno(ret); + return OTHER_ERROR; + } + + if (type == PMEM2_FTYPE_REG || type == PMEM2_FTYPE_DIR) + return TYPE_NORMAL; + + if (type == PMEM2_FTYPE_DEVDAX) + return TYPE_DEVDAX; + + ASSERTinfo(0, "unhandled file type in util_stat_get_type"); + return OTHER_ERROR; +} + +/* + * util_fd_get_type -- checks whether a file descriptor is associated + * with a device dax or a normal file + */ +enum file_type +util_fd_get_type(int fd) +{ + LOG(3, "fd %d", fd); + +#ifdef _WIN32 + return TYPE_NORMAL; +#else + os_stat_t st; + + if (os_fstat(fd, &st) < 0) { + ERR("!fstat"); + return OTHER_ERROR; + } + + return util_stat_get_type(&st); +#endif +} + +/* + * util_file_get_type -- checks whether the path points to a device dax, + * normal file or non-existent file + */ +enum file_type +util_file_get_type(const char *path) +{ + LOG(3, "path \"%s\"", path); + + if (path == NULL) { + ERR("invalid (NULL) path"); + errno = EINVAL; + return OTHER_ERROR; + } + + int exists = util_file_exists(path); + if (exists < 0) + return OTHER_ERROR; + + if (!exists) + return NOT_EXISTS; + +#ifdef _WIN32 + return TYPE_NORMAL; +#else + os_stat_t st; + + if (os_stat(path, &st) < 0) { + ERR("!stat"); + return OTHER_ERROR; + } + + return util_stat_get_type(&st); +#endif +} + +/* + * util_file_get_size -- returns size of a file + */ +ssize_t +util_file_get_size(const char *path) +{ + LOG(3, "path \"%s\"", path); + + int fd = os_open(path, O_RDONLY); + if (fd < 0) { + ERR("!open"); + return -1; + } + + ssize_t size = util_fd_get_size(fd); + (void) close(fd); + + return size; +} + +/* + * util_fd_get_size -- returns size of a file behind a given file descriptor + */ +ssize_t +util_fd_get_size(int fd) +{ + LOG(3, "fd %d", fd); + + struct pmem2_source *src; + size_t size; + int ret; + + if ((ret = pmem2_source_from_fd(&src, fd)) != 0) { + errno = pmem2_err_to_errno(ret); + return -1; + } + + ret = pmem2_source_size(src, &size); + + pmem2_source_delete(&src); + + if (ret) { + errno = pmem2_err_to_errno(ret); + return -1; + } + + /* size is unsigned, this function returns signed */ + if (size >= INT64_MAX) { + errno = ERANGE; + ERR( + "file size (%ld) too big to be represented in 64-bit signed integer", + size); + return -1; + } + + LOG(4, "file length %zu", size); + return (ssize_t)size; +} + +/* + * util_file_map_whole -- maps the entire file into memory + */ +void * +util_file_map_whole(const char *path) +{ + LOG(3, "path \"%s\"", path); + + int fd; + int olderrno; + void *addr = NULL; + int flags = O_RDWR; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + + if ((fd = os_open(path, flags)) < 0) { + ERR("!open \"%s\"", path); + return NULL; + } + + ssize_t size = util_fd_get_size(fd); + if (size < 0) { + LOG(2, "cannot determine file length \"%s\"", path); + goto out; + } + + addr = util_map(fd, 0, (size_t)size, MAP_SHARED, 0, 0, NULL); + if (addr == NULL) { + LOG(2, "failed to map entire file \"%s\"", path); + goto out; + } + +out: + olderrno = errno; + (void) os_close(fd); + errno = olderrno; + + return addr; +} + +/* + * util_file_zero -- zeroes the specified region of the file + */ +int +util_file_zero(const char *path, os_off_t off, size_t len) +{ + LOG(3, "path \"%s\" off %ju len %zu", path, off, len); + + int fd; + int olderrno; + int ret = 0; + int flags = O_RDWR; +#ifdef _WIN32 + flags |= O_BINARY; +#endif + + if ((fd = os_open(path, flags)) < 0) { + ERR("!open \"%s\"", path); + return -1; + } + + ssize_t size = util_fd_get_size(fd); + if (size < 0) { + LOG(2, "cannot determine file length \"%s\"", path); + ret = -1; + goto out; + } + + if (off > size) { + LOG(2, "offset beyond file length, %ju > %ju", off, size); + ret = -1; + goto out; + } + + if ((size_t)off + len > (size_t)size) { + LOG(2, "requested size of write goes beyond the file length, " + "%zu > %zu", (size_t)off + len, size); + LOG(4, "adjusting len to %zu", size - off); + len = (size_t)(size - off); + } + + void *addr = util_map(fd, 0, (size_t)size, MAP_SHARED, 0, 0, NULL); + if (addr == NULL) { + LOG(2, "failed to map entire file \"%s\"", path); + ret = -1; + goto out; + } + + /* zero initialize the specified region */ + memset((char *)addr + off, 0, len); + + util_unmap(addr, (size_t)size); + +out: + olderrno = errno; + (void) os_close(fd); + errno = olderrno; + + return ret; +} + +/* + * util_file_pwrite -- writes to a file with an offset + */ +ssize_t +util_file_pwrite(const char *path, const void *buffer, size_t size, + os_off_t offset) +{ + LOG(3, "path \"%s\" buffer %p size %zu offset %ju", + path, buffer, size, offset); + + enum file_type type = util_file_get_type(path); + if (type < 0) + return -1; + + if (type == TYPE_NORMAL) { + int fd = util_file_open(path, NULL, 0, O_RDWR); + if (fd < 0) { + LOG(2, "failed to open file \"%s\"", path); + return -1; + } + + ssize_t write_len = pwrite(fd, buffer, size, offset); + int olderrno = errno; + (void) os_close(fd); + errno = olderrno; + return write_len; + } + + ssize_t file_size = util_file_get_size(path); + if (file_size < 0) { + LOG(2, "cannot determine file length \"%s\"", path); + return -1; + } + + size_t max_size = (size_t)(file_size - offset); + if (size > max_size) { + LOG(2, "requested size of write goes beyond the file length, " + "%zu > %zu", size, max_size); + LOG(4, "adjusting size to %zu", max_size); + size = max_size; + } + + void *addr = util_file_map_whole(path); + if (addr == NULL) { + LOG(2, "failed to map entire file \"%s\"", path); + return -1; + } + + memcpy(ADDR_SUM(addr, offset), buffer, size); + util_unmap(addr, (size_t)file_size); + return (ssize_t)size; +} + +/* + * util_file_pread -- reads from a file with an offset + */ +ssize_t +util_file_pread(const char *path, void *buffer, size_t size, + os_off_t offset) +{ + LOG(3, "path \"%s\" buffer %p size %zu offset %ju", + path, buffer, size, offset); + + enum file_type type = util_file_get_type(path); + if (type < 0) + return -1; + + if (type == TYPE_NORMAL) { + int fd = util_file_open(path, NULL, 0, O_RDONLY); + if (fd < 0) { + LOG(2, "failed to open file \"%s\"", path); + return -1; + } + + ssize_t read_len = pread(fd, buffer, size, offset); + int olderrno = errno; + (void) os_close(fd); + errno = olderrno; + return read_len; + } + + ssize_t file_size = util_file_get_size(path); + if (file_size < 0) { + LOG(2, "cannot determine file length \"%s\"", path); + return -1; + } + + size_t max_size = (size_t)(file_size - offset); + if (size > max_size) { + LOG(2, "requested size of read goes beyond the file length, " + "%zu > %zu", size, max_size); + LOG(4, "adjusting size to %zu", max_size); + size = max_size; + } + + void *addr = util_file_map_whole(path); + if (addr == NULL) { + LOG(2, "failed to map entire file \"%s\"", path); + return -1; + } + + memcpy(buffer, ADDR_SUM(addr, offset), size); + util_unmap(addr, (size_t)file_size); + return (ssize_t)size; +} + +/* + * util_file_create -- create a new memory pool file + */ +int +util_file_create(const char *path, size_t size, size_t minsize) +{ + LOG(3, "path \"%s\" size %zu minsize %zu", path, size, minsize); + + ASSERTne(size, 0); + + if (size < minsize) { + ERR("size %zu smaller than %zu", size, minsize); + errno = EINVAL; + return -1; + } + + if (((os_off_t)size) < 0) { + ERR("invalid size (%zu) for os_off_t", size); + errno = EFBIG; + return -1; + } + + int fd; + int mode; + int flags = O_RDWR | O_CREAT | O_EXCL; +#ifndef _WIN32 + mode = 0; +#else + mode = S_IWRITE | S_IREAD; + flags |= O_BINARY; +#endif + + /* + * Create file without any permission. It will be granted once + * initialization completes. + */ + if ((fd = os_open(path, flags, mode)) < 0) { + ERR("!open \"%s\"", path); + return -1; + } + + if ((errno = os_posix_fallocate(fd, 0, (os_off_t)size)) != 0) { + ERR("!posix_fallocate \"%s\", %zu", path, size); + goto err; + } + + /* for windows we can't flock until after we fallocate */ + if (os_flock(fd, OS_LOCK_EX | OS_LOCK_NB) < 0) { + ERR("!flock \"%s\"", path); + goto err; + } + + return fd; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + if (fd != -1) + (void) os_close(fd); + os_unlink(path); + errno = oerrno; + return -1; +} + +/* + * util_file_open -- open a memory pool file + */ +int +util_file_open(const char *path, size_t *size, size_t minsize, int flags) +{ + LOG(3, "path \"%s\" size %p minsize %zu flags %d", path, size, minsize, + flags); + + int oerrno; + int fd; + +#ifdef _WIN32 + flags |= O_BINARY; +#endif + + if ((fd = os_open(path, flags)) < 0) { + ERR("!open \"%s\"", path); + return -1; + } + + if (os_flock(fd, OS_LOCK_EX | OS_LOCK_NB) < 0) { + ERR("!flock \"%s\"", path); + (void) os_close(fd); + return -1; + } + + if (size || minsize) { + if (size) + ASSERTeq(*size, 0); + + ssize_t actual_size = util_fd_get_size(fd); + if (actual_size < 0) { + ERR("stat \"%s\": negative size", path); + errno = EINVAL; + goto err; + } + + if ((size_t)actual_size < minsize) { + ERR("size %zu smaller than %zu", + (size_t)actual_size, minsize); + errno = EINVAL; + goto err; + } + + if (size) { + *size = (size_t)actual_size; + LOG(4, "actual file size %zu", *size); + } + } + + return fd; +err: + oerrno = errno; + if (os_flock(fd, OS_LOCK_UN)) + ERR("!flock unlock"); + (void) os_close(fd); + errno = oerrno; + return -1; +} + +/* + * util_unlink -- unlinks a file or zeroes a device dax + */ +int +util_unlink(const char *path) +{ + LOG(3, "path \"%s\"", path); + + enum file_type type = util_file_get_type(path); + if (type < 0) + return -1; + + if (type == TYPE_DEVDAX) { + return util_file_zero(path, 0, DEVICE_DAX_ZERO_LEN); + } else { +#ifdef _WIN32 + /* on Windows we can not unlink Read-Only files */ + if (os_chmod(path, S_IREAD | S_IWRITE) == -1) { + ERR("!chmod \"%s\"", path); + return -1; + } +#endif + return os_unlink(path); + } +} + +/* + * util_unlink_flock -- flocks the file and unlinks it + * + * The unlink(2) call on a file which is opened and locked using flock(2) + * by different process works on linux. Thus in order to forbid removing a + * pool when in use by different process we need to flock(2) the pool files + * first before unlinking. + */ +int +util_unlink_flock(const char *path) +{ + LOG(3, "path \"%s\"", path); + +#ifdef WIN32 + /* + * On Windows it is not possible to unlink the + * file if it is flocked. + */ + return util_unlink(path); +#else + int fd = util_file_open(path, NULL, 0, O_RDONLY); + if (fd < 0) { + LOG(2, "failed to open file \"%s\"", path); + return -1; + } + + int ret = util_unlink(path); + + (void) os_close(fd); + + return ret; +#endif +} + +/* + * util_write_all -- a wrapper for util_write + * + * writes exactly count bytes from buf to file referred to by fd + * returns -1 on error, 0 otherwise + */ +int +util_write_all(int fd, const char *buf, size_t count) +{ + ssize_t n_wrote = 0; + size_t total = 0; + + while (count > total) { + n_wrote = util_write(fd, buf, count - total); + if (n_wrote <= 0) + return -1; + + buf += (size_t)n_wrote; + total += (size_t)n_wrote; + } + + return 0; +} diff --git a/src/pmdk/src/common/file.h b/src/pmdk/src/common/file.h new file mode 100644 index 000000000..a54144ee2 --- /dev/null +++ b/src/pmdk/src/common/file.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * file.h -- internal definitions for file module + */ + +#ifndef PMDK_FILE_H +#define PMDK_FILE_H 1 + +#include +#include +#include +#include +#include +#include "os.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +#define NAME_MAX _MAX_FNAME +#endif + +struct file_info { + char filename[NAME_MAX + 1]; + int is_dir; +}; + +struct dir_handle { + const char *path; +#ifdef _WIN32 + HANDLE handle; + char *_file; +#else + DIR *dirp; +#endif +}; + +enum file_type { + OTHER_ERROR = -2, + NOT_EXISTS = -1, + TYPE_NORMAL = 1, + TYPE_DEVDAX = 2 +}; + +int util_file_dir_open(struct dir_handle *a, const char *path); +int util_file_dir_next(struct dir_handle *a, struct file_info *info); +int util_file_dir_close(struct dir_handle *a); +int util_file_dir_remove(const char *path); +int util_file_exists(const char *path); +enum file_type util_stat_get_type(const os_stat_t *st); +enum file_type util_fd_get_type(int fd); +enum file_type util_file_get_type(const char *path); +int util_ddax_region_find(const char *path, unsigned *region_id); +ssize_t util_file_get_size(const char *path); +ssize_t util_fd_get_size(int fd); +size_t util_file_device_dax_alignment(const char *path); +void *util_file_map_whole(const char *path); +int util_file_zero(const char *path, os_off_t off, size_t len); +ssize_t util_file_pread(const char *path, void *buffer, size_t size, + os_off_t offset); +ssize_t util_file_pwrite(const char *path, const void *buffer, size_t size, + os_off_t offset); + +int util_tmpfile(const char *dir, const char *templ, int flags); +int util_is_absolute_path(const char *path); + +int util_file_create(const char *path, size_t size, size_t minsize); +int util_file_open(const char *path, size_t *size, size_t minsize, int flags); +int util_unlink(const char *path); +int util_unlink_flock(const char *path); +int util_file_mkdir(const char *path, mode_t mode); + +int util_write_all(int fd, const char *buf, size_t count); + +#ifndef _WIN32 +#define util_read read +#define util_write write +#else +static inline ssize_t +util_read(int fd, void *buf, size_t count) +{ + /* + * Simulate short read, because Windows' _read uses "unsigned" as + * a type of the last argument and "int" as a return type. + * We have to limit "count" to what _read can return as a success, + * not what it can accept. + */ + if (count > INT_MAX) + count = INT_MAX; + return _read(fd, buf, (unsigned)count); +} + +static inline ssize_t +util_write(int fd, const void *buf, size_t count) +{ + /* + * Simulate short write, because Windows' _write uses "unsigned" as + * a type of the last argument and "int" as a return type. + * We have to limit "count" to what _write can return as a success, + * not what it can accept. + */ + if (count > INT_MAX) + count = INT_MAX; + return _write(fd, buf, (unsigned)count); +} +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/pmdk/src/common/file_posix.c b/src/pmdk/src/common/file_posix.c new file mode 100644 index 000000000..f887244ef --- /dev/null +++ b/src/pmdk/src/common/file_posix.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * file_posix.c -- Posix versions of file APIs + */ + +/* for O_TMPFILE */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "os.h" +#include "file.h" +#include "out.h" +#include "libpmem2.h" +#include "../libpmem2/pmem2_utils.h" +#include "../libpmem2/region_namespace.h" + +/* + * util_tmpfile_mkstemp -- (internal) create temporary file + * if O_TMPFILE not supported + */ +static int +util_tmpfile_mkstemp(const char *dir, const char *templ) +{ + /* the templ must start with a path separator */ + ASSERTeq(templ[0], '/'); + + int oerrno; + int fd = -1; + + char *fullname = alloca(strlen(dir) + strlen(templ) + 1); + + (void) strcpy(fullname, dir); + (void) strcat(fullname, templ); + + sigset_t set, oldset; + sigfillset(&set); + (void) sigprocmask(SIG_BLOCK, &set, &oldset); + + mode_t prev_umask = umask(S_IRWXG | S_IRWXO); + + fd = os_mkstemp(fullname); + + umask(prev_umask); + + if (fd < 0) { + ERR("!mkstemp"); + goto err; + } + + (void) os_unlink(fullname); + (void) sigprocmask(SIG_SETMASK, &oldset, NULL); + LOG(3, "unlinked file is \"%s\"", fullname); + + return fd; + +err: + oerrno = errno; + (void) sigprocmask(SIG_SETMASK, &oldset, NULL); + if (fd != -1) + (void) os_close(fd); + errno = oerrno; + return -1; +} + +/* + * util_tmpfile -- create temporary file + */ +int +util_tmpfile(const char *dir, const char *templ, int flags) +{ + LOG(3, "dir \"%s\" template \"%s\" flags %x", dir, templ, flags); + + /* only O_EXCL is allowed here */ + ASSERT(flags == 0 || flags == O_EXCL); + +#ifdef O_TMPFILE + int fd = os_open(dir, O_TMPFILE | O_RDWR | flags, S_IRUSR | S_IWUSR); + /* + * Open can fail if underlying file system does not support O_TMPFILE + * flag. + */ + if (fd >= 0) + return fd; + if (errno != EOPNOTSUPP) { + ERR("!open"); + return -1; + } +#endif + + return util_tmpfile_mkstemp(dir, templ); +} + +/* + * util_is_absolute_path -- check if the path is an absolute one + */ +int +util_is_absolute_path(const char *path) +{ + LOG(3, "path: %s", path); + + if (path[0] == OS_DIR_SEPARATOR) + return 1; + else + return 0; +} + +/* + * util_create_mkdir -- creates new dir + */ +int +util_file_mkdir(const char *path, mode_t mode) +{ + LOG(3, "path: %s mode: %o", path, mode); + return mkdir(path, mode); +} + +/* + * util_file_dir_open -- open a directory + */ +int +util_file_dir_open(struct dir_handle *handle, const char *path) +{ + LOG(3, "handle: %p path: %s", handle, path); + handle->dirp = opendir(path); + return handle->dirp == NULL; +} + +/* + * util_file_dir_next -- read next file in directory + */ +int +util_file_dir_next(struct dir_handle *handle, struct file_info *info) +{ + LOG(3, "handle: %p info: %p", handle, info); + struct dirent *d = readdir(handle->dirp); + if (d == NULL) + return 1; /* break */ + info->filename[NAME_MAX] = '\0'; + strncpy(info->filename, d->d_name, NAME_MAX + 1); + if (info->filename[NAME_MAX] != '\0') + return -1; /* filename truncated */ + info->is_dir = d->d_type == DT_DIR; + return 0; /* continue */ +} + +/* + * util_file_dir_close -- close a directory + */ +int +util_file_dir_close(struct dir_handle *handle) +{ + LOG(3, "path: %p", handle); + return closedir(handle->dirp); +} + +/* + * util_file_dir_remove -- remove directory + */ +int +util_file_dir_remove(const char *path) +{ + LOG(3, "path: %s", path); + return rmdir(path); +} + +/* + * device_dax_alignment -- (internal) checks the alignment of given Device DAX + */ +static size_t +device_dax_alignment(const char *path) +{ + size_t size = 0; + + LOG(3, "path \"%s\"", path); + + struct pmem2_source *src; + + int fd = os_open(path, O_RDONLY); + if (fd == -1) { + LOG(1, "Cannot open file %s", path); + return size; + } + + int ret = pmem2_source_from_fd(&src, fd); + if (ret) + goto end; + + ret = pmem2_device_dax_alignment(src, &size); + if (ret) { + size = 0; + goto end; + } + +end: + pmem2_source_delete(&src); + os_close(fd); + return size; +} + +/* + * util_file_device_dax_alignment -- returns internal Device DAX alignment + */ +size_t +util_file_device_dax_alignment(const char *path) +{ + LOG(3, "path \"%s\"", path); + + return device_dax_alignment(path); +} + +/* + * util_ddax_region_find -- returns Device DAX region id + */ +int +util_ddax_region_find(const char *path, unsigned *region_id) +{ + LOG(3, "path \"%s\"", path); + + os_stat_t st; + int ret; + + if (os_stat(path, &st) < 0) { + ERR("!stat \"%s\"", path); + return -1; + } + + enum pmem2_file_type ftype; + if ((ret = pmem2_get_type_from_stat(&st, &ftype)) < 0) { + errno = pmem2_err_to_errno(ret); + return -1; + } + + /* + * XXX: this is a hack to workaround the fact that common is using + * non-public APIs of libpmem2, and there's often no way to properly + * create the required structures... + * This needs to go away together with refactoring that untangles + * these internal dependencies. + */ + struct pmem2_source src; + src.type = PMEM2_SOURCE_FD; + src.value.ftype = ftype; + src.value.st_rdev = st.st_rdev; + src.value.st_dev = st.st_dev; + + ret = pmem2_get_region_id(&src, region_id); + if (ret < 0) { + errno = pmem2_err_to_errno(ret); + return -1; + } + + return ret; +} diff --git a/src/pmdk/src/common/file_windows.c b/src/pmdk/src/common/file_windows.c new file mode 100644 index 000000000..f67c11abf --- /dev/null +++ b/src/pmdk/src/common/file_windows.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * file_windows.c -- Windows emulation of Linux-specific system calls + */ + +/* + * XXX - The initial approach to PMDK for Windows port was to minimize the + * amount of changes required in the core part of the library, and to avoid + * preprocessor conditionals, if possible. For that reason, some of the + * Linux system calls that have no equivalents on Windows have been emulated + * using Windows API. + * Note that it was not a goal to fully emulate POSIX-compliant behavior + * of mentioned functions. They are used only internally, so current + * implementation is just good enough to satisfy PMDK needs and to make it + * work on Windows. + */ + +#include +#include +#include + +#include "alloc.h" +#include "file.h" +#include "out.h" +#include "os.h" + +/* + * util_tmpfile -- create a temporary file + */ +int +util_tmpfile(const char *dir, const char *templ, int flags) +{ + LOG(3, "dir \"%s\" template \"%s\" flags %x", dir, templ, flags); + + /* only O_EXCL is allowed here */ + ASSERT(flags == 0 || flags == O_EXCL); + + int oerrno; + int fd = -1; + + size_t len = strlen(dir) + strlen(templ) + 1; + char *fullname = Malloc(sizeof(*fullname) * len); + if (fullname == NULL) { + ERR("!Malloc"); + return -1; + } + + int ret = _snprintf(fullname, len, "%s%s", dir, templ); + if (ret < 0 || ret >= len) { + ERR("snprintf: %d", ret); + goto err; + } + + LOG(4, "fullname \"%s\"", fullname); + + /* + * XXX - block signals and modify file creation mask for the time + * of mkstmep() execution. Restore previous settings once the file + * is created. + */ + + fd = os_mkstemp(fullname); + if (fd < 0) { + ERR("!os_mkstemp"); + goto err; + } + + /* + * There is no point to use unlink() here. First, because it does not + * work on open files. Second, because the file is created with + * O_TEMPORARY flag, and it looks like such temp files cannot be open + * from another process, even though they are visible on + * the filesystem. + */ + + Free(fullname); + return fd; + +err: + Free(fullname); + oerrno = errno; + if (fd != -1) + (void) os_close(fd); + errno = oerrno; + return -1; +} + +/* + * util_is_absolute_path -- check if the path is absolute + */ +int +util_is_absolute_path(const char *path) +{ + LOG(3, "path \"%s\"", path); + + if (path == NULL || path[0] == '\0') + return 0; + + if (path[0] == '\\' || path[1] == ':') + return 1; + + return 0; +} + +/* + * util_file_mkdir -- creates new dir + */ +int +util_file_mkdir(const char *path, mode_t mode) +{ + /* + * On windows we cannot create read only dir so mode + * parameter is useless. + */ + UNREFERENCED_PARAMETER(mode); + LOG(3, "path: %s mode: %d", path, mode); + return _mkdir(path); +} + +/* + * util_file_dir_open -- open a directory + */ +int +util_file_dir_open(struct dir_handle *handle, const char *path) +{ + /* init handle */ + handle->handle = NULL; + handle->path = path; + return 0; +} + +/* + * util_file_dir_next - read next file in directory + */ +int +util_file_dir_next(struct dir_handle *handle, struct file_info *info) +{ + WIN32_FIND_DATAA data; + if (handle->handle == NULL) { + handle->handle = FindFirstFileA(handle->path, &data); + if (handle->handle == NULL) + return 1; + } else { + if (FindNextFileA(handle->handle, &data) == 0) + return 1; + } + info->filename[NAME_MAX] = '\0'; + strncpy(info->filename, data.cFileName, NAME_MAX + 1); + if (info->filename[NAME_MAX] != '\0') + return -1; /* filename truncated */ + info->is_dir = data.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY; + + return 0; +} + +/* + * util_file_dir_close -- close a directory + */ +int +util_file_dir_close(struct dir_handle *handle) +{ + return FindClose(handle->handle); +} + +/* + * util_file_dir_remove -- remove directory + */ +int +util_file_dir_remove(const char *path) +{ + return RemoveDirectoryA(path) == 0 ? -1 : 0; +} + +/* + * util_file_device_dax_alignment -- returns internal Device DAX alignment + */ +size_t +util_file_device_dax_alignment(const char *path) +{ + LOG(3, "path \"%s\"", path); + + return 0; +} + +/* + * util_ddax_region_find -- returns DEV dax region id that contains file + */ +int +util_ddax_region_find(const char *path, unsigned *region_id) +{ + LOG(3, "path \"%s\"", path); + + return -1; +} diff --git a/src/pmdk/src/common/libpmemcommon.vcxproj b/src/pmdk/src/common/libpmemcommon.vcxproj new file mode 100644 index 000000000..1fc1b7a96 --- /dev/null +++ b/src/pmdk/src/common/libpmemcommon.vcxproj @@ -0,0 +1,163 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {2fa3155b-6f26-4d15-ac03-9d82d48dbc42} + + + + {492BAA3D-0D5D-478E-9765-500463AE69AA} + Win32Proj + libpmemcommon + 10.0.17134.0 + + + + StaticLibrary + true + v140 + NotSet + + + StaticLibrary + true + v140 + NotSet + + + + + + + + + + + + + + + true + .lib + $(SolutionDir)\include;$(SolutionDir)\windows\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);.; + + + true + .lib + $(SolutionDir)\include;$(SolutionDir)\windows\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);.; + + + + NotUsing + Level3 + PMDK_UTF8_API;NTDDI_VERSION=NTDDI_WIN10_RS1;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + platform.h + CompileAsC + MultiThreadedDebugDLL + false + true + %(AdditionalIncludeDirectories);$(VcpkgRoot)include;$(SolutionDir)\core + + + Console + true + + + ntdll.lib;%(AdditionalDependencies) + true + + + + + + + + + NotUsing + Level3 + PMDK_UTF8_API;NTDDI_VERSION=NTDDI_WIN10_RS1;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + platform.h + CompileAsC + MaxSpeed + MultiThreadedDLL + Default + false + ProgramDatabase + true + %(AdditionalIncludeDirectories);$(VcpkgRoot)include;$(SolutionDir)\core + + + Console + true + + + ntdll.lib;%(AdditionalDependencies) + true + + + + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/common/libpmemcommon.vcxproj.filters b/src/pmdk/src/common/libpmemcommon.vcxproj.filters new file mode 100644 index 000000000..4b2ec8511 --- /dev/null +++ b/src/pmdk/src/common/libpmemcommon.vcxproj.filters @@ -0,0 +1,149 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/src/pmdk/src/common/mmap.c b/src/pmdk/src/common/mmap.c new file mode 100644 index 000000000..b118b083a --- /dev/null +++ b/src/pmdk/src/common/mmap.c @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * mmap.c -- mmap utilities + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "file.h" +#include "queue.h" +#include "mmap.h" +#include "sys_util.h" +#include "os.h" +#include "alloc.h" +#include "libpmem2.h" + +int Mmap_no_random; +void *Mmap_hint; +static os_rwlock_t Mmap_list_lock; + +static PMDK_SORTEDQ_HEAD(map_list_head, map_tracker) Mmap_list = + PMDK_SORTEDQ_HEAD_INITIALIZER(Mmap_list); + +/* + * util_mmap_init -- initialize the mmap utils + * + * This is called from the library initialization code. + */ +void +util_mmap_init(void) +{ + LOG(3, NULL); + + util_rwlock_init(&Mmap_list_lock); + + /* + * For testing, allow overriding the default mmap() hint address. + * If hint address is defined, it also disables address randomization. + */ + char *e = os_getenv("PMEM_MMAP_HINT"); + if (e) { + char *endp; + errno = 0; + unsigned long long val = strtoull(e, &endp, 16); + + if (errno || endp == e) { + LOG(2, "Invalid PMEM_MMAP_HINT"); + } else if (os_access(OS_MAPFILE, R_OK)) { + LOG(2, "No /proc, PMEM_MMAP_HINT ignored"); + } else { + Mmap_hint = (void *)val; + Mmap_no_random = 1; + LOG(3, "PMEM_MMAP_HINT set to %p", Mmap_hint); + } + } +} + +/* + * util_mmap_fini -- clean up the mmap utils + * + * This is called before process stop. + */ +void +util_mmap_fini(void) +{ + LOG(3, NULL); + + util_rwlock_destroy(&Mmap_list_lock); +} + +/* + * util_map -- memory map a file + * + * This is just a convenience function that calls mmap() with the + * appropriate arguments and includes our trace points. + */ +void * +util_map(int fd, os_off_t off, size_t len, int flags, int rdonly, + size_t req_align, int *map_sync) +{ + LOG(3, "fd %d len %zu flags %d rdonly %d req_align %zu map_sync %p", + fd, len, flags, rdonly, req_align, map_sync); + + void *base; + void *addr = util_map_hint(len, req_align); + if (addr == MAP_FAILED) { + LOG(1, "cannot find a contiguous region of given size"); + return NULL; + } + + if (req_align) + ASSERTeq((uintptr_t)addr % req_align, 0); + + int proto = rdonly ? PROT_READ : PROT_READ|PROT_WRITE; + base = util_map_sync(addr, len, proto, flags, fd, off, map_sync); + if (base == MAP_FAILED) { + ERR("!mmap %zu bytes", len); + return NULL; + } + + LOG(3, "mapped at %p", base); + + return base; +} + +/* + * util_unmap -- unmap a file + * + * This is just a convenience function that calls munmap() with the + * appropriate arguments and includes our trace points. + */ +int +util_unmap(void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + +/* + * XXX Workaround for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=169608 + */ +#ifdef __FreeBSD__ + if (!IS_PAGE_ALIGNED((uintptr_t)addr)) { + errno = EINVAL; + ERR("!munmap"); + return -1; + } +#endif + int retval = munmap(addr, len); + if (retval < 0) + ERR("!munmap"); + + return retval; +} + +/* + * util_range_ro -- set a memory range read-only + */ +int +util_range_ro(void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + uintptr_t uptr; + int retval; + + /* + * mprotect requires addr to be a multiple of pagesize, so + * adjust addr and len to represent the full 4k chunks + * covering the given range. + */ + + /* increase len by the amount we gain when we round addr down */ + len += (uintptr_t)addr & (Pagesize - 1); + + /* round addr down to page boundary */ + uptr = (uintptr_t)addr & ~(Pagesize - 1); + + if ((retval = mprotect((void *)uptr, len, PROT_READ)) < 0) + ERR("!mprotect: PROT_READ"); + + return retval; +} + +/* + * util_range_rw -- set a memory range read-write + */ +int +util_range_rw(void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + uintptr_t uptr; + int retval; + + /* + * mprotect requires addr to be a multiple of pagesize, so + * adjust addr and len to represent the full 4k chunks + * covering the given range. + */ + + /* increase len by the amount we gain when we round addr down */ + len += (uintptr_t)addr & (Pagesize - 1); + + /* round addr down to page boundary */ + uptr = (uintptr_t)addr & ~(Pagesize - 1); + + if ((retval = mprotect((void *)uptr, len, PROT_READ|PROT_WRITE)) < 0) + ERR("!mprotect: PROT_READ|PROT_WRITE"); + + return retval; +} + +/* + * util_range_none -- set a memory range for no access allowed + */ +int +util_range_none(void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + uintptr_t uptr; + int retval; + + /* + * mprotect requires addr to be a multiple of pagesize, so + * adjust addr and len to represent the full 4k chunks + * covering the given range. + */ + + /* increase len by the amount we gain when we round addr down */ + len += (uintptr_t)addr & (Pagesize - 1); + + /* round addr down to page boundary */ + uptr = (uintptr_t)addr & ~(Pagesize - 1); + + if ((retval = mprotect((void *)uptr, len, PROT_NONE)) < 0) + ERR("!mprotect: PROT_NONE"); + + return retval; +} + +/* + * util_range_comparer -- (internal) compares the two mapping trackers + */ +static intptr_t +util_range_comparer(struct map_tracker *a, struct map_tracker *b) +{ + return ((intptr_t)a->base_addr - (intptr_t)b->base_addr); +} + +/* + * util_range_find_unlocked -- (internal) find the map tracker + * for given address range + * + * Returns the first entry at least partially overlapping given range. + * It's up to the caller to check whether the entry exactly matches the range, + * or if the range spans multiple entries. + */ +static struct map_tracker * +util_range_find_unlocked(uintptr_t addr, size_t len) +{ + LOG(10, "addr 0x%016" PRIxPTR " len %zu", addr, len); + + uintptr_t end = addr + len; + + struct map_tracker *mt; + + PMDK_SORTEDQ_FOREACH(mt, &Mmap_list, entry) { + if (addr < mt->end_addr && + (addr >= mt->base_addr || end > mt->base_addr)) + goto out; + + /* break if there is no chance to find matching entry */ + if (addr < mt->base_addr) + break; + } + mt = NULL; + +out: + return mt; +} + +/* + * util_range_find -- find the map tracker for given address range + * the same as util_range_find_unlocked but locked + */ +struct map_tracker * +util_range_find(uintptr_t addr, size_t len) +{ + LOG(10, "addr 0x%016" PRIxPTR " len %zu", addr, len); + + util_rwlock_rdlock(&Mmap_list_lock); + + struct map_tracker *mt = util_range_find_unlocked(addr, len); + + util_rwlock_unlock(&Mmap_list_lock); + return mt; +} + +/* + * util_range_register -- add a memory range into a map tracking list + */ +int +util_range_register(const void *addr, size_t len, const char *path, + enum pmem_map_type type) +{ + LOG(3, "addr %p len %zu path %s type %d", addr, len, path, type); + + /* check if not tracked already */ + if (util_range_find((uintptr_t)addr, len) != NULL) { + ERR( + "duplicated persistent memory range; presumably unmapped with munmap() instead of pmem_unmap(): addr %p len %zu", + addr, len); + errno = ENOMEM; + return -1; + } + + struct map_tracker *mt; + mt = Malloc(sizeof(struct map_tracker)); + if (mt == NULL) { + ERR("!Malloc"); + return -1; + } + + mt->base_addr = (uintptr_t)addr; + mt->end_addr = mt->base_addr + len; + mt->type = type; + if (type == PMEM_DEV_DAX) { + unsigned region_id; + int ret = util_ddax_region_find(path, ®ion_id); + if (ret < 0) { + ERR("Cannot find DAX device region id"); + return -1; + } + mt->region_id = region_id; + } + + util_rwlock_wrlock(&Mmap_list_lock); + + PMDK_SORTEDQ_INSERT(&Mmap_list, mt, entry, struct map_tracker, + util_range_comparer); + + util_rwlock_unlock(&Mmap_list_lock); + + return 0; +} + +/* + * util_range_split -- (internal) remove or split a map tracking entry + */ +static int +util_range_split(struct map_tracker *mt, const void *addrp, const void *endp) +{ + LOG(3, "begin %p end %p", addrp, endp); + + uintptr_t addr = (uintptr_t)addrp; + uintptr_t end = (uintptr_t)endp; + ASSERTne(mt, NULL); + if (addr == end || addr % Mmap_align != 0 || end % Mmap_align != 0) { + ERR( + "invalid munmap length, must be non-zero and page aligned"); + return -1; + } + + struct map_tracker *mtb = NULL; + struct map_tracker *mte = NULL; + + /* + * 1) b e b e + * xxxxxxxxxxxxx => xxx.......xxxx - mtb+mte + * 2) b e b e + * xxxxxxxxxxxxx => xxxxxxx....... - mtb + * 3) b e b e + * xxxxxxxxxxxxx => ........xxxxxx - mte + * 4) b e b e + * xxxxxxxxxxxxx => .............. - + */ + + if (addr > mt->base_addr) { + /* case #1/2 */ + /* new mapping at the beginning */ + mtb = Malloc(sizeof(struct map_tracker)); + if (mtb == NULL) { + ERR("!Malloc"); + goto err; + } + + mtb->base_addr = mt->base_addr; + mtb->end_addr = addr; + mtb->region_id = mt->region_id; + mtb->type = mt->type; + } + + if (end < mt->end_addr) { + /* case #1/3 */ + /* new mapping at the end */ + mte = Malloc(sizeof(struct map_tracker)); + if (mte == NULL) { + ERR("!Malloc"); + goto err; + } + + mte->base_addr = end; + mte->end_addr = mt->end_addr; + mte->region_id = mt->region_id; + mte->type = mt->type; + } + + PMDK_SORTEDQ_REMOVE(&Mmap_list, mt, entry); + + if (mtb) { + PMDK_SORTEDQ_INSERT(&Mmap_list, mtb, entry, + struct map_tracker, util_range_comparer); + } + + if (mte) { + PMDK_SORTEDQ_INSERT(&Mmap_list, mte, entry, + struct map_tracker, util_range_comparer); + } + + /* free entry for the original mapping */ + Free(mt); + return 0; + +err: + Free(mtb); + Free(mte); + return -1; +} + +/* + * util_range_unregister -- remove a memory range + * from map tracking list + * + * Remove the region between [begin,end]. If it's in a middle of the existing + * mapping, it results in two new map trackers. + */ +int +util_range_unregister(const void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + int ret = 0; + + util_rwlock_wrlock(&Mmap_list_lock); + + /* + * Changes in the map tracker list must match the underlying behavior. + * + * $ man 2 mmap: + * The address addr must be a multiple of the page size (but length + * need not be). All pages containing a part of the indicated range + * are unmapped. + * + * This means that we must align the length to the page size. + */ + len = PAGE_ALIGNED_UP_SIZE(len); + + void *end = (char *)addr + len; + + /* XXX optimize the loop */ + struct map_tracker *mt; + while ((mt = util_range_find_unlocked((uintptr_t)addr, len)) != NULL) { + if (util_range_split(mt, addr, end) != 0) { + ret = -1; + break; + } + } + + util_rwlock_unlock(&Mmap_list_lock); + return ret; +} + +/* + * util_range_is_pmem -- return true if entire range + * is persistent memory + */ +int +util_range_is_pmem(const void *addrp, size_t len) +{ + LOG(10, "addr %p len %zu", addrp, len); + + uintptr_t addr = (uintptr_t)addrp; + int retval = 1; + + util_rwlock_rdlock(&Mmap_list_lock); + + do { + struct map_tracker *mt = util_range_find(addr, len); + if (mt == NULL) { + LOG(4, "address not found 0x%016" PRIxPTR, addr); + retval = 0; + break; + } + + LOG(10, "range found - begin 0x%016" PRIxPTR + " end 0x%016" PRIxPTR, + mt->base_addr, mt->end_addr); + + if (mt->base_addr > addr) { + LOG(10, "base address doesn't match: " + "0x%" PRIxPTR " > 0x%" PRIxPTR, + mt->base_addr, addr); + retval = 0; + break; + } + + uintptr_t map_len = mt->end_addr - addr; + if (map_len > len) + map_len = len; + len -= map_len; + addr += map_len; + } while (len > 0); + + util_rwlock_unlock(&Mmap_list_lock); + + return retval; +} diff --git a/src/pmdk/src/common/mmap.h b/src/pmdk/src/common/mmap.h new file mode 100644 index 000000000..5fff60ea8 --- /dev/null +++ b/src/pmdk/src/common/mmap.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * mmap.h -- internal definitions for mmap module + */ + +#ifndef PMDK_MMAP_H +#define PMDK_MMAP_H 1 + +#include +#include +#include +#include +#include +#include + +#include "out.h" +#include "queue.h" +#include "os.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern int Mmap_no_random; +extern void *Mmap_hint; +extern char *Mmap_mapfile; + +void *util_map_sync(void *addr, size_t len, int proto, int flags, int fd, + os_off_t offset, int *map_sync); +void *util_map(int fd, os_off_t off, size_t len, int flags, int rdonly, + size_t req_align, int *map_sync); +int util_unmap(void *addr, size_t len); + +#ifdef __FreeBSD__ +#define MAP_NORESERVE 0 +#define OS_MAPFILE "/proc/curproc/map" +#else +#define OS_MAPFILE "/proc/self/maps" +#endif + +#ifndef MAP_SYNC +#define MAP_SYNC 0x80000 +#endif + +#ifndef MAP_SHARED_VALIDATE +#define MAP_SHARED_VALIDATE 0x03 +#endif + +/* + * macros for micromanaging range protections for the debug version + */ +#ifdef DEBUG + +#define RANGE(addr, len, is_dev_dax, type) do {\ + if (!is_dev_dax) ASSERT(util_range_##type(addr, len) >= 0);\ +} while (0) + +#else + +#define RANGE(addr, len, is_dev_dax, type) do {} while (0) + +#endif + +#define RANGE_RO(addr, len, is_dev_dax) RANGE(addr, len, is_dev_dax, ro) +#define RANGE_RW(addr, len, is_dev_dax) RANGE(addr, len, is_dev_dax, rw) +#define RANGE_NONE(addr, len, is_dev_dax) RANGE(addr, len, is_dev_dax, none) + +/* pmem mapping type */ +enum pmem_map_type { + PMEM_DEV_DAX, /* device dax */ + PMEM_MAP_SYNC, /* mapping with MAP_SYNC flag on dax fs */ + + MAX_PMEM_TYPE +}; + +/* + * this structure tracks the file mappings outstanding per file handle + */ +struct map_tracker { + PMDK_SORTEDQ_ENTRY(map_tracker) entry; + uintptr_t base_addr; + uintptr_t end_addr; + unsigned region_id; + enum pmem_map_type type; +#ifdef _WIN32 + /* Windows-specific data */ + HANDLE FileHandle; + HANDLE FileMappingHandle; + DWORD Access; + os_off_t Offset; + size_t FileLen; +#endif +}; + +void util_mmap_init(void); +void util_mmap_fini(void); + +int util_range_ro(void *addr, size_t len); +int util_range_rw(void *addr, size_t len); +int util_range_none(void *addr, size_t len); + +char *util_map_hint_unused(void *minaddr, size_t len, size_t align); +char *util_map_hint(size_t len, size_t req_align); + +#define KILOBYTE ((uintptr_t)1 << 10) +#define MEGABYTE ((uintptr_t)1 << 20) +#define GIGABYTE ((uintptr_t)1 << 30) + +/* + * util_map_hint_align -- choose the desired mapping alignment + * + * The smallest supported alignment is 2 megabytes because of the object + * alignment requirements. Changing this value to 4 kilobytes constitues a + * layout change. + * + * Use 1GB page alignment only if the mapping length is at least + * twice as big as the page size. + */ +static inline size_t +util_map_hint_align(size_t len, size_t req_align) +{ + size_t align = 2 * MEGABYTE; + if (req_align) + align = req_align; + else if (len >= 2 * GIGABYTE) + align = GIGABYTE; + + return align; +} + +int util_range_register(const void *addr, size_t len, const char *path, + enum pmem_map_type type); +int util_range_unregister(const void *addr, size_t len); +struct map_tracker *util_range_find(uintptr_t addr, size_t len); +int util_range_is_pmem(const void *addr, size_t len); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/pmdk/src/common/mmap_posix.c b/src/pmdk/src/common/mmap_posix.c new file mode 100644 index 000000000..94c875c02 --- /dev/null +++ b/src/pmdk/src/common/mmap_posix.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2018, Intel Corporation */ + +/* + * mmap_posix.c -- memory-mapped files for Posix + */ + +#include +#include +#include +#include "mmap.h" +#include "out.h" +#include "os.h" + +#define PROCMAXLEN 2048 /* maximum expected line length in /proc files */ + +char *Mmap_mapfile = OS_MAPFILE; /* Should be modified only for testing */ + +#ifdef __FreeBSD__ +static const char * const sscanf_os = "%p %p"; +#else +static const char * const sscanf_os = "%p-%p"; +#endif + +/* + * util_map_hint_unused -- use /proc to determine a hint address for mmap() + * + * This is a helper function for util_map_hint(). + * It opens up /proc/self/maps and looks for the first unused address + * in the process address space that is: + * - greater or equal 'minaddr' argument, + * - large enough to hold range of given length, + * - aligned to the specified unit. + * + * Asking for aligned address like this will allow the DAX code to use large + * mappings. It is not an error if mmap() ignores the hint and chooses + * different address. + */ +char * +util_map_hint_unused(void *minaddr, size_t len, size_t align) +{ + LOG(3, "minaddr %p len %zu align %zu", minaddr, len, align); + ASSERT(align > 0); + + FILE *fp; + if ((fp = os_fopen(Mmap_mapfile, "r")) == NULL) { + ERR("!%s", Mmap_mapfile); + return MAP_FAILED; + } + + char line[PROCMAXLEN]; /* for fgets() */ + char *lo = NULL; /* beginning of current range in maps file */ + char *hi = NULL; /* end of current range in maps file */ + char *raddr = minaddr; /* ignore regions below 'minaddr' */ + + if (raddr == NULL) + raddr += Pagesize; + + raddr = (char *)roundup((uintptr_t)raddr, align); + + while (fgets(line, PROCMAXLEN, fp) != NULL) { + /* check for range line */ + if (sscanf(line, sscanf_os, &lo, &hi) == 2) { + LOG(4, "%p-%p", lo, hi); + if (lo > raddr) { + if ((uintptr_t)(lo - raddr) >= len) { + LOG(4, "unused region of size %zu " + "found at %p", + lo - raddr, raddr); + break; + } else { + LOG(4, "region is too small: %zu < %zu", + lo - raddr, len); + } + } + + if (hi > raddr) { + raddr = (char *)roundup((uintptr_t)hi, align); + LOG(4, "nearest aligned addr %p", raddr); + } + + if (raddr == NULL) { + LOG(4, "end of address space reached"); + break; + } + } + } + + /* + * Check for a case when this is the last unused range in the address + * space, but is not large enough. (very unlikely) + */ + if ((raddr != NULL) && (UINTPTR_MAX - (uintptr_t)raddr < len)) { + ERR("end of address space reached"); + raddr = MAP_FAILED; + } + + fclose(fp); + + LOG(3, "returning %p", raddr); + return raddr; +} + +/* + * util_map_hint -- determine hint address for mmap() + * + * If PMEM_MMAP_HINT environment variable is not set, we let the system to pick + * the randomized mapping address. Otherwise, a user-defined hint address + * is used. + * + * ALSR in 64-bit Linux kernel uses 28-bit of randomness for mmap + * (bit positions 12-39), which means the base mapping address is randomized + * within [0..1024GB] range, with 4KB granularity. Assuming additional + * 1GB alignment, it results in 1024 possible locations. + * + * Configuring the hint address via PMEM_MMAP_HINT environment variable + * disables address randomization. In such case, the function will search for + * the first unused, properly aligned region of given size, above the specified + * address. + */ +char * +util_map_hint(size_t len, size_t req_align) +{ + LOG(3, "len %zu req_align %zu", len, req_align); + + char *hint_addr = MAP_FAILED; + + /* choose the desired alignment based on the requested length */ + size_t align = util_map_hint_align(len, req_align); + + if (Mmap_no_random) { + LOG(4, "user-defined hint %p", Mmap_hint); + hint_addr = util_map_hint_unused(Mmap_hint, len, align); + } else { + /* + * Create dummy mapping to find an unused region of given size. + * Request for increased size for later address alignment. + * Use MAP_PRIVATE with read-only access to simulate + * zero cost for overcommit accounting. Note: MAP_NORESERVE + * flag is ignored if overcommit is disabled (mode 2). + */ + char *addr = mmap(NULL, len + align, PROT_READ, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ERR("!mmap MAP_ANONYMOUS"); + } else { + LOG(4, "system choice %p", addr); + hint_addr = (char *)roundup((uintptr_t)addr, align); + munmap(addr, len + align); + } + } + LOG(4, "hint %p", hint_addr); + + return hint_addr; +} + +/* + * util_map_sync -- memory map given file into memory, if MAP_SHARED flag is + * provided it attempts to use MAP_SYNC flag. Otherwise it fallbacks to + * mmap(2). + */ +void * +util_map_sync(void *addr, size_t len, int proto, int flags, int fd, + os_off_t offset, int *map_sync) +{ + LOG(15, "addr %p len %zu proto %x flags %x fd %d offset %ld " + "map_sync %p", addr, len, proto, flags, fd, offset, map_sync); + + if (map_sync) + *map_sync = 0; + + /* if map_sync is NULL do not even try to mmap with MAP_SYNC flag */ + if (!map_sync || flags & MAP_PRIVATE) + return mmap(addr, len, proto, flags, fd, offset); + + /* MAP_SHARED */ + void *ret = mmap(addr, len, proto, + flags | MAP_SHARED_VALIDATE | MAP_SYNC, + fd, offset); + if (ret != MAP_FAILED) { + LOG(4, "mmap with MAP_SYNC succeeded"); + *map_sync = 1; + return ret; + } + + if (errno == EINVAL || errno == ENOTSUP) { + LOG(4, "mmap with MAP_SYNC not supported"); + return mmap(addr, len, proto, flags, fd, offset); + } + + /* other error */ + return MAP_FAILED; +} diff --git a/src/pmdk/src/common/mmap_windows.c b/src/pmdk/src/common/mmap_windows.c new file mode 100644 index 000000000..f88a2e178 --- /dev/null +++ b/src/pmdk/src/common/mmap_windows.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2018, Intel Corporation */ +/* + * Copyright (c) 2015-2017, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * mmap_windows.c -- memory-mapped files for Windows + */ + +#include +#include "mmap.h" +#include "out.h" + +/* + * util_map_hint_unused -- use VirtualQuery to determine hint address + * + * This is a helper function for util_map_hint(). + * It iterates through memory regions and looks for the first unused address + * in the process address space that is: + * - greater or equal 'minaddr' argument, + * - large enough to hold range of given length, + * - aligned to the specified unit. + */ +char * +util_map_hint_unused(void *minaddr, size_t len, size_t align) +{ + LOG(3, "minaddr %p len %zu align %zu", minaddr, len, align); + + ASSERT(align > 0); + + MEMORY_BASIC_INFORMATION mi; + char *lo = NULL; /* beginning of current range in maps file */ + char *hi = NULL; /* end of current range in maps file */ + char *raddr = minaddr; /* ignore regions below 'minaddr' */ + + if (raddr == NULL) + raddr += Pagesize; + + raddr = (char *)roundup((uintptr_t)raddr, align); + + while ((uintptr_t)raddr < UINTPTR_MAX - len) { + size_t ret = VirtualQuery(raddr, &mi, sizeof(mi)); + if (ret == 0) { + ERR("VirtualQuery %p", raddr); + return MAP_FAILED; + } + LOG(4, "addr %p len %zu state %d", + mi.BaseAddress, mi.RegionSize, mi.State); + + if ((mi.State != MEM_FREE) || (mi.RegionSize < len)) { + raddr = (char *)mi.BaseAddress + mi.RegionSize; + raddr = (char *)roundup((uintptr_t)raddr, align); + LOG(4, "nearest aligned addr %p", raddr); + } else { + LOG(4, "unused region of size %zu found at %p", + mi.RegionSize, mi.BaseAddress); + return mi.BaseAddress; + } + } + + LOG(4, "end of address space reached"); + return MAP_FAILED; +} + +/* + * util_map_hint -- determine hint address for mmap() + * + * XXX - Windows doesn't support large DAX pages yet, so there is + * no point in aligning for the same. + */ +char * +util_map_hint(size_t len, size_t req_align) +{ + LOG(3, "len %zu req_align %zu", len, req_align); + + char *hint_addr = MAP_FAILED; + + /* choose the desired alignment based on the requested length */ + size_t align = util_map_hint_align(len, req_align); + + if (Mmap_no_random) { + LOG(4, "user-defined hint %p", Mmap_hint); + hint_addr = util_map_hint_unused(Mmap_hint, len, align); + } else { + /* + * Create dummy mapping to find an unused region of given size. + * Request for increased size for later address alignment. + * + * Use MAP_NORESERVE flag to only reserve the range of pages + * rather than commit. We don't want the pages to be actually + * backed by the operating system paging file, as the swap + * file is usually too small to handle terabyte pools. + */ + char *addr = mmap(NULL, len + align, PROT_READ, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); + if (addr != MAP_FAILED) { + LOG(4, "system choice %p", addr); + hint_addr = (char *)roundup((uintptr_t)addr, align); + munmap(addr, len + align); + } + } + + LOG(4, "hint %p", hint_addr); + return hint_addr; +} + +/* + * util_map_sync -- memory map given file into memory + */ +void * +util_map_sync(void *addr, size_t len, int proto, int flags, int fd, + os_off_t offset, int *map_sync) +{ + LOG(15, "addr %p len %zu proto %x flags %x fd %d offset %ld", + addr, len, proto, flags, fd, offset); + + if (map_sync) + *map_sync = 0; + + return mmap(addr, len, proto, flags, fd, offset); +} diff --git a/src/pmdk/src/common/os_deep.h b/src/pmdk/src/common/os_deep.h new file mode 100644 index 000000000..31f4965fa --- /dev/null +++ b/src/pmdk/src/common/os_deep.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * os_deep.h -- abstraction layer for common usage of deep_* functions + */ + +#ifndef PMDK_OS_DEEP_PERSIST_H +#define PMDK_OS_DEEP_PERSIST_H 1 + +#include +#include +#include "set.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int os_range_deep_common(uintptr_t addr, size_t len); +int os_part_deep_common(struct pool_replica *rep, unsigned partidx, void *addr, + size_t len, int flush); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/os_deep_linux.c b/src/pmdk/src/common/os_deep_linux.c new file mode 100644 index 000000000..03599e50e --- /dev/null +++ b/src/pmdk/src/common/os_deep_linux.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +/* + * os_deep_linux.c -- Linux abstraction layer + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include "out.h" +#include "os.h" +#include "mmap.h" +#include "file.h" +#include "libpmem.h" +#include "os_deep.h" +#include "../libpmem2/deep_flush.h" + +/* + * os_deep_type -- (internal) perform deep operation based on a pmem + * mapping type + */ +static int +os_deep_type(const struct map_tracker *mt, void *addr, size_t len) +{ + LOG(15, "mt %p addr %p len %zu", mt, addr, len); + + switch (mt->type) { + case PMEM_DEV_DAX: + pmem_drain(); + + int ret = pmem2_deep_flush_write(mt->region_id); + if (ret < 0) { + if (ret == PMEM2_E_NOSUPP) { + errno = ENOTSUP; + LOG(1, "!deep_flush not supported"); + } else { + errno = pmem2_err_to_errno(ret); + LOG(2, "cannot write to deep_flush" + "in region %u", mt->region_id); + } + return -1; + } + return 0; + case PMEM_MAP_SYNC: + return pmem_msync(addr, len); + default: + ASSERT(0); + return -1; + } +} + +/* + * os_range_deep_common -- perform deep action of given address range + */ +int +os_range_deep_common(uintptr_t addr, size_t len) +{ + LOG(3, "addr 0x%016" PRIxPTR " len %zu", addr, len); + + while (len != 0) { + const struct map_tracker *mt = util_range_find(addr, len); + + /* no more overlapping track regions or NOT a device DAX */ + if (mt == NULL) { + LOG(15, "pmem_msync addr %p, len %lu", + (void *)addr, len); + return pmem_msync((void *)addr, len); + } + /* + * For range that intersects with the found mapping + * write to (Device DAX) deep_flush file. + * Call msync for the non-intersecting part. + */ + if (mt->base_addr > addr) { + size_t curr_len = mt->base_addr - addr; + if (curr_len > len) + curr_len = len; + if (pmem_msync((void *)addr, curr_len) != 0) + return -1; + len -= curr_len; + if (len == 0) + return 0; + addr = mt->base_addr; + } + size_t mt_in_len = mt->end_addr - addr; + size_t persist_len = MIN(len, mt_in_len); + + if (os_deep_type(mt, (void *)addr, persist_len)) + return -1; + + if (mt->end_addr >= addr + len) + return 0; + + len -= mt_in_len; + addr = mt->end_addr; + } + return 0; +} + +/* + * os_part_deep_common -- common function to handle both + * deep_persist and deep_drain part flush cases. + */ +int +os_part_deep_common(struct pool_replica *rep, unsigned partidx, void *addr, + size_t len, int flush) +{ + LOG(3, "part %p part %d addr %p len %lu flush %d", + rep, partidx, addr, len, flush); + + if (!rep->is_pmem) { + /* + * In case of part on non-pmem call msync on the range + * to deep flush the data. Deep drain is empty as all + * data is msynced to persistence. + */ + + if (!flush) + return 0; + + if (pmem_msync(addr, len)) { + LOG(1, "pmem_msync(%p, %lu)", addr, len); + return -1; + } + return 0; + } + struct pool_set_part part = rep->part[partidx]; + /* Call deep flush if it was requested */ + if (flush) { + LOG(15, "pmem_deep_flush addr %p, len %lu", addr, len); + pmem_deep_flush(addr, len); + } + /* + * Before deep drain call normal drain to ensure that data + * is at least in WPQ. + */ + pmem_drain(); + + if (part.is_dev_dax) { + /* + * During deep_drain for part on device DAX search for + * device region id, and perform WPQ flush on found + * device DAX region. + */ + unsigned region_id; + int ret = util_ddax_region_find(part.path, ®ion_id); + + if (ret < 0) { + if (errno == ENOENT) { + errno = ENOTSUP; + LOG(1, "!deep_flush not supported"); + } else { + LOG(1, "invalid dax_region id %u", region_id); + } + return -1; + } + + if (pmem2_deep_flush_write(region_id)) { + LOG(1, "pmem2_deep_flush_write(%u)", + region_id); + return -1; + } + } else { + /* + * For deep_drain on normal pmem it is enough to + * call msync on one page. + */ + if (pmem_msync(addr, MIN(Pagesize, len))) { + LOG(1, "pmem_msync(%p, %lu)", addr, len); + return -1; + } + } + return 0; +} diff --git a/src/pmdk/src/common/os_deep_windows.c b/src/pmdk/src/common/os_deep_windows.c new file mode 100644 index 000000000..90994e862 --- /dev/null +++ b/src/pmdk/src/common/os_deep_windows.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2018, Intel Corporation */ + +/* + * os_deep_windows.c -- Windows abstraction layer for deep_* functions + */ + +#include +#include "out.h" +#include "os.h" +#include "set.h" +#include "libpmem.h" + +/* + * os_range_deep_common -- call msnyc for non DEV dax + */ +int +os_range_deep_common(uintptr_t addr, size_t len) +{ + LOG(3, "os_range_deep_common addr %p len %lu", addr, len); + + if (len == 0) + return 0; + return pmem_msync((void *)addr, len); +} + +/* + * os_part_deep_common -- common function to handle both + * deep_persist and deep_drain part flush cases. + */ +int +os_part_deep_common(struct pool_replica *rep, unsigned partidx, void *addr, + size_t len, int flush) +{ + LOG(3, "part %p part %d addr %p len %lu flush %d", + rep, partidx, addr, len, flush); + + if (!rep->is_pmem) { + /* + * In case of part on non-pmem call msync on the range + * to deep flush the data. Deep drain is empty as all + * data is msynced to persistence. + */ + + if (!flush) + return 0; + + if (pmem_msync(addr, len)) { + LOG(1, "pmem_msync(%p, %lu)", addr, len); + return -1; + } + return 0; + } + + /* Call deep flush if it was requested */ + if (flush) { + LOG(15, "pmem_deep_flush addr %p, len %lu", addr, len); + pmem_deep_flush(addr, len); + } + /* + * Before deep drain call normal drain to ensure that data + * is at least in WPQ. + */ + pmem_drain(); + + /* + * For deep_drain on normal pmem it is enough to + * call msync on one page. + */ + if (pmem_msync(addr, MIN(Pagesize, len))) { + LOG(1, "pmem_msync(%p, %lu)", addr, len); + return -1; + } + return 0; +} diff --git a/src/pmdk/src/common/page_size.h b/src/pmdk/src/common/page_size.h new file mode 100644 index 000000000..978eb33c6 --- /dev/null +++ b/src/pmdk/src/common/page_size.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ +/* Copyright 2019, IBM Corporation */ + +#ifndef PMDK_PAGE_SIZE_H +#define PMDK_PAGE_SIZE_H + +#if defined(__x86_64) || defined(_M_X64) || defined(__aarch64__) + +#define PMEM_PAGESIZE 4096 + +#elif defined(__PPC64__) + +#define PMEM_PAGESIZE 65536 + +#else + +#error unable to recognize ISA at compile time + +#endif + +#endif /* PMDK_PAGE_SIZE_H */ diff --git a/src/pmdk/src/common/pmemcommon.h b/src/pmdk/src/common/pmemcommon.h new file mode 100644 index 000000000..864736347 --- /dev/null +++ b/src/pmdk/src/common/pmemcommon.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * pmemcommon.h -- definitions for "common" module + */ + +#ifndef PMEMCOMMON_H +#define PMEMCOMMON_H 1 + +#include "mmap.h" +#include "pmemcore.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +common_init(const char *log_prefix, const char *log_level_var, + const char *log_file_var, int major_version, + int minor_version) +{ + core_init(log_prefix, log_level_var, log_file_var, major_version, + minor_version); + util_mmap_init(); +} + +static inline void +common_fini(void) +{ + util_mmap_fini(); + core_fini(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/pmemcommon.inc b/src/pmdk/src/common/pmemcommon.inc new file mode 100644 index 000000000..7b2d4335c --- /dev/null +++ b/src/pmdk/src/common/pmemcommon.inc @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2019, Intel Corporation +# +# src/pmemcommon.inc -- common SOURCE definitions for PMDK libraries +# + +SOURCE +=\ + $(COMMON)/bad_blocks.c\ + $(COMMON)/set_badblocks.c\ + $(COMMON)/ctl.c\ + $(COMMON)/ctl_prefault.c\ + $(COMMON)/ctl_sds.c\ + $(COMMON)/ctl_fallocate.c\ + $(COMMON)/ctl_cow.c\ + $(COMMON)/file.c\ + $(COMMON)/file_posix.c\ + $(COMMON)/mmap.c\ + $(COMMON)/mmap_posix.c\ + $(COMMON)/os_deep_linux.c\ + $(COMMON)/pool_hdr.c\ + $(COMMON)/rand.c\ + $(COMMON)/ravl.c\ + $(COMMON)/set.c\ + $(COMMON)/shutdown_state.c\ + $(COMMON)/uuid.c\ + $(call osdep, $(COMMON)/uuid,.c)\ + $(PMEM2)/pmem2_utils.c\ + $(PMEM2)/config.c\ + $(PMEM2)/persist_posix.c\ + $(PMEM2)/badblocks.c\ + $(PMEM2)/badblocks_$(OS_DIMM).c\ + $(PMEM2)/usc_$(OS_DIMM).c\ + $(PMEM2)/source.c\ + $(PMEM2)/source_posix.c + +ifeq ($(OS_KERNEL_NAME),Linux) +SOURCE +=\ + $(PMEM2)/auto_flush_linux.c\ + $(PMEM2)/deep_flush_linux.c\ + $(PMEM2)/extent_linux.c\ + $(PMEM2)/pmem2_utils_linux.c\ + $(PMEM2)/pmem2_utils_$(OS_DIMM).c +else +SOURCE +=\ + $(PMEM2)/auto_flush_none.c\ + $(PMEM2)/deep_flush_other.c\ + $(PMEM2)/extent_none.c\ + $(PMEM2)/pmem2_utils_other.c +endif + +ifeq ($(OS_DIMM),ndctl) +SOURCE += $(PMEM2)/region_namespace_ndctl.c +else +SOURCE += $(PMEM2)/region_namespace_none.c +endif diff --git a/src/pmdk/src/common/pool_hdr.c b/src/pmdk/src/common/pool_hdr.c new file mode 100644 index 000000000..dc6985f09 --- /dev/null +++ b/src/pmdk/src/common/pool_hdr.c @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * pool_hdr.c -- pool header utilities + */ + +#include +#include +#include +#include + +#include "out.h" +#include "pool_hdr.h" + +/* Determine ISA for which PMDK is currently compiled */ +#if defined(__x86_64) || defined(_M_X64) +/* x86 -- 64 bit */ +#define PMDK_MACHINE PMDK_MACHINE_X86_64 +#define PMDK_MACHINE_CLASS PMDK_MACHINE_CLASS_64 + +#elif defined(__aarch64__) +/* 64 bit ARM not supported yet */ +#define PMDK_MACHINE PMDK_MACHINE_AARCH64 +#define PMDK_MACHINE_CLASS PMDK_MACHINE_CLASS_64 + +#elif defined(__PPC64__) +#define PMDK_MACHINE PMDK_MACHINE_PPC64 +#define PMDK_MACHINE_CLASS PMDK_MACHINE_CLASS_64 + +#else +/* add appropriate definitions here when porting PMDK to another ISA */ +#error unable to recognize ISA at compile time + +#endif + +/* + * arch_machine -- (internal) determine endianness + */ +static uint8_t +arch_data(void) +{ + uint16_t word = (PMDK_DATA_BE << 8) + PMDK_DATA_LE; + return ((uint8_t *)&word)[0]; +} + +/* + * util_get_arch_flags -- get architecture identification flags + */ +void +util_get_arch_flags(struct arch_flags *arch_flags) +{ + memset(arch_flags, 0, sizeof(*arch_flags)); + arch_flags->machine = PMDK_MACHINE; + arch_flags->machine_class = PMDK_MACHINE_CLASS; + arch_flags->data = arch_data(); + arch_flags->alignment_desc = alignment_desc(); +} + +/* + * util_convert2le_hdr -- convert pool_hdr into little-endian byte order + */ +void +util_convert2le_hdr(struct pool_hdr *hdrp) +{ + hdrp->major = htole32(hdrp->major); + hdrp->features.compat = htole32(hdrp->features.compat); + hdrp->features.incompat = htole32(hdrp->features.incompat); + hdrp->features.ro_compat = htole32(hdrp->features.ro_compat); + hdrp->arch_flags.alignment_desc = + htole64(hdrp->arch_flags.alignment_desc); + hdrp->arch_flags.machine = htole16(hdrp->arch_flags.machine); + hdrp->crtime = htole64(hdrp->crtime); + hdrp->checksum = htole64(hdrp->checksum); +} + +/* + * util_convert2h_hdr_nocheck -- convert pool_hdr into host byte order + */ +void +util_convert2h_hdr_nocheck(struct pool_hdr *hdrp) +{ + hdrp->major = le32toh(hdrp->major); + hdrp->features.compat = le32toh(hdrp->features.compat); + hdrp->features.incompat = le32toh(hdrp->features.incompat); + hdrp->features.ro_compat = le32toh(hdrp->features.ro_compat); + hdrp->crtime = le64toh(hdrp->crtime); + hdrp->arch_flags.machine = le16toh(hdrp->arch_flags.machine); + hdrp->arch_flags.alignment_desc = + le64toh(hdrp->arch_flags.alignment_desc); + hdrp->checksum = le64toh(hdrp->checksum); +} + +/* + * util_arch_flags_check -- validates arch_flags + */ +int +util_check_arch_flags(const struct arch_flags *arch_flags) +{ + struct arch_flags cur_af; + int ret = 0; + + util_get_arch_flags(&cur_af); + + if (!util_is_zeroed(&arch_flags->reserved, + sizeof(arch_flags->reserved))) { + ERR("invalid reserved values"); + ret = -1; + } + + if (arch_flags->machine != cur_af.machine) { + ERR("invalid machine value"); + ret = -1; + } + + if (arch_flags->data != cur_af.data) { + ERR("invalid data value"); + ret = -1; + } + + if (arch_flags->machine_class != cur_af.machine_class) { + ERR("invalid machine_class value"); + ret = -1; + } + + if (arch_flags->alignment_desc != cur_af.alignment_desc) { + ERR("invalid alignment_desc value"); + ret = -1; + } + + return ret; +} + +/* + * util_get_unknown_features -- filter out unknown features flags + */ +features_t +util_get_unknown_features(features_t features, features_t known) +{ + features_t unknown; + unknown.compat = util_get_not_masked_bits( + features.compat, known.compat); + unknown.incompat = util_get_not_masked_bits( + features.incompat, known.incompat); + unknown.ro_compat = util_get_not_masked_bits( + features.ro_compat, known.ro_compat); + return unknown; +} + +/* + * util_feature_check -- check features masks + */ +int +util_feature_check(struct pool_hdr *hdrp, features_t known) +{ + LOG(3, "hdrp %p features {incompat %#x ro_compat %#x compat %#x}", + hdrp, + known.incompat, known.ro_compat, known.compat); + + features_t unknown = util_get_unknown_features(hdrp->features, known); + + /* check incompatible ("must support") features */ + if (unknown.incompat) { + ERR("unsafe to continue due to unknown incompat "\ + "features: %#x", unknown.incompat); + errno = EINVAL; + return -1; + } + + /* check RO-compatible features (force RO if unsupported) */ + if (unknown.ro_compat) { + ERR("switching to read-only mode due to unknown ro_compat "\ + "features: %#x", unknown.ro_compat); + return 0; + } + + /* check compatible ("may") features */ + if (unknown.compat) { + LOG(3, "ignoring unknown compat features: %#x", unknown.compat); + } + + return 1; +} + +/* + * util_feature_cmp -- compares features with reference + * + * returns 1 if features and reference match and 0 otherwise + */ +int +util_feature_cmp(features_t features, features_t ref) +{ + LOG(3, "features {incompat %#x ro_compat %#x compat %#x} " + "ref {incompat %#x ro_compat %#x compat %#x}", + features.incompat, features.ro_compat, features.compat, + ref.incompat, ref.ro_compat, ref.compat); + + return features.compat == ref.compat && + features.incompat == ref.incompat && + features.ro_compat == ref.ro_compat; +} + +/* + * util_feature_is_zero -- check if features flags are zeroed + * + * returns 1 if features is zeroed and 0 otherwise + */ +int +util_feature_is_zero(features_t features) +{ + const uint32_t bits = + features.compat | features.incompat | + features.ro_compat; + return bits ? 0 : 1; +} + +/* + * util_feature_is_set -- check if feature flag is set in features + * + * returns 1 if feature flag is set and 0 otherwise + */ +int +util_feature_is_set(features_t features, features_t flag) +{ + uint32_t bits = 0; + bits |= features.compat & flag.compat; + bits |= features.incompat & flag.incompat; + bits |= features.ro_compat & flag.ro_compat; + return bits ? 1 : 0; +} + +/* + * util_feature_enable -- enable feature + */ +void +util_feature_enable(features_t *features, features_t new_feature) +{ +#define FEATURE_ENABLE(flags, X) \ + (flags) |= (X) + + FEATURE_ENABLE(features->compat, new_feature.compat); + FEATURE_ENABLE(features->incompat, new_feature.incompat); + FEATURE_ENABLE(features->ro_compat, new_feature.ro_compat); + +#undef FEATURE_ENABLE +} + +/* + * util_feature_disable -- (internal) disable feature + */ +void +util_feature_disable(features_t *features, features_t old_feature) +{ +#define FEATURE_DISABLE(flags, X) \ + (flags) &= ~(X) + + FEATURE_DISABLE(features->compat, old_feature.compat); + FEATURE_DISABLE(features->incompat, old_feature.incompat); + FEATURE_DISABLE(features->ro_compat, old_feature.ro_compat); + +#undef FEATURE_DISABLE +} + +static const features_t feature_2_pmempool_feature_map[] = { + FEAT_INCOMPAT(SINGLEHDR), /* PMEMPOOL_FEAT_SINGLEHDR */ + FEAT_INCOMPAT(CKSUM_2K), /* PMEMPOOL_FEAT_CKSUM_2K */ + FEAT_INCOMPAT(SDS), /* PMEMPOOL_FEAT_SHUTDOWN_STATE */ + FEAT_COMPAT(CHECK_BAD_BLOCKS), /* PMEMPOOL_FEAT_CHECK_BAD_BLOCKS */ +}; + +#define FEAT_2_PMEMPOOL_FEATURE_MAP_SIZE \ + ARRAY_SIZE(feature_2_pmempool_feature_map) + +static const char *str_2_pmempool_feature_map[] = { + "SINGLEHDR", + "CKSUM_2K", + "SHUTDOWN_STATE", + "CHECK_BAD_BLOCKS", +}; + +#define PMEMPOOL_FEATURE_2_STR_MAP_SIZE ARRAY_SIZE(str_2_pmempool_feature_map) + +/* + * util_str2feature -- convert string to feat_flags value + */ +features_t +util_str2feature(const char *str) +{ + /* all features have to be named in incompat_features_str array */ + COMPILE_ERROR_ON(FEAT_2_PMEMPOOL_FEATURE_MAP_SIZE != + PMEMPOOL_FEATURE_2_STR_MAP_SIZE); + + for (uint32_t f = 0; f < PMEMPOOL_FEATURE_2_STR_MAP_SIZE; ++f) { + if (strcmp(str, str_2_pmempool_feature_map[f]) == 0) { + return feature_2_pmempool_feature_map[f]; + } + } + return features_zero; +} + +/* + * util_feature2pmempool_feature -- convert feature to pmempool_feature + */ +uint32_t +util_feature2pmempool_feature(features_t feat) +{ + for (uint32_t pf = 0; pf < FEAT_2_PMEMPOOL_FEATURE_MAP_SIZE; ++pf) { + const features_t *record = + &feature_2_pmempool_feature_map[pf]; + if (util_feature_cmp(feat, *record)) { + return pf; + } + } + return UINT32_MAX; +} + +/* + * util_str2pmempool_feature -- convert string to uint32_t enum pmempool_feature + * equivalent + */ +uint32_t +util_str2pmempool_feature(const char *str) +{ + features_t fval = util_str2feature(str); + if (util_feature_is_zero(fval)) + return UINT32_MAX; + return util_feature2pmempool_feature(fval); +} + +/* + * util_feature2str -- convert uint32_t feature to string + */ +const char * +util_feature2str(features_t features, features_t *found) +{ + for (uint32_t i = 0; i < FEAT_2_PMEMPOOL_FEATURE_MAP_SIZE; ++i) { + const features_t *record = &feature_2_pmempool_feature_map[i]; + if (util_feature_is_set(features, *record)) { + if (found) + memcpy(found, record, sizeof(features_t)); + return str_2_pmempool_feature_map[i]; + } + } + return NULL; +} diff --git a/src/pmdk/src/common/pool_hdr.h b/src/pmdk/src/common/pool_hdr.h new file mode 100644 index 000000000..eb7e90753 --- /dev/null +++ b/src/pmdk/src/common/pool_hdr.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * pool_hdr.h -- internal definitions for pool header module + */ + +#ifndef PMDK_POOL_HDR_H +#define PMDK_POOL_HDR_H 1 + +#include +#include +#include +#include "uuid.h" +#include "shutdown_state.h" +#include "util.h" +#include "page_size.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Number of bits per type in alignment descriptor + */ +#define ALIGNMENT_DESC_BITS 4 + +/* + * architecture identification flags + * + * These flags allow to unambiguously determine the architecture + * on which the pool was created. + * + * The alignment_desc field contains information about alignment + * of the following basic types: + * - char + * - short + * - int + * - long + * - long long + * - size_t + * - os_off_t + * - float + * - double + * - long double + * - void * + * + * The alignment of each type is computed as an offset of field + * of specific type in the following structure: + * struct { + * char byte; + * type field; + * }; + * + * The value is decremented by 1 and masked by 4 bits. + * Multiple alignments are stored on consecutive 4 bits of each + * type in the order specified above. + * + * The values used in the machine, and machine_class fields are in + * principle independent of operating systems, and object formats. + * In practice they happen to match constants used in ELF object headers. + */ +struct arch_flags { + uint64_t alignment_desc; /* alignment descriptor */ + uint8_t machine_class; /* address size -- 64 bit or 32 bit */ + uint8_t data; /* data encoding -- LE or BE */ + uint8_t reserved[4]; + uint16_t machine; /* required architecture */ +}; + +#define POOL_HDR_ARCH_LEN sizeof(struct arch_flags) + +/* possible values of the machine class field in the above struct */ +#define PMDK_MACHINE_CLASS_64 2 /* 64 bit pointers, 64 bit size_t */ + +/* possible values of the machine field in the above struct */ +#define PMDK_MACHINE_X86_64 62 +#define PMDK_MACHINE_AARCH64 183 +#define PMDK_MACHINE_PPC64 21 + +/* possible values of the data field in the above struct */ +#define PMDK_DATA_LE 1 /* 2's complement, little endian */ +#define PMDK_DATA_BE 2 /* 2's complement, big endian */ + +/* + * features flags + */ +typedef struct { + uint32_t compat; /* mask: compatible "may" features */ + uint32_t incompat; /* mask: "must support" features */ + uint32_t ro_compat; /* mask: force RO if unsupported */ +} features_t; + +/* + * header used at the beginning of all types of memory pools + * + * for pools build on persistent memory, the integer types + * below are stored in little-endian byte order. + */ +#define POOL_HDR_SIG_LEN 8 +#define POOL_HDR_UNUSED_SIZE 1904 +#define POOL_HDR_UNUSED2_SIZE 1976 +#define POOL_HDR_ALIGN_PAD (PMEM_PAGESIZE - 4096) +struct pool_hdr { + char signature[POOL_HDR_SIG_LEN]; + uint32_t major; /* format major version number */ + features_t features; /* features flags */ + uuid_t poolset_uuid; /* pool set UUID */ + uuid_t uuid; /* UUID of this file */ + uuid_t prev_part_uuid; /* prev part */ + uuid_t next_part_uuid; /* next part */ + uuid_t prev_repl_uuid; /* prev replica */ + uuid_t next_repl_uuid; /* next replica */ + uint64_t crtime; /* when created (seconds since epoch) */ + struct arch_flags arch_flags; /* architecture identification flags */ + unsigned char unused[POOL_HDR_UNUSED_SIZE]; /* must be zero */ + /* not checksumed */ + unsigned char unused2[POOL_HDR_UNUSED2_SIZE]; /* must be zero */ + struct shutdown_state sds; /* shutdown status */ + uint64_t checksum; /* checksum of above fields */ + +#if PMEM_PAGESIZE > 4096 /* prevent zero size array */ + unsigned char align_pad[POOL_HDR_ALIGN_PAD]; /* alignment pad */ +#endif +}; + +#define POOL_HDR_SIZE (sizeof(struct pool_hdr)) + +#define POOL_DESC_SIZE PMEM_PAGESIZE + +void util_convert2le_hdr(struct pool_hdr *hdrp); +void util_convert2h_hdr_nocheck(struct pool_hdr *hdrp); + +void util_get_arch_flags(struct arch_flags *arch_flags); +int util_check_arch_flags(const struct arch_flags *arch_flags); + +features_t util_get_unknown_features(features_t features, features_t known); +int util_feature_check(struct pool_hdr *hdrp, features_t features); +int util_feature_cmp(features_t features, features_t ref); +int util_feature_is_zero(features_t features); +int util_feature_is_set(features_t features, features_t flag); +void util_feature_enable(features_t *features, features_t new_feature); +void util_feature_disable(features_t *features, features_t new_feature); + +const char *util_feature2str(features_t feature, features_t *found); +features_t util_str2feature(const char *str); +uint32_t util_str2pmempool_feature(const char *str); +uint32_t util_feature2pmempool_feature(features_t feat); + +/* + * set of macros for determining the alignment descriptor + */ +#define DESC_MASK ((1 << ALIGNMENT_DESC_BITS) - 1) +#define alignment_of(t) offsetof(struct { char c; t x; }, x) +#define alignment_desc_of(t) (((uint64_t)alignment_of(t) - 1) & DESC_MASK) +#define alignment_desc()\ +(alignment_desc_of(char) << 0 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(short) << 1 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(int) << 2 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(long) << 3 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(long long) << 4 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(size_t) << 5 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(off_t) << 6 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(float) << 7 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(double) << 8 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(long double) << 9 * ALIGNMENT_DESC_BITS) |\ +(alignment_desc_of(void *) << 10 * ALIGNMENT_DESC_BITS) + +#define POOL_FEAT_ZERO 0x0000U + +static const features_t features_zero = + {POOL_FEAT_ZERO, POOL_FEAT_ZERO, POOL_FEAT_ZERO}; + +/* + * compat features + */ +#define POOL_FEAT_CHECK_BAD_BLOCKS 0x0001U /* check bad blocks in a pool */ + +#define POOL_FEAT_COMPAT_ALL \ + (POOL_FEAT_CHECK_BAD_BLOCKS) + +#define FEAT_COMPAT(X) \ + {POOL_FEAT_##X, POOL_FEAT_ZERO, POOL_FEAT_ZERO} + +/* + * incompat features + */ +#define POOL_FEAT_SINGLEHDR 0x0001U /* pool header only in the first part */ +#define POOL_FEAT_CKSUM_2K 0x0002U /* only first 2K of hdr checksummed */ +#define POOL_FEAT_SDS 0x0004U /* check shutdown state */ + +#define POOL_FEAT_INCOMPAT_ALL \ + (POOL_FEAT_SINGLEHDR | POOL_FEAT_CKSUM_2K | POOL_FEAT_SDS) + +/* + * incompat features effective values (if applicable) + */ +#ifdef SDS_ENABLED +#define POOL_E_FEAT_SDS POOL_FEAT_SDS +#else +#define POOL_E_FEAT_SDS 0x0000U /* empty */ +#endif + +#define POOL_FEAT_COMPAT_VALID \ + (POOL_FEAT_CHECK_BAD_BLOCKS) + +#define POOL_FEAT_INCOMPAT_VALID \ + (POOL_FEAT_SINGLEHDR | POOL_FEAT_CKSUM_2K | POOL_E_FEAT_SDS) + +#if defined(_WIN32) || NDCTL_ENABLED +#define POOL_FEAT_INCOMPAT_DEFAULT \ + (POOL_FEAT_CKSUM_2K | POOL_E_FEAT_SDS) +#else +/* + * shutdown state support on Linux requires root access on kernel < 4.20 with + * ndctl < 63 so it is disabled by default + */ +#define POOL_FEAT_INCOMPAT_DEFAULT \ + (POOL_FEAT_CKSUM_2K) +#endif + +#if NDCTL_ENABLED +#define POOL_FEAT_COMPAT_DEFAULT \ + (POOL_FEAT_CHECK_BAD_BLOCKS) +#else +#define POOL_FEAT_COMPAT_DEFAULT \ + (POOL_FEAT_ZERO) +#endif + +#define FEAT_INCOMPAT(X) \ + {POOL_FEAT_ZERO, POOL_FEAT_##X, POOL_FEAT_ZERO} + +#define POOL_FEAT_VALID \ + {POOL_FEAT_COMPAT_VALID, POOL_FEAT_INCOMPAT_VALID, POOL_FEAT_ZERO} + +/* + * defines the first not checksummed field - all fields after this will be + * ignored during checksum calculations. + */ +#define POOL_HDR_CSUM_2K_END_OFF offsetof(struct pool_hdr, unused2) +#define POOL_HDR_CSUM_4K_END_OFF offsetof(struct pool_hdr, checksum) + +/* + * pick the first not checksummed field. 2K variant is used if + * POOL_FEAT_CKSUM_2K incompat feature is set. + */ +#define POOL_HDR_CSUM_END_OFF(hdrp) \ + ((hdrp)->features.incompat & POOL_FEAT_CKSUM_2K) \ + ? POOL_HDR_CSUM_2K_END_OFF : POOL_HDR_CSUM_4K_END_OFF + +/* ignore shutdown state if incompat feature is disabled */ +#define IGNORE_SDS(hdrp) \ + (((hdrp) != NULL) && (((hdrp)->features.incompat & POOL_FEAT_SDS) == 0)) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/queue.h b/src/pmdk/src/common/queue.h new file mode 100644 index 000000000..ea874e347 --- /dev/null +++ b/src/pmdk/src/common/queue.h @@ -0,0 +1,634 @@ +/* + * Source: glibc 2.24 (git://sourceware.org/glibc.git /misc/sys/queue.h) + * + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +#ifndef _PMDK_QUEUE_H_ +#define _PMDK_QUEUE_H_ + +/* + * This file defines five types of data structures: singly-linked lists, + * lists, simple queues, tail queues, and circular queues. + * + * A singly-linked list is headed by a single forward pointer. The + * elements are singly linked for minimum space and pointer manipulation + * overhead at the expense of O(n) removal for arbitrary elements. New + * elements can be added to the list after an existing element or at the + * head of the list. Elements being removed from the head of the list + * should use the explicit macro for this purpose for optimum + * efficiency. A singly-linked list may only be traversed in the forward + * direction. Singly-linked lists are ideal for applications with large + * datasets and few or no removals or for implementing a LIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A simple queue is headed by a pair of pointers, one the head of the + * list and the other to the tail of the list. The elements are singly + * linked to save space, so elements can only be removed from the + * head of the list. New elements can be added to the list after + * an existing element, at the head of the list, or at the end of the + * list. A simple queue may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * A circle queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or after + * an existing element, at the head of the list, or at the end of the list. + * A circle queue may be traversed in either direction, but has a more + * complex end of list detection. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +/* + * XXX This is a workaround for a bug in the llvm's static analyzer. For more + * info see https://github.com/pmem/issues/issues/309. + */ +#ifdef __clang_analyzer__ + +static void custom_assert(void) +{ + abort(); +} + +#define ANALYZER_ASSERT(x) (__builtin_expect(!(x), 0) ? (void)0 : custom_assert()) +#else +#define ANALYZER_ASSERT(x) do {} while (0) +#endif + +/* + * List definitions. + */ +#define PMDK_LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define PMDK_LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#ifdef __cplusplus + #define PMDK__CAST_AND_ASSIGN(x, y) x = (__typeof__(x))y; +#else + #define PMDK__CAST_AND_ASSIGN(x, y) x = (void *)(y); +#endif + +#define PMDK_LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ +#define PMDK_LIST_INIT(head) do { \ + (head)->lh_first = NULL; \ +} while (/*CONSTCOND*/0) + +#define PMDK_LIST_INSERT_AFTER(listelm, elm, field) do { \ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ + (listelm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + (listelm)->field.le_next = (elm); \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_LIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_LIST_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} while (/*CONSTCOND*/0) + +#define PMDK_LIST_REMOVE(elm, field) do { \ + ANALYZER_ASSERT((elm) != NULL); \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_LIST_FOREACH(var, head, field) \ + for ((var) = ((head)->lh_first); \ + (var); \ + (var) = ((var)->field.le_next)) + +/* + * List access methods. + */ +#define PMDK_LIST_EMPTY(head) ((head)->lh_first == NULL) +#define PMDK_LIST_FIRST(head) ((head)->lh_first) +#define PMDK_LIST_NEXT(elm, field) ((elm)->field.le_next) + + +/* + * Singly-linked List definitions. + */ +#define PMDK_SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define PMDK_SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define PMDK_SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define PMDK_SLIST_INIT(head) do { \ + (head)->slh_first = NULL; \ +} while (/*CONSTCOND*/0) + +#define PMDK_SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + (elm)->field.sle_next = (slistelm)->field.sle_next; \ + (slistelm)->field.sle_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_SLIST_INSERT_HEAD(head, elm, field) do { \ + (elm)->field.sle_next = (head)->slh_first; \ + (head)->slh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_SLIST_REMOVE_HEAD(head, field) do { \ + (head)->slh_first = (head)->slh_first->field.sle_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_SLIST_REMOVE(head, elm, type, field) do { \ + if ((head)->slh_first == (elm)) { \ + PMDK_SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = (head)->slh_first; \ + while(curelm->field.sle_next != (elm)) \ + curelm = curelm->field.sle_next; \ + curelm->field.sle_next = \ + curelm->field.sle_next->field.sle_next; \ + } \ +} while (/*CONSTCOND*/0) + +#define PMDK_SLIST_FOREACH(var, head, field) \ + for((var) = (head)->slh_first; (var); (var) = (var)->field.sle_next) + +/* + * Singly-linked List access methods. + */ +#define PMDK_SLIST_EMPTY(head) ((head)->slh_first == NULL) +#define PMDK_SLIST_FIRST(head) ((head)->slh_first) +#define PMDK_SLIST_NEXT(elm, field) ((elm)->field.sle_next) + + +/* + * Singly-linked Tail queue declarations. + */ +#define PMDK_STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first; /* first element */ \ + struct type **stqh_last; /* addr of last next element */ \ +} + +#define PMDK_STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define PMDK_STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define PMDK_STAILQ_INIT(head) do { \ + (head)->stqh_first = NULL; \ + (head)->stqh_last = &(head)->stqh_first; \ +} while (/*CONSTCOND*/0) + +#define PMDK_STAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.stqe_next = (head)->stqh_first) == NULL) \ + (head)->stqh_last = &(elm)->field.stqe_next; \ + (head)->stqh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_STAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.stqe_next = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &(elm)->field.stqe_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_STAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.stqe_next = (listelm)->field.stqe_next) == NULL)\ + (head)->stqh_last = &(elm)->field.stqe_next; \ + (listelm)->field.stqe_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_STAILQ_REMOVE_HEAD(head, field) do { \ + if (((head)->stqh_first = (head)->stqh_first->field.stqe_next) == NULL) \ + (head)->stqh_last = &(head)->stqh_first; \ +} while (/*CONSTCOND*/0) + +#define PMDK_STAILQ_REMOVE(head, elm, type, field) do { \ + if ((head)->stqh_first == (elm)) { \ + PMDK_STAILQ_REMOVE_HEAD((head), field); \ + } else { \ + struct type *curelm = (head)->stqh_first; \ + while (curelm->field.stqe_next != (elm)) \ + curelm = curelm->field.stqe_next; \ + if ((curelm->field.stqe_next = \ + curelm->field.stqe_next->field.stqe_next) == NULL) \ + (head)->stqh_last = &(curelm)->field.stqe_next; \ + } \ +} while (/*CONSTCOND*/0) + +#define PMDK_STAILQ_FOREACH(var, head, field) \ + for ((var) = ((head)->stqh_first); \ + (var); \ + (var) = ((var)->field.stqe_next)) + +#define PMDK_STAILQ_CONCAT(head1, head2) do { \ + if (!PMDK_STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + PMDK_STAILQ_INIT((head2)); \ + } \ +} while (/*CONSTCOND*/0) + +/* + * Singly-linked Tail queue access methods. + */ +#define PMDK_STAILQ_EMPTY(head) ((head)->stqh_first == NULL) +#define PMDK_STAILQ_FIRST(head) ((head)->stqh_first) +#define PMDK_STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + + +/* + * Simple queue definitions. + */ +#define PMDK_SIMPLEQ_HEAD(name, type) \ +struct name { \ + struct type *sqh_first; /* first element */ \ + struct type **sqh_last; /* addr of last next element */ \ +} + +#define PMDK_SIMPLEQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).sqh_first } + +#define PMDK_SIMPLEQ_ENTRY(type) \ +struct { \ + struct type *sqe_next; /* next element */ \ +} + +/* + * Simple queue functions. + */ +#define PMDK_SIMPLEQ_INIT(head) do { \ + (head)->sqh_first = NULL; \ + (head)->sqh_last = &(head)->sqh_first; \ +} while (/*CONSTCOND*/0) + +#define PMDK_SIMPLEQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (head)->sqh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_SIMPLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.sqe_next = NULL; \ + *(head)->sqh_last = (elm); \ + (head)->sqh_last = &(elm)->field.sqe_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (listelm)->field.sqe_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_SIMPLEQ_REMOVE_HEAD(head, field) do { \ + if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL) \ + (head)->sqh_last = &(head)->sqh_first; \ +} while (/*CONSTCOND*/0) + +#define PMDK_SIMPLEQ_REMOVE(head, elm, type, field) do { \ + if ((head)->sqh_first == (elm)) { \ + PMDK_SIMPLEQ_REMOVE_HEAD((head), field); \ + } else { \ + struct type *curelm = (head)->sqh_first; \ + while (curelm->field.sqe_next != (elm)) \ + curelm = curelm->field.sqe_next; \ + if ((curelm->field.sqe_next = \ + curelm->field.sqe_next->field.sqe_next) == NULL) \ + (head)->sqh_last = &(curelm)->field.sqe_next; \ + } \ +} while (/*CONSTCOND*/0) + +#define PMDK_SIMPLEQ_FOREACH(var, head, field) \ + for ((var) = ((head)->sqh_first); \ + (var); \ + (var) = ((var)->field.sqe_next)) + +/* + * Simple queue access methods. + */ +#define PMDK_SIMPLEQ_EMPTY(head) ((head)->sqh_first == NULL) +#define PMDK_SIMPLEQ_FIRST(head) ((head)->sqh_first) +#define PMDK_SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next) + + +/* + * Tail queue definitions. + */ +#define PMDK__TAILQ_HEAD(name, type, qual) \ +struct name { \ + qual type *tqh_first; /* first element */ \ + qual type *qual *tqh_last; /* addr of last next element */ \ +} +#define PMDK_TAILQ_HEAD(name, type) PMDK__TAILQ_HEAD(name, struct type,) + +#define PMDK_TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define PMDK__TAILQ_ENTRY(type, qual) \ +struct { \ + qual type *tqe_next; /* next element */ \ + qual type *qual *tqe_prev; /* address of previous next element */\ +} +#define PMDK_TAILQ_ENTRY(type) PMDK__TAILQ_ENTRY(struct type,) + +/* + * Tail queue functions. + */ +#define PMDK_TAILQ_INIT(head) do { \ + (head)->tqh_first = NULL; \ + (head)->tqh_last = &(head)->tqh_first; \ +} while (/*CONSTCOND*/0) + +#define PMDK_TAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (head)->tqh_first->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_prev = &(head)->tqh_first; \ +} while (/*CONSTCOND*/0) + +#define PMDK_TAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &(elm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + (elm)->field.tqe_next = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_TAILQ_REMOVE(head, elm, field) do { \ + ANALYZER_ASSERT((elm) != NULL); \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_TAILQ_FOREACH(var, head, field) \ + for ((var) = ((head)->tqh_first); \ + (var); \ + (var) = ((var)->field.tqe_next)) + +#define PMDK_TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last)); \ + (var); \ + (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last))) + +#define PMDK_TAILQ_CONCAT(head1, head2, field) do { \ + if (!PMDK_TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + PMDK_TAILQ_INIT((head2)); \ + } \ +} while (/*CONSTCOND*/0) + +/* + * Tail queue access methods. + */ +#define PMDK_TAILQ_EMPTY(head) ((head)->tqh_first == NULL) +#define PMDK_TAILQ_FIRST(head) ((head)->tqh_first) +#define PMDK_TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define PMDK_TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) +#define PMDK_TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + + +/* + * Circular queue definitions. + */ +#define PMDK_CIRCLEQ_HEAD(name, type) \ +struct name { \ + struct type *cqh_first; /* first element */ \ + struct type *cqh_last; /* last element */ \ +} + +#define PMDK_CIRCLEQ_HEAD_INITIALIZER(head) \ + { (void *)&(head), (void *)&(head) } + +#define PMDK_CIRCLEQ_ENTRY(type) \ +struct { \ + struct type *cqe_next; /* next element */ \ + struct type *cqe_prev; /* previous element */ \ +} + +/* + * Circular queue functions. + */ +#define PMDK_CIRCLEQ_INIT(head) do { \ + PMDK__CAST_AND_ASSIGN((head)->cqh_first, (head)); \ + PMDK__CAST_AND_ASSIGN((head)->cqh_last, (head)); \ +} while (/*CONSTCOND*/0) + +#define PMDK_CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm)->field.cqe_next; \ + (elm)->field.cqe_prev = (listelm); \ + if ((listelm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (listelm)->field.cqe_next->field.cqe_prev = (elm); \ + (listelm)->field.cqe_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm); \ + (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \ + if ((listelm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (listelm)->field.cqe_prev->field.cqe_next = (elm); \ + (listelm)->field.cqe_prev = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_CIRCLEQ_INSERT_HEAD(head, elm, field) do { \ + (elm)->field.cqe_next = (head)->cqh_first; \ + (elm)->field.cqe_prev = (void *)(head); \ + if ((head)->cqh_last == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (head)->cqh_first->field.cqe_prev = (elm); \ + (head)->cqh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_CIRCLEQ_INSERT_TAIL(head, elm, field) do { \ + PMDK__CAST_AND_ASSIGN((elm)->field.cqe_next, (head)); \ + (elm)->field.cqe_prev = (head)->cqh_last; \ + if ((head)->cqh_first == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (head)->cqh_last->field.cqe_next = (elm); \ + (head)->cqh_last = (elm); \ +} while (/*CONSTCOND*/0) + +#define PMDK_CIRCLEQ_REMOVE(head, elm, field) do { \ + if ((elm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm)->field.cqe_prev; \ + else \ + (elm)->field.cqe_next->field.cqe_prev = \ + (elm)->field.cqe_prev; \ + if ((elm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm)->field.cqe_next; \ + else \ + (elm)->field.cqe_prev->field.cqe_next = \ + (elm)->field.cqe_next; \ +} while (/*CONSTCOND*/0) + +#define PMDK_CIRCLEQ_FOREACH(var, head, field) \ + for ((var) = ((head)->cqh_first); \ + (var) != (const void *)(head); \ + (var) = ((var)->field.cqe_next)) + +#define PMDK_CIRCLEQ_FOREACH_REVERSE(var, head, field) \ + for ((var) = ((head)->cqh_last); \ + (var) != (const void *)(head); \ + (var) = ((var)->field.cqe_prev)) + +/* + * Circular queue access methods. + */ +#define PMDK_CIRCLEQ_EMPTY(head) ((head)->cqh_first == (void *)(head)) +#define PMDK_CIRCLEQ_FIRST(head) ((head)->cqh_first) +#define PMDK_CIRCLEQ_LAST(head) ((head)->cqh_last) +#define PMDK_CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next) +#define PMDK_CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev) + +#define PMDK_CIRCLEQ_LOOP_NEXT(head, elm, field) \ + (((elm)->field.cqe_next == (void *)(head)) \ + ? ((head)->cqh_first) \ + : ((elm)->field.cqe_next)) +#define PMDK_CIRCLEQ_LOOP_PREV(head, elm, field) \ + (((elm)->field.cqe_prev == (void *)(head)) \ + ? ((head)->cqh_last) \ + : ((elm)->field.cqe_prev)) + +/* + * Sorted queue functions. + */ +#define PMDK_SORTEDQ_HEAD(name, type) PMDK_CIRCLEQ_HEAD(name, type) +#define PMDK_SORTEDQ_HEAD_INITIALIZER(head) PMDK_CIRCLEQ_HEAD_INITIALIZER(head) +#define PMDK_SORTEDQ_ENTRY(type) PMDK_CIRCLEQ_ENTRY(type) +#define PMDK_SORTEDQ_INIT(head) PMDK_CIRCLEQ_INIT(head) +#define PMDK_SORTEDQ_INSERT(head, elm, field, type, comparer) { \ + type *_elm_it; \ + for (_elm_it = (head)->cqh_first; \ + ((_elm_it != (void *)(head)) && \ + (comparer(_elm_it, (elm)) < 0)); \ + _elm_it = _elm_it->field.cqe_next) \ + /*NOTHING*/; \ + if (_elm_it == (void *)(head)) \ + PMDK_CIRCLEQ_INSERT_TAIL(head, elm, field); \ + else \ + PMDK_CIRCLEQ_INSERT_BEFORE(head, _elm_it, elm, field); \ +} +#define PMDK_SORTEDQ_REMOVE(head, elm, field) PMDK_CIRCLEQ_REMOVE(head, elm, field) +#define PMDK_SORTEDQ_FOREACH(var, head, field) PMDK_CIRCLEQ_FOREACH(var, head, field) +#define PMDK_SORTEDQ_FOREACH_REVERSE(var, head, field) \ + PMDK_CIRCLEQ_FOREACH_REVERSE(var, head, field) + +/* + * Sorted queue access methods. + */ +#define PMDK_SORTEDQ_EMPTY(head) PMDK_CIRCLEQ_EMPTY(head) +#define PMDK_SORTEDQ_FIRST(head) PMDK_CIRCLEQ_FIRST(head) +#define PMDK_SORTEDQ_LAST(head) PMDK_CIRCLEQ_LAST(head) +#define PMDK_SORTEDQ_NEXT(elm, field) PMDK_CIRCLEQ_NEXT(elm, field) +#define PMDK_SORTEDQ_PREV(elm, field) PMDK_CIRCLEQ_PREV(elm, field) + +#endif /* sys/queue.h */ diff --git a/src/pmdk/src/common/rand.c b/src/pmdk/src/common/rand.c new file mode 100644 index 000000000..e7fa5eb0c --- /dev/null +++ b/src/pmdk/src/common/rand.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019, Intel Corporation */ + +/* + * rand.c -- random utils + */ + +#include +#include +#include +#include +#include + +#include "rand.h" + +#ifdef _WIN32 +#include +#include +#else +#include +#endif + +/* + * hash64 -- a u64 -> u64 hash + */ +uint64_t +hash64(uint64_t x) +{ + x += 0x9e3779b97f4a7c15; + x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9; + x = (x ^ (x >> 27)) * 0x94d049bb133111eb; + return x ^ (x >> 31); +} + +/* + * xoshiro256** random generator + * + * Fastest available good PRNG as of 2018 (sub-nanosecond per entry), produces + * much better output than old stuff like rand() or Mersenne's Twister. + * + * By David Blackman and Sebastiano Vigna; PD/CC0 2018. + * + * It has a period of 2²⁵⁶-1, excluding all-zero state; it must always get + * initialized to avoid that zero. + */ + +static inline uint64_t rotl(const uint64_t x, int k) +{ + /* optimized to a single instruction on x86 */ + return (x << k) | (x >> (64 - k)); +} + +/* + * rnd64_r -- return 64-bits of randomness + */ +uint64_t +rnd64_r(rng_t *state) +{ + uint64_t *s = (void *)state; + + const uint64_t result = rotl(s[1] * 5, 7) * 9; + const uint64_t t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 45); + + return result; +} + +/* + * randomize_r -- initialize random generator + * + * Seed of 0 means random. + */ +void +randomize_r(rng_t *state, uint64_t seed) +{ + if (!seed) { +#ifdef SYS_getrandom + /* We want getentropy() but ancient Red Hat lacks it. */ + if (!syscall(SYS_getrandom, state, sizeof(rng_t), 0)) + return; /* nofail, but ENOSYS on kernel < 3.16 */ +#elif _WIN32 +#pragma comment(lib, "Bcrypt.lib") + if (BCryptGenRandom(NULL, (PUCHAR)state, sizeof(rng_t), + BCRYPT_USE_SYSTEM_PREFERRED_RNG)) { + return; + } +#endif + seed = (uint64_t)getpid(); + } + + uint64_t *s = (void *)state; + s[0] = hash64(seed); + s[1] = hash64(s[0]); + s[2] = hash64(s[1]); + s[3] = hash64(s[2]); +} + +static rng_t global_rng; + +/* + * rnd64 -- global state version of rnd64_t + */ +uint64_t +rnd64(void) +{ + return rnd64_r(&global_rng); +} + +/* + * randomize -- initialize global RNG + */ +void +randomize(uint64_t seed) +{ + randomize_r(&global_rng, seed); +} diff --git a/src/pmdk/src/common/rand.h b/src/pmdk/src/common/rand.h new file mode 100644 index 000000000..8009c6674 --- /dev/null +++ b/src/pmdk/src/common/rand.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +/* + * rand.h -- random utils + */ + +#ifndef RAND_H +#define RAND_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint64_t rng_t[4]; + +uint64_t hash64(uint64_t x); +uint64_t rnd64_r(rng_t *rng); +void randomize_r(rng_t *rng, uint64_t seed); +uint64_t rnd64(void); +void randomize(uint64_t seed); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/ravl.c b/src/pmdk/src/common/ravl.c new file mode 100644 index 000000000..d00ee7bd4 --- /dev/null +++ b/src/pmdk/src/common/ravl.c @@ -0,0 +1,577 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * ravl.c -- implementation of a RAVL tree + * https://sidsen.azurewebsites.net//papers/ravl-trees-journal.pdf + */ + +#include +#include +#include +#include "out.h" +#include "ravl.h" +#include "alloc.h" + +#define RAVL_DEFAULT_DATA_SIZE (sizeof(void *)) + +enum ravl_slot_type { + RAVL_LEFT, + RAVL_RIGHT, + + MAX_SLOTS, + + RAVL_ROOT +}; + +struct ravl_node { + struct ravl_node *parent; + struct ravl_node *slots[MAX_SLOTS]; + int32_t rank; /* cannot be greater than height of the subtree */ + int32_t pointer_based; + char data[]; +}; + +struct ravl { + struct ravl_node *root; + ravl_compare *compare; + size_t data_size; +}; + +/* + * ravl_new -- creates a new ravl tree instance + */ +struct ravl * +ravl_new_sized(ravl_compare *compare, size_t data_size) +{ + struct ravl *r = Malloc(sizeof(*r)); + if (r == NULL) { + ERR("!Malloc"); + return r; + } + + r->compare = compare; + r->root = NULL; + r->data_size = data_size; + + return r; +} + +/* + * ravl_new -- creates a new tree that stores data pointers + */ +struct ravl * +ravl_new(ravl_compare *compare) +{ + return ravl_new_sized(compare, RAVL_DEFAULT_DATA_SIZE); +} + +/* + * ravl_clear_node -- (internal) recursively clears the given subtree, + * calls callback in an in-order fashion. Optionally frees the given node. + */ +static void +ravl_foreach_node(struct ravl_node *n, ravl_cb cb, void *arg, int free_node) +{ + if (n == NULL) + return; + + ravl_foreach_node(n->slots[RAVL_LEFT], cb, arg, free_node); + if (cb) + cb((void *)n->data, arg); + ravl_foreach_node(n->slots[RAVL_RIGHT], cb, arg, free_node); + + if (free_node) + Free(n); +} + +/* + * ravl_clear -- clears the entire tree, starting from the root + */ +void +ravl_clear(struct ravl *ravl) +{ + ravl_foreach_node(ravl->root, NULL, NULL, 1); + ravl->root = NULL; +} + +/* + * ravl_delete_cb -- clears and deletes the given ravl instance, calls callback + */ +void +ravl_delete_cb(struct ravl *ravl, ravl_cb cb, void *arg) +{ + ravl_foreach_node(ravl->root, cb, arg, 1); + Free(ravl); +} + +/* + * ravl_delete -- clears and deletes the given ravl instance + */ +void +ravl_delete(struct ravl *ravl) +{ + ravl_delete_cb(ravl, NULL, NULL); +} + +/* + * ravl_foreach -- traverses the entire tree, calling callback for every node + */ +void +ravl_foreach(struct ravl *ravl, ravl_cb cb, void *arg) +{ + ravl_foreach_node(ravl->root, cb, arg, 0); +} + +/* + * ravl_empty -- checks whether the given tree is empty + */ +int +ravl_empty(struct ravl *ravl) +{ + return ravl->root == NULL; +} + +/* + * ravl_node_insert_constructor -- node data constructor for ravl_insert + */ +static void +ravl_node_insert_constructor(void *data, size_t data_size, const void *arg) +{ + /* copy only the 'arg' pointer */ + memcpy(data, &arg, sizeof(arg)); +} + +/* + * ravl_node_copy_constructor -- node data constructor for ravl_emplace_copy + */ +static void +ravl_node_copy_constructor(void *data, size_t data_size, const void *arg) +{ + memcpy(data, arg, data_size); +} + +/* + * ravl_new_node -- (internal) allocates and initializes a new node + */ +static struct ravl_node * +ravl_new_node(struct ravl *ravl, ravl_constr constr, const void *arg) +{ + struct ravl_node *n = Malloc(sizeof(*n) + ravl->data_size); + if (n == NULL) { + ERR("!Malloc"); + return n; + } + + n->parent = NULL; + n->slots[RAVL_LEFT] = NULL; + n->slots[RAVL_RIGHT] = NULL; + n->rank = 0; + n->pointer_based = constr == ravl_node_insert_constructor; + constr(n->data, ravl->data_size, arg); + + return n; +} + +/* + * ravl_slot_opposite -- (internal) returns the opposite slot type, cannot be + * called for root type + */ +static enum ravl_slot_type +ravl_slot_opposite(enum ravl_slot_type t) +{ + ASSERTne(t, RAVL_ROOT); + + return t == RAVL_LEFT ? RAVL_RIGHT : RAVL_LEFT; +} + +/* + * ravl_node_slot_type -- (internal) returns the type of the given node: + * left child, right child or root + */ +static enum ravl_slot_type +ravl_node_slot_type(struct ravl_node *n) +{ + if (n->parent == NULL) + return RAVL_ROOT; + + return n->parent->slots[RAVL_LEFT] == n ? RAVL_LEFT : RAVL_RIGHT; +} + +/* + * ravl_node_sibling -- (internal) returns the sibling of the given node, + * NULL if the node is root (has no parent) + */ +static struct ravl_node * +ravl_node_sibling(struct ravl_node *n) +{ + enum ravl_slot_type t = ravl_node_slot_type(n); + if (t == RAVL_ROOT) + return NULL; + + return n->parent->slots[t == RAVL_LEFT ? RAVL_RIGHT : RAVL_LEFT]; +} + +/* + * ravl_node_ref -- (internal) returns the pointer to the memory location in + * which the given node resides + */ +static struct ravl_node ** +ravl_node_ref(struct ravl *ravl, struct ravl_node *n) +{ + enum ravl_slot_type t = ravl_node_slot_type(n); + + return t == RAVL_ROOT ? &ravl->root : &n->parent->slots[t]; +} + +/* + * ravl_rotate -- (internal) performs a rotation around a given node + * + * The node n swaps place with its parent. If n is right child, parent becomes + * the left child of n, otherwise parent becomes right child of n. + */ +static void +ravl_rotate(struct ravl *ravl, struct ravl_node *n) +{ + ASSERTne(n->parent, NULL); + struct ravl_node *p = n->parent; + struct ravl_node **pref = ravl_node_ref(ravl, p); + + enum ravl_slot_type t = ravl_node_slot_type(n); + enum ravl_slot_type t_opposite = ravl_slot_opposite(t); + + n->parent = p->parent; + p->parent = n; + *pref = n; + + if ((p->slots[t] = n->slots[t_opposite]) != NULL) + p->slots[t]->parent = p; + n->slots[t_opposite] = p; +} + +/* + * ravl_node_rank -- (internal) returns the rank of the node + * + * For the purpose of balancing, NULL nodes have rank -1. + */ +static int +ravl_node_rank(struct ravl_node *n) +{ + return n == NULL ? -1 : n->rank; +} + +/* + * ravl_node_rank_difference_parent -- (internal) returns the rank different + * between parent node p and its child n + * + * Every rank difference must be positive. + * + * Either of these can be NULL. + */ +static int +ravl_node_rank_difference_parent(struct ravl_node *p, struct ravl_node *n) +{ + return ravl_node_rank(p) - ravl_node_rank(n); +} + +/* + * ravl_node_rank_differenced - (internal) returns the rank difference between + * parent and its child + * + * Can be used to check if a given node is an i-child. + */ +static int +ravl_node_rank_difference(struct ravl_node *n) +{ + return ravl_node_rank_difference_parent(n->parent, n); +} + +/* + * ravl_node_is_i_j -- (internal) checks if a given node is strictly i,j-node + */ +static int +ravl_node_is_i_j(struct ravl_node *n, int i, int j) +{ + return (ravl_node_rank_difference_parent(n, n->slots[RAVL_LEFT]) == i && + ravl_node_rank_difference_parent(n, n->slots[RAVL_RIGHT]) == j); +} + +/* + * ravl_node_is -- (internal) checks if a given node is i,j-node or j,i-node + */ +static int +ravl_node_is(struct ravl_node *n, int i, int j) +{ + return ravl_node_is_i_j(n, i, j) || ravl_node_is_i_j(n, j, i); +} + +/* + * ravl_node_promote -- promotes a given node by increasing its rank + */ +static void +ravl_node_promote(struct ravl_node *n) +{ + n->rank += 1; +} + +/* + * ravl_node_promote -- demotes a given node by increasing its rank + */ +static void +ravl_node_demote(struct ravl_node *n) +{ + ASSERT(n->rank > 0); + n->rank -= 1; +} + +/* + * ravl_balance -- balances the tree after insert + * + * This function must restore the invariant that every rank + * difference is positive. + */ +static void +ravl_balance(struct ravl *ravl, struct ravl_node *n) +{ + /* walk up the tree, promoting nodes */ + while (n->parent && ravl_node_is(n->parent, 0, 1)) { + ravl_node_promote(n->parent); + n = n->parent; + } + + /* + * Either the rank rule holds or n is a 0-child whose sibling is an + * i-child with i > 1. + */ + struct ravl_node *s = ravl_node_sibling(n); + if (!(ravl_node_rank_difference(n) == 0 && + ravl_node_rank_difference_parent(n->parent, s) > 1)) + return; + + struct ravl_node *y = n->parent; + /* if n is a left child, let z be n's right child and vice versa */ + enum ravl_slot_type t = ravl_slot_opposite(ravl_node_slot_type(n)); + struct ravl_node *z = n->slots[t]; + + if (z == NULL || ravl_node_rank_difference(z) == 2) { + ravl_rotate(ravl, n); + ravl_node_demote(y); + } else if (ravl_node_rank_difference(z) == 1) { + ravl_rotate(ravl, z); + ravl_rotate(ravl, z); + ravl_node_promote(z); + ravl_node_demote(n); + ravl_node_demote(y); + } +} + +/* + * ravl_insert -- insert data into the tree + */ +int +ravl_insert(struct ravl *ravl, const void *data) +{ + return ravl_emplace(ravl, ravl_node_insert_constructor, data); +} + +/* + * ravl_insert -- copy construct data inside of a new tree node + */ +int +ravl_emplace_copy(struct ravl *ravl, const void *data) +{ + return ravl_emplace(ravl, ravl_node_copy_constructor, data); +} + +/* + * ravl_emplace -- construct data inside of a new tree node + */ +int +ravl_emplace(struct ravl *ravl, ravl_constr constr, const void *arg) +{ + LOG(6, NULL); + + struct ravl_node *n = ravl_new_node(ravl, constr, arg); + if (n == NULL) + return -1; + + /* walk down the tree and insert the new node into a missing slot */ + struct ravl_node **dstp = &ravl->root; + struct ravl_node *dst = NULL; + while (*dstp != NULL) { + dst = (*dstp); + int cmp_result = ravl->compare(ravl_data(n), ravl_data(dst)); + if (cmp_result == 0) + goto error_duplicate; + + dstp = &dst->slots[cmp_result > 0]; + } + n->parent = dst; + *dstp = n; + + ravl_balance(ravl, n); + + return 0; + +error_duplicate: + errno = EEXIST; + Free(n); + return -1; +} + +/* + * ravl_node_type_most -- (internal) returns left-most or right-most node in + * the subtree + */ +static struct ravl_node * +ravl_node_type_most(struct ravl_node *n, enum ravl_slot_type t) +{ + while (n->slots[t] != NULL) + n = n->slots[t]; + + return n; +} + +/* + * ravl_node_cessor -- (internal) returns the successor or predecessor of the + * node + */ +static struct ravl_node * +ravl_node_cessor(struct ravl_node *n, enum ravl_slot_type t) +{ + /* + * If t child is present, we are looking for t-opposite-most node + * in t child subtree + */ + if (n->slots[t]) + return ravl_node_type_most(n->slots[t], ravl_slot_opposite(t)); + + /* otherwise get the first parent on the t path */ + while (n->parent != NULL && n == n->parent->slots[t]) + n = n->parent; + + return n->parent; +} + +/* + * ravl_node_successor -- (internal) returns node's successor + * + * It's the first node larger than n. + */ +static struct ravl_node * +ravl_node_successor(struct ravl_node *n) +{ + return ravl_node_cessor(n, RAVL_RIGHT); +} + +/* + * ravl_node_successor -- (internal) returns node's successor + * + * It's the first node smaller than n. + */ +static struct ravl_node * +ravl_node_predecessor(struct ravl_node *n) +{ + return ravl_node_cessor(n, RAVL_LEFT); +} + +/* + * ravl_predicate_holds -- (internal) verifies the given predicate for + * the current node in the search path + * + * If the predicate holds for the given node or a node that can be directly + * derived from it, returns 1. Otherwise returns 0. + */ +static int +ravl_predicate_holds(struct ravl *ravl, int result, struct ravl_node **ret, + struct ravl_node *n, const void *data, enum ravl_predicate flags) +{ + if (flags & RAVL_PREDICATE_EQUAL) { + if (result == 0) { + *ret = n; + return 1; + } + } + if (flags & RAVL_PREDICATE_GREATER) { + if (result < 0) { /* data < n->data */ + *ret = n; + return 0; + } else if (result == 0) { + *ret = ravl_node_successor(n); + return 1; + } + } + if (flags & RAVL_PREDICATE_LESS) { + if (result > 0) { /* data > n->data */ + *ret = n; + return 0; + } else if (result == 0) { + *ret = ravl_node_predecessor(n); + return 1; + } + } + + return 0; +} + +/* + * ravl_find -- searches for the node in the tree + */ +struct ravl_node * +ravl_find(struct ravl *ravl, const void *data, enum ravl_predicate flags) +{ + LOG(6, NULL); + + struct ravl_node *r = NULL; + struct ravl_node *n = ravl->root; + while (n) { + int result = ravl->compare(data, ravl_data(n)); + if (ravl_predicate_holds(ravl, result, &r, n, data, flags)) + return r; + + n = n->slots[result > 0]; + } + + return r; +} + +/* + * ravl_remove -- removes the given node from the tree + */ +void +ravl_remove(struct ravl *ravl, struct ravl_node *n) +{ + LOG(6, NULL); + + if (n->slots[RAVL_LEFT] != NULL && n->slots[RAVL_RIGHT] != NULL) { + /* if both children are present, remove the successor instead */ + struct ravl_node *s = ravl_node_successor(n); + memcpy(n->data, s->data, ravl->data_size); + + ravl_remove(ravl, s); + } else { + /* swap n with the child that may exist */ + struct ravl_node *r = n->slots[RAVL_LEFT] ? + n->slots[RAVL_LEFT] : n->slots[RAVL_RIGHT]; + if (r != NULL) + r->parent = n->parent; + + *ravl_node_ref(ravl, n) = r; + Free(n); + } +} + +/* + * ravl_data -- returns the data contained within the node + */ +void * +ravl_data(struct ravl_node *node) +{ + if (node->pointer_based) { + void *data; + memcpy(&data, node->data, sizeof(void *)); + return data; + } else { + return (void *)node->data; + } +} diff --git a/src/pmdk/src/common/ravl.h b/src/pmdk/src/common/ravl.h new file mode 100644 index 000000000..a008a6605 --- /dev/null +++ b/src/pmdk/src/common/ravl.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * ravl.h -- internal definitions for ravl tree + */ + +#ifndef LIBPMEMOBJ_RAVL_H +#define LIBPMEMOBJ_RAVL_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct ravl; +struct ravl_node; + +enum ravl_predicate { + RAVL_PREDICATE_EQUAL = 1 << 0, + RAVL_PREDICATE_GREATER = 1 << 1, + RAVL_PREDICATE_LESS = 1 << 2, + RAVL_PREDICATE_LESS_EQUAL = + RAVL_PREDICATE_EQUAL | RAVL_PREDICATE_LESS, + RAVL_PREDICATE_GREATER_EQUAL = + RAVL_PREDICATE_EQUAL | RAVL_PREDICATE_GREATER, +}; + +typedef int ravl_compare(const void *lhs, const void *rhs); +typedef void ravl_cb(void *data, void *arg); +typedef void ravl_constr(void *data, size_t data_size, const void *arg); + +struct ravl *ravl_new(ravl_compare *compare); +struct ravl *ravl_new_sized(ravl_compare *compare, size_t data_size); +void ravl_delete(struct ravl *ravl); +void ravl_delete_cb(struct ravl *ravl, ravl_cb cb, void *arg); +void ravl_foreach(struct ravl *ravl, ravl_cb cb, void *arg); +int ravl_empty(struct ravl *ravl); +void ravl_clear(struct ravl *ravl); +int ravl_insert(struct ravl *ravl, const void *data); +int ravl_emplace(struct ravl *ravl, ravl_constr constr, const void *arg); +int ravl_emplace_copy(struct ravl *ravl, const void *data); + +struct ravl_node *ravl_find(struct ravl *ravl, const void *data, + enum ravl_predicate predicate_flags); +void *ravl_data(struct ravl_node *node); +void ravl_remove(struct ravl *ravl, struct ravl_node *node); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBPMEMOBJ_RAVL_H */ diff --git a/src/pmdk/src/common/set.c b/src/pmdk/src/common/set.c new file mode 100644 index 000000000..e3f8b6f63 --- /dev/null +++ b/src/pmdk/src/common/set.c @@ -0,0 +1,4439 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * set.c -- pool set utilities + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libpmem.h" +#include "librpmem.h" +#include "set.h" +#include "file.h" +#include "os.h" +#include "mmap.h" +#include "util.h" +#include "out.h" +#include "dlsym.h" +#include "valgrind_internal.h" +#include "sys_util.h" +#include "util_pmem.h" +#include "fs.h" +#include "os_deep.h" +#include "set_badblocks.h" + +#define LIBRARY_REMOTE "librpmem.so.1" +#define SIZE_AUTODETECT_STR "AUTO" + +#define PMEM_EXT ".pmem" +#define PMEM_EXT_LEN sizeof(PMEM_EXT) +#define PMEM_FILE_PADDING 6 +#define PMEM_FILE_NAME_MAX_LEN 20 +#define PMEM_FILE_MAX_LEN (PMEM_FILE_NAME_MAX_LEN + PMEM_FILE_PADDING) + +static RPMEMpool *(*Rpmem_create)(const char *target, const char *pool_set_name, + void *pool_addr, size_t pool_size, unsigned *nlanes, + const struct rpmem_pool_attr *rpmem_attr); +static RPMEMpool *(*Rpmem_open)(const char *target, const char *pool_set_name, + void *pool_addr, size_t pool_size, unsigned *nlanes, + struct rpmem_pool_attr *rpmem_attr); +int (*Rpmem_close)(RPMEMpool *rpp); +int (*Rpmem_persist)(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane, unsigned flags); +int (*Rpmem_deep_persist)(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane); +int (*Rpmem_read)(RPMEMpool *rpp, void *buff, size_t offset, + size_t length, unsigned lane); +int (*Rpmem_remove)(const char *target, const char *pool_set_name, int flags); +int (*Rpmem_set_attr)(RPMEMpool *rpp, const struct rpmem_pool_attr *rattr); + +static int Remote_replication_available; +static os_mutex_t Remote_lock; +static void *Rpmem_handle_remote; + +int Prefault_at_open = 0; +int Prefault_at_create = 0; +int SDS_at_create = POOL_FEAT_INCOMPAT_DEFAULT & POOL_E_FEAT_SDS ? 1 : 0; +int Fallocate_at_create = 1; +int COW_at_open = 0; + +/* list of pool set option names and flags */ +static const struct pool_set_option Options[] = { + { "SINGLEHDR", OPTION_SINGLEHDR }, +#ifndef _WIN32 + { "NOHDRS", OPTION_NOHDRS }, +#endif + { NULL, OPTION_UNKNOWN } +}; + +/* + * util_remote_init -- initialize remote replication + */ +void +util_remote_init(void) +{ + LOG(3, NULL); + + /* XXX Is duplicate initialization really okay? */ + if (!Remote_replication_available) { + util_mutex_init(&Remote_lock); + Remote_replication_available = 1; + } +} + +/* + * util_remote_fini -- finalize remote replication + */ +void +util_remote_fini(void) +{ + LOG(3, NULL); + + util_remote_unload(); + + /* XXX Okay to be here if not initialized? */ + if (Remote_replication_available) { + Remote_replication_available = 0; + util_mutex_destroy(&Remote_lock); + } +} + +/* + * util_dl_check_error -- check libdl error + */ +static int +util_dl_check_error(void *handle, const char *func) +{ + LOG(15, "handle %p func %s", handle, func); + + if (handle == NULL) { + char *errstr = util_dlerror(); + if (errstr) + ERR("%s(): %s", func, errstr); + errno = ELIBACC; + return -1; + } + return 0; +} + +/* + * util_remote_unload_core -- (internal) unload remote library (core function) + */ +static void +util_remote_unload_core(void) +{ + if (Rpmem_handle_remote != NULL) { + util_dlclose(Rpmem_handle_remote); + Rpmem_handle_remote = NULL; + } + Rpmem_create = NULL; + Rpmem_open = NULL; + Rpmem_close = NULL; + Rpmem_persist = NULL; + Rpmem_deep_persist = NULL; + Rpmem_read = NULL; + Rpmem_remove = NULL; + Rpmem_set_attr = NULL; +} + +/* + * util_remote_unload -- unload remote library + */ +void +util_remote_unload(void) +{ + LOG(3, NULL); + + if (!Remote_replication_available) + return; + + util_mutex_lock(&Remote_lock); + + util_remote_unload_core(); + + util_mutex_unlock(&Remote_lock); +} + +/* + * util_remote_load -- load remote library + */ +int +util_remote_load(void) +{ + LOG(3, NULL); + + if (!Remote_replication_available) { + ERR("remote replication is not available"); + return -1; + } + + CHECK_FUNC_COMPATIBLE(rpmem_create, *Rpmem_create); + CHECK_FUNC_COMPATIBLE(rpmem_open, *Rpmem_open); + CHECK_FUNC_COMPATIBLE(rpmem_close, *Rpmem_close); + CHECK_FUNC_COMPATIBLE(rpmem_persist, *Rpmem_persist); + CHECK_FUNC_COMPATIBLE(rpmem_deep_persist, *Rpmem_deep_persist); + CHECK_FUNC_COMPATIBLE(rpmem_read, *Rpmem_read); + CHECK_FUNC_COMPATIBLE(rpmem_remove, *Rpmem_remove); + + util_mutex_lock(&Remote_lock); + + if (Rpmem_handle_remote) + goto end; + + Rpmem_handle_remote = util_dlopen(LIBRARY_REMOTE); + if (util_dl_check_error(Rpmem_handle_remote, "dlopen")) { + ERR("the pool set requires a remote replica, " + "but the '%s' library cannot be loaded", + LIBRARY_REMOTE); + goto err; + } + + Rpmem_create = util_dlsym(Rpmem_handle_remote, "rpmem_create"); + if (util_dl_check_error(Rpmem_create, "dlsym")) { + ERR("symbol 'rpmem_create' not found"); + goto err; + } + + Rpmem_open = util_dlsym(Rpmem_handle_remote, "rpmem_open"); + if (util_dl_check_error(Rpmem_open, "dlsym")) { + ERR("symbol 'rpmem_open' not found"); + goto err; + } + + Rpmem_close = util_dlsym(Rpmem_handle_remote, "rpmem_close"); + if (util_dl_check_error(Rpmem_close, "dlsym")) { + ERR("symbol 'rpmem_close' not found"); + goto err; + } + + Rpmem_persist = util_dlsym(Rpmem_handle_remote, "rpmem_persist"); + if (util_dl_check_error(Rpmem_persist, "dlsym")) { + ERR("symbol 'rpmem_persist' not found"); + goto err; + } + + Rpmem_deep_persist = util_dlsym(Rpmem_handle_remote, + "rpmem_deep_persist"); + if (util_dl_check_error(Rpmem_deep_persist, "dlsym")) { + ERR("symbol 'rpmem_deep_persist' not found"); + goto err; + } + + Rpmem_read = util_dlsym(Rpmem_handle_remote, "rpmem_read"); + if (util_dl_check_error(Rpmem_read, "dlsym")) { + ERR("symbol 'rpmem_read' not found"); + goto err; + } + + Rpmem_remove = util_dlsym(Rpmem_handle_remote, "rpmem_remove"); + if (util_dl_check_error(Rpmem_remove, "dlsym")) { + ERR("symbol 'rpmem_remove' not found"); + goto err; + } + + Rpmem_set_attr = util_dlsym(Rpmem_handle_remote, "rpmem_set_attr"); + if (util_dl_check_error(Rpmem_set_attr, "dlsym")) { + ERR("symbol 'rpmem_set_attr' not found"); + goto err; + } + +end: + util_mutex_unlock(&Remote_lock); + return 0; + +err: + LOG(4, "error clean up"); + util_remote_unload_core(); + util_mutex_unlock(&Remote_lock); + return -1; +} + +/* reserve space for size, path and some whitespace and/or comment */ + +enum parser_codes { + PARSER_CONTINUE = 0, + PARSER_PMEMPOOLSET, + PARSER_REPLICA, + PARSER_INVALID_TOKEN, + PARSER_REMOTE_REPLICA_EXPECTED, + PARSER_WRONG_SIZE, + PARSER_CANNOT_READ_SIZE, + PARSER_ABSOLUTE_PATH_EXPECTED, + PARSER_RELATIVE_PATH_EXPECTED, + PARSER_SET_NO_PARTS, + PARSER_REP_NO_PARTS, + PARSER_REMOTE_REP_UNEXPECTED_PARTS, + PARSER_SIZE_MISMATCH, + PARSER_OUT_OF_MEMORY, + PARSER_OPTION_UNKNOWN, + PARSER_OPTION_EXPECTED, + PARSER_FORMAT_OK, + PARSER_MAX_CODE +}; + +static const char *parser_errstr[PARSER_MAX_CODE] = { + "", /* parsing */ + "the first line must be exactly 'PMEMPOOLSET'", + "exactly 'REPLICA' expected", + "invalid token found in the current line", + "address of remote node and descriptor of remote pool set expected", + "incorrect format of size", + "cannot determine size of a part", + "incorrect path (must be an absolute one)", + "incorrect descriptor (must be a relative path)", + "no pool set parts", + "no replica parts", + "unexpected parts for remote replica", + "sizes of pool set and replica mismatch", + "allocating memory failed", + "unknown option", + "missing option name", + "" /* format correct */ +}; + +/* + * util_replica_force_page_allocation - (internal) forces page allocation for + * replica + */ +static void +util_replica_force_page_allocation(struct pool_replica *rep) +{ + volatile char *cur_addr = rep->part[0].addr; + char *addr_end = (char *)cur_addr + rep->resvsize; + for (; cur_addr < addr_end; cur_addr += Pagesize) { + *cur_addr = *cur_addr; + VALGRIND_SET_CLEAN(cur_addr, 1); + } +} + +/* + * util_map_hdr -- map a header of a pool set + */ +int +util_map_hdr(struct pool_set_part *part, int flags, int rdonly) +{ + LOG(3, "part %p flags %d", part, flags); + + COMPILE_ERROR_ON(POOL_HDR_SIZE == 0); + ASSERTeq(POOL_HDR_SIZE % Pagesize, 0); + + /* + * Workaround for Device DAX not allowing to map a portion + * of the device if offset/length are not aligned to the internal + * device alignment (page size). I.e. if the device alignment + * is 2M, we cannot map the 4K header, but need to align the mapping + * length to 2M. + * + * According to mmap(2), system should automatically align mapping + * length to be a multiple of the underlying page size, but it's + * not true for Device DAX. + */ + size_t hdrsize = part->alignment > POOL_HDR_SIZE + ? part->alignment : POOL_HDR_SIZE; + + void *addr = NULL; + +#if VG_MEMCHECK_ENABLED + if (On_valgrind) { + /* this is required only for Device DAX & memcheck */ + addr = util_map_hint(hdrsize, hdrsize); + if (addr == MAP_FAILED) { + LOG(1, "cannot find a contiguous region of given size"); + /* there's nothing we can do */ + return -1; + } + } +#endif + + int prot = rdonly ? PROT_READ : PROT_READ|PROT_WRITE; + void *hdrp = util_map_sync(addr, hdrsize, prot, flags, + part->fd, 0, &part->hdr_map_sync); + if (hdrp == MAP_FAILED) { + ERR("!mmap: %s", part->path); + return -1; + } + + part->hdrsize = hdrsize; + part->hdr = hdrp; + + VALGRIND_REGISTER_PMEM_MAPPING(part->hdr, part->hdrsize); + VALGRIND_REGISTER_PMEM_FILE(part->fd, part->hdr, part->hdrsize, 0); + + return 0; +} + +/* + * util_unmap_hdr -- unmap pool set part header + */ +void +util_unmap_hdr(struct pool_set_part *part) +{ + if (part->hdr == NULL || part->hdrsize == 0) + return; + + LOG(4, "munmap: addr %p size %zu", part->hdr, part->hdrsize); + VALGRIND_REMOVE_PMEM_MAPPING(part->hdr, part->hdrsize); + if (munmap(part->hdr, part->hdrsize) != 0) + /* this means there's a bug on the caller side */ + FATAL("!munmap: %s", part->path); + part->hdr = NULL; + part->hdrsize = 0; +} + +/* + * util_map_part -- map a part of a pool set + */ +int +util_map_part(struct pool_set_part *part, void *addr, size_t size, + size_t offset, int flags, int rdonly) +{ + LOG(3, "part %p addr %p size %zu offset %zu flags %d", + part, addr, size, offset, flags); + + ASSERTeq((uintptr_t)addr % Mmap_align, 0); + ASSERTeq(offset % Mmap_align, 0); + ASSERTeq(size % Mmap_align, 0); + ASSERT(((os_off_t)offset) >= 0); + ASSERTeq(offset % part->alignment, 0); + ASSERT(offset < part->filesize); + + if (!size) + size = (part->filesize - offset) & ~(part->alignment - 1); + else + size = roundup(size, part->alignment); + + int prot = rdonly ? PROT_READ : PROT_READ | PROT_WRITE; + void *addrp = util_map_sync(addr, size, prot, flags, part->fd, + (os_off_t)offset, &part->map_sync); + if (addrp == MAP_FAILED) { + ERR("!mmap: %s", part->path); + return -1; + } + + if (addr != NULL && (flags & MAP_FIXED) && addrp != addr) { + ERR("unable to map at requested address %p", addr); + munmap(addrp, size); + return -1; + } + + part->addr = addrp; + part->size = size; + + VALGRIND_REGISTER_PMEM_MAPPING(part->addr, part->size); + VALGRIND_REGISTER_PMEM_FILE(part->fd, part->addr, part->size, offset); + + return 0; +} + +/* + * util_unmap_part -- unmap a part of a pool set + */ +int +util_unmap_part(struct pool_set_part *part) +{ + LOG(3, "part %p", part); + + if (part->addr != NULL && part->size != 0) { + LOG(4, "munmap: addr %p size %zu", part->addr, part->size); + VALGRIND_REMOVE_PMEM_MAPPING(part->addr, part->size); + if (munmap(part->addr, part->size) != 0) { + ERR("!munmap: %s", part->path); + } + + part->addr = NULL; + part->size = 0; + } + + return 0; +} + +/* + * util_unmap_parts -- unmap parts from start_index to the end_index + */ +int +util_unmap_parts(struct pool_replica *rep, unsigned start_index, + unsigned end_index) +{ + LOG(3, "rep: %p, start_index: %u, end_index: %u", rep, start_index, + end_index); + + for (unsigned p = start_index; p <= end_index; p++) + util_unmap_part(&rep->part[p]); + + return 0; +} + +/* + * util_poolset_free -- free pool set info + */ +void +util_poolset_free(struct pool_set *set) +{ + LOG(3, "set %p", set); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + if (rep->remote == NULL) { + /* only local replicas have paths */ + for (unsigned p = 0; p < rep->nallocated; p++) { + Free((void *)(rep->part[p].path)); + } + } else { + /* remote replica */ + ASSERTeq(rep->nparts, 1); + Free(rep->remote->node_addr); + Free(rep->remote->pool_desc); + Free(rep->remote); + } + struct pool_set_directory *d; + VEC_FOREACH_BY_PTR(d, &rep->directory) { + Free((void *)d->path); + } + VEC_DELETE(&rep->directory); + Free(set->replica[r]); + } + Free(set->path); + Free(set); +} + +/* + * util_poolset_open -- open all replicas from a poolset + */ +int +util_poolset_open(struct pool_set *set) +{ + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (util_replica_open(set, r, MAP_SHARED)) { + LOG(2, "replica open failed: replica %u", r); + errno = EINVAL; + return -1; + } + } + + return 0; +} + +/* + * util_replica_close_local -- close local replica, optionally delete the + * replica's parts + */ +int +util_replica_close_local(struct pool_replica *rep, unsigned repn, + enum del_parts_mode del) +{ + for (unsigned p = 0; p < rep->nparts; p++) { + if (rep->part[p].fd != -1) + (void) os_close(rep->part[p].fd); + + if ((del == DELETE_CREATED_PARTS && rep->part[p].created) || + del == DELETE_ALL_PARTS) { + LOG(4, "unlink %s", rep->part[p].path); + int olderrno = errno; + if (util_unlink(rep->part[p].path) && errno != ENOENT) { + ERR("!unlink %s failed (part %u, replica %u)", + rep->part[p].path, p, repn); + return -1; + } + errno = olderrno; + } + } + return 0; +} + +/* + * util_replica_close_remote -- close remote replica, optionally delete the + * replica + */ +int +util_replica_close_remote(struct pool_replica *rep, unsigned repn, + enum del_parts_mode del) +{ + if (!rep->remote) + return 0; + + if (rep->remote->rpp) { + LOG(4, "closing remote replica #%u", repn); + Rpmem_close(rep->remote->rpp); + rep->remote->rpp = NULL; + } + + if ((del == DELETE_CREATED_PARTS && rep->part[0].created) || + del == DELETE_ALL_PARTS) { + LOG(4, "removing remote replica #%u", repn); + int ret = Rpmem_remove(rep->remote->node_addr, + rep->remote->pool_desc, 0); + if (ret) { + LOG(1, "!removing remote replica #%u failed", repn); + return -1; + } + } + return 0; +} + +/* + * util_poolset_close -- unmap and close all the parts of the pool set, + * optionally delete parts + */ +void +util_poolset_close(struct pool_set *set, enum del_parts_mode del) +{ + LOG(3, "set %p del %d", set, del); + + int oerrno = errno; + + for (unsigned r = 0; r < set->nreplicas; r++) { + util_replica_close(set, r); + + struct pool_replica *rep = set->replica[r]; + if (!rep->remote) + (void) util_replica_close_local(rep, r, del); + else + (void) util_replica_close_remote(rep, r, del); + } + + /* + * XXX On FreeBSD, mmap()ing a file does not increment the flock() + * reference count, so we had to keep the files open until now. + */ +#ifdef __FreeBSD__ + util_poolset_fdclose_always(set); +#endif + util_poolset_free(set); + + errno = oerrno; +} + +/* + * util_poolset_chmod -- change mode for all created files related to pool set + */ +int +util_poolset_chmod(struct pool_set *set, mode_t mode) +{ + LOG(3, "set %p mode %o", set, mode); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + + /* skip remote replicas */ + if (rep->remote != NULL) + continue; + + for (unsigned p = 0; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + + /* skip not created or closed parts */ + if (!part->created || part->fd == -1) + continue; + + os_stat_t stbuf; + if (os_fstat(part->fd, &stbuf) != 0) { + ERR("!fstat %d %s", part->fd, part->path); + return -1; + } + + if (stbuf.st_mode & ~(unsigned)S_IFMT) { + LOG(1, "file permissions changed during pool " + "initialization, file: %s (%o)", + part->path, + stbuf.st_mode & ~(unsigned)S_IFMT); + } + + if (os_chmod(part->path, mode)) { + ERR("!chmod %u/%u/%s", r, p, part->path); + return -1; + } + } + } + + return 0; +} + +/* + * util_poolset_fdclose_always -- close file descriptors related to pool set + */ +void +util_poolset_fdclose_always(struct pool_set *set) +{ + LOG(3, "set %p", set); + + for (unsigned r = 0; r < set->nreplicas; r++) + util_replica_fdclose(set->replica[r]); +} + +/* + * util_poolset_fdclose -- close pool set file descriptors if not FreeBSD + * + * XXX On FreeBSD, mmap()ing a file does not increment the flock() + * reference count, so we need to keep the files open. + */ +void +util_poolset_fdclose(struct pool_set *set) +{ +#ifdef __FreeBSD__ + LOG(3, "set %p: holding open", set); +#else + util_poolset_fdclose_always(set); +#endif +} + +/* + * util_autodetect_size -- (internal) retrieves size of an existing file + */ +static ssize_t +util_autodetect_size(const char *path) +{ + enum file_type type = util_file_get_type(path); + if (type < 0) + return -1; + + if (type == TYPE_NORMAL) { + ERR("size autodetection is supported only for device dax"); + return -1; + } + + return util_file_get_size(path); +} + +/* + * parser_read_line -- (internal) read line and validate size and path + * from a pool set file + */ +static enum parser_codes +parser_read_line(char *line, size_t *size, char **path) +{ + int ret; + char *size_str; + char *path_str; + char *rest_str; + char *saveptr = NULL; /* must be NULL initialized on Windows */ + + size_str = strtok_r(line, " \t", &saveptr); + path_str = strtok_r(NULL, " \t", &saveptr); + rest_str = strtok_r(NULL, " \t", &saveptr); + + if (!size_str || !path_str || rest_str) + return PARSER_INVALID_TOKEN; + + LOG(10, "size '%s' path '%s'", size_str, path_str); + + /* + * A format of the size is checked in detail. As regards the path, + * it is checked only if the read path is an absolute path. + * The rest should be checked during creating/opening the file. + */ + + /* check if the read path is an absolute path */ + if (!util_is_absolute_path(path_str)) + return PARSER_ABSOLUTE_PATH_EXPECTED; + + *path = Strdup(path_str); + if (!(*path)) { + ERR("!Strdup"); + return PARSER_OUT_OF_MEMORY; + } + + if (strcmp(SIZE_AUTODETECT_STR, size_str) == 0) { + /* + * XXX: this should be done after the parsing completes, but + * currently this operation is performed in simply too many + * places in the code to move this someplace else. + */ + ssize_t s = util_autodetect_size(path_str); + if (s < 0) { + Free(*path); + *path = NULL; + return PARSER_CANNOT_READ_SIZE; + } + + *size = (size_t)s; + + return PARSER_CONTINUE; + } + + ret = util_parse_size(size_str, size); + if (ret != 0 || *size == 0) { + Free(*path); + *path = NULL; + return PARSER_WRONG_SIZE; + } + + return PARSER_CONTINUE; +} + +/* + * parser_read_replica -- (internal) read line and validate remote replica + * from a pool set file + */ +static enum parser_codes +parser_read_replica(char *line, char **node_addr, char **pool_desc) +{ + char *addr_str; + char *desc_str; + char *rest_str; + char *saveptr = NULL; /* must be NULL initialized on Windows */ + + addr_str = strtok_r(line, " \t", &saveptr); + desc_str = strtok_r(NULL, " \t", &saveptr); + rest_str = strtok_r(NULL, " \t", &saveptr); + + if (!addr_str || !desc_str) + return PARSER_REMOTE_REPLICA_EXPECTED; + + if (rest_str) + return PARSER_INVALID_TOKEN; + + LOG(10, "node address '%s' pool set descriptor '%s'", + addr_str, desc_str); + + /* check if the descriptor is a relative path */ + if (util_is_absolute_path(desc_str)) + return PARSER_RELATIVE_PATH_EXPECTED; + + *node_addr = Strdup(addr_str); + *pool_desc = Strdup(desc_str); + + if (!(*node_addr) || !(*pool_desc)) { + ERR("!Strdup"); + if (*node_addr) + Free(*node_addr); + if (*pool_desc) + Free(*pool_desc); + return PARSER_OUT_OF_MEMORY; + } + + return PARSER_CONTINUE; +} + +/* + * parser_read_options -- (internal) read line and validate options + */ +static enum parser_codes +parser_read_options(char *line, unsigned *options) +{ + LOG(3, "line '%s'", line); + + int opt_cnt = 0; + char *saveptr = NULL; /* must be NULL initialized on Windows */ + + char *opt_str = strtok_r(line, " \t", &saveptr); + while (opt_str != NULL) { + LOG(4, "option '%s'", opt_str); + + int i = 0; + while (Options[i].name && strcmp(opt_str, Options[i].name) != 0) + i++; + + if (Options[i].name == NULL) { + LOG(4, "unknown option '%s'", opt_str); + return PARSER_OPTION_UNKNOWN; + } + + if (*options & Options[i].flag) + LOG(4, "duplicated option '%s'", opt_str); + + *options |= Options[i].flag; + + opt_cnt++; + opt_str = strtok_r(NULL, " \t", &saveptr); + } + + if (opt_cnt == 0) + return PARSER_OPTION_EXPECTED; + + return PARSER_CONTINUE; +} + +/* + * util_replica_reserve -- reserves part slots capacity in a replica + */ +static int +util_replica_reserve(struct pool_replica **repp, unsigned n) +{ + LOG(3, "replica %p n %u", *repp, n); + + struct pool_replica *rep = *repp; + if (rep->nallocated >= n) + return 0; + + rep = Realloc(rep, sizeof(struct pool_replica) + + (n) * sizeof(struct pool_set_part)); + if (rep == NULL) { + ERR("!Realloc"); + return -1; + } + + size_t nsize = sizeof(struct pool_set_part) * (n - rep->nallocated); + memset(rep->part + rep->nallocated, 0, nsize); + + rep->nallocated = n; + *repp = rep; + + return 0; +} + +/* + * util_replica_add_part_by_idx -- (internal) allocates, initializes and adds a + * part structure at the provided location in the replica info + */ +static int +util_replica_add_part_by_idx(struct pool_replica **repp, + const char *path, size_t filesize, unsigned p) +{ + LOG(3, "replica %p path %s filesize %zu", *repp, path, filesize); + + if (util_replica_reserve(repp, p + 1) != 0) + return -1; + + struct pool_replica *rep = *repp; + ASSERTne(rep, NULL); + + int is_dev_dax = 0; + + if (path != NULL) { + enum file_type type = util_file_get_type(path); + if (type == OTHER_ERROR) + return -1; + + is_dev_dax = type == TYPE_DEVDAX; + } + + rep->part[p].path = path; + rep->part[p].filesize = filesize; + rep->part[p].fd = -1; + rep->part[p].is_dev_dax = is_dev_dax; + rep->part[p].created = 0; + rep->part[p].hdr = NULL; + rep->part[p].addr = NULL; + rep->part[p].remote_hdr = NULL; + rep->part[p].has_bad_blocks = 0; + + if (is_dev_dax) + rep->part[p].alignment = util_file_device_dax_alignment(path); + else + rep->part[p].alignment = Mmap_align; + + ASSERTne(rep->part[p].alignment, 0); + + rep->nparts += 1; + + return 0; +} + +/* + * util_replica_add_part -- adds a next part in replica info + */ +static int +util_replica_add_part(struct pool_replica **repp, + const char *path, size_t filesize) +{ + LOG(3, "replica %p path \"%s\" filesize %zu", *repp, path, filesize); + + return util_replica_add_part_by_idx(repp, path, + filesize, (*repp)->nparts); +} + +/* + * util_parse_add_part -- (internal) add a new part file to the replica info + */ +static int +util_parse_add_part(struct pool_set *set, const char *path, size_t filesize) +{ + LOG(3, "set %p path %s filesize %zu", set, path, filesize); + + ASSERTne(set, NULL); + + if (set->directory_based) { + ERR("cannot mix directories and files in a set"); + errno = EINVAL; + return -1; + } + + return util_replica_add_part(&set->replica[set->nreplicas - 1], + path, filesize); +} + +/* + * util_parse_add_directory -- + * (internal) add a new directory to the replica info + */ +static int +util_parse_add_directory(struct pool_set *set, const char *path, + size_t filesize) +{ + LOG(3, "set %p path %s filesize %zu", set, path, filesize); + + ASSERTne(set, NULL); + + struct pool_replica *rep = set->replica[set->nreplicas - 1]; + ASSERTne(rep, NULL); + + if (set->directory_based == 0) { + if (rep->nparts > 0 || set->nreplicas > 1) { + ERR("cannot mix directories and files in a set"); + errno = EINVAL; + return -1; + } + set->directory_based = 1; + } + + char *rpath = util_part_realpath(path); + if (rpath == NULL) { + ERR("cannot resolve realpath of new directory"); + return -1; + } + + for (unsigned i = 0; i < set->nreplicas; ++i) { + struct pool_replica *r = set->replica[i]; + struct pool_set_directory *dir; + char *dpath = NULL; + VEC_FOREACH_BY_PTR(dir, &r->directory) { + dpath = util_part_realpath(dir->path); + ASSERTne(dpath, NULL); /* must have been resolved */ + if (strcmp(rpath, dpath) == 0) { + ERR("cannot use the same directory twice"); + errno = EEXIST; + free(dpath); + free(rpath); + return -1; + } + free(dpath); + } + } + free(rpath); + + struct pool_set_directory d; + d.path = path; + d.resvsize = filesize; + + if (VEC_PUSH_BACK(&rep->directory, d) != 0) + return -1; + + rep->resvsize += filesize; + + return 0; +} + +/* + * util_parse_add_element -- + * (internal) add a new element to the replica info + */ +static int +util_parse_add_element(struct pool_set *set, const char *path, size_t filesize) +{ + LOG(3, "set %p path %s filesize %zu", set, path, filesize); + + os_stat_t stat; + + int olderrno = errno; + + if (os_stat(path, &stat) == 0 && S_ISDIR(stat.st_mode)) + return util_parse_add_directory(set, path, filesize); + + errno = olderrno; + + return util_parse_add_part(set, path, filesize); +} + +/* + * util_parse_add_replica -- (internal) add a new replica to the pool set info + */ +static int +util_parse_add_replica(struct pool_set **setp) +{ + LOG(3, "setp %p", setp); + + ASSERTne(setp, NULL); + + struct pool_set *set = *setp; + ASSERTne(set, NULL); + + set = Realloc(set, sizeof(struct pool_set) + + (set->nreplicas + 1) * sizeof(struct pool_replica *)); + if (set == NULL) { + ERR("!Realloc"); + return -1; + } + *setp = set; + + struct pool_replica *rep; + rep = Zalloc(sizeof(struct pool_replica)); + if (rep == NULL) { + ERR("!Zalloc"); + return -1; + } + + VEC_INIT(&rep->directory); + + unsigned r = set->nreplicas++; + + set->replica[r] = rep; + + return 0; +} + +/* + * util_replica_check_map_sync -- (internal) check MAP_SYNC restrictions + */ +static int +util_replica_check_map_sync(struct pool_set *set, unsigned repidx, + int check_hdr) +{ + LOG(3, "set %p repidx %u", set, repidx); + + struct pool_replica *rep = set->replica[repidx]; + + int map_sync = rep->part[0].map_sync; + + for (unsigned p = 1; p < rep->nparts; p++) { + if (map_sync != rep->part[p].map_sync) { + ERR("replica #%u part %u %smapped with MAP_SYNC", + repidx, p, rep->part[p].map_sync ? "" : "not"); + return -1; + } + } + + if (check_hdr) { + for (unsigned p = 0; p < rep->nhdrs; p++) { + if (map_sync != rep->part[p].hdr_map_sync) { + ERR("replica #%u part %u header %smapped " + "with MAP_SYNC", repidx, p, + rep->part[p].hdr_map_sync ? + "" : "not"); + return -1; + } + } + } + + return 0; +} + +/* + * util_poolset_check_devdax -- (internal) check Device DAX restrictions + */ +static int +util_poolset_check_devdax(struct pool_set *set) +{ + LOG(3, "set %p", set); + + if (set->directory_based) + return 0; + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + int is_dev_dax = rep->part[0].is_dev_dax; + + for (unsigned p = 0; p < rep->nparts; p++) { + if (rep->part[p].is_dev_dax != is_dev_dax) { + ERR( + "either all the parts must be Device DAX or none"); + return -1; + } + + if (is_dev_dax && rep->nparts > 1 && + (set->options & (OPTION_SINGLEHDR | + OPTION_NOHDRS)) == 0 && + util_file_device_dax_alignment(rep->part[p].path) + != Pagesize) { + ERR( + "Multiple DAX devices with alignment other than 4KB. Use the SINGLEHDR poolset option."); + return -1; + } + } + } + return 0; +} + +/* + * util_poolset_check_options -- (internal) check if poolset options are + * admissible + */ +static int +util_poolset_check_options(struct pool_set *set) +{ + LOG(3, "set %p", set); + if ((set->options & OPTION_SINGLEHDR) && + (set->options & OPTION_NOHDRS)) { + ERR( + "both SINGLEHDR and NOHDR poolset options used at the same time"); + return -1; + } + return 0; +} + +/* + * util_poolset_set_size -- (internal) calculate pool size + */ +static void +util_poolset_set_size(struct pool_set *set) +{ + LOG(3, "set %p", set); + + set->poolsize = SIZE_MAX; + set->resvsize = SIZE_MAX; + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + + if (set->options & OPTION_SINGLEHDR) + rep->nhdrs = 1; + else if (set->options & OPTION_NOHDRS) + rep->nhdrs = 0; + else + rep->nhdrs = rep->nparts; + + rep->repsize = 0; + for (unsigned p = 0; p < rep->nparts; p++) { + rep->repsize += + (rep->part[p].filesize & ~(Mmap_align - 1)); + } + if (rep->nhdrs > 0) + rep->repsize -= (rep->nhdrs - 1) * Mmap_align; + + if (rep->resvsize == 0) + rep->resvsize = rep->repsize; + + /* + * Calculate pool size - choose the smallest replica size. + * Ignore remote replicas. + */ + if (rep->remote == NULL && rep->repsize < set->poolsize) + set->poolsize = rep->repsize; + if (rep->remote == NULL && rep->resvsize < set->resvsize) + set->resvsize = rep->resvsize; + } + + LOG(3, "pool size set to %zu", set->poolsize); +} + +/* + * util_parse_add_remote_replica -- (internal) add a new remote replica + * to the pool set info + */ +static int +util_parse_add_remote_replica(struct pool_set **setp, char *node_addr, + char *pool_desc) +{ + LOG(3, "setp %p node_addr %s pool_desc %s", setp, node_addr, pool_desc); + + ASSERTne(setp, NULL); + ASSERTne(node_addr, NULL); + ASSERTne(pool_desc, NULL); + + int ret = util_parse_add_replica(setp); + if (ret != 0) + return ret; + + /* + * A remote replica has one fake part of size equal twice pool header + * size for storing pool header and pool descriptor. + */ + ret = util_parse_add_part(*setp, NULL, 2 * POOL_HDR_SIZE); + if (ret != 0) + return ret; + + struct pool_set *set = *setp; + struct pool_replica *rep = set->replica[set->nreplicas - 1]; + ASSERTne(rep, NULL); + + rep->remote = Zalloc(sizeof(struct remote_replica)); + if (rep->remote == NULL) { + ERR("!Malloc"); + return -1; + } + rep->remote->node_addr = node_addr; + rep->remote->pool_desc = pool_desc; + set->remote = 1; + + return 0; +} + +/* + * util_part_idx_by_file_name -- (internal) retrieves the part index from a + * name of the file that is an element of a directory poolset + */ +static long +util_part_idx_by_file_name(const char *filename) +{ + LOG(3, "filename \"%s\"", filename); + + int olderrno = errno; + errno = 0; + long part_idx = strtol(filename, NULL, 10); + if (errno != 0) + return -1; + + errno = olderrno; + + return part_idx; +} + +/* + * util_poolset_directory_load -- (internal) loads and initializes all + * existing parts in a single directory + */ +static int +util_poolset_directory_load(struct pool_replica **repp, const char *directory) +{ + LOG(3, "rep %p dir \"%s\"", *repp, directory); + + struct fs *f = fs_new(directory); + if (f == NULL) { + ERR("!fs_new: \"%s\"", directory); + return -1; + } + + int nparts = 0; + char *path = NULL; + + struct fs_entry *entry; + while ((entry = fs_read(f)) != NULL) { + if (entry->level != 1) + continue; + if (entry->type != FS_ENTRY_FILE) + continue; + if (entry->namelen < PMEM_EXT_LEN) + continue; + const char *ext = entry->path + entry->pathlen - + PMEM_EXT_LEN + 1; + if (strcmp(PMEM_EXT, ext) != 0) + continue; + + long part_idx = util_part_idx_by_file_name(entry->name); + if (part_idx < 0) + continue; + + ssize_t size = util_file_get_size(entry->path); + if (size < 0) { + LOG(2, + "cannot read size of file (%s) in a poolset directory", + entry->path); + goto err; + } + + if ((path = Strdup(entry->path)) == NULL) { + ERR("!Strdup"); + goto err; + } + + if (util_replica_add_part_by_idx(repp, path, + (size_t)size, (unsigned)part_idx) != 0) { + ERR("unable to load part %s", entry->path); + goto err; + } + nparts++; + } + + fs_delete(f); + return nparts; + +err: + fs_delete(f); + return -1; +} + +/* + * util_poolset_directories_load -- (internal) loads and initializes all + * existing parts in the poolset directories + */ +static int +util_poolset_directories_load(struct pool_set *set) +{ + LOG(3, "set %p", set); + + if (!set->directory_based) + return 0; + + unsigned next_part_id = 0; + unsigned max_parts_rep = 0; + for (unsigned r = 0; r < set->nreplicas; r++) { + next_part_id = 0; + + struct pool_set_directory *d; + int nparts = 0; + int prev_nparts = 0; + VEC_FOREACH_BY_PTR(d, &set->replica[r]->directory) { + prev_nparts = nparts; + nparts = util_poolset_directory_load(&set->replica[r], + d->path); + if (nparts < 0) { + ERR("failed to load parts from directory %s", + d->path); + return -1; + } + + next_part_id += (unsigned)nparts; + + /* always try to evenly spread files across dirs */ + if (r == 0 && prev_nparts > nparts) + set->next_directory_id++; + } + + if (next_part_id > set->replica[max_parts_rep]->nparts) + max_parts_rep = r; + + if (r == 0) + set->next_id = next_part_id; + } + + /* + * In order to maintain the same semantics of poolset parsing for + * regular poolsets and directory poolsets, we need to speculatively + * recreate the information regarding any missing parts in replicas. + */ + struct pool_replica *rep; + struct pool_replica *mrep = set->replica[max_parts_rep]; + + for (unsigned r = 0; r < set->nreplicas; r++) { + if (set->replica[r]->nparts == mrep->nparts) + continue; + + if (VEC_SIZE(&set->replica[r]->directory) == 0) { + errno = ENOENT; + ERR("!no directories in replica"); + return -1; + } + + if (util_replica_reserve(&set->replica[r], mrep->nparts) != 0) + return -1; + + rep = set->replica[r]; + + struct pool_set_directory *d = VEC_GET(&rep->directory, 0); + + for (unsigned pidx = 0; pidx < rep->nallocated; ++pidx) { + struct pool_set_part *p = &rep->part[pidx]; + *p = mrep->part[pidx]; + + size_t path_len = strlen(d->path) + PMEM_FILE_MAX_LEN; + if ((p->path = Malloc(path_len)) == NULL) { + ERR("!Malloc"); + return -1; + } + + snprintf((char *)p->path, path_len, + "%s" OS_DIR_SEP_STR "%0*u%s", + d->path, PMEM_FILE_PADDING, + pidx, PMEM_EXT); + } + rep->nparts = mrep->nparts; + } + + return 0; +} + +/* + * util_poolset_parse -- parse pool set config file + * + * Returns 0 if the file is a valid poolset config file, + * and -1 in case of any error. + * + * XXX: use memory mapped file + */ +int +util_poolset_parse(struct pool_set **setp, const char *path, int fd) +{ + LOG(3, "setp %p path %s fd %d", setp, path, fd); + + struct pool_set *set = NULL; + enum parser_codes result; + char *line; + char *ppath; + char *pool_desc; + char *node_addr; + char *cp; + size_t psize; + FILE *fs; + int oerrno; + + if (os_lseek(fd, 0, SEEK_SET) != 0) { + ERR("!lseek %d", fd); + return -1; + } + + fd = dup(fd); + if (fd < 0) { + ERR("!dup"); + return -1; + } + + /* associate a stream with the file descriptor */ + if ((fs = os_fdopen(fd, "r")) == NULL) { + ERR("!fdopen %d", fd); + os_close(fd); + return -1; + } + + unsigned nlines = 0; + unsigned nparts = 0; /* number of parts in current replica */ + + /* read the first line */ + line = util_readline(fs); + if (line == NULL) { + ERR("!Reading poolset file"); + goto err; + } + nlines++; + + set = Zalloc(sizeof(struct pool_set)); + if (set == NULL) { + ERR("!Malloc for pool set"); + goto err; + } + + set->path = Strdup(path); + if (set->path == NULL) { + ERR("!Strdup"); + goto err; + } + + /* check also if the last character is '\n' */ + if (strncmp(line, POOLSET_HDR_SIG, POOLSET_HDR_SIG_LEN) == 0 && + line[POOLSET_HDR_SIG_LEN] == '\n') { + /* 'PMEMPOOLSET' signature detected */ + LOG(10, "PMEMPOOLSET"); + + int ret = util_parse_add_replica(&set); + if (ret != 0) + goto err; + + nparts = 0; + result = PARSER_CONTINUE; + } else { + result = PARSER_PMEMPOOLSET; + } + + while (result == PARSER_CONTINUE) { + Free(line); + /* read next line */ + line = util_readline(fs); + nlines++; + + if (line) { + /* chop off newline and comments */ + if ((cp = strchr(line, '\n')) != NULL) + *cp = '\0'; + if (cp != line && (cp = strchr(line, '#')) != NULL) + *cp = '\0'; + + /* skip comments and blank lines */ + if (cp == line) + continue; + } + + if (!line) { + if (nparts >= 1) { + result = PARSER_FORMAT_OK; + } else { + if (set->nreplicas == 1) + result = PARSER_SET_NO_PARTS; + else + result = PARSER_REP_NO_PARTS; + } + } else if (strncmp(line, POOLSET_OPTION_SIG, + POOLSET_OPTION_SIG_LEN) == 0) { + result = parser_read_options( + line + POOLSET_OPTION_SIG_LEN, + &set->options); + if (result == PARSER_CONTINUE) { + LOG(10, "OPTIONS: %x", set->options); + } + } else if (strncmp(line, POOLSET_REPLICA_SIG, + POOLSET_REPLICA_SIG_LEN) == 0) { + if (line[POOLSET_REPLICA_SIG_LEN] != '\0') { + /* something more than 'REPLICA' */ + char c = line[POOLSET_REPLICA_SIG_LEN]; + if (!isblank((unsigned char)c)) { + result = PARSER_REPLICA; + continue; + } + /* check if it is a remote replica */ + result = parser_read_replica( + line + POOLSET_REPLICA_SIG_LEN, + &node_addr, &pool_desc); + if (result == PARSER_CONTINUE) { + /* remote REPLICA */ + LOG(10, "REMOTE REPLICA " + "node address '%s' " + "pool set descriptor '%s'", + node_addr, pool_desc); + if (util_parse_add_remote_replica(&set, + node_addr, pool_desc)) + goto err; + } + } else if (nparts >= 1) { + /* 'REPLICA' signature detected */ + LOG(10, "REPLICA"); + + int ret = util_parse_add_replica(&set); + if (ret != 0) + goto err; + + nparts = 0; + result = PARSER_CONTINUE; + } else { + if (set->nreplicas == 1) + result = PARSER_SET_NO_PARTS; + else + result = PARSER_REP_NO_PARTS; + } + } else { + /* there could be no parts for remote replicas */ + if (set->replica[set->nreplicas - 1]->remote) { + result = PARSER_REMOTE_REP_UNEXPECTED_PARTS; + continue; + } + + /* read size and path */ + result = parser_read_line(line, &psize, &ppath); + if (result == PARSER_CONTINUE) { + /* add a new pool's part to the list */ + int ret = util_parse_add_element(set, + ppath, psize); + if (ret != 0) { + Free(ppath); + goto err; + } + nparts++; + } + } + } + + if (result != PARSER_FORMAT_OK) { + ERR("%s [%s:%d]", path, parser_errstr[result], nlines); + switch (result) { + case PARSER_CANNOT_READ_SIZE: + case PARSER_OUT_OF_MEMORY: + /* do not overwrite errno */ + break; + default: + errno = EINVAL; + } + goto err; + } + + if (util_poolset_check_devdax(set) != 0) { + errno = EINVAL; + goto err; + } + + if (util_poolset_directories_load(set) != 0) { + ERR("cannot load part files from directories"); + goto err; + } + + LOG(4, "set file format correct (%s)", path); + (void) os_fclose(fs); + Free(line); + util_poolset_check_options(set); + util_poolset_set_size(set); + *setp = set; + return 0; + +err: + oerrno = errno; + Free(line); + (void) os_fclose(fs); + if (set) + util_poolset_free(set); + errno = oerrno; + return -1; +} + +/* + * util_poolset_single -- (internal) create a one-part pool set + * + * On success returns a pointer to a newly allocated and initialized + * pool set structure. Otherwise, NULL is returned. + */ +static struct pool_set * +util_poolset_single(const char *path, size_t filesize, int create, + int ignore_sds) +{ + LOG(3, "path %s filesize %zu create %d", + path, filesize, create); + + enum file_type type = util_file_get_type(path); + if (type == OTHER_ERROR) + return NULL; + + struct pool_set *set; + set = Zalloc(sizeof(struct pool_set) + + sizeof(struct pool_replica *)); + if (set == NULL) { + ERR("!Malloc for pool set"); + return NULL; + } + + set->path = Strdup(path); + if (set->path == NULL) { + ERR("!Strdup"); + Free(set); + return NULL; + } + + struct pool_replica *rep; + rep = Zalloc(sizeof(struct pool_replica) + + sizeof(struct pool_set_part)); + if (rep == NULL) { + ERR("!Malloc for pool set replica"); + Free(set->path); + Free(set); + return NULL; + } + + VEC_INIT(&rep->directory); + + set->replica[0] = rep; + + rep->part[0].filesize = filesize; + rep->part[0].path = Strdup(path); + rep->part[0].fd = -1; /* will be filled out by util_poolset_file() */ + rep->part[0].is_dev_dax = type == TYPE_DEVDAX; + rep->part[0].created = create; + rep->part[0].hdr = NULL; + rep->part[0].addr = NULL; + rep->part[0].has_bad_blocks = 0; + + if (rep->part[0].is_dev_dax) + rep->part[0].alignment = util_file_device_dax_alignment(path); + else + rep->part[0].alignment = Mmap_align; + + ASSERTne(rep->part[0].alignment, 0); + + rep->nallocated = 1; + rep->nparts = 1; + rep->nhdrs = 1; + + /* it does not have a remote replica */ + rep->remote = NULL; + set->remote = 0; + + /* round down to the nearest mapping alignment boundary */ + rep->repsize = rep->part[0].filesize & ~(rep->part[0].alignment - 1); + rep->resvsize = rep->repsize; + + set->poolsize = rep->repsize; + set->resvsize = rep->resvsize; + + set->nreplicas = 1; + set->ignore_sds = ignore_sds || (set->options & OPTION_NOHDRS); + + return set; +} + +/* + * util_part_open -- open or create a single part file + */ +int +util_part_open(struct pool_set_part *part, size_t minsize, int create_part) +{ + LOG(3, "part %p minsize %zu create %d", part, minsize, create_part); + + int exists = util_file_exists(part->path); + if (exists < 0) + return -1; + + int create_file = create_part; + + if (exists) + create_file = 0; + + part->created = 0; + if (create_file) { + part->fd = util_file_create(part->path, part->filesize, + minsize); + if (part->fd == -1) { + LOG(2, "failed to create file: %s", part->path); + return -1; + } + part->created = 1; + } else { + size_t size = 0; + int flags = O_RDWR; + part->fd = util_file_open(part->path, &size, minsize, flags); + if (part->fd == -1) { + LOG(2, "failed to open file: %s", part->path); + return -1; + } + + if (Fallocate_at_create && create_part && !part->is_dev_dax) { + int ret = os_posix_fallocate(part->fd, 0, + (os_off_t)size); + if (ret != 0) { + errno = ret; + ERR("!posix_fallocate \"%s\", %zu", part->path, + size); + return -1; + } + } + + /* check if filesize matches */ + if (part->filesize != size) { + ERR("file size does not match config: %s, %zu != %zu", + part->path, size, part->filesize); + errno = EINVAL; + return -1; + } + } + + return 0; +} + +/* + * util_part_fdclose -- close part file + */ +void +util_part_fdclose(struct pool_set_part *part) +{ + LOG(3, "part %p", part); + + if (part->fd != -1) { + (void) os_close(part->fd); + part->fd = -1; + } +} + +/* + * util_set_rpmem_attr -- (internal) overwrite existing pool attributes + * + * does not set uuid, next_part_uuid, prev_part_uuid + */ +static void +util_set_rpmem_attr(struct pool_hdr *hdrp, const struct rpmem_pool_attr *rattr) +{ + LOG(5, "hdrp %p rattr %p", hdrp, rattr); + memcpy(hdrp->signature, rattr->signature, POOL_HDR_SIG_LEN); + hdrp->major = rattr->major; + hdrp->features.compat = rattr->compat_features; + hdrp->features.incompat = rattr->incompat_features; + hdrp->features.ro_compat = rattr->ro_compat_features; + memcpy(hdrp->poolset_uuid, rattr->poolset_uuid, POOL_HDR_UUID_LEN); + memcpy(hdrp->next_repl_uuid, rattr->next_uuid, POOL_HDR_UUID_LEN); + memcpy(hdrp->prev_repl_uuid, rattr->prev_uuid, POOL_HDR_UUID_LEN); + memcpy(&hdrp->arch_flags, rattr->user_flags, sizeof(struct arch_flags)); +} + +/* + * util_get_rpmem_attr -- (internal) get attributes for remote replica header + */ +static void +util_get_rpmem_attr(struct rpmem_pool_attr *rattr, const struct pool_hdr *hdrp) +{ + LOG(5, "rpmem_attr %p hdrp %p", rattr, hdrp); + ASSERTne(rattr, NULL); + memcpy(rattr->signature, hdrp->signature, POOL_HDR_SIG_LEN); + rattr->major = hdrp->major; + rattr->compat_features = hdrp->features.compat; + rattr->incompat_features = hdrp->features.incompat; + rattr->ro_compat_features = hdrp->features.ro_compat; + memcpy(rattr->poolset_uuid, hdrp->poolset_uuid, POOL_HDR_UUID_LEN); + memcpy(rattr->uuid, hdrp->uuid, POOL_HDR_UUID_LEN); + memcpy(rattr->next_uuid, hdrp->next_repl_uuid, POOL_HDR_UUID_LEN); + memcpy(rattr->prev_uuid, hdrp->prev_repl_uuid, POOL_HDR_UUID_LEN); + memcpy(rattr->user_flags, &hdrp->arch_flags, sizeof(struct arch_flags)); +} + +/* + * util_remote_store_attr -- (internal) store attributes read from remote + * replica in the local volatile pool header + */ +static void +util_remote_store_attr(struct pool_hdr *hdrp, + const struct rpmem_pool_attr *rattr) +{ + LOG(4, "hdrp %p rpmem_attr %p", hdrp, rattr); + + util_set_rpmem_attr(hdrp, rattr); + memcpy(hdrp->uuid, rattr->uuid, POOL_HDR_UUID_LEN); + memcpy(hdrp->next_part_uuid, rattr->uuid, POOL_HDR_UUID_LEN); + memcpy(hdrp->prev_part_uuid, rattr->uuid, POOL_HDR_UUID_LEN); +} + +/* + * util_update_remote_header -- update attributes of a remote replica; + * the remote replica must be open + */ +int +util_update_remote_header(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + + ASSERTne(REP(set, repn)->remote, NULL); + ASSERTne(REP(set, repn)->remote->rpp, NULL); + + struct pool_replica *rep = REP(set, repn); + struct pool_hdr *hdr = HDR(rep, 0); + + /* get attributes from the local pool header */ + struct rpmem_pool_attr attributes; + util_get_rpmem_attr(&attributes, hdr); + + /* push the attributes to the remote replica */ + RPMEMpool *rpp = rep->remote->rpp; + int ret = Rpmem_set_attr(rpp, &attributes); + if (ret) { + ERR("!Rpmem_set_attr"); + return -1; + } + return 0; +} + +/* + * util_pool_close_remote -- close a remote replica + */ +int +util_pool_close_remote(RPMEMpool *rpp) +{ + LOG(3, "rpp %p", rpp); + + return Rpmem_close(rpp); +} + +/* + * util_poolset_remote_open -- open or create a remote replica + */ +int +util_poolset_remote_open(struct pool_replica *rep, unsigned repidx, + size_t minsize, int create, void *pool_addr, + size_t pool_size, unsigned *nlanes) +{ + LOG(3, "rep %p repidx %u minsize %zu create %d " + "pool_addr %p pool_size %zu nlanes %p", + rep, repidx, minsize, create, + pool_addr, pool_size, nlanes); + + ASSERTne(nlanes, NULL); + + if (!Rpmem_handle_remote) { + return -1; + } + + unsigned remote_nlanes = *nlanes; + + if (create) { + struct rpmem_pool_attr rpmem_attr_create; + util_get_rpmem_attr(&rpmem_attr_create, rep->part[0].hdr); + + rep->remote->rpp = Rpmem_create(rep->remote->node_addr, + rep->remote->pool_desc, + pool_addr, + pool_size, + &remote_nlanes, + &rpmem_attr_create); + if (rep->remote->rpp == NULL) { + ERR("creating remote replica #%u failed", repidx); + return -1; + } + rep->part[0].created = 1; + } else { /* open */ + struct rpmem_pool_attr rpmem_attr_open; + + rep->remote->rpp = Rpmem_open(rep->remote->node_addr, + rep->remote->pool_desc, + pool_addr, + pool_size, + &remote_nlanes, + &rpmem_attr_open); + if (rep->remote->rpp == NULL) { + ERR("opening remote replica #%u failed", repidx); + return -1; + } + + util_remote_store_attr(rep->part[0].hdr, &rpmem_attr_open); + } + + if (remote_nlanes < *nlanes) + *nlanes = remote_nlanes; + + return 0; +} + +/* + * util_poolset_files_local -- (internal) open or create all the local + * part files of a pool set and replica sets + */ +static int +util_poolset_files_local(struct pool_set *set, size_t minpartsize, int create) +{ + LOG(3, "set %p minpartsize %zu create %d", set, minpartsize, create); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + if (!rep->remote) { + for (unsigned p = 0; p < rep->nparts; p++) { + if (util_part_open(&rep->part[p], minpartsize, + create)) + return -1; + } + } + } + + return 0; +} + +/* + * util_poolset_remote_replica_open -- open remote replica + */ +int +util_poolset_remote_replica_open(struct pool_set *set, unsigned repidx, + size_t minsize, int create, unsigned *nlanes) +{ +#ifndef _WIN32 + /* + * This is a workaround for an issue with using device dax with + * libibverbs. To handle fork() function calls correctly libfabric use + * ibv_fork_init(3) which makes all registered memory being madvised + * with MADV_DONTFORK flag. In libpmemobj the remote replication is + * performed without pool header (first 4k). In such case the address + * passed to madvise(2) is aligned to 4k, but device dax can require + * different alignment (default is 2MB). This workaround madvises the + * entire memory region before registering it by fi_mr_reg(3). + * + * The librpmem client requires fork() support to work correctly. + */ + if (set->replica[0]->part[0].is_dev_dax) { + int ret = os_madvise(set->replica[0]->part[0].addr, + set->replica[0]->part[0].filesize, + MADV_DONTFORK); + if (ret) { + ERR("!madvise"); + return ret; + } + } +#endif + + void *pool_addr = (void *)((uintptr_t)set->replica[0]->part[0].addr); + + return util_poolset_remote_open(set->replica[repidx], repidx, minsize, + create, pool_addr, set->poolsize, nlanes); +} + +/* + * util_poolset_files_remote -- (internal) open or create all the remote + * part files of a pool set and replica sets + */ +static int +util_poolset_files_remote(struct pool_set *set, size_t minsize, + unsigned *nlanes, int create) +{ + LOG(3, "set %p minsize %zu nlanes %p create %d", + set, minsize, nlanes, create); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + if (rep->remote) { + if (util_poolset_remote_replica_open(set, r, + minsize, create, nlanes)) + return -1; + } + } + + return 0; +} + +/* + * util_poolset_read -- read memory pool set file + * + * On success returns 0 and a pointer to a newly allocated structure + * containing the info of all the parts of the pool set and replicas. + */ +int +util_poolset_read(struct pool_set **setp, const char *path) +{ + LOG(3, "setp %p path %s", setp, path); + + int oerrno; + int ret = 0; + int fd; + + if ((fd = os_open(path, O_RDONLY)) < 0) { + ERR("!open: path \"%s\"", path); + return -1; + } + + ret = util_poolset_parse(setp, path, fd); + + oerrno = errno; + (void) os_close(fd); + errno = oerrno; + return ret; +} + +/* + * util_poolset_create_set -- create a new pool set structure + * + * On success returns 0 and a pointer to a newly allocated structure + * containing the info of all the parts of the pool set and replicas. + */ +int +util_poolset_create_set(struct pool_set **setp, const char *path, + size_t poolsize, size_t minsize, int ignore_sds) +{ + LOG(3, "setp %p path %s poolsize %zu minsize %zu", + setp, path, poolsize, minsize); + + int oerrno; + int ret = 0; + int fd; + size_t size = 0; + + enum file_type type = util_file_get_type(path); + if (type == OTHER_ERROR) + return -1; + + if (poolsize != 0) { + if (type == TYPE_DEVDAX) { + ERR("size must be zero for device dax"); + return -1; + } + *setp = util_poolset_single(path, poolsize, 1, ignore_sds); + if (*setp == NULL) + return -1; + + return 0; + } + + /* do not check minsize */ + if ((fd = util_file_open(path, &size, 0, O_RDONLY)) == -1) + return -1; + + char signature[POOLSET_HDR_SIG_LEN]; + if (type == TYPE_NORMAL) { + /* + * read returns ssize_t, but we know it will return value + * between -1 and POOLSET_HDR_SIG_LEN (11), so we can safely + * cast it to int + */ + ret = (int)read(fd, signature, POOLSET_HDR_SIG_LEN); + if (ret < 0) { + ERR("!read %d", fd); + goto err; + } + } + + if (type == TYPE_DEVDAX || ret < POOLSET_HDR_SIG_LEN || + strncmp(signature, POOLSET_HDR_SIG, POOLSET_HDR_SIG_LEN)) { + LOG(4, "not a pool set header"); + (void) os_close(fd); + + if (size < minsize) { + ERR("file is not a poolset file and its size (%zu)" + " is smaller than %zu", size, minsize); + errno = EINVAL; + return -1; + } + *setp = util_poolset_single(path, size, 0, ignore_sds); + if (*setp == NULL) + return -1; + + return 0; + } + + ret = util_poolset_parse(setp, path, fd); + if (ret) + goto err; + + (*setp)->ignore_sds = ignore_sds || ((*setp)->options & OPTION_NOHDRS); +#ifdef _WIN32 + /* remote replication is not supported on Windows */ + if ((*setp)->remote) { + util_poolset_free(*setp); + ERR("remote replication is not supported on Windows"); + errno = ENOTSUP; + ret = -1; + goto err; + } +#endif /* _WIN32 */ + +err: + oerrno = errno; + (void) os_close(fd); + errno = oerrno; + return ret; +} + +/* + * util_poolset_check_header_options -- (internal) check if poolset options + * match given flags + */ +static int +util_poolset_check_header_options(struct pool_set *set, uint32_t incompat) +{ + LOG(3, "set %p, incompat %#x", set, incompat); + + if (((set->options & OPTION_SINGLEHDR) == 0) != + ((incompat & POOL_FEAT_SINGLEHDR) == 0)) { + ERR( + "poolset file options (%u) do not match incompat feature flags (%#x)", + set->options, incompat); + errno = EINVAL; + return -1; + } + return 0; +} + +/* + * util_header_create -- create header of a single pool set file + */ +int +util_header_create(struct pool_set *set, unsigned repidx, unsigned partidx, + const struct pool_attr *attr, int overwrite) +{ + LOG(3, "set %p repidx %u partidx %u attr %p overwrite %d", set, repidx, + partidx, attr, overwrite); + + ASSERTne(attr, NULL); + + struct pool_replica *rep = set->replica[repidx]; + + /* opaque info lives at the beginning of mapped memory pool */ + struct pool_hdr *hdrp = rep->part[partidx].hdr; + + /* check if the pool header is all zeros */ + if (!util_is_zeroed(hdrp, sizeof(*hdrp)) && !overwrite) { + ERR("Non-empty file detected"); + errno = EEXIST; + return -1; + } + + /* create pool's header */ + util_pool_attr2hdr(hdrp, attr); + + if (set->options & OPTION_SINGLEHDR) + hdrp->features.incompat |= POOL_FEAT_SINGLEHDR; + + memcpy(hdrp->poolset_uuid, set->uuid, POOL_HDR_UUID_LEN); + memcpy(hdrp->uuid, PART(rep, partidx)->uuid, POOL_HDR_UUID_LEN); + + /* link parts */ + if (set->options & OPTION_SINGLEHDR) { + /* next/prev part point to part #0 */ + ASSERTeq(partidx, 0); + memcpy(hdrp->prev_part_uuid, PART(rep, 0)->uuid, + POOL_HDR_UUID_LEN); + memcpy(hdrp->next_part_uuid, PART(rep, 0)->uuid, + POOL_HDR_UUID_LEN); + } else { + memcpy(hdrp->prev_part_uuid, PARTP(rep, partidx)->uuid, + POOL_HDR_UUID_LEN); + memcpy(hdrp->next_part_uuid, PARTN(rep, partidx)->uuid, + POOL_HDR_UUID_LEN); + } + + /* link replicas */ + if (!util_is_zeroed(attr->prev_repl_uuid, POOL_HDR_UUID_LEN)) { + memcpy(hdrp->prev_repl_uuid, attr->prev_repl_uuid, + POOL_HDR_UUID_LEN); + } else { + memcpy(hdrp->prev_repl_uuid, PART(REPP(set, repidx), 0)->uuid, + POOL_HDR_UUID_LEN); + } + if (!util_is_zeroed(attr->next_repl_uuid, POOL_HDR_UUID_LEN)) { + memcpy(hdrp->next_repl_uuid, attr->next_repl_uuid, + POOL_HDR_UUID_LEN); + } else { + memcpy(hdrp->next_repl_uuid, PART(REPN(set, repidx), 0)->uuid, + POOL_HDR_UUID_LEN); + } + + if (!rep->remote) { + os_stat_t stbuf; + + if (os_fstat(rep->part[partidx].fd, &stbuf) != 0) { + ERR("!fstat"); + return -1; + } + ASSERT(stbuf.st_ctime); + hdrp->crtime = (uint64_t)stbuf.st_ctime; + } + + int arch_is_zeroed = util_is_zeroed(attr->arch_flags, + POOL_HDR_ARCH_LEN); + if (arch_is_zeroed) + util_get_arch_flags(&hdrp->arch_flags); + + util_convert2le_hdr(hdrp); + + if (!arch_is_zeroed) { + memcpy(&hdrp->arch_flags, attr->arch_flags, POOL_HDR_ARCH_LEN); + } + + if (!set->ignore_sds && partidx == 0 && !rep->remote) { + shutdown_state_init(&hdrp->sds, rep); + for (unsigned p = 0; p < rep->nparts; p++) { + if (shutdown_state_add_part(&hdrp->sds, + PART(rep, p)->fd, rep)) + return -1; + } + shutdown_state_set_dirty(&hdrp->sds, rep); + } + + util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, + 1, POOL_HDR_CSUM_END_OFF(hdrp)); + + /* store pool's header */ + util_persist_auto(rep->is_pmem, hdrp, sizeof(*hdrp)); + + return 0; +} + +/* + * util_header_check -- (internal) validate header of a single pool set file + */ +static int +util_header_check(struct pool_set *set, unsigned repidx, unsigned partidx, + const struct pool_attr *attr) +{ + LOG(3, "set %p repidx %u partidx %u attr %p", set, repidx, partidx, + attr); + + ASSERTne(attr, NULL); + + struct pool_replica *rep = set->replica[repidx]; + + /* opaque info lives at the beginning of mapped memory pool */ + struct pool_hdr *hdrp = rep->part[partidx].hdr; + struct pool_hdr hdr; + + memcpy(&hdr, hdrp, sizeof(hdr)); + + /* local copy of a remote header does not need to be converted */ + if (rep->remote == NULL) + util_convert2h_hdr_nocheck(&hdr); + + /* to be valid, a header must have a major version of at least 1 */ + if (hdr.major == 0) { + ERR("invalid major version (0)"); + errno = EINVAL; + return -1; + } + + /* check signature */ + if (memcmp(hdr.signature, attr->signature, POOL_HDR_SIG_LEN)) { + ERR("wrong pool type: \"%.8s\"", hdr.signature); + errno = EINVAL; + return -1; + } + + /* check format version number */ + if (hdr.major != attr->major) { + ERR("pool version %d (library expects %d)", hdr.major, + attr->major); + if (hdr.major < attr->major) + ERR( + "Please run the pmdk-convert utility to upgrade the pool."); + errno = EINVAL; + return -1; + } + + rep->part[partidx].rdonly = 0; + + int retval = util_feature_check(&hdr, attr->features); + if (retval < 0) + return -1; + + if (retval == 0) + rep->part[partidx].rdonly = 1; + + if (rep->remote == NULL) { + /* + * and to be valid, the fields must checksum correctly + * + * NOTE: checksum validation is performed after format version + * and feature check, because if POOL_FEAT_CKSUM_2K flag is set, + * we want to report it as incompatible feature, rather than + * invalid checksum. + */ + if (!util_checksum(&hdr, sizeof(hdr), &hdr.checksum, + 0, POOL_HDR_CSUM_END_OFF(&hdr))) { + ERR("invalid checksum of pool header"); + errno = EINVAL; + return -1; + } + + LOG(3, "valid header, signature \"%.8s\"", hdr.signature); + } + + if (util_check_arch_flags(&hdr.arch_flags)) { + ERR("wrong architecture flags"); + errno = EINVAL; + return -1; + } + + /* check pool set UUID */ + if (memcmp(HDR(REP(set, 0), 0)->poolset_uuid, hdr.poolset_uuid, + POOL_HDR_UUID_LEN)) { + ERR("wrong pool set UUID"); + errno = EINVAL; + return -1; + } + + /* check pool set linkage */ + if (memcmp(HDRP(rep, partidx)->uuid, hdr.prev_part_uuid, + POOL_HDR_UUID_LEN) || + memcmp(HDRN(rep, partidx)->uuid, hdr.next_part_uuid, + POOL_HDR_UUID_LEN)) { + ERR("wrong part UUID"); + errno = EINVAL; + return -1; + } + + /* check format version */ + if (HDR(rep, 0)->major != hdrp->major) { + ERR("incompatible pool format"); + errno = EINVAL; + return -1; + } + + /* check compatibility features */ + if (HDR(rep, 0)->features.compat != hdrp->features.compat || + HDR(rep, 0)->features.incompat != hdrp->features.incompat || + HDR(rep, 0)->features.ro_compat != hdrp->features.ro_compat) { + ERR("incompatible feature flags"); + errno = EINVAL; + return -1; + } + + /* check poolset options */ + if (util_poolset_check_header_options(set, + HDR(rep, 0)->features.incompat)) + return -1; + + return 0; +} + +/* + * util_header_check_remote -- (internal) validate header of a remote + * pool set file + */ +static int +util_header_check_remote(struct pool_set *set, unsigned partidx) +{ + LOG(3, "set %p partidx %u ", set, partidx); + + /* there is only one replica in remote poolset */ + struct pool_replica *rep = set->replica[0]; + /* opaque info lives at the beginning of mapped memory pool */ + struct pool_hdr *hdrp = rep->part[partidx].hdr; + struct pool_hdr hdr; + + if (util_is_zeroed(hdrp, sizeof(*hdrp))) { + ERR("pool header zeroed"); + errno = EINVAL; + return -1; + } + + memcpy(&hdr, hdrp, sizeof(hdr)); + + util_convert2h_hdr_nocheck(&hdr); + + /* valid header found */ + if (memcmp(HDR(rep, 0)->signature, hdrp->signature, POOL_HDR_SIG_LEN)) { + ERR("pool signature mismatch in part %d", partidx); + errno = EINVAL; + return -1; + } + + /* check format version */ + if (HDR(rep, 0)->major != hdrp->major) { + ERR("pool version mismatch in part %d", partidx); + errno = EINVAL; + return -1; + } + + /* check compatibility features */ + if (HDR(rep, 0)->features.compat != hdrp->features.compat) { + ERR("'may have' compatibility flags mismatch in part %d", + partidx); + errno = EINVAL; + return -1; + } + if (HDR(rep, 0)->features.incompat != hdrp->features.incompat) { + ERR("'must support' compatibility flags mismatch in part %d", + partidx); + errno = EINVAL; + return -1; + } + if (HDR(rep, 0)->features.ro_compat != hdrp->features.ro_compat) { + ERR("'force read-only' compatibility flags mismatch in part %d", + partidx); + errno = EINVAL; + return -1; + } + + /* + * and to be valid, the fields must checksum correctly + * + * NOTE: checksum validation is performed after format version and + * feature check, because if POOL_FEAT_CKSUM_2K flag is set, + * we want to report it as incompatible feature, rather than invalid + * checksum. + */ + if (!util_checksum(&hdr, sizeof(hdr), &hdr.checksum, + 0, POOL_HDR_CSUM_END_OFF(&hdr))) { + ERR("invalid checksum of pool header"); + return -1; + } + + LOG(3, "valid header, signature \"%.8s\"", hdr.signature); + + /* check pool set UUID */ + if (memcmp(HDR(rep, 0)->poolset_uuid, hdrp->poolset_uuid, + POOL_HDR_UUID_LEN)) { + ERR("wrong pool set UUID in part %d", partidx); + errno = EINVAL; + return -1; + } + + /* check previous replica UUID */ + if (memcmp(HDR(rep, 0)->prev_repl_uuid, hdrp->prev_repl_uuid, + POOL_HDR_UUID_LEN)) { + ERR("wrong previous replica UUID in part %d", partidx); + errno = EINVAL; + return -1; + } + + /* check next replica UUID */ + if (memcmp(HDR(rep, 0)->next_repl_uuid, hdrp->next_repl_uuid, + POOL_HDR_UUID_LEN)) { + ERR("wrong next replica UUID in part %d", partidx); + errno = EINVAL; + return -1; + } + + if (memcmp(&HDR(rep, 0)->arch_flags, &hdrp->arch_flags, + sizeof(hdrp->arch_flags))) { + ERR("wrong architecture flags"); + errno = EINVAL; + return -1; + } + + /* check pool set linkage */ + if (memcmp(HDRP(rep, partidx)->uuid, hdrp->prev_part_uuid, + POOL_HDR_UUID_LEN) || + memcmp(HDRN(rep, partidx)->uuid, hdrp->next_part_uuid, + POOL_HDR_UUID_LEN)) { + ERR("wrong part UUID in part %d", partidx); + errno = EINVAL; + return -1; + } + + /* read shutdown state toggle from header */ + set->ignore_sds |= IGNORE_SDS(HDR(rep, 0)); + + if (!set->ignore_sds && partidx == 0) { + struct shutdown_state sds; + shutdown_state_init(&sds, NULL); + for (unsigned p = 0; p < rep->nparts; p++) { + if (shutdown_state_add_part(&sds, + PART(rep, p)->fd, NULL)) + return -1; + } + + if (shutdown_state_check(&sds, &hdrp->sds, rep)) { + errno = EINVAL; + return -1; + } + + shutdown_state_set_dirty(&hdrp->sds, rep); + } + + rep->part[partidx].rdonly = 0; + + return 0; +} + +/* + * util_replica_set_is_pmem -- sets per-replica is_pmem flag + * + * The replica is PMEM if: + * - all parts are on device dax, or + * - all parts are mapped with MAP_SYNC. + * + * It's enough to check only first part because it's already verified + * that either all or none parts are device dax or mapped with MAP_SYNC. + */ +static inline void +util_replica_set_is_pmem(struct pool_replica *rep) +{ + rep->is_pmem = rep->part[0].is_dev_dax || rep->part[0].map_sync || + pmem_is_pmem(rep->part[0].addr, rep->resvsize); +} + +/* + * util_replica_map_local -- (internal) map memory pool for local replica + */ +static int +util_replica_map_local(struct pool_set *set, unsigned repidx, int flags) +{ + LOG(3, "set %p repidx %u flags %d", set, repidx, flags); + + /* + * XXX: Like we reserve space for all parts in this replica when we map + * the first part, we need to reserve the space for all replicas + * upfront. It is not necessary that the replicas are contiguous but + * that way we would not fragment the memory much. I think we should + * leave this to MM, but let's have a note as per our collective minds. + */ + +#ifndef _WIN32 + int remaining_retries = 0; +#else + int remaining_retries = 10; +#endif + int retry_for_contiguous_addr; + size_t mapsize; + /* header size for all headers but the first one */ + size_t hdrsize = (set->options & (OPTION_SINGLEHDR | OPTION_NOHDRS)) ? + 0 : Mmap_align; + void *addr; + struct pool_replica *rep = set->replica[repidx]; + + ASSERTeq(rep->remote, NULL); + ASSERTne(rep->part, NULL); + + do { + retry_for_contiguous_addr = 0; + mapsize = rep->part[0].filesize & ~(Mmap_align - 1); + + /* determine a hint address for mmap() */ + addr = util_map_hint(rep->resvsize, 0); + if (addr == MAP_FAILED) { + LOG(1, "cannot find a contiguous region of given size"); + return -1; + } + + /* map the first part and reserve space for remaining parts */ + if (util_map_part(&rep->part[0], addr, rep->resvsize, 0, + flags, 0) != 0) { + LOG(2, "pool mapping failed - replica #%u part #0", + repidx); + return -1; + } + + VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, + rep->part[0].size); + VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, + rep->part[0].addr, rep->part[0].size, 0); + + set->zeroed &= rep->part[0].created; + + addr = (char *)rep->part[0].addr + mapsize; + + /* + * map the remaining parts of the usable pool space + * (aligned to memory mapping granularity) + */ + for (unsigned p = 1; p < rep->nparts; p++) { + /* map data part */ + if (util_map_part(&rep->part[p], addr, 0, hdrsize, + flags | MAP_FIXED, 0) != 0) { + /* + * if we can't map the part at the address we + * asked for, unmap all the parts that are + * mapped and remap at a different address. + */ + if ((errno == EINVAL) && + (remaining_retries > 0)) { + LOG(2, "usable space mapping failed - " + "part #%d - retrying", p); + retry_for_contiguous_addr = 1; + remaining_retries--; + + util_unmap_parts(rep, 0, p - 1); + + /* release rest of the VA reserved */ + ASSERTne(addr, NULL); + ASSERTne(addr, MAP_FAILED); + munmap(addr, rep->resvsize - mapsize); + break; + } + LOG(2, "usable space mapping failed - part #%d", + p); + goto err; + } + + VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, + rep->part[p].addr, rep->part[p].size, + hdrsize); + + mapsize += rep->part[p].size; + set->zeroed &= rep->part[p].created; + addr = (char *)addr + rep->part[p].size; + } + } while (retry_for_contiguous_addr); + + /* + * Initially part[0].size is the size of address space + * reservation for all parts from given replica. After + * mapping that space we need to overwrite part[0].size + * with its actual size to be consistent - size for each + * part should be the actual mapping size of this part + * only - it simplifies future calculations. + */ + rep->part[0].size = rep->part[0].filesize & ~(Mmap_align - 1); + + if (util_replica_check_map_sync(set, repidx, 0)) + goto err; + + util_replica_set_is_pmem(rep); + + if (Prefault_at_create) + util_replica_force_page_allocation(rep); + + ASSERTeq(mapsize, rep->repsize); + + LOG(3, "replica #%u addr %p", repidx, rep->part[0].addr); + + return 0; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + if (mapsize < rep->repsize) { + ASSERTne(rep->part[0].addr, NULL); + ASSERTne(rep->part[0].addr, MAP_FAILED); + munmap(rep->part[0].addr, rep->resvsize - mapsize); + } + for (unsigned p = 0; p < rep->nparts; p++) { + util_unmap_part(&rep->part[p]); + } + errno = oerrno; + return -1; +} + +/* + * util_replica_init_headers_local -- (internal) initialize pool headers + */ +static int +util_replica_init_headers_local(struct pool_set *set, unsigned repidx, + int flags, const struct pool_attr *attr) +{ + LOG(3, "set %p repidx %u flags %d attr %p", set, repidx, flags, attr); + + struct pool_replica *rep = set->replica[repidx]; + + /* map all headers - don't care about the address */ + for (unsigned p = 0; p < rep->nhdrs; p++) { + if (util_map_hdr(&rep->part[p], flags, 0) != 0) { + LOG(2, "header mapping failed - part #%d", p); + goto err; + } + } + + /* create headers, set UUID's */ + for (unsigned p = 0; p < rep->nhdrs; p++) { + if (util_header_create(set, repidx, p, attr, 0) != 0) { + LOG(2, "header creation failed - part #%d", p); + goto err; + } + } + + /* unmap all headers */ + for (unsigned p = 0; p < rep->nhdrs; p++) + util_unmap_hdr(&rep->part[p]); + + return 0; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + for (unsigned p = 0; p < rep->nhdrs; p++) { + util_unmap_hdr(&rep->part[p]); + } + errno = oerrno; + return -1; +} + +/* + * util_replica_create_local -- (internal) create a new memory pool for local + * replica + */ +static int +util_replica_create_local(struct pool_set *set, unsigned repidx, int flags, + const struct pool_attr *attr) +{ + LOG(3, "set %p repidx %u flags %d attr %p", set, repidx, flags, attr); + + /* + * the first replica has to be mapped prior to remote ones so if + * a replica is already mapped skip mapping creation + */ + if (PART(REP(set, repidx), 0)->addr == NULL) { + if (util_replica_map_local(set, repidx, flags) != 0) { + LOG(2, "replica #%u map failed", repidx); + return -1; + } + } + + if (attr == NULL) + return 0; + + if (util_replica_init_headers_local(set, repidx, flags, attr) != 0) { + LOG(2, "replica #%u headers initialization failed", repidx); + return -1; + } + return 0; +} + +/* + * util_replica_create_remote -- (internal) create a new memory pool + * for remote replica + */ +static int +util_replica_create_remote(struct pool_set *set, unsigned repidx, int flags, + const struct pool_attr *attr) +{ + LOG(3, "set %p repidx %u flags %d attr %p", set, repidx, flags, attr); + + struct pool_replica *rep = set->replica[repidx]; + + ASSERTne(rep->remote, NULL); + ASSERTne(rep->part, NULL); + ASSERTeq(rep->nparts, 1); + ASSERTeq(rep->nhdrs, 1); + ASSERTne(attr, NULL); + + struct pool_set_part *part = rep->part; + + /* + * A remote replica has one fake part of size equal twice pool header + * size for storing pool header and pool descriptor. + */ + part->size = rep->repsize; + ASSERT(IS_PAGE_ALIGNED(part->size)); + part->remote_hdr = Zalloc(part->size + Pagesize); + if (!part->remote_hdr) { + ERR("!Zalloc"); + return -1; + } + + part->hdr = PAGE_ALIGN_UP(part->remote_hdr); + part->addr = PAGE_ALIGN_UP(part->remote_hdr); + part->hdrsize = POOL_HDR_SIZE; + + /* create header, set UUID's */ + if (util_header_create(set, repidx, 0, attr, 0) != 0) { + LOG(2, "header creation failed - part #0"); + Free(part->remote_hdr); + return -1; + } + + LOG(3, "replica #%u addr %p", repidx, rep->part[0].addr); + + return 0; +} + +/* + * util_replica_close -- close a memory pool replica + * + * This function unmaps all mapped memory regions. + */ +int +util_replica_close(struct pool_set *set, unsigned repidx) +{ + LOG(3, "set %p repidx %u", set, repidx); + struct pool_replica *rep = set->replica[repidx]; + + if (rep->remote == NULL) { + struct pool_set_part *part = PART(rep, 0); + if (!set->ignore_sds && part->addr != NULL && + part->size != 0) { + struct pool_hdr *hdr = part->addr; + RANGE_RW(hdr, sizeof(*hdr), part->is_dev_dax); + /* + * deep drain will call msync on one page in each + * part in replica to trigger WPQ flush. + * This pages may have been marked as + * undefined/inaccessible, but msyncing such memory + * is not a bug, so as a workaround temporarily + * disable error reporting. + */ + VALGRIND_DO_DISABLE_ERROR_REPORTING; + util_replica_deep_drain(part->addr, rep->repsize, + set, repidx); + VALGRIND_DO_ENABLE_ERROR_REPORTING; + shutdown_state_clear_dirty(&hdr->sds, rep); + } + for (unsigned p = 0; p < rep->nhdrs; p++) + util_unmap_hdr(&rep->part[p]); + + rep->part[0].size = rep->resvsize; + util_unmap_part(&rep->part[0]); + } else { + LOG(4, "freeing volatile header of remote replica #%u", repidx); + Free(rep->part[0].remote_hdr); + rep->part[0].remote_hdr = NULL; + rep->part[0].hdr = NULL; + rep->part[0].hdrsize = 0; + rep->part[0].addr = NULL; + rep->part[0].size = 0; + } + + return 0; +} + +/* + * util_poolset_append_new_part -- (internal) creates a new part in each replica + * of the poolset + */ +static int +util_poolset_append_new_part(struct pool_set *set, size_t size) +{ + LOG(3, "set %p size %zu", set, size); + + if (!set->directory_based) + return -1; + + struct pool_set_directory *d; + size_t directory_id; + char *path; + size_t path_len; + + unsigned r; + for (r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + + directory_id = set->next_directory_id % + VEC_SIZE(&rep->directory); + d = VEC_GET(&rep->directory, directory_id); + + path_len = strlen(d->path) + PMEM_FILE_MAX_LEN; + if ((path = Malloc(path_len)) == NULL) { + ERR("!Malloc"); + goto err_part_init; + } + + snprintf(path, path_len, "%s" OS_DIR_SEP_STR "%0*u%s", + d->path, PMEM_FILE_PADDING, set->next_id, PMEM_EXT); + + if (util_replica_add_part(&set->replica[r], path, size) != 0) + FATAL("cannot add a new part to the replica info"); + } + + set->next_directory_id += 1; + set->next_id += 1; + + util_poolset_set_size(set); + + return 0; + +err_part_init: + /* for each replica 0..r-1 remove the last part */ + for (unsigned rn = 0; rn < r; ++rn) { + struct pool_replica *rep = set->replica[rn]; + unsigned pidx = rep->nparts - 1; + Free((void *)(rep->part[pidx].path)); + rep->part[pidx].path = NULL; + rep->nparts--; + } + + return -1; +} + +/* + * util_pool_extend -- extends the poolset by the provided size + */ +void * +util_pool_extend(struct pool_set *set, size_t *size, size_t minpartsize) +{ + LOG(3, "set %p size %zu minpartsize %zu", set, *size, minpartsize); + + if (*size == 0) { + ERR("cannot extend pool by 0 bytes"); + return NULL; + } + + if ((set->options & OPTION_SINGLEHDR) == 0) { + ERR( + "extending the pool by appending parts with headers is not supported!"); + return NULL; + } + + if (set->poolsize + *size > set->resvsize) { + *size = set->resvsize - set->poolsize; + if (*size < minpartsize) { + ERR("exceeded reservation size"); + return NULL; + } + LOG(4, "extend size adjusted to not exceed reservation size"); + } + + size_t old_poolsize = set->poolsize; + + if (util_poolset_append_new_part(set, *size) != 0) { + ERR("unable to append a new part to the pool"); + return NULL; + } + + size_t hdrsize = (set->options & OPTION_SINGLEHDR) ? 0 : Mmap_align; + void *addr = NULL; + void *addr_base = NULL; + + unsigned r; + for (r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + unsigned pidx = rep->nparts - 1; + struct pool_set_part *p = &rep->part[pidx]; + + if (util_part_open(p, 0, 1 /* create */) != 0) { + ERR("cannot open the new part"); + goto err; + } + + addr = (char *)rep->part[0].addr + old_poolsize; + if (addr_base == NULL) + addr_base = addr; + + if (util_map_part(p, addr, 0, hdrsize, + MAP_SHARED | MAP_FIXED, 0) != 0) { + ERR("cannot map the new part"); + goto err; + } + + /* + * new part must be mapped the same way as all the rest + * within a replica + */ + if (p->map_sync != rep->part[0].map_sync) { + if (p->map_sync) + ERR("new part cannot be mapped with MAP_SYNC"); + else + ERR("new part mapped with MAP_SYNC"); + goto err; + } + } + + /* XXX: mode should be the same as for pmemxxx_create() */ + if (util_poolset_chmod(set, S_IWUSR | S_IRUSR)) + goto err; + + util_poolset_fdclose(set); + + return addr_base; + +err: + for (unsigned rn = 0; rn <= r; ++rn) { + struct pool_replica *rep = set->replica[r]; + unsigned pidx = rep->nparts - 1; + struct pool_set_part *p = &rep->part[pidx]; + rep->nparts--; + + if (p->fd != 0) + (void) os_close(p->fd); + if (p->created) + os_unlink(p->path); + Free((void *)p->path); + p->path = NULL; + } + util_poolset_set_size(set); + + return NULL; +} + +/* + * util_print_bad_files_cb -- (internal) callback printing names of pool files + * containing bad blocks + */ +static int +util_print_bad_files_cb(struct part_file *pf, void *arg) +{ + if (!pf->is_remote && pf->part && pf->part->has_bad_blocks) + ERR("file contains bad blocks -- '%s'", pf->part->path); + + return 0; +} + +/* + * util_pool_create_uuids -- create a new memory pool (set or a single file) + * with given uuids + * + * On success returns 0 and a pointer to a newly allocated structure + * containing the info of all the parts of the pool set and replicas. + */ +int +util_pool_create_uuids(struct pool_set **setp, const char *path, + size_t poolsize, size_t minsize, size_t minpartsize, + const struct pool_attr *attr, unsigned *nlanes, int can_have_rep, + int remote) +{ + LOG(3, "setp %p path %s poolsize %zu minsize %zu minpartsize %zu " + "pattr %p nlanes %p can_have_rep %i remote %i", setp, path, + poolsize, minsize, minpartsize, attr, nlanes, can_have_rep, + remote); + + /* attributes cannot be NULL for local replicas */ + ASSERT(remote || attr != NULL); + + int flags = MAP_SHARED; + int oerrno; + + int exists = util_file_exists(path); + if (exists < 0) + return -1; + + /* check if file exists */ + if (poolsize > 0 && exists) { + ERR("file %s already exists", path); + errno = EEXIST; + return -1; + } + + int ret = util_poolset_create_set(setp, path, poolsize, minsize, + IGNORE_SDS(attr)); + if (ret < 0) { + LOG(2, "cannot create pool set -- '%s'", path); + return -1; + } + + struct pool_set *set = *setp; + + ASSERT(set->nreplicas > 0); + + if (!remote && (set->options & OPTION_NOHDRS)) { + ERR( + "the NOHDRS poolset option is not supported for local poolsets"); + errno = EINVAL; + goto err_poolset_free; + } + + if ((attr == NULL) != ((set->options & OPTION_NOHDRS) != 0)) { + ERR( + "pool attributes are not supported for poolsets without headers (with the NOHDRS option)"); + errno = EINVAL; + goto err_poolset_free; + } + + if (set->directory_based && ((set->options & OPTION_SINGLEHDR) == 0)) { + ERR( + "directory based pools are not supported for poolsets with headers (without SINGLEHDR option)"); + errno = EINVAL; + goto err_poolset_free; + } + + if (set->resvsize < minsize) { + ERR("reservation pool size %zu smaller than %zu", + set->resvsize, minsize); + errno = EINVAL; + goto err_poolset_free; + } + + if (set->directory_based && set->poolsize == 0 && + util_poolset_append_new_part(set, minsize) != 0) { + ERR("cannot create a new part in provided directories"); + goto err_poolset_free; + } + + if (attr != NULL && + (attr->features.compat & POOL_FEAT_CHECK_BAD_BLOCKS)) { + int bbs = badblocks_check_poolset(set, 1 /* create */); + if (bbs < 0) { + LOG(1, + "failed to check pool set for bad blocks -- '%s'", + path); + goto err_poolset_free; + } + + if (bbs > 0) { + util_poolset_foreach_part_struct(set, + util_print_bad_files_cb, + NULL); + ERR( + "pool set contains bad blocks and cannot be created, run 'pmempool create --clear-bad-blocks' utility to clear bad blocks and create a pool"); + errno = EIO; + goto err_poolset_free; + } + } + + if (set->poolsize < minsize) { + ERR("net pool size %zu smaller than %zu", + set->poolsize, minsize); + errno = EINVAL; + goto err_poolset_free; + } + + if (remote) { + /* it is a remote replica - it cannot have replicas */ + if (set->nreplicas > 1) { + LOG(2, "remote pool set cannot have replicas"); + errno = EINVAL; + goto err_poolset_free; + } + + /* check if poolset options match remote pool attributes */ + if (attr != NULL && + ((set->options & OPTION_SINGLEHDR) == 0) != + ((attr->features.incompat & + POOL_FEAT_SINGLEHDR) == 0)) { + ERR( + "pool incompat feature flags and remote poolset options do not match"); + errno = EINVAL; + goto err_poolset_free; + } + } + + if (!can_have_rep && set->nreplicas > 1) { + ERR("replication not supported"); + errno = ENOTSUP; + goto err_poolset_free; + } + + if (set->remote && util_remote_load()) { + ERR( + "the pool set requires a remote replica, but the '%s' library cannot be loaded", + LIBRARY_REMOTE); + goto err_poolset_free; + } + + set->zeroed = 1; + + if (attr != NULL) { + if (!util_is_zeroed(attr->poolset_uuid, POOL_HDR_UUID_LEN)) { + memcpy(set->uuid, attr->poolset_uuid, + POOL_HDR_UUID_LEN); + } else { + /* generate pool set UUID */ + ret = util_uuid_generate(set->uuid); + if (ret < 0) { + LOG(2, "cannot generate pool set UUID"); + goto err_poolset; + } + } + + /* generate UUID's for all the parts */ + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + for (unsigned i = 0; i < rep->nhdrs; i++) { + ret = util_uuid_generate(rep->part[i].uuid); + if (ret < 0) { + LOG(2, + "cannot generate pool set part UUID"); + goto err_poolset; + } + } + } + + /* overwrite UUID of the first part if given */ + if (!util_is_zeroed(attr->first_part_uuid, POOL_HDR_UUID_LEN)) { + memcpy(set->replica[0]->part[0].uuid, + attr->first_part_uuid, POOL_HDR_UUID_LEN); + } + } + + ret = util_poolset_files_local(set, minpartsize, 1); + if (ret != 0) + goto err_poolset; + + /* map first local replica - it has to exist prior to remote ones */ + ret = util_replica_map_local(set, 0, flags); + if (ret != 0) + goto err_poolset; + + /* prepare remote replicas first */ + if (set->remote) { + for (unsigned r = 0; r < set->nreplicas; r++) { + if (REP(set, r)->remote == NULL) { + continue; + } + if (util_replica_create_remote(set, r, flags, attr) != + 0) { + LOG(2, "replica #%u creation failed", r); + goto err_create; + } + } + + ret = util_poolset_files_remote(set, minsize, nlanes, + 1 /* create */); + if (ret != 0) + goto err_create; + } + + /* prepare local replicas */ + if (remote) { + if (util_replica_create_local(set, 0, flags, attr) != 0) { + LOG(2, "replica #0 creation failed"); + goto err_create; + } + } else { + for (unsigned r = 0; r < set->nreplicas; r++) { + if (REP(set, r)->remote != NULL) { + continue; + } + if (util_replica_create_local(set, r, flags, attr) != + 0) { + LOG(2, "replica #%u creation failed", r); + goto err_create; + } + } + } + + return 0; + +err_create: + oerrno = errno; + for (unsigned r = 0; r < set->nreplicas; r++) + util_replica_close(set, r); + errno = oerrno; +err_poolset: + oerrno = errno; + util_poolset_close(set, DELETE_CREATED_PARTS); + errno = oerrno; + return -1; + +err_poolset_free: + oerrno = errno; + util_poolset_free(set); + errno = oerrno; + return -1; +} + +/* + * util_pool_create -- create a new memory pool (set or a single file) + * + * On success returns 0 and a pointer to a newly allocated structure + * containing the info of all the parts of the pool set and replicas. + */ +int +util_pool_create(struct pool_set **setp, const char *path, size_t poolsize, + size_t minsize, size_t minpartsize, const struct pool_attr *attr, + unsigned *nlanes, int can_have_rep) +{ + LOG(3, "setp %p path %s poolsize %zu minsize %zu minpartsize %zu " + "attr %p nlanes %p can_have_rep %i", setp, path, poolsize, + minsize, minpartsize, attr, nlanes, can_have_rep); + + return util_pool_create_uuids(setp, path, poolsize, minsize, + minpartsize, attr, nlanes, can_have_rep, POOL_LOCAL); +} + +/* + * util_replica_open_local -- (internal) open a memory pool local replica + */ +static int +util_replica_open_local(struct pool_set *set, unsigned repidx, int flags) +{ + LOG(3, "set %p repidx %u flags %d", set, repidx, flags); + + int remaining_retries = 10; + int retry_for_contiguous_addr; + size_t mapsize; + size_t hdrsize = (set->options & (OPTION_SINGLEHDR | OPTION_NOHDRS)) ? + 0 : Mmap_align; + struct pool_replica *rep = set->replica[repidx]; + void *addr = NULL; + + do { + retry_for_contiguous_addr = 0; + + /* determine a hint address for mmap() if not specified */ + if (addr == NULL) + addr = util_map_hint(rep->resvsize, 0); + if (addr == MAP_FAILED) { + LOG(1, "cannot find a contiguous region of given size"); + return -1; + } + + mapsize = rep->part[0].filesize & ~(Mmap_align - 1); + + /* map the first part and reserve space for remaining parts */ + if (util_map_part(&rep->part[0], addr, rep->resvsize, 0, + flags, 0) != 0) { + LOG(2, "pool mapping failed - replica #%u part #0", + repidx); + return -1; + } + + VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, + rep->resvsize); + VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, + rep->part[0].addr, rep->resvsize, 0); + + /* map all headers - don't care about the address */ + for (unsigned p = 0; p < rep->nhdrs; p++) { + if (util_map_hdr(&rep->part[p], flags, 0) != 0) { + LOG(2, "header mapping failed - part #%d", p); + goto err; + } + } + + addr = (char *)rep->part[0].addr + mapsize; + + /* + * map the remaining parts of the usable pool space + * (aligned to memory mapping granularity) + */ + for (unsigned p = 1; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + size_t targetsize = mapsize + + ALIGN_DOWN(part->filesize - hdrsize, + part->alignment); + if (targetsize > rep->resvsize) { + ERR( + "pool mapping failed - address space reservation too small"); + errno = EINVAL; + goto err; + } + + /* map data part */ + if (util_map_part(part, addr, 0, hdrsize, + flags | MAP_FIXED, 0) != 0) { + /* + * if we can't map the part at the address we + * asked for, unmap all the parts that are + * mapped and remap at a different address. + */ + if ((errno == EINVAL) && + (remaining_retries > 0)) { + LOG(2, "usable space mapping failed - " + "part #%d - retrying", p); + retry_for_contiguous_addr = 1; + remaining_retries--; + + util_unmap_parts(rep, 0, p - 1); + + /* release rest of the VA reserved */ + munmap(rep->part[0].addr, + rep->resvsize); + break; + } + LOG(2, "usable space mapping failed - part #%d", + p); + goto err; + } + + VALGRIND_REGISTER_PMEM_FILE(part->fd, + part->addr, part->size, hdrsize); + + mapsize += part->size; + addr = (char *)addr + part->size; + } + } while (retry_for_contiguous_addr); + + /* + * Initially part[0].size is the size of address space + * reservation for all parts from given replica. After + * mapping that space we need to overwrite part[0].size + * with its actual size to be consistent - size for each + * part should be the actual mapping size of this part + * only - it simplifies future calculations. + */ + rep->part[0].size = rep->part[0].filesize & ~(Mmap_align - 1); + + if (util_replica_check_map_sync(set, repidx, 1)) + goto err; + + util_replica_set_is_pmem(rep); + + if (Prefault_at_open) + util_replica_force_page_allocation(rep); + + ASSERTeq(mapsize, rep->repsize); + + /* calculate pool size - choose the smallest replica size */ + if (rep->repsize < set->poolsize) + set->poolsize = rep->repsize; + + LOG(3, "replica addr %p", rep->part[0].addr); + + return 0; +err: + LOG(4, "error clean up"); + int oerrno = errno; + if (mapsize < rep->repsize) { + ASSERTne(rep->part[0].addr, NULL); + ASSERTne(rep->part[0].addr, MAP_FAILED); + munmap(rep->part[0].addr, rep->resvsize - mapsize); + } + for (unsigned p = 0; p < rep->nhdrs; p++) + util_unmap_hdr(&rep->part[p]); + for (unsigned p = 0; p < rep->nparts; p++) + util_unmap_part(&rep->part[p]); + errno = oerrno; + return -1; +} + +/* + * util_replica_open_remote -- open a memory pool for remote replica + */ +int +util_replica_open_remote(struct pool_set *set, unsigned repidx, int flags) +{ + LOG(3, "set %p repidx %u flags %d", set, repidx, flags); + + struct pool_replica *rep = set->replica[repidx]; + + ASSERTne(rep->remote, NULL); + ASSERTne(rep->part, NULL); + ASSERTeq(rep->nparts, 1); + ASSERTeq(rep->nhdrs, 1); + + struct pool_set_part *part = rep->part; + + part->size = rep->repsize; + ASSERT(IS_PAGE_ALIGNED(part->size)); + part->remote_hdr = Zalloc(part->size + Pagesize); + if (!part->remote_hdr) { + ERR("!Zalloc"); + return -1; + } + + part->hdr = PAGE_ALIGN_UP(part->remote_hdr); + part->addr = PAGE_ALIGN_UP(part->remote_hdr); + part->hdrsize = POOL_HDR_SIZE; + + LOG(3, "replica #%u addr %p", repidx, rep->part[0].addr); + + return 0; +} + +/* + * util_replica_open -- open a memory pool replica + */ +int +util_replica_open(struct pool_set *set, unsigned repidx, int flags) +{ + LOG(3, "set %p repidx %u flags %d", set, repidx, flags); + + if (set->replica[repidx]->remote) + return util_replica_open_remote(set, repidx, flags); + + return util_replica_open_local(set, repidx, flags); +} + +/* + * util_replica_set_attr -- overwrite existing replica attributes + */ +int +util_replica_set_attr(struct pool_replica *rep, + const struct rpmem_pool_attr *rattr) +{ + LOG(3, "rep %p, rattr %p", rep, rattr); + ASSERT(rattr != NULL || rep->nhdrs == 0); + + if (rattr != NULL && rep->nhdrs == 0) { + ERR( + "cannot set pool attributes for a replica without headers (with the NOHDRS option)"); + errno = EINVAL; + return -1; + } + int flags = MAP_SHARED; + + /* map all headers - don't care about the address */ + for (unsigned p = 0; p < rep->nparts; p++) { + if (util_map_hdr(&rep->part[p], flags, 0) != 0) { + LOG(2, "header mapping failed - part #%d", p); + goto err; + } + } + + for (unsigned p = 0; p < rep->nhdrs; p++) { + ASSERTne(rattr, NULL); + + struct pool_hdr *hdrp = HDR(rep, p); + ASSERTne(hdrp, NULL); + util_convert2h_hdr_nocheck(hdrp); + + util_set_rpmem_attr(hdrp, rattr); + + if (hdrp == HDR(rep, 0)) + memcpy(hdrp->uuid, rattr->uuid, POOL_HDR_UUID_LEN); + if (hdrp == HDRP(rep, 0)) + memcpy(hdrp->next_part_uuid, rattr->uuid, + POOL_HDR_UUID_LEN); + if (hdrp == HDRN(rep, 0)) + memcpy(hdrp->prev_part_uuid, rattr->uuid, + POOL_HDR_UUID_LEN); + + util_convert2le_hdr(hdrp); + + util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, + 1, POOL_HDR_CSUM_END_OFF(hdrp)); + + /* store pool's header */ + util_persist_auto(rep->is_pmem, hdrp, sizeof(*hdrp)); + } + + /* unmap all headers */ + for (unsigned p = 0; p < rep->nhdrs; p++) + util_unmap_hdr(&rep->part[p]); + + return 0; +err: + for (unsigned p = 0; p < rep->nhdrs; p++) { + util_unmap_hdr(&rep->part[p]); + } + return -1; +} + +/* + * util_get_attr_from_header -- get pool attributes from a pool header + */ +void +util_pool_hdr2attr(struct pool_attr *attr, struct pool_hdr *hdr) +{ + LOG(3, "attr %p, hdr %p", attr, hdr); + ASSERTne(attr, NULL); + ASSERTne(hdr, NULL); + memset(attr, 0, sizeof(*attr)); + memcpy(attr->signature, hdr->signature, POOL_HDR_SIG_LEN); + attr->major = hdr->major; + attr->features.compat = hdr->features.compat; + attr->features.incompat = hdr->features.incompat; + attr->features.ro_compat = hdr->features.ro_compat; + memcpy(attr->poolset_uuid, hdr->poolset_uuid, POOL_HDR_UUID_LEN); +} + +/* + * util_copy_attr_to_header -- copy pool attributes into pool header + */ +void +util_pool_attr2hdr(struct pool_hdr *hdr, const struct pool_attr *attr) +{ + LOG(3, "hdr %p, attr %p", hdr, attr); + ASSERTne(hdr, NULL); + ASSERTne(attr, NULL); + memcpy(hdr->signature, attr->signature, POOL_HDR_SIG_LEN); + hdr->major = attr->major; + hdr->features.compat = attr->features.compat; + hdr->features.incompat = attr->features.incompat; + hdr->features.ro_compat = attr->features.ro_compat; +} + +/* + * util_unmap_all_hdrs -- unmap all pool set headers + */ +static void +util_unmap_all_hdrs(struct pool_set *set) +{ + LOG(3, "set %p", set); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + if (rep->remote == NULL) { + for (unsigned p = 0; p < rep->nhdrs; p++) + util_unmap_hdr(&rep->part[p]); + } else { + /* + * hdr & hdrsize were set only for util_header_check(), + * they will not be used any more. The memory will be + * freed by util_replica_close() + */ + rep->part[0].hdr = NULL; + rep->part[0].hdrsize = 0; + } + } +} + +/* + * util_replica_check -- check headers, check UUID's, check replicas linkage + */ +static int +util_replica_check(struct pool_set *set, const struct pool_attr *attr) +{ + LOG(3, "set %p attr %p", set, attr); + + /* read shutdown state toggle from header */ + set->ignore_sds |= IGNORE_SDS(HDR(REP(set, 0), 0)); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + for (unsigned p = 0; p < rep->nhdrs; p++) { + if (util_header_check(set, r, p, attr) != 0) { + LOG(2, "header check failed - part #%d", p); + return -1; + } + set->rdonly |= rep->part[p].rdonly; + } + + if (memcmp(HDR(REPP(set, r), 0)->uuid, + HDR(REP(set, r), 0)->prev_repl_uuid, + POOL_HDR_UUID_LEN) || + memcmp(HDR(REPN(set, r), 0)->uuid, + HDR(REP(set, r), 0)->next_repl_uuid, + POOL_HDR_UUID_LEN)) { + ERR("wrong replica UUID"); + errno = EINVAL; + return -1; + } + if (!set->ignore_sds && !rep->remote && rep->nhdrs) { + struct shutdown_state sds; + shutdown_state_init(&sds, NULL); + for (unsigned p = 0; p < rep->nparts; p++) { + if (shutdown_state_add_part(&sds, + PART(rep, p)->fd, NULL)) + return -1; + } + + ASSERTne(rep->nhdrs, 0); + ASSERTne(rep->nparts, 0); + if (shutdown_state_check(&sds, &HDR(rep, 0)->sds, + rep)) { + LOG(2, "ADR failure detected"); + errno = EINVAL; + return -1; + } + shutdown_state_set_dirty(&HDR(rep, 0)->sds, + rep); + } + } + return 0; +} + +/* + * util_pool_has_device_dax -- (internal) check if poolset has any device dax + */ +int +util_pool_has_device_dax(struct pool_set *set) +{ + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + /* either all the parts must be Device DAX or none */ + if (PART(rep, 0)->is_dev_dax) + return 1; + } + return 0; +} + +/* + * util_pool_open_nocheck -- open a memory pool (set or a single file) + * + * This function opens a pool set without checking the header values. + */ +int +util_pool_open_nocheck(struct pool_set *set, unsigned flags) +{ + LOG(3, "set %p flags 0x%x", set, flags); + + int cow = flags & POOL_OPEN_COW; + + if (cow && util_pool_has_device_dax(set)) { + ERR("device dax cannot be mapped privately"); + errno = ENOTSUP; + return -1; + } + + int mmap_flags = cow ? MAP_PRIVATE|MAP_NORESERVE : MAP_SHARED; + int oerrno; + + ASSERTne(set, NULL); + ASSERT(set->nreplicas > 0); + + if (flags & POOL_OPEN_CHECK_BAD_BLOCKS) { + /* check if any bad block recovery file exists */ + int bfe = badblocks_recovery_file_exists(set); + if (bfe > 0) { + ERR( + "error: a bad block recovery file exists, run 'pmempool sync --bad-blocks' utility to try to recover the pool"); + errno = EINVAL; + return -1; + } + if (bfe < 0) { + LOG(1, + "an error occurred when checking whether recovery file exists."); + return -1; + } + + int bbs = badblocks_check_poolset(set, 0 /* not create */); + if (bbs < 0) { + LOG(1, "failed to check pool set for bad blocks"); + return -1; + } + + if (bbs > 0) { + if (flags & POOL_OPEN_IGNORE_BAD_BLOCKS) { + LOG(1, + "WARNING: pool set contains bad blocks, ignoring"); + } else { + ERR( + "pool set contains bad blocks and cannot be opened, run 'pmempool sync --bad-blocks' utility to try to recover the pool"); + errno = EIO; + return -1; + } + } + } + + if (set->remote && util_remote_load()) { + ERR("the pool set requires a remote replica, " + "but the '%s' library cannot be loaded", + LIBRARY_REMOTE); + return -1; + } + + int ret = util_poolset_files_local(set, 0 /* minpartsize */, 0); + if (ret != 0) + goto err_poolset; + + set->rdonly = 0; + + for (unsigned r = 0; r < set->nreplicas; r++) { + if (util_replica_open(set, r, mmap_flags) != 0) { + LOG(2, "replica #%u open failed", r); + goto err_replica; + } + } + + if (set->remote) { + ret = util_poolset_files_remote(set, 0, NULL, 0); + if (ret != 0) + goto err_replica; + } + + util_unmap_all_hdrs(set); + + return 0; + +err_replica: + LOG(4, "error clean up"); + oerrno = errno; + for (unsigned r = 0; r < set->nreplicas; r++) + util_replica_close(set, r); + errno = oerrno; +err_poolset: + oerrno = errno; + util_poolset_close(set, DO_NOT_DELETE_PARTS); + errno = oerrno; + return -1; +} + +/* + * util_read_compat_features -- (internal) read compat features from the header + */ +static int +util_read_compat_features(struct pool_set *set, uint32_t *compat_features) +{ + LOG(3, "set %p pcompat_features %p", set, compat_features); + + *compat_features = 0; + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + + if (util_part_open(part, 0, 0 /* create */)) { + LOG(1, "!cannot open the part -- \"%s\"", + part->path); + /* try to open the next part */ + continue; + } + + if (util_map_hdr(part, MAP_SHARED, 0) != 0) { + LOG(1, "header mapping failed -- \"%s\"", + part->path); + util_part_fdclose(part); + return -1; + } + + struct pool_hdr *hdrp = part->hdr; + *compat_features = hdrp->features.compat; + + util_unmap_hdr(part); + util_part_fdclose(part); + + /* exit on the first successfully opened part */ + return 0; + } + } + + return 0; +} + +/* + * unlink_remote_replicas -- removes remote replicas from poolset + * + * It is necessary when COW flag is set because remote replicas + * cannot be mapped privately + */ +static int +unlink_remote_replicas(struct pool_set *set) +{ + unsigned i = 0; + while (i < set->nreplicas) { + if (set->replica[i]->remote == NULL) { + i++; + continue; + } + + util_replica_close(set, i); + int ret = util_replica_close_remote(set->replica[i], i, + DO_NOT_DELETE_PARTS); + if (ret != 0) + return ret; + + size_t size = sizeof(set->replica[i]) * + (set->nreplicas - i - 1); + memmove(&set->replica[i], &set->replica[i + 1], size); + set->nreplicas--; + } + + set->remote = 0; + return 0; +} + +/* + * util_pool_open -- open a memory pool (set or a single file) + * + * This routine does all the work, but takes a rdonly flag so internal + * calls can map a read-only pool if required. + */ +int +util_pool_open(struct pool_set **setp, const char *path, size_t minpartsize, + const struct pool_attr *attr, unsigned *nlanes, void *addr, + unsigned flags) +{ + LOG(3, "setp %p path %s minpartsize %zu attr %p nlanes %p " + "addr %p flags 0x%x ", setp, path, minpartsize, attr, nlanes, + addr, flags); + + int cow = flags & POOL_OPEN_COW; + int mmap_flags = cow ? MAP_PRIVATE|MAP_NORESERVE : MAP_SHARED; + int oerrno; + + /* do not check minsize */ + int ret = util_poolset_create_set(setp, path, 0, 0, + flags & POOL_OPEN_IGNORE_SDS); + if (ret < 0) { + LOG(2, "cannot open pool set -- '%s'", path); + return -1; + } + + if ((*setp)->replica[0]->nparts == 0) { + errno = ENOENT; + ERR("!no parts in replicas"); + goto err_poolset_free; + } + + if (cow && (*setp)->replica[0]->part[0].is_dev_dax) { + ERR("device dax cannot be mapped privately"); + errno = ENOTSUP; + goto err_poolset_free; + } + + struct pool_set *set = *setp; + + ASSERT(set->nreplicas > 0); + + uint32_t compat_features; + + if (util_read_compat_features(set, &compat_features)) { + LOG(1, "reading compat features failed"); + goto err_poolset_free; + } + + if (compat_features & POOL_FEAT_CHECK_BAD_BLOCKS) { + /* check if any bad block recovery file exists */ + int bfe = badblocks_recovery_file_exists(set); + if (bfe > 0) { + ERR( + "error: a bad block recovery file exists, run 'pmempool sync --bad-blocks' utility to try to recover the pool"); + errno = EINVAL; + goto err_poolset_free; + } + + if (bfe < 0) { + LOG(1, + "an error occurred when checking whether recovery file exists."); + goto err_poolset_free; + } + + int bbs = badblocks_check_poolset(set, 0 /* not create */); + if (bbs < 0) { + LOG(1, + "failed to check pool set for bad blocks -- '%s'", + path); + goto err_poolset_free; + } + + if (bbs > 0) { + if (flags & POOL_OPEN_IGNORE_BAD_BLOCKS) { + LOG(1, + "WARNING: pool set contains bad blocks, ignoring -- '%s'", + path); + } else { + ERR( + "pool set contains bad blocks and cannot be opened, run 'pmempool sync --bad-blocks' utility to try to recover the pool -- '%s'", + path); + errno = EIO; + goto err_poolset_free; + } + } + } + + if (set->remote && util_remote_load()) { + ERR( + "the pool set requires a remote replica, but the '%s' library cannot be loaded", + LIBRARY_REMOTE); + goto err_poolset_free; + } + + ret = util_poolset_files_local(set, minpartsize, 0); + if (ret != 0) + goto err_poolset; + + for (unsigned r = 0; r < set->nreplicas; r++) { + if (util_replica_open(set, r, mmap_flags) != 0) { + LOG(2, "replica #%u open failed", r); + goto err_replica; + } + } + + if (set->remote) { + /* do not check minsize */ + ret = util_poolset_files_remote(set, 0, nlanes, 0); + if (ret != 0) + goto err_replica; + } + + /* check headers, check UUID's, check replicas linkage */ + if (attr != NULL && util_replica_check(set, attr)) + goto err_replica; + + /* unmap all headers */ + util_unmap_all_hdrs(set); + + /* remove all remote replicas from poolset when cow */ + if (cow && set->remote) { + ret = unlink_remote_replicas(set); + if (ret != 0) + goto err_replica; + } + + return 0; + +err_replica: + LOG(4, "error clean up"); + oerrno = errno; + for (unsigned r = 0; r < set->nreplicas; r++) + util_replica_close(set, r); + errno = oerrno; +err_poolset: + oerrno = errno; + util_poolset_close(set, DO_NOT_DELETE_PARTS); + errno = oerrno; + return -1; + +err_poolset_free: + oerrno = errno; + util_poolset_free(*setp); + errno = oerrno; + return -1; +} + +/* + * util_pool_open_remote -- open a remote pool set file + * + * This routine does all the work, but takes a rdonly flag so internal + * calls can map a read-only pool if required. + */ +int +util_pool_open_remote(struct pool_set **setp, const char *path, int cow, + size_t minpartsize, struct rpmem_pool_attr *rattr) +{ + LOG(3, "setp %p path %s cow %d minpartsize %zu rattr %p", + setp, path, cow, minpartsize, rattr); + + int flags = cow ? MAP_PRIVATE|MAP_NORESERVE : MAP_SHARED; + int oerrno; + + /* do not check minsize */ + int ret = util_poolset_create_set(setp, path, 0, 0, 0); + if (ret < 0) { + LOG(2, "cannot open pool set -- '%s'", path); + return -1; + } + + if (cow && (*setp)->replica[0]->part[0].is_dev_dax) { + ERR("device dax cannot be mapped privately"); + errno = ENOTSUP; + return -1; + } + + struct pool_set *set = *setp; + + if (set->nreplicas > 1) { + LOG(2, "remote pool set cannot have replicas"); + goto err_poolset; + } + + uint32_t compat_features; + + if (util_read_compat_features(set, &compat_features)) { + LOG(1, "reading compat features failed"); + goto err_poolset; + } + + if (compat_features & POOL_FEAT_CHECK_BAD_BLOCKS) { + /* check if there are any bad blocks */ + int bbs = badblocks_check_poolset(set, 0 /* not create */); + if (bbs < 0) { + LOG(1, + "failed to check the remote replica for bad blocks -- '%s'", + path); + goto err_poolset; + } + + if (bbs > 0) { + ERR( + "remote replica contains bad blocks and cannot be opened, run 'pmempool sync --bad-blocks' utility to recreate it -- '%s'", + path); + errno = EIO; + goto err_poolset; + } + } + + ret = util_poolset_files_local(set, minpartsize, 0); + if (ret != 0) + goto err_poolset; + + if (util_replica_open(set, 0, flags) != 0) { + LOG(2, "replica open failed"); + goto err_replica; + } + + struct pool_replica *rep = set->replica[0]; + + set->rdonly |= rep->part[0].rdonly; + + /* check headers, check UUID's, check replicas linkage */ + for (unsigned p = 0; p < rep->nhdrs; p++) { + if (util_header_check_remote(set, p) != 0) { + LOG(2, "header check failed - part #%d", p); + goto err_replica; + } + set->rdonly |= rep->part[p].rdonly; + } + + if (rep->nhdrs > 0) { + /* header exists, copy pool attributes */ + struct pool_hdr *hdr = rep->part[0].hdr; + util_get_rpmem_attr(rattr, hdr); + } else { + /* header does not exist, zero pool attributes */ + memset(rattr, 0, sizeof(*rattr)); + } + + /* unmap all headers */ + for (unsigned p = 0; p < rep->nhdrs; p++) + util_unmap_hdr(&rep->part[p]); + + return 0; + +err_replica: + LOG(4, "error clean up"); + oerrno = errno; + util_replica_close(set, 0); + errno = oerrno; +err_poolset: + oerrno = errno; + util_poolset_close(set, DO_NOT_DELETE_PARTS); + errno = oerrno; + return -1; +} + +/* + * util_is_poolset_file -- check if specified file is a poolset file + * + * Return value: + * -1 - error + * 0 - not a poolset + * 1 - is a poolset + */ +int +util_is_poolset_file(const char *path) +{ + enum file_type type = util_file_get_type(path); + if (type < 0) + return -1; + + if (type == TYPE_DEVDAX) + return 0; + + int fd = util_file_open(path, NULL, 0, O_RDONLY); + if (fd < 0) + return -1; + + int ret = 0; + ssize_t sret; + char signature[POOLSET_HDR_SIG_LEN]; + size_t rd = 0; + do { + sret = util_read(fd, &signature[rd], sizeof(signature) - rd); + if (sret > 0) + rd += (size_t)sret; + } while (sret > 0); + if (sret < 0) { + ERR("!read"); + ret = -1; + goto out; + } else if (rd != sizeof(signature)) { + ret = 0; + goto out; + } + + if (memcmp(signature, POOLSET_HDR_SIG, POOLSET_HDR_SIG_LEN) == 0) + ret = 1; +out: + os_close(fd); + return ret; +} +/* + * util_poolset_foreach_part_struct -- walk through all poolset file parts + * of the given set + * + * Stops processing if callback returns non-zero value. + * The value returned by callback is returned to the caller. + */ +int +util_poolset_foreach_part_struct(struct pool_set *set, + int (*callback)(struct part_file *pf, void *arg), void *arg) +{ + LOG(3, "set %p callback %p arg %p", set, callback, arg); + + ASSERTne(callback, NULL); + + int ret; + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct part_file cbdata; + if (set->replica[r]->remote) { + cbdata.is_remote = 1; + cbdata.remote = set->replica[r]->remote; + cbdata.part = NULL; + ret = (*callback)(&cbdata, arg); + if (ret) + return ret; + } else { + cbdata.is_remote = 0; + cbdata.remote = NULL; + for (unsigned p = 0; p < set->replica[r]->nparts; p++) { + cbdata.part = &set->replica[r]->part[p]; + ret = (*callback)(&cbdata, arg); + if (ret) + return ret; + } + } + } + + return 0; +} + +/* + * util_poolset_foreach_part -- walk through all poolset file parts + * + * Stops processing if callback returns non-zero value. + * The value returned by callback is returned to the caller. + * + * Return value: + * 0 - all part files have been processed + * -1 - parsing poolset file error + */ +int +util_poolset_foreach_part(const char *path, + int (*callback)(struct part_file *pf, void *arg), void *arg) +{ + LOG(3, "path %s callback %p arg %p", path, callback, arg); + + ASSERTne(callback, NULL); + + int fd = os_open(path, O_RDONLY); + if (fd < 0) { + ERR("!open: path \"%s\"", path); + return -1; + } + + struct pool_set *set; + int ret = util_poolset_parse(&set, path, fd); + if (ret) { + ERR("util_poolset_parse failed -- '%s'", path); + ret = -1; + goto err_close; + } + + ret = util_poolset_foreach_part_struct(set, callback, arg); + + /* + * Make sure callback does not return -1, + * because this value is reserved for parsing + * error. + */ + ASSERTne(ret, -1); + util_poolset_free(set); + +err_close: + os_close(fd); + return ret; +} + +/* + * util_poolset_size -- get size of poolset, returns 0 on error + */ +size_t +util_poolset_size(const char *path) +{ + int fd = os_open(path, O_RDONLY); + if (fd < 0) + return 0; + + size_t size = 0; + struct pool_set *set; + if (util_poolset_parse(&set, path, fd)) + goto err_close; + + size = set->poolsize; + + util_poolset_free(set); +err_close: + os_close(fd); + return size; +} + +/* + * util_replica_fdclose -- close all parts of given replica + */ +void +util_replica_fdclose(struct pool_replica *rep) +{ + for (unsigned p = 0; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + util_part_fdclose(part); + } +} + +/* + * util_replica_deep_common -- performs common calculations + * on all parts from replica to define intersection ranges + * for final flushing operations that take place in + * os_part_deep_common function. + */ +int +util_replica_deep_common(const void *addr, size_t len, struct pool_set *set, + unsigned replica_id, int flush) +{ + LOG(3, "addr %p len %zu set %p replica_id %u flush %d", + addr, len, set, replica_id, flush); + + struct pool_replica *rep = set->replica[replica_id]; + uintptr_t rep_start = (uintptr_t)rep->part[0].addr; + uintptr_t rep_end = rep_start + rep->repsize; + uintptr_t start = (uintptr_t)addr; + uintptr_t end = start + len; + + ASSERT(start >= rep_start); + ASSERT(end <= rep_end); + + for (unsigned p = 0; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + uintptr_t part_start = (uintptr_t)part->addr; + uintptr_t part_end = part_start + part->size; + /* init intersection start and end addresses */ + uintptr_t range_start = start; + uintptr_t range_end = end; + + if (part_start > end || part_end < start) + continue; + /* recalculate intersection addresses */ + if (part_start > start) + range_start = part_start; + if (part_end < end) + range_end = part_end; + size_t range_len = range_end - range_start; + + LOG(15, "perform deep flushing for replica %u " + "part %p, addr %p, len %lu", + replica_id, part, (void *)range_start, range_len); + if (os_part_deep_common(rep, p, (void *)range_start, + range_len, flush)) { + LOG(1, "os_part_deep_common(%p, %p, %lu)", + part, (void *)range_start, range_len); + return -1; + } + } + return 0; +} + +/* + * util_replica_deep_persist -- wrapper for util_replica_deep_common + * Calling the target precedes initialization of function that + * partly defines way of deep replica flushing. + */ +int +util_replica_deep_persist(const void *addr, size_t len, struct pool_set *set, + unsigned replica_id) +{ + LOG(3, "addr %p len %zu set %p replica_id %u", + addr, len, set, replica_id); + + int flush = 1; + return util_replica_deep_common(addr, len, set, replica_id, flush); +} + +/* + * util_replica_deep_drain -- wrapper for util_replica_deep_common + * Calling the target precedes initialization of function that + * partly defines way of deep replica flushing. + */ +int +util_replica_deep_drain(const void *addr, size_t len, struct pool_set *set, + unsigned replica_id) +{ + LOG(3, "addr %p len %zu set %p replica_id %u", + addr, len, set, replica_id); + + int flush = 0; + return util_replica_deep_common(addr, len, set, replica_id, flush); +} diff --git a/src/pmdk/src/common/set.h b/src/pmdk/src/common/set.h new file mode 100644 index 000000000..ba4603dd9 --- /dev/null +++ b/src/pmdk/src/common/set.h @@ -0,0 +1,440 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * set.h -- internal definitions for set module + */ + +#ifndef PMDK_SET_H +#define PMDK_SET_H 1 + +#include +#include +#include + +#include + +#include "out.h" +#include "vec.h" +#include "pool_hdr.h" +#include "librpmem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * pool sets & replicas + */ +#define POOLSET_HDR_SIG "PMEMPOOLSET" +#define POOLSET_HDR_SIG_LEN 11 /* does NOT include '\0' */ + +#define POOLSET_REPLICA_SIG "REPLICA" +#define POOLSET_REPLICA_SIG_LEN 7 /* does NOT include '\0' */ + +#define POOLSET_OPTION_SIG "OPTION" +#define POOLSET_OPTION_SIG_LEN 6 /* does NOT include '\0' */ + +/* pool set option flags */ +enum pool_set_option_flag { + OPTION_UNKNOWN = 0x0, + OPTION_SINGLEHDR = 0x1, /* pool headers only in the first part */ + OPTION_NOHDRS = 0x2, /* no pool headers, remote replicas only */ +}; + +struct pool_set_option { + const char *name; + enum pool_set_option_flag flag; +}; + +#define POOL_LOCAL 0 +#define POOL_REMOTE 1 + +#define REPLICAS_DISABLED 0 +#define REPLICAS_ENABLED 1 + +/* util_pool_open flags */ +#define POOL_OPEN_COW 1 /* copy-on-write mode */ +#define POOL_OPEN_IGNORE_SDS 2 /* ignore shutdown state */ +#define POOL_OPEN_IGNORE_BAD_BLOCKS 4 /* ignore bad blocks */ +#define POOL_OPEN_CHECK_BAD_BLOCKS 8 /* check bad blocks */ + +enum del_parts_mode { + DO_NOT_DELETE_PARTS, /* do not delete part files */ + DELETE_CREATED_PARTS, /* delete only newly created parts files */ + DELETE_ALL_PARTS /* force delete all parts files */ +}; + +struct pool_set_part { + /* populated by a pool set file parser */ + const char *path; + size_t filesize; /* aligned to page size */ + int fd; + int flags; /* stores flags used when opening the file */ + /* valid only if fd >= 0 */ + int is_dev_dax; /* indicates if the part is on device dax */ + size_t alignment; /* internal alignment (Device DAX only) */ + int created; /* indicates newly created (zeroed) file */ + + /* util_poolset_open/create */ + void *remote_hdr; /* allocated header for remote replica */ + void *hdr; /* base address of header */ + size_t hdrsize; /* size of the header mapping */ + int hdr_map_sync; /* header mapped with MAP_SYNC */ + void *addr; /* base address of the mapping */ + size_t size; /* size of the mapping - page aligned */ + int map_sync; /* part has been mapped with MAP_SYNC flag */ + int rdonly; /* is set based on compat features, affects */ + /* the whole poolset */ + uuid_t uuid; + int has_bad_blocks; /* part file contains bad blocks */ + int sds_dirty_modified; /* sds dirty flag was set */ +}; + +struct pool_set_directory { + const char *path; + size_t resvsize; /* size of the address space reservation */ + +}; + +struct remote_replica { + void *rpp; /* RPMEMpool opaque handle */ + char *node_addr; /* address of a remote node */ + /* poolset descriptor is a pool set file name on a remote node */ + char *pool_desc; /* descriptor of a poolset */ +}; + +struct pool_replica { + unsigned nparts; + unsigned nallocated; + unsigned nhdrs; /* should be 0, 1 or nparts */ + size_t repsize; /* total size of all the parts (mappings) */ + size_t resvsize; /* min size of the address space reservation */ + int is_pmem; /* true if all the parts are in PMEM */ + struct remote_replica *remote; /* not NULL if the replica */ + /* is a remote one */ + VEC(, struct pool_set_directory) directory; + struct pool_set_part part[]; +}; + +struct pool_set { + char *path; /* path of the poolset file */ + unsigned nreplicas; + uuid_t uuid; + int rdonly; + int zeroed; /* true if all the parts are new files */ + size_t poolsize; /* the smallest replica size */ + int has_bad_blocks; /* pool set contains bad blocks */ + int remote; /* true if contains a remote replica */ + unsigned options; /* enabled pool set options */ + + int directory_based; + size_t resvsize; + + unsigned next_id; + unsigned next_directory_id; + + int ignore_sds; /* don't use shutdown state */ + struct pool_replica *replica[]; +}; + +struct part_file { + int is_remote; + /* + * Pointer to the part file structure - + * - not-NULL only for a local part file + */ + struct pool_set_part *part; + /* + * Pointer to the replica structure - + * - not-NULL only for a remote replica + */ + struct remote_replica *remote; +}; + +struct pool_attr { + char signature[POOL_HDR_SIG_LEN]; /* pool signature */ + uint32_t major; /* format major version number */ + features_t features; /* features flags */ + unsigned char poolset_uuid[POOL_HDR_UUID_LEN]; /* pool uuid */ + unsigned char first_part_uuid[POOL_HDR_UUID_LEN]; /* first part uuid */ + unsigned char prev_repl_uuid[POOL_HDR_UUID_LEN]; /* prev replica uuid */ + unsigned char next_repl_uuid[POOL_HDR_UUID_LEN]; /* next replica uuid */ + unsigned char arch_flags[POOL_HDR_ARCH_LEN]; /* arch flags */ +}; + +/* get index of the (r)th replica */ +static inline unsigned +REPidx(const struct pool_set *set, unsigned r) +{ + ASSERTne(set->nreplicas, 0); + return r % set->nreplicas; +} + +/* get index of the (r + 1)th replica */ +static inline unsigned +REPNidx(const struct pool_set *set, unsigned r) +{ + ASSERTne(set->nreplicas, 0); + return (r + 1) % set->nreplicas; +} + +/* get index of the (r - 1)th replica */ +static inline unsigned +REPPidx(const struct pool_set *set, unsigned r) +{ + ASSERTne(set->nreplicas, 0); + return (set->nreplicas + r - 1) % set->nreplicas; +} + +/* get index of the (r)th part */ +static inline unsigned +PARTidx(const struct pool_replica *rep, unsigned p) +{ + ASSERTne(rep->nparts, 0); + return p % rep->nparts; +} + +/* get index of the (r + 1)th part */ +static inline unsigned +PARTNidx(const struct pool_replica *rep, unsigned p) +{ + ASSERTne(rep->nparts, 0); + return (p + 1) % rep->nparts; +} + +/* get index of the (r - 1)th part */ +static inline unsigned +PARTPidx(const struct pool_replica *rep, unsigned p) +{ + ASSERTne(rep->nparts, 0); + return (rep->nparts + p - 1) % rep->nparts; +} + +/* get index of the (r)th part */ +static inline unsigned +HDRidx(const struct pool_replica *rep, unsigned p) +{ + ASSERTne(rep->nhdrs, 0); + return p % rep->nhdrs; +} + +/* get index of the (r + 1)th part */ +static inline unsigned +HDRNidx(const struct pool_replica *rep, unsigned p) +{ + ASSERTne(rep->nhdrs, 0); + return (p + 1) % rep->nhdrs; +} + +/* get index of the (r - 1)th part */ +static inline unsigned +HDRPidx(const struct pool_replica *rep, unsigned p) +{ + ASSERTne(rep->nhdrs, 0); + return (rep->nhdrs + p - 1) % rep->nhdrs; +} + +/* get (r)th replica */ +static inline struct pool_replica * +REP(const struct pool_set *set, unsigned r) +{ + return set->replica[REPidx(set, r)]; +} + +/* get (r + 1)th replica */ +static inline struct pool_replica * +REPN(const struct pool_set *set, unsigned r) +{ + return set->replica[REPNidx(set, r)]; +} + +/* get (r - 1)th replica */ +static inline struct pool_replica * +REPP(const struct pool_set *set, unsigned r) +{ + return set->replica[REPPidx(set, r)]; +} + +/* get (p)th part */ +static inline struct pool_set_part * +PART(struct pool_replica *rep, unsigned p) +{ + return &rep->part[PARTidx(rep, p)]; +} + +/* get (p + 1)th part */ +static inline struct pool_set_part * +PARTN(struct pool_replica *rep, unsigned p) +{ + return &rep->part[PARTNidx(rep, p)]; +} + +/* get (p - 1)th part */ +static inline struct pool_set_part * +PARTP(struct pool_replica *rep, unsigned p) +{ + return &rep->part[PARTPidx(rep, p)]; +} + +/* get (p)th header */ +static inline struct pool_hdr * +HDR(struct pool_replica *rep, unsigned p) +{ + return (struct pool_hdr *)(rep->part[HDRidx(rep, p)].hdr); +} + +/* get (p + 1)th header */ +static inline struct pool_hdr * +HDRN(struct pool_replica *rep, unsigned p) +{ + return (struct pool_hdr *)(rep->part[HDRNidx(rep, p)].hdr); +} + +/* get (p - 1)th header */ +static inline struct pool_hdr * +HDRP(struct pool_replica *rep, unsigned p) +{ + return (struct pool_hdr *)(rep->part[HDRPidx(rep, p)].hdr); +} + +extern int Prefault_at_open; +extern int Prefault_at_create; +extern int SDS_at_create; +extern int Fallocate_at_create; +extern int COW_at_open; + +int util_poolset_parse(struct pool_set **setp, const char *path, int fd); +int util_poolset_read(struct pool_set **setp, const char *path); +int util_poolset_create_set(struct pool_set **setp, const char *path, + size_t poolsize, size_t minsize, int ignore_sds); +int util_poolset_open(struct pool_set *set); +void util_poolset_close(struct pool_set *set, enum del_parts_mode del); +void util_poolset_free(struct pool_set *set); +int util_poolset_chmod(struct pool_set *set, mode_t mode); +void util_poolset_fdclose(struct pool_set *set); +void util_poolset_fdclose_always(struct pool_set *set); +int util_is_poolset_file(const char *path); +int util_poolset_foreach_part_struct(struct pool_set *set, + int (*cb)(struct part_file *pf, void *arg), void *arg); +int util_poolset_foreach_part(const char *path, + int (*cb)(struct part_file *pf, void *arg), void *arg); +size_t util_poolset_size(const char *path); + +int util_replica_deep_common(const void *addr, size_t len, + struct pool_set *set, unsigned replica_id, int flush); +int util_replica_deep_persist(const void *addr, size_t len, + struct pool_set *set, unsigned replica_id); +int util_replica_deep_drain(const void *addr, size_t len, + struct pool_set *set, unsigned replica_id); + +int util_pool_create(struct pool_set **setp, const char *path, size_t poolsize, + size_t minsize, size_t minpartsize, const struct pool_attr *attr, + unsigned *nlanes, int can_have_rep); +int util_pool_create_uuids(struct pool_set **setp, const char *path, + size_t poolsize, size_t minsize, size_t minpartsize, + const struct pool_attr *attr, unsigned *nlanes, int can_have_rep, + int remote); + +int util_part_open(struct pool_set_part *part, size_t minsize, int create_part); +void util_part_fdclose(struct pool_set_part *part); +int util_replica_open(struct pool_set *set, unsigned repidx, int flags); +int util_replica_set_attr(struct pool_replica *rep, + const struct rpmem_pool_attr *rattr); +void util_pool_hdr2attr(struct pool_attr *attr, struct pool_hdr *hdr); +void util_pool_attr2hdr(struct pool_hdr *hdr, + const struct pool_attr *attr); +int util_replica_close(struct pool_set *set, unsigned repidx); +int util_map_part(struct pool_set_part *part, void *addr, size_t size, + size_t offset, int flags, int rdonly); +int util_unmap_part(struct pool_set_part *part); +int util_unmap_parts(struct pool_replica *rep, unsigned start_index, + unsigned end_index); +int util_header_create(struct pool_set *set, unsigned repidx, unsigned partidx, + const struct pool_attr *attr, int overwrite); + +int util_map_hdr(struct pool_set_part *part, int flags, int rdonly); +void util_unmap_hdr(struct pool_set_part *part); + +int util_pool_has_device_dax(struct pool_set *set); + +int util_pool_open_nocheck(struct pool_set *set, unsigned flags); +int util_pool_open(struct pool_set **setp, const char *path, size_t minpartsize, + const struct pool_attr *attr, unsigned *nlanes, void *addr, + unsigned flags); +int util_pool_open_remote(struct pool_set **setp, const char *path, int cow, + size_t minpartsize, struct rpmem_pool_attr *rattr); + +void *util_pool_extend(struct pool_set *set, size_t *size, size_t minpartsize); + +void util_remote_init(void); +void util_remote_fini(void); + +int util_update_remote_header(struct pool_set *set, unsigned repn); +void util_remote_init_lock(void); +void util_remote_destroy_lock(void); +int util_pool_close_remote(RPMEMpool *rpp); +void util_remote_unload(void); +void util_replica_fdclose(struct pool_replica *rep); +int util_poolset_remote_open(struct pool_replica *rep, unsigned repidx, + size_t minsize, int create, void *pool_addr, + size_t pool_size, unsigned *nlanes); +int util_remote_load(void); +int util_replica_open_remote(struct pool_set *set, unsigned repidx, int flags); +int util_poolset_remote_replica_open(struct pool_set *set, unsigned repidx, + size_t minsize, int create, unsigned *nlanes); +int util_replica_close_local(struct pool_replica *rep, unsigned repn, + enum del_parts_mode del); +int util_replica_close_remote(struct pool_replica *rep, unsigned repn, + enum del_parts_mode del); + +extern int (*Rpmem_persist)(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane, unsigned flags); +extern int (*Rpmem_deep_persist)(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane); +extern int (*Rpmem_read)(RPMEMpool *rpp, void *buff, size_t offset, + size_t length, unsigned lane); +extern int (*Rpmem_close)(RPMEMpool *rpp); + +extern int (*Rpmem_remove)(const char *target, + const char *pool_set_name, int flags); + +extern int (*Rpmem_set_attr)(RPMEMpool *rpp, + const struct rpmem_pool_attr *rattr); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/set_badblocks.c b/src/pmdk/src/common/set_badblocks.c new file mode 100644 index 000000000..1ee9687c3 --- /dev/null +++ b/src/pmdk/src/common/set_badblocks.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * set_badblocks.c - common part of implementation of bad blocks API + */ +#define _GNU_SOURCE + +#include +#include +#include + +#include "file.h" +#include "os.h" +#include "out.h" +#include "set_badblocks.h" +#include "badblocks.h" + +/* helper structure for badblocks_check_file_cb() */ +struct check_file_cb { + int n_files_bbs; /* number of files with bad blocks */ + int create; /* poolset is just being created */ +}; + +/* + * badblocks_check_file_cb -- (internal) callback checking bad blocks + * in the given file + */ +static int +badblocks_check_file_cb(struct part_file *pf, void *arg) +{ + LOG(3, "part_file %p arg %p", pf, arg); + + struct check_file_cb *pcfcb = arg; + + if (pf->is_remote) { + /* + * Remote replicas are checked for bad blocks + * while opening in util_pool_open_remote(). + */ + return 0; + } + + int exists = util_file_exists(pf->part->path); + if (exists < 0) + return -1; + + if (!exists) + /* the part does not exist, so it has no bad blocks */ + return 0; + + int ret = badblocks_check_file(pf->part->path); + if (ret < 0) { + ERR("checking the pool file for bad blocks failed -- '%s'", + pf->part->path); + return -1; + } + + if (ret > 0) { + ERR("part file contains bad blocks -- '%s'", pf->part->path); + pcfcb->n_files_bbs++; + pf->part->has_bad_blocks = 1; + } + + return 0; +} + +/* + * badblocks_check_poolset -- checks if the pool set contains bad blocks + * + * Return value: + * -1 error + * 0 pool set does not contain bad blocks + * 1 pool set contains bad blocks + */ +int +badblocks_check_poolset(struct pool_set *set, int create) +{ + LOG(3, "set %p create %i", set, create); + + struct check_file_cb cfcb; + + cfcb.n_files_bbs = 0; + cfcb.create = create; + + if (util_poolset_foreach_part_struct(set, badblocks_check_file_cb, + &cfcb)) { + return -1; + } + + if (cfcb.n_files_bbs) { + LOG(1, "%i pool file(s) contain bad blocks", cfcb.n_files_bbs); + set->has_bad_blocks = 1; + } + + return (cfcb.n_files_bbs > 0); +} + +/* + * badblocks_clear_poolset_cb -- (internal) callback clearing bad blocks + * in the given file + */ +static int +badblocks_clear_poolset_cb(struct part_file *pf, void *arg) +{ + LOG(3, "part_file %p arg %p", pf, arg); + + int *create = arg; + + if (pf->is_remote) { /* XXX not supported yet */ + LOG(1, + "WARNING: clearing bad blocks in remote replicas is not supported yet -- '%s:%s'", + pf->remote->node_addr, pf->remote->pool_desc); + return 0; + } + + if (*create) { + /* + * Poolset is just being created - check if file exists + * and if we can read it. + */ + int exists = util_file_exists(pf->part->path); + if (exists < 0) + return -1; + + if (!exists) + return 0; + } + + int ret = badblocks_clear_all(pf->part->path); + if (ret < 0) { + ERR("clearing bad blocks in the pool file failed -- '%s'", + pf->part->path); + errno = EIO; + return -1; + } + + pf->part->has_bad_blocks = 0; + + return 0; +} + +/* + * badblocks_clear_poolset -- clears bad blocks in the pool set + */ +int +badblocks_clear_poolset(struct pool_set *set, int create) +{ + LOG(3, "set %p create %i", set, create); + + if (util_poolset_foreach_part_struct(set, badblocks_clear_poolset_cb, + &create)) { + return -1; + } + + set->has_bad_blocks = 0; + + return 0; +} + +/* + * badblocks_recovery_file_alloc -- allocate name of bad block recovery file, + * the allocated name has to be freed + * using Free() + */ +char * +badblocks_recovery_file_alloc(const char *file, unsigned rep, unsigned part) +{ + LOG(3, "file %s rep %u part %u", file, rep, part); + + char bbs_suffix[64]; + char *path; + + sprintf(bbs_suffix, "_r%u_p%u_badblocks.txt", rep, part); + + size_t len_file = strlen(file); + size_t len_bbs_suffix = strlen(bbs_suffix); + size_t len_path = len_file + len_bbs_suffix; + + path = Malloc(len_path + 1); + if (path == NULL) { + ERR("!Malloc"); + return NULL; + } + + strcpy(path, file); + strcat(path, bbs_suffix); + + return path; +} + +/* + * badblocks_recovery_file_exists -- check if any bad block recovery file exists + * + * Returns: + * 0 when there are no bad block recovery files and + * 1 when there is at least one bad block recovery file. + */ +int +badblocks_recovery_file_exists(struct pool_set *set) +{ + LOG(3, "set %p", set); + + int recovery_file_exists = 0; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + + /* XXX: not supported yet */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + const char *path = PART(rep, p)->path; + + int exists = util_file_exists(path); + if (exists < 0) + return -1; + + if (!exists) { + /* part file does not exist - skip it */ + continue; + } + + char *rec_file = + badblocks_recovery_file_alloc(set->path, r, p); + if (rec_file == NULL) { + LOG(1, + "allocating name of bad block recovery file failed"); + return -1; + } + + exists = util_file_exists(rec_file); + if (exists < 0) { + Free(rec_file); + return -1; + } + + if (exists) { + LOG(3, "bad block recovery file exists: %s", + rec_file); + + recovery_file_exists = 1; + } + + Free(rec_file); + + if (recovery_file_exists) + return 1; + } + } + + return 0; +} diff --git a/src/pmdk/src/common/set_badblocks.h b/src/pmdk/src/common/set_badblocks.h new file mode 100644 index 000000000..dbb59b80e --- /dev/null +++ b/src/pmdk/src/common/set_badblocks.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * set_badblocks.h - poolset part of bad blocks API + */ + +#ifndef PMDK_SET_BADBLOCKS_H +#define PMDK_SET_BADBLOCKS_H 1 + +#include "set.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int badblocks_check_poolset(struct pool_set *set, int create); +int badblocks_clear_poolset(struct pool_set *set, int create); + +char *badblocks_recovery_file_alloc(const char *file, + unsigned rep, unsigned part); +int badblocks_recovery_file_exists(struct pool_set *set); + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_SET_BADBLOCKS_H */ diff --git a/src/pmdk/src/common/shutdown_state.c b/src/pmdk/src/common/shutdown_state.c new file mode 100644 index 000000000..3a2bb11fa --- /dev/null +++ b/src/pmdk/src/common/shutdown_state.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +/* + * shutdown_state.c -- unsafe shudown detection + */ + +#include +#include +#include +#include "shutdown_state.h" +#include "out.h" +#include "util.h" +#include "os_deep.h" +#include "set.h" +#include "libpmem2.h" +#include "badblocks.h" +#include "../libpmem2/pmem2_utils.h" + +#define FLUSH_SDS(sds, rep) \ + if ((rep) != NULL) os_part_deep_common(rep, 0, sds, sizeof(*(sds)), 1) + +/* + * shutdown_state_checksum -- (internal) counts SDS checksum and flush it + */ +static void +shutdown_state_checksum(struct shutdown_state *sds, struct pool_replica *rep) +{ + LOG(3, "sds %p", sds); + + util_checksum(sds, sizeof(*sds), &sds->checksum, 1, 0); + FLUSH_SDS(sds, rep); +} + +/* + * shutdown_state_init -- initializes shutdown_state struct + */ +int +shutdown_state_init(struct shutdown_state *sds, struct pool_replica *rep) +{ + /* check if we didn't change size of shutdown_state accidentally */ + COMPILE_ERROR_ON(sizeof(struct shutdown_state) != 64); + LOG(3, "sds %p", sds); + + memset(sds, 0, sizeof(*sds)); + + shutdown_state_checksum(sds, rep); + + return 0; +} + +/* + * shutdown_state_add_part -- adds file uuid and usc to shutdown_state struct + * + * if path does not exist it will fail which does NOT mean shutdown failure + */ +int +shutdown_state_add_part(struct shutdown_state *sds, int fd, + struct pool_replica *rep) +{ + LOG(3, "sds %p, fd %d", sds, fd); + + size_t len = 0; + char *uid; + uint64_t usc; + + struct pmem2_source *src; + + if (pmem2_source_from_fd(&src, fd)) + return 1; + + int ret = pmem2_source_device_usc(src, &usc); + + if (ret == PMEM2_E_NOSUPP) { + usc = 0; + } else if (ret != 0) { + if (ret == -EPERM) { + /* overwrite error message */ + ERR( + "Cannot read unsafe shutdown count. For more information please check https://github.com/pmem/pmdk/issues/4207"); + } + LOG(2, "cannot read unsafe shutdown count for %d", fd); + goto err; + } + + ret = pmem2_source_device_id(src, NULL, &len); + if (ret != PMEM2_E_NOSUPP && ret != 0) { + ERR("cannot read uuid of %d", fd); + goto err; + } + + len += 4 - len % 4; + uid = Zalloc(len); + + if (uid == NULL) { + ERR("!Zalloc"); + goto err; + } + + ret = pmem2_source_device_id(src, uid, &len); + if (ret != PMEM2_E_NOSUPP && ret != 0) { + ERR("cannot read uuid of %d", fd); + Free(uid); + goto err; + } + + sds->usc = htole64(le64toh(sds->usc) + usc); + + uint64_t tmp; + util_checksum(uid, len, &tmp, 1, 0); + sds->uuid = htole64(le64toh(sds->uuid) + tmp); + + FLUSH_SDS(sds, rep); + Free(uid); + pmem2_source_delete(&src); + shutdown_state_checksum(sds, rep); + return 0; +err: + pmem2_source_delete(&src); + return 1; +} + +/* + * shutdown_state_set_dirty -- sets dirty pool flag + */ +void +shutdown_state_set_dirty(struct shutdown_state *sds, struct pool_replica *rep) +{ + LOG(3, "sds %p", sds); + + sds->dirty = 1; + rep->part[0].sds_dirty_modified = 1; + + FLUSH_SDS(sds, rep); + + shutdown_state_checksum(sds, rep); +} + +/* + * shutdown_state_clear_dirty -- clears dirty pool flag + */ +void +shutdown_state_clear_dirty(struct shutdown_state *sds, struct pool_replica *rep) +{ + LOG(3, "sds %p", sds); + + struct pool_set_part part = rep->part[0]; + /* + * If a dirty flag was set in previous program execution it should be + * preserved as it stores information about potential ADR failure. + */ + if (part.sds_dirty_modified != 1) + return; + + sds->dirty = 0; + part.sds_dirty_modified = 0; + + FLUSH_SDS(sds, rep); + + shutdown_state_checksum(sds, rep); +} + +/* + * shutdown_state_reinit -- (internal) reinitializes shutdown_state struct + */ +static void +shutdown_state_reinit(struct shutdown_state *curr_sds, + struct shutdown_state *pool_sds, struct pool_replica *rep) +{ + LOG(3, "curr_sds %p, pool_sds %p", curr_sds, pool_sds); + shutdown_state_init(pool_sds, rep); + pool_sds->uuid = htole64(curr_sds->uuid); + pool_sds->usc = htole64(curr_sds->usc); + pool_sds->dirty = 0; + + FLUSH_SDS(pool_sds, rep); + + shutdown_state_checksum(pool_sds, rep); +} + +/* + * shutdown_state_check -- compares and fixes shutdown state + */ +int +shutdown_state_check(struct shutdown_state *curr_sds, + struct shutdown_state *pool_sds, struct pool_replica *rep) +{ + LOG(3, "curr_sds %p, pool_sds %p", curr_sds, pool_sds); + + if (util_is_zeroed(pool_sds, sizeof(*pool_sds)) && + !util_is_zeroed(curr_sds, sizeof(*curr_sds))) { + shutdown_state_reinit(curr_sds, pool_sds, rep); + return 0; + } + + bool is_uuid_usc_correct = + le64toh(pool_sds->usc) == le64toh(curr_sds->usc) && + le64toh(pool_sds->uuid) == le64toh(curr_sds->uuid); + + bool is_checksum_correct = util_checksum(pool_sds, + sizeof(*pool_sds), &pool_sds->checksum, 0, 0); + + int dirty = pool_sds->dirty; + + if (!is_checksum_correct) { + /* the program was killed during opening or closing the pool */ + LOG(2, "incorrect checksum - SDS will be reinitialized"); + shutdown_state_reinit(curr_sds, pool_sds, rep); + return 0; + } + + if (is_uuid_usc_correct) { + if (dirty == 0) + return 0; + /* + * the program was killed when the pool was opened + * but there wasn't an ADR failure + */ + LOG(2, + "the pool was not closed - SDS will be reinitialized"); + shutdown_state_reinit(curr_sds, pool_sds, rep); + return 0; + } + if (dirty == 0) { + /* an ADR failure but the pool was closed */ + LOG(2, + "an ADR failure was detected but the pool was closed - SDS will be reinitialized"); + shutdown_state_reinit(curr_sds, pool_sds, rep); + return 0; + } + /* an ADR failure - the pool might be corrupted */ + ERR("an ADR failure was detected, the pool might be corrupted"); + return 1; +} diff --git a/src/pmdk/src/common/shutdown_state.h b/src/pmdk/src/common/shutdown_state.h new file mode 100644 index 000000000..1f9fa4ce8 --- /dev/null +++ b/src/pmdk/src/common/shutdown_state.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * shutdown_state.h -- unsafe shudown detection + */ + +#ifndef PMDK_SHUTDOWN_STATE_H +#define PMDK_SHUTDOWN_STATE_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct pool_replica; +struct shutdown_state { + uint64_t usc; + uint64_t uuid; /* UID checksum */ + uint8_t dirty; + uint8_t reserved[39]; + uint64_t checksum; +}; + +int shutdown_state_init(struct shutdown_state *sds, struct pool_replica *rep); +int shutdown_state_add_part(struct shutdown_state *sds, int fd, + struct pool_replica *rep); +void shutdown_state_set_dirty(struct shutdown_state *sds, + struct pool_replica *rep); +void shutdown_state_clear_dirty(struct shutdown_state *sds, + struct pool_replica *rep); + +int shutdown_state_check(struct shutdown_state *curr_sds, + struct shutdown_state *pool_sds, struct pool_replica *rep); + +#ifdef __cplusplus +} +#endif + +#endif /* shutdown_state.h */ diff --git a/src/pmdk/src/common/sys_util.h b/src/pmdk/src/common/sys_util.h new file mode 100644 index 000000000..fbe0c10d6 --- /dev/null +++ b/src/pmdk/src/common/sys_util.h @@ -0,0 +1,315 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * sys_util.h -- internal utility wrappers around system functions + */ + +#ifndef PMDK_SYS_UTIL_H +#define PMDK_SYS_UTIL_H 1 + +#include + +#include "os_thread.h" +#include "out.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * util_mutex_init -- os_mutex_init variant that never fails from + * caller perspective. If os_mutex_init failed, this function aborts + * the program. + */ +static inline void +util_mutex_init(os_mutex_t *m) +{ + int tmp = os_mutex_init(m); + if (tmp) { + errno = tmp; + FATAL("!os_mutex_init"); + } +} + +/* + * util_mutex_destroy -- os_mutex_destroy variant that never fails from + * caller perspective. If os_mutex_destroy failed, this function aborts + * the program. + */ +static inline void +util_mutex_destroy(os_mutex_t *m) +{ + int tmp = os_mutex_destroy(m); + if (tmp) { + errno = tmp; + FATAL("!os_mutex_destroy"); + } +} + +/* + * util_mutex_lock -- os_mutex_lock variant that never fails from + * caller perspective. If os_mutex_lock failed, this function aborts + * the program. + */ +static inline void +util_mutex_lock(os_mutex_t *m) +{ + int tmp = os_mutex_lock(m); + if (tmp) { + errno = tmp; + FATAL("!os_mutex_lock"); + } +} + +/* + * util_mutex_trylock -- os_mutex_trylock variant that never fails from + * caller perspective (other than EBUSY). If util_mutex_trylock failed, this + * function aborts the program. + * Returns 0 if locked successfully, otherwise returns EBUSY. + */ +static inline int +util_mutex_trylock(os_mutex_t *m) +{ + int tmp = os_mutex_trylock(m); + if (tmp && tmp != EBUSY) { + errno = tmp; + FATAL("!os_mutex_trylock"); + } + return tmp; +} + +/* + * util_mutex_unlock -- os_mutex_unlock variant that never fails from + * caller perspective. If os_mutex_unlock failed, this function aborts + * the program. + */ +static inline void +util_mutex_unlock(os_mutex_t *m) +{ + int tmp = os_mutex_unlock(m); + if (tmp) { + errno = tmp; + FATAL("!os_mutex_unlock"); + } +} + +/* + * util_rwlock_init -- os_rwlock_init variant that never fails from + * caller perspective. If os_rwlock_init failed, this function aborts + * the program. + */ +static inline void +util_rwlock_init(os_rwlock_t *m) +{ + int tmp = os_rwlock_init(m); + if (tmp) { + errno = tmp; + FATAL("!os_rwlock_init"); + } +} + +/* + * util_rwlock_rdlock -- os_rwlock_rdlock variant that never fails from + * caller perspective. If os_rwlock_rdlock failed, this function aborts + * the program. + */ +static inline void +util_rwlock_rdlock(os_rwlock_t *m) +{ + int tmp = os_rwlock_rdlock(m); + if (tmp) { + errno = tmp; + FATAL("!os_rwlock_rdlock"); + } +} + +/* + * util_rwlock_wrlock -- os_rwlock_wrlock variant that never fails from + * caller perspective. If os_rwlock_wrlock failed, this function aborts + * the program. + */ +static inline void +util_rwlock_wrlock(os_rwlock_t *m) +{ + int tmp = os_rwlock_wrlock(m); + if (tmp) { + errno = tmp; + FATAL("!os_rwlock_wrlock"); + } +} + +/* + * util_rwlock_unlock -- os_rwlock_unlock variant that never fails from + * caller perspective. If os_rwlock_unlock failed, this function aborts + * the program. + */ +static inline void +util_rwlock_unlock(os_rwlock_t *m) +{ + int tmp = os_rwlock_unlock(m); + if (tmp) { + errno = tmp; + FATAL("!os_rwlock_unlock"); + } +} + +/* + * util_rwlock_destroy -- os_rwlock_destroy variant that never fails from + * caller perspective. If os_rwlock_destroy failed, this function aborts + * the program. + */ +static inline void +util_rwlock_destroy(os_rwlock_t *m) +{ + int tmp = os_rwlock_destroy(m); + if (tmp) { + errno = tmp; + FATAL("!os_rwlock_destroy"); + } +} + +/* + * util_spin_init -- os_spin_init variant that logs on fail and sets errno. + */ +static inline int +util_spin_init(os_spinlock_t *lock, int pshared) +{ + int tmp = os_spin_init(lock, pshared); + if (tmp) { + errno = tmp; + ERR("!os_spin_init"); + } + return tmp; +} + +/* + * util_spin_destroy -- os_spin_destroy variant that never fails from + * caller perspective. If os_spin_destroy failed, this function aborts + * the program. + */ +static inline void +util_spin_destroy(os_spinlock_t *lock) +{ + int tmp = os_spin_destroy(lock); + if (tmp) { + errno = tmp; + FATAL("!os_spin_destroy"); + } +} + +/* + * util_spin_lock -- os_spin_lock variant that never fails from caller + * perspective. If os_spin_lock failed, this function aborts the program. + */ +static inline void +util_spin_lock(os_spinlock_t *lock) +{ + int tmp = os_spin_lock(lock); + if (tmp) { + errno = tmp; + FATAL("!os_spin_lock"); + } +} + +/* + * util_spin_unlock -- os_spin_unlock variant that never fails + * from caller perspective. If os_spin_unlock failed, + * this function aborts the program. + */ +static inline void +util_spin_unlock(os_spinlock_t *lock) +{ + int tmp = os_spin_unlock(lock); + if (tmp) { + errno = tmp; + FATAL("!os_spin_unlock"); + } +} + +/* + * util_semaphore_init -- os_semaphore_init variant that never fails + * from caller perspective. If os_semaphore_init failed, + * this function aborts the program. + */ +static inline void +util_semaphore_init(os_semaphore_t *sem, unsigned value) +{ + if (os_semaphore_init(sem, value)) + FATAL("!os_semaphore_init"); +} + +/* + * util_semaphore_destroy -- deletes a semaphore instance + */ +static inline void +util_semaphore_destroy(os_semaphore_t *sem) +{ + if (os_semaphore_destroy(sem) != 0) + FATAL("!os_semaphore_destroy"); +} + +/* + * util_semaphore_wait -- decreases the value of the semaphore + */ +static inline void +util_semaphore_wait(os_semaphore_t *sem) +{ + errno = 0; + + int ret; + do { + ret = os_semaphore_wait(sem); + } while (errno == EINTR); /* signal interrupt */ + + if (ret != 0) + FATAL("!os_semaphore_wait"); +} + +/* + * util_semaphore_trywait -- tries to decrease the value of the semaphore + */ +static inline int +util_semaphore_trywait(os_semaphore_t *sem) +{ + errno = 0; + int ret; + do { + ret = os_semaphore_trywait(sem); + } while (errno == EINTR); /* signal interrupt */ + + if (ret != 0 && errno != EAGAIN) + FATAL("!os_semaphore_trywait"); + + return ret; +} + +/* + * util_semaphore_post -- increases the value of the semaphore + */ +static inline void +util_semaphore_post(os_semaphore_t *sem) +{ + if (os_semaphore_post(sem) != 0) + FATAL("!os_semaphore_post"); +} + +static inline void +util_cond_init(os_cond_t *__restrict cond) +{ + if (os_cond_init(cond)) + FATAL("!os_cond_init"); +} + +static inline void +util_cond_destroy(os_cond_t *__restrict cond) +{ + if (os_cond_destroy(cond)) + FATAL("!os_cond_destroy"); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/util_pmem.h b/src/pmdk/src/common/util_pmem.h new file mode 100644 index 000000000..a31b6b8f7 --- /dev/null +++ b/src/pmdk/src/common/util_pmem.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * util_pmem.h -- internal definitions for pmem utils + */ + +#ifndef PMDK_UTIL_PMEM_H +#define PMDK_UTIL_PMEM_H 1 + +#include "libpmem.h" +#include "out.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * util_persist -- flush to persistence + */ +static inline void +util_persist(int is_pmem, const void *addr, size_t len) +{ + LOG(3, "is_pmem %d, addr %p, len %zu", is_pmem, addr, len); + + if (is_pmem) + pmem_persist(addr, len); + else if (pmem_msync(addr, len)) + FATAL("!pmem_msync"); +} + +/* + * util_persist_auto -- flush to persistence + */ +static inline void +util_persist_auto(int is_pmem, const void *addr, size_t len) +{ + LOG(3, "is_pmem %d, addr %p, len %zu", is_pmem, addr, len); + + util_persist(is_pmem || pmem_is_pmem(addr, len), addr, len); +} + +#ifdef __cplusplus +} +#endif + +#endif /* util_pmem.h */ diff --git a/src/pmdk/src/common/uuid.c b/src/pmdk/src/common/uuid.c new file mode 100644 index 000000000..94fdd737c --- /dev/null +++ b/src/pmdk/src/common/uuid.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2018, Intel Corporation */ + +/* + * uuid.c -- uuid utilities + */ + +#include +#include +#include +#include "uuid.h" +#include "out.h" + +/* + * util_uuid_to_string -- generate a string form of the uuid + */ +int +util_uuid_to_string(const uuid_t u, char *buf) +{ + int len; /* size that is returned from sprintf call */ + + if (buf == NULL) { + LOG(2, "invalid buffer for uuid string"); + return -1; + } + + if (u == NULL) { + LOG(2, "invalid uuid structure"); + return -1; + } + + struct uuid *uuid = (struct uuid *)u; + len = snprintf(buf, POOL_HDR_UUID_STR_LEN, + "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid->time_low, uuid->time_mid, uuid->time_hi_and_ver, + uuid->clock_seq_hi, uuid->clock_seq_low, uuid->node[0], + uuid->node[1], uuid->node[2], uuid->node[3], uuid->node[4], + uuid->node[5]); + + if (len != POOL_HDR_UUID_STR_LEN - 1) { + LOG(2, "snprintf(uuid): %d", len); + return -1; + } + + return 0; +} + +/* + * util_uuid_from_string -- generate a binary form of the uuid + * + * uuid string read from /proc/sys/kernel/random/uuid. UUID string + * format example: + * f81d4fae-7dec-11d0-a765-00a0c91e6bf6 + */ +int +util_uuid_from_string(const char *uuid, struct uuid *ud) +{ + if (strlen(uuid) != 36) { + LOG(2, "invalid uuid string"); + return -1; + } + + if (uuid[8] != '-' || uuid[13] != '-' || uuid[18] != '-' || + uuid[23] != '-') { + LOG(2, "invalid uuid string"); + return -1; + } + + int n = sscanf(uuid, + "%08x-%04hx-%04hx-%02hhx%02hhx-" + "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx", + &ud->time_low, &ud->time_mid, &ud->time_hi_and_ver, + &ud->clock_seq_hi, &ud->clock_seq_low, &ud->node[0], + &ud->node[1], &ud->node[2], &ud->node[3], &ud->node[4], + &ud->node[5]); + + if (n != 11) { + LOG(2, "sscanf(uuid)"); + return -1; + } + + return 0; +} diff --git a/src/pmdk/src/common/uuid.h b/src/pmdk/src/common/uuid.h new file mode 100644 index 000000000..5d817bbf1 --- /dev/null +++ b/src/pmdk/src/common/uuid.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * uuid.h -- internal definitions for uuid module + */ + +#ifndef PMDK_UUID_H +#define PMDK_UUID_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Structure for binary version of uuid. From RFC4122, + * https://tools.ietf.org/html/rfc4122 + */ +struct uuid { + uint32_t time_low; + uint16_t time_mid; + uint16_t time_hi_and_ver; + uint8_t clock_seq_hi; + uint8_t clock_seq_low; + uint8_t node[6]; +}; + +#define POOL_HDR_UUID_LEN 16 /* uuid byte length */ +#define POOL_HDR_UUID_STR_LEN 37 /* uuid string length */ +#define POOL_HDR_UUID_GEN_FILE "/proc/sys/kernel/random/uuid" + +typedef unsigned char uuid_t[POOL_HDR_UUID_LEN]; /* 16 byte binary uuid value */ + +int util_uuid_generate(uuid_t uuid); +int util_uuid_to_string(const uuid_t u, char *buf); +int util_uuid_from_string(const char uuid[POOL_HDR_UUID_STR_LEN], + struct uuid *ud); + +/* + * uuidcmp -- compare two uuids + */ +static inline int +uuidcmp(const uuid_t uuid1, const uuid_t uuid2) +{ + return memcmp(uuid1, uuid2, POOL_HDR_UUID_LEN); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/common/uuid_freebsd.c b/src/pmdk/src/common/uuid_freebsd.c new file mode 100644 index 000000000..0a771c9a6 --- /dev/null +++ b/src/pmdk/src/common/uuid_freebsd.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2017, Intel Corporation */ + +/* + * uuid_freebsd.c -- FreeBSD-specific implementation for UUID generation + */ + +#include "uuid.h" + +/* XXX Can't include because it also defines uuid_t */ +void uuid_generate(uuid_t); + +/* + * util_uuid_generate -- generate a uuid + * + * Uses the available FreeBSD uuid_generate library function. + */ +int +util_uuid_generate(uuid_t uuid) +{ + uuid_generate(uuid); + + return 0; +} diff --git a/src/pmdk/src/common/uuid_linux.c b/src/pmdk/src/common/uuid_linux.c new file mode 100644 index 000000000..979e0ad52 --- /dev/null +++ b/src/pmdk/src/common/uuid_linux.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2017, Intel Corporation */ + +/* + * uuid_linux.c -- pool set utilities with OS-specific implementation + */ + +#include +#include +#include + +#include "uuid.h" +#include "os.h" +#include "out.h" + +/* + * util_uuid_generate -- generate a uuid + * + * This function reads the uuid string from /proc/sys/kernel/random/uuid + * It converts this string into the binary uuid format as specified in + * https://www.ietf.org/rfc/rfc4122.txt + */ +int +util_uuid_generate(uuid_t uuid) +{ + char uu[POOL_HDR_UUID_STR_LEN]; + + int fd = os_open(POOL_HDR_UUID_GEN_FILE, O_RDONLY); + if (fd < 0) { + /* Fatal error */ + LOG(2, "!open(uuid)"); + return -1; + } + ssize_t num = read(fd, uu, POOL_HDR_UUID_STR_LEN); + if (num < POOL_HDR_UUID_STR_LEN) { + /* Fatal error */ + LOG(2, "!read(uuid)"); + os_close(fd); + return -1; + } + os_close(fd); + + uu[POOL_HDR_UUID_STR_LEN - 1] = '\0'; + int ret = util_uuid_from_string(uu, (struct uuid *)uuid); + if (ret < 0) + return ret; + + return 0; +} diff --git a/src/pmdk/src/common/uuid_windows.c b/src/pmdk/src/common/uuid_windows.c new file mode 100644 index 000000000..9c62a70fe --- /dev/null +++ b/src/pmdk/src/common/uuid_windows.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2017, Intel Corporation */ + +/* + * uuid_windows.c -- pool set utilities with OS-specific implementation + */ + +#include "uuid.h" +#include "out.h" + +/* + * util_uuid_generate -- generate a uuid + */ +int +util_uuid_generate(uuid_t uuid) +{ + HRESULT res = CoCreateGuid((GUID *)(uuid)); + if (res != S_OK) { + ERR("CoCreateGuid"); + return -1; + } + return 0; +} diff --git a/src/pmdk/src/common/vec.h b/src/pmdk/src/common/vec.h new file mode 100644 index 000000000..c1ba60c95 --- /dev/null +++ b/src/pmdk/src/common/vec.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * vec.h -- vector interface + */ + +#ifndef PMDK_VEC_H +#define PMDK_VEC_H 1 + +#include +#include "valgrind_internal.h" +#include "util.h" +#include "out.h" +#include "alloc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define VEC_INIT_SIZE (64) + +#define VEC(name, type)\ +struct name {\ + type *buffer;\ + size_t size;\ + size_t capacity;\ +} + +#define VEC_INITIALIZER {NULL, 0, 0} + +#define VEC_INIT(vec) do {\ + (vec)->buffer = NULL;\ + (vec)->size = 0;\ + (vec)->capacity = 0;\ +} while (0) + +#define VEC_MOVE(vecl, vecr) do {\ + Free((vecl)->buffer);\ + (vecl)->buffer = (vecr)->buffer;\ + (vecl)->size = (vecr)->size;\ + (vecl)->capacity = (vecr)->capacity;\ + (vecr)->buffer = NULL;\ + (vecr)->size = 0;\ + (vecr)->capacity = 0;\ +} while (0) + +#define VEC_REINIT(vec) do {\ + VALGRIND_ANNOTATE_NEW_MEMORY((vec), sizeof(*vec));\ + VALGRIND_ANNOTATE_NEW_MEMORY((vec)->buffer,\ + (sizeof(*(vec)->buffer) * ((vec)->capacity)));\ + (vec)->size = 0;\ +} while (0) + +static inline int +vec_reserve(void *vec, size_t ncapacity, size_t s) +{ + size_t ncap = ncapacity == 0 ? VEC_INIT_SIZE : ncapacity; + VEC(vvec, void) *vecp = (struct vvec *)vec; + void *tbuf = Realloc(vecp->buffer, s * ncap); + if (tbuf == NULL) { + ERR("!Realloc"); + return -1; + } + vecp->buffer = tbuf; + vecp->capacity = ncap; + return 0; +} + +#define VEC_RESERVE(vec, ncapacity)\ +(((vec)->size == 0 || (ncapacity) > (vec)->size) ?\ + vec_reserve((void *)vec, ncapacity, sizeof(*(vec)->buffer)) :\ + 0) + +#define VEC_POP_BACK(vec) do {\ + (vec)->size -= 1;\ +} while (0) + +#define VEC_FRONT(vec)\ +(vec)->buffer[0] + +#define VEC_BACK(vec)\ +(vec)->buffer[(vec)->size - 1] + +#define VEC_ERASE_BY_POS(vec, pos) do {\ + if ((pos) != ((vec)->size - 1))\ + (vec)->buffer[(pos)] = VEC_BACK(vec);\ + VEC_POP_BACK(vec);\ +} while (0) + +#define VEC_ERASE_BY_PTR(vec, element) do {\ + if ((element) != &VEC_BACK(vec))\ + *(element) = VEC_BACK(vec);\ + VEC_POP_BACK(vec);\ +} while (0) + +#define VEC_INSERT(vec, element)\ +((vec)->buffer[(vec)->size - 1] = (element), 0) + +#define VEC_INC_SIZE(vec)\ +(((vec)->size++), 0) + +#define VEC_INC_BACK(vec)\ +((vec)->capacity == (vec)->size ?\ + (VEC_RESERVE((vec), ((vec)->capacity * 2)) == 0 ?\ + VEC_INC_SIZE(vec) : -1) :\ + VEC_INC_SIZE(vec)) + +#define VEC_PUSH_BACK(vec, element)\ +(VEC_INC_BACK(vec) == 0? VEC_INSERT(vec, element) : -1) + +#define VEC_FOREACH(el, vec)\ +for (size_t _vec_i = 0;\ + _vec_i < (vec)->size && (((el) = (vec)->buffer[_vec_i]), 1);\ + ++_vec_i) + +#define VEC_FOREACH_REVERSE(el, vec)\ +for (size_t _vec_i = ((vec)->size);\ + _vec_i != 0 && (((el) = (vec)->buffer[_vec_i - 1]), 1);\ + --_vec_i) + +#define VEC_FOREACH_BY_POS(elpos, vec)\ +for ((elpos) = 0; (elpos) < (vec)->size; ++(elpos)) + +#define VEC_FOREACH_BY_PTR(el, vec)\ +for (size_t _vec_i = 0;\ + _vec_i < (vec)->size && (((el) = &(vec)->buffer[_vec_i]), 1);\ + ++_vec_i) + +#define VEC_SIZE(vec)\ +((vec)->size) + +#define VEC_CAPACITY(vec)\ +((vec)->capacity) + +#define VEC_ARR(vec)\ +((vec)->buffer) + +#define VEC_GET(vec, id)\ +(&(vec)->buffer[id]) + +#define VEC_CLEAR(vec) do {\ + (vec)->size = 0;\ +} while (0) + +#define VEC_DELETE(vec) do {\ + Free((vec)->buffer);\ + (vec)->buffer = NULL;\ + (vec)->size = 0;\ + (vec)->capacity = 0;\ +} while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_VEC_H */ diff --git a/src/pmdk/src/common/vecq.h b/src/pmdk/src/common/vecq.h new file mode 100644 index 000000000..98c77b615 --- /dev/null +++ b/src/pmdk/src/common/vecq.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * vecq.h -- vector queue (FIFO) interface + */ + +#ifndef PMDK_VECQ_H +#define PMDK_VECQ_H 1 + +#include +#include "util.h" +#include "out.h" +#include "alloc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define VECQ_INIT_SIZE (64) + +#define VECQ(name, type)\ +struct name {\ + type *buffer;\ + size_t capacity;\ + size_t front;\ + size_t back;\ +} + +#define VECQ_INIT(vec) do {\ + (vec)->buffer = NULL;\ + (vec)->capacity = 0;\ + (vec)->front = 0;\ + (vec)->back = 0;\ +} while (0) + +#define VECQ_REINIT(vec) do {\ + VALGRIND_ANNOTATE_NEW_MEMORY((vec), sizeof(*vec));\ + VALGRIND_ANNOTATE_NEW_MEMORY((vec)->buffer,\ + (sizeof(*(vec)->buffer) * ((vec)->capacity)));\ + (vec)->front = 0;\ + (vec)->back = 0;\ +} while (0) + +#define VECQ_FRONT_POS(vec)\ +((vec)->front & ((vec)->capacity - 1)) + +#define VECQ_BACK_POS(vec)\ +((vec)->back & ((vec)->capacity - 1)) + +#define VECQ_FRONT(vec)\ +(vec)->buffer[VECQ_FRONT_POS(vec)] + +#define VECQ_BACK(vec)\ +(vec)->buffer[VECQ_BACK_POS(vec)] + +#define VECQ_DEQUEUE(vec)\ +((vec)->buffer[(((vec)->front++) & ((vec)->capacity - 1))]) + +#define VECQ_SIZE(vec)\ +((vec)->back - (vec)->front) + +static inline int +realloc_set(void **buf, size_t s) +{ + void *tbuf = Realloc(*buf, s); + if (tbuf == NULL) { + ERR("!Realloc"); + return -1; + } + *buf = tbuf; + return 0; +} + +#define VECQ_NCAPACITY(vec)\ +((vec)->capacity == 0 ? VECQ_INIT_SIZE : (vec)->capacity * 2) +#define VECQ_GROW(vec)\ +(realloc_set((void **)&(vec)->buffer,\ + VECQ_NCAPACITY(vec) * sizeof(*(vec)->buffer)) ? -1 :\ + (memcpy((vec)->buffer + (vec)->capacity, (vec)->buffer,\ + VECQ_FRONT_POS(vec) * sizeof(*(vec)->buffer)),\ + (vec)->front = VECQ_FRONT_POS(vec),\ + (vec)->back = (vec)->front + (vec)->capacity,\ + (vec)->capacity = VECQ_NCAPACITY(vec),\ + 0\ +)) + +#define VECQ_INSERT(vec, element)\ +(VECQ_BACK(vec) = element, (vec)->back += 1, 0) + +#define VECQ_ENQUEUE(vec, element)\ +((vec)->capacity == VECQ_SIZE(vec) ?\ + (VECQ_GROW(vec) == 0 ? VECQ_INSERT(vec, element) : -1) :\ +VECQ_INSERT(vec, element)) + +#define VECQ_CAPACITY(vec)\ +((vec)->capacity) + +#define VECQ_FOREACH(el, vec)\ +for (size_t _vec_i = 0;\ + _vec_i < VECQ_SIZE(vec) &&\ + (((el) = (vec)->buffer[_vec_i & ((vec)->capacity - 1)]), 1);\ + ++_vec_i) + +#define VECQ_FOREACH_REVERSE(el, vec)\ +for (size_t _vec_i = VECQ_SIZE(vec);\ + _vec_i > 0 &&\ + (((el) = (vec)->buffer[(_vec_i - 1) & ((vec)->capacity - 1)]), 1);\ + --_vec_i) + +#define VECQ_CLEAR(vec) do {\ + (vec)->front = 0;\ + (vec)->back = 0;\ +} while (0) + +#define VECQ_DELETE(vec) do {\ + Free((vec)->buffer);\ + (vec)->buffer = NULL;\ + (vec)->capacity = 0;\ + (vec)->front = 0;\ + (vec)->back = 0;\ +} while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_VECQ_H */ diff --git a/src/pmdk/src/core/Makefile b/src/pmdk/src/core/Makefile new file mode 100644 index 000000000..c7526550b --- /dev/null +++ b/src/pmdk/src/core/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2020, Intel Corporation + +# +# src/core/Makefile -- Makefile for core +# + +LIBRARY_NAME = pmemcore + +include pmemcore.inc + +include ../Makefile.inc diff --git a/src/pmdk/src/core/alloc.c b/src/pmdk/src/core/alloc.c new file mode 100644 index 000000000..e267e4e62 --- /dev/null +++ b/src/pmdk/src/core/alloc.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +#include + +#include "alloc.h" +#include "fault_injection.h" +#include "out.h" + +Malloc_func fn_malloc = malloc; +Realloc_func fn_realloc = realloc; + +#if FAULT_INJECTION +static __thread int malloc_num; +static __thread int fail_malloc_num; +static __thread const char *fail_malloc_from; + +void * +_flt_Malloc(size_t size, const char *func) +{ + if (fail_malloc_from && strcmp(func, fail_malloc_from) == 0) { + if (++malloc_num == fail_malloc_num) { + errno = ENOMEM; + return NULL; + } + } + return fn_malloc(size); +} + +static __thread int realloc_num; +static __thread int fail_realloc_num; +static __thread const char *fail_realloc_from; + +void * +_flt_Realloc(void *ptr, size_t size, const char *func) +{ + if (fail_realloc_from && strcmp(func, fail_realloc_from) == 0) { + if (++realloc_num == fail_realloc_num) { + errno = ENOMEM; + return NULL; + } + } + return fn_realloc(ptr, size); +} + +void +core_inject_fault_at(enum pmem_allocation_type type, int nth, const char *at) +{ + switch (type) { + case PMEM_MALLOC: + malloc_num = 0; + fail_malloc_num = nth; + fail_malloc_from = at; + break; + case PMEM_REALLOC: + realloc_num = 0; + fail_realloc_num = nth; + fail_realloc_from = at; + break; + default: + FATAL("unknown allocation type"); + } +} + +int +core_fault_injection_enabled(void) +{ + return 1; +} +#else +void *_Malloc(size_t size) { + return fn_malloc(size); +} + +void *_Realloc(void *ptr, size_t size) { + return fn_realloc(ptr, size); +} +#endif + +void set_func_malloc(void *(*malloc_func)(size_t size)) { + fn_malloc = (malloc_func == NULL) ? malloc : malloc_func; +} + +void set_func_realloc(void *(*realloc_func)(void *ptr, size_t size)) { + fn_realloc = (realloc_func == NULL) ? realloc : realloc_func; +} + +/* + * our versions of malloc & friends start off pointing to the libc versions + */ +Free_func Free = free; +Strdup_func Strdup = strdup; + +/* + * Zalloc -- allocate zeroed memory + */ +void * +Zalloc(size_t sz) +{ + void *ret = Malloc(sz); + if (!ret) + return NULL; + return memset(ret, 0, sz); +} + +/* + * util_set_alloc_funcs -- allow one to override malloc, etc. + */ +void +util_set_alloc_funcs(void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)) +{ + set_func_malloc(malloc_func); + Free = (free_func == NULL) ? free : free_func; + set_func_realloc(realloc_func); + Strdup = (strdup_func == NULL) ? strdup : strdup_func; +} diff --git a/src/pmdk/src/core/alloc.h b/src/pmdk/src/core/alloc.h new file mode 100644 index 000000000..a3c5c2bcc --- /dev/null +++ b/src/pmdk/src/core/alloc.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +#ifndef COMMON_ALLOC_H +#define COMMON_ALLOC_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void *(*Malloc_func)(size_t size); +typedef void *(*Realloc_func)(void *ptr, size_t size); + +extern Malloc_func fn_malloc; +extern Realloc_func fn_realloc; + +#if FAULT_INJECTION +void *_flt_Malloc(size_t, const char *); +void *_flt_Realloc(void *, size_t, const char *); + +#define Malloc(size) _flt_Malloc(size, __func__) +#define Realloc(ptr, size) _flt_Realloc(ptr, size, __func__) +#else +void *_Malloc(size_t); +void *_Realloc(void *, size_t); + +#define Malloc(size) _Malloc(size) +#define Realloc(ptr, size) _Realloc(ptr, size) +#endif + +void set_func_malloc(void *(*malloc_func)(size_t size)); +void set_func_realloc(void *(*realloc_func)(void *ptr, size_t size)); + +/* + * overridable names for malloc & friends used by this library + */ +typedef void (*Free_func)(void *ptr); +typedef char *(*Strdup_func)(const char *s); + +extern Free_func Free; +extern Strdup_func Strdup; +extern void *Zalloc(size_t sz); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/pmdk/src/core/errno_freebsd.h b/src/pmdk/src/core/errno_freebsd.h new file mode 100644 index 000000000..e207abac4 --- /dev/null +++ b/src/pmdk/src/core/errno_freebsd.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * errno_freebsd.h -- map Linux errno's to something close on FreeBSD + */ + +#ifndef PMDK_ERRNO_FREEBSD_H +#define PMDK_ERRNO_FREEBSD_H 1 + +#ifdef __FreeBSD__ +#define EBADFD EBADF +#define ELIBACC EINVAL +#define EMEDIUMTYPE EOPNOTSUPP +#define ENOMEDIUM ENODEV +#define EREMOTEIO EIO +#endif + +#endif /* PMDK_ERRNO_FREEBSD_H */ diff --git a/src/pmdk/src/core/fault_injection.h b/src/pmdk/src/core/fault_injection.h new file mode 100644 index 000000000..748d1ab51 --- /dev/null +++ b/src/pmdk/src/core/fault_injection.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +#ifndef CORE_FAULT_INJECTION +#define CORE_FAULT_INJECTION + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +enum pmem_allocation_type { PMEM_MALLOC, PMEM_REALLOC }; + +#if FAULT_INJECTION +void core_inject_fault_at(enum pmem_allocation_type type, + int nth, const char *at); + +int core_fault_injection_enabled(void); + +#else +static inline void +core_inject_fault_at(enum pmem_allocation_type type, int nth, const char *at) +{ + abort(); +} + +static inline int +core_fault_injection_enabled(void) +{ + return 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/core/fs.h b/src/pmdk/src/core/fs.h new file mode 100644 index 000000000..2323204ad --- /dev/null +++ b/src/pmdk/src/core/fs.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * fs.h -- file system traversal abstraction layer + */ + +#ifndef PMDK_FS_H +#define PMDK_FS_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct fs; + +enum fs_entry_type { + FS_ENTRY_FILE, + FS_ENTRY_DIRECTORY, + FS_ENTRY_SYMLINK, + FS_ENTRY_OTHER, + + MAX_FS_ENTRY_TYPES +}; + +struct fs_entry { + enum fs_entry_type type; + + const char *name; + size_t namelen; + + const char *path; + size_t pathlen; + /* the depth of the traversal */ + /* XXX long on FreeBSD. Linux uses short. No harm in it being bigger */ + long level; +}; + +struct fs *fs_new(const char *path); +void fs_delete(struct fs *f); + +/* this call invalidates the previous entry */ +struct fs_entry *fs_read(struct fs *f); + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_FS_H */ diff --git a/src/pmdk/src/core/fs_posix.c b/src/pmdk/src/core/fs_posix.c new file mode 100644 index 000000000..ec27f4399 --- /dev/null +++ b/src/pmdk/src/core/fs_posix.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +/* + * fs_posix.c -- file system traversal Posix implementation + */ + +#include +#include "util.h" +#include "out.h" +#include "vec.h" +#include "fs.h" + +struct fs { + FTS *ft; + struct fs_entry entry; +}; + +/* + * fs_new -- creates fs traversal instance + */ +struct fs * +fs_new(const char *path) +{ + struct fs *f = Zalloc(sizeof(*f)); + if (f == NULL) + goto error_fs_alloc; + + const char *paths[2] = {path, NULL}; + f->ft = fts_open((char * const *)paths, FTS_COMFOLLOW | FTS_XDEV, NULL); + if (f->ft == NULL) + goto error_fts_open; + + return f; + +error_fts_open: + Free(f); +error_fs_alloc: + return NULL; +} + +/* + * fs_read -- reads an entry from the fs path + */ +struct fs_entry * +fs_read(struct fs *f) +{ + FTSENT *entry = fts_read(f->ft); + if (entry == NULL) + return NULL; + + switch (entry->fts_info) { + case FTS_D: + f->entry.type = FS_ENTRY_DIRECTORY; + break; + case FTS_F: + f->entry.type = FS_ENTRY_FILE; + break; + case FTS_SL: + f->entry.type = FS_ENTRY_SYMLINK; + break; + default: + f->entry.type = FS_ENTRY_OTHER; + break; + } + + f->entry.name = entry->fts_name; + f->entry.namelen = entry->fts_namelen; + f->entry.path = entry->fts_path; + f->entry.pathlen = entry->fts_pathlen; + f->entry.level = entry->fts_level; + + return &f->entry; +} + +/* + * fs_delete -- deletes a fs traversal instance + */ +void +fs_delete(struct fs *f) +{ + fts_close(f->ft); + Free(f); +} diff --git a/src/pmdk/src/core/fs_windows.c b/src/pmdk/src/core/fs_windows.c new file mode 100644 index 000000000..5fb7c2564 --- /dev/null +++ b/src/pmdk/src/core/fs_windows.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +/* + * fs_windows.c -- file system traversal windows implementation + */ + +#include +#include "alloc.h" +#include "fs.h" +#include "out.h" +#include "util.h" + +struct fs { + size_t dirlen; + WIN32_FIND_DATAW ffd; + HANDLE hFind; + int first_done; + const char *dir; + + struct fs_entry entry; +}; + +/* + * fs_new -- creates fs traversal instance + */ +struct fs * +fs_new(const char *path) +{ + size_t pathlen = strlen(path); + char *search_path = Malloc(strlen(path) + sizeof("\\*\0")); + if (search_path == NULL) + goto error_spath_alloc; + + strcpy(search_path, path); + strcpy(search_path + pathlen, "\\*\0"); + + wchar_t *pathw = util_toUTF16(search_path); + if (pathw == NULL) + goto error_path_alloc; + + struct fs *f = Zalloc(sizeof(*f)); + if (f == NULL) + goto error_fs_alloc; + + f->first_done = 0; + + f->hFind = FindFirstFileW(pathw, &f->ffd); + if (f->hFind == INVALID_HANDLE_VALUE) + goto error_fff; + + f->dir = path; + f->dirlen = pathlen; + util_free_UTF16(pathw); + Free(search_path); + + return f; + +error_fff: + Free(f); +error_fs_alloc: + util_free_UTF16(pathw); +error_path_alloc: + Free(search_path); +error_spath_alloc: + return NULL; +} + +/* + * fs_read -- reads an entry from the fs path + */ +struct fs_entry * +fs_read(struct fs *f) +{ + util_free_UTF8((char *)f->entry.name); + Free((char *)f->entry.path); + f->entry.name = NULL; + f->entry.path = NULL; + + if (f->first_done) { + if (FindNextFileW(f->hFind, &f->ffd) == 0) + return NULL; + } else { + f->first_done = 1; + } + + if (f->ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + f->entry.type = FS_ENTRY_DIRECTORY; + else + f->entry.type = FS_ENTRY_FILE; + + f->entry.name = util_toUTF8(f->ffd.cFileName); + if (f->entry.name == NULL) + return NULL; + f->entry.namelen = strnlen(f->entry.name, MAX_PATH); + + f->entry.pathlen = f->dirlen + f->entry.namelen + 1; + char *path = Zalloc(f->entry.pathlen + 1); + if (path == NULL) { + util_free_UTF8((char *)f->entry.name); + return NULL; + } + strcpy(path, f->dir); + path[f->dirlen] = '\\'; + strcpy(path + f->dirlen + 1, f->entry.name); + f->entry.path = path; + f->entry.level = 1; + + return &f->entry; +} + +/* + * fs_delete -- deletes a fs traversal instance + */ +void +fs_delete(struct fs *f) +{ + util_free_UTF8((char *)f->entry.name); + Free((char *)f->entry.path); + + FindClose(f->hFind); + Free(f); +} diff --git a/src/pmdk/src/core/libpmemcore.vcxproj b/src/pmdk/src/core/libpmemcore.vcxproj new file mode 100644 index 000000000..e2b1020f2 --- /dev/null +++ b/src/pmdk/src/core/libpmemcore.vcxproj @@ -0,0 +1,135 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + {901f04db-e1a5-4a41-8b81-9d31c19acd59} + + + + {2FA3155B-6F26-4D15-AC03-9D82D48DBC42} + Win32Proj + libpmemcore + 10.0.17134.0 + + + + StaticLibrary + true + v140 + NotSet + + + StaticLibrary + true + v140 + NotSet + + + + + + + + + + + + + + + true + .lib + $(SolutionDir)\include;$(SolutionDir)\windows\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);.; + + + true + .lib + $(SolutionDir)\include;$(SolutionDir)\windows\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);.; + + + + NotUsing + Level3 + PMDK_UTF8_API;NTDDI_VERSION=NTDDI_WIN10_RS1;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + platform.h + CompileAsC + MultiThreadedDebugDLL + false + true + + + Console + true + + + ntdll.lib;%(AdditionalDependencies) + true + + + + + + + + + NotUsing + Level3 + PMDK_UTF8_API;NTDDI_VERSION=NTDDI_WIN10_RS1;_DEBUG;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + platform.h + CompileAsC + MaxSpeed + MultiThreadedDLL + Default + false + ProgramDatabase + true + + + Console + true + + + ntdll.lib;%(AdditionalDependencies) + true + + + + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/core/libpmemcore.vcxproj.filters b/src/pmdk/src/core/libpmemcore.vcxproj.filters new file mode 100644 index 000000000..7907f89b7 --- /dev/null +++ b/src/pmdk/src/core/libpmemcore.vcxproj.filters @@ -0,0 +1,71 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/src/pmdk/src/core/os.h b/src/pmdk/src/core/os.h new file mode 100644 index 000000000..9e984d971 --- /dev/null +++ b/src/pmdk/src/core/os.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * os.h -- os abstraction layer + */ + +#ifndef PMDK_OS_H +#define PMDK_OS_H 1 + +#include +#include +#include + +#include "errno_freebsd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _WIN32 +#define OS_DIR_SEPARATOR '/' +#define OS_DIR_SEP_STR "/" +#else +#define OS_DIR_SEPARATOR '\\' +#define OS_DIR_SEP_STR "\\" +#endif + +#ifndef _WIN32 + +/* madvise() */ +#ifdef __FreeBSD__ +#define os_madvise minherit +#define MADV_DONTFORK INHERIT_NONE +#else +#define os_madvise madvise +#endif + +/* dlopen() */ +#ifdef __FreeBSD__ +#define RTLD_DEEPBIND 0 /* XXX */ +#endif + +/* major(), minor() */ +#ifdef __FreeBSD__ +#define os_major (unsigned)major +#define os_minor (unsigned)minor +#else +#define os_major major +#define os_minor minor +#endif + +#endif /* #ifndef _WIN32 */ + +struct iovec; + +/* os_flock */ +#define OS_LOCK_SH 1 +#define OS_LOCK_EX 2 +#define OS_LOCK_NB 4 +#define OS_LOCK_UN 8 + +#ifndef _WIN32 +typedef struct stat os_stat_t; +#define os_fstat fstat +#define os_lseek lseek +#else +typedef struct _stat64 os_stat_t; +#define os_fstat _fstat64 +#define os_lseek _lseeki64 +#endif + +#define os_close close +#define os_fclose fclose + +#ifndef _WIN32 +typedef off_t os_off_t; +#else +/* XXX: os_off_t defined in platform.h */ +#endif +int os_open(const char *pathname, int flags, ...); +int os_fsync(int fd); +int os_fsync_dir(const char *dir_name); +int os_stat(const char *pathname, os_stat_t *buf); +int os_unlink(const char *pathname); +int os_access(const char *pathname, int mode); +FILE *os_fopen(const char *pathname, const char *mode); +FILE *os_fdopen(int fd, const char *mode); +int os_chmod(const char *pathname, mode_t mode); +int os_mkstemp(char *temp); +int os_posix_fallocate(int fd, os_off_t offset, os_off_t len); +int os_ftruncate(int fd, os_off_t length); +int os_flock(int fd, int operation); +ssize_t os_writev(int fd, const struct iovec *iov, int iovcnt); +int os_clock_gettime(int id, struct timespec *ts); +unsigned os_rand_r(unsigned *seedp); +int os_unsetenv(const char *name); +int os_setenv(const char *name, const char *value, int overwrite); +char *os_getenv(const char *name); +const char *os_strsignal(int sig); +int os_execv(const char *path, char *const argv[]); + +/* + * XXX: missing APis (used in ut_file.c) + * + * rename + * read + * write + */ + +#ifdef __cplusplus +} +#endif + +#endif /* os.h */ diff --git a/src/pmdk/src/core/os_posix.c b/src/pmdk/src/core/os_posix.c new file mode 100644 index 000000000..c953a4046 --- /dev/null +++ b/src/pmdk/src/core/os_posix.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +/* + * os_posix.c -- abstraction layer for basic Posix functions + */ + +#define _GNU_SOURCE + +#include +#include +#include +#ifdef __FreeBSD__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "out.h" +#include "os.h" + +/* + * os_open -- open abstraction layer + */ +int +os_open(const char *pathname, int flags, ...) +{ + int mode_required = (flags & O_CREAT) == O_CREAT; + +#ifdef O_TMPFILE + mode_required |= (flags & O_TMPFILE) == O_TMPFILE; +#endif + + if (mode_required) { + va_list arg; + va_start(arg, flags); + /* Clang requires int due to auto-promotion */ + int mode = va_arg(arg, int); + va_end(arg); + return open(pathname, flags, (mode_t)mode); + } else { + return open(pathname, flags); + } +} + +/* + * os_fsync -- fsync abstraction layer + */ +int +os_fsync(int fd) +{ + return fsync(fd); +} + +/* + * os_fsync_dir -- fsync the directory + */ +int +os_fsync_dir(const char *dir_name) +{ + int fd = os_open(dir_name, O_RDONLY | O_DIRECTORY); + if (fd < 0) + return -1; + + int ret = os_fsync(fd); + + os_close(fd); + + return ret; +} + +/* + * os_stat -- stat abstraction layer + */ +int +os_stat(const char *pathname, os_stat_t *buf) +{ + return stat(pathname, buf); +} + +/* + * os_unlink -- unlink abstraction layer + */ +int +os_unlink(const char *pathname) +{ + return unlink(pathname); +} + +/* + * os_access -- access abstraction layer + */ +int +os_access(const char *pathname, int mode) +{ + return access(pathname, mode); +} + +/* + * os_fopen -- fopen abstraction layer + */ +FILE * +os_fopen(const char *pathname, const char *mode) +{ + return fopen(pathname, mode); +} + +/* + * os_fdopen -- fdopen abstraction layer + */ +FILE * +os_fdopen(int fd, const char *mode) +{ + return fdopen(fd, mode); +} + +/* + * os_chmod -- chmod abstraction layer + */ +int +os_chmod(const char *pathname, mode_t mode) +{ + return chmod(pathname, mode); +} + +/* + * os_mkstemp -- mkstemp abstraction layer + */ +int +os_mkstemp(char *temp) +{ + return mkstemp(temp); +} + +/* + * os_posix_fallocate -- posix_fallocate abstraction layer + */ +int +os_posix_fallocate(int fd, os_off_t offset, off_t len) +{ + +#ifdef __FreeBSD__ + struct stat fbuf; + struct statfs fsbuf; +/* + * XXX Workaround for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=223287 + * + * FreeBSD implements posix_fallocate with a simple block allocation/zero + * loop. If the requested size is unreasonably large, this can result in + * an uninterruptable system call that will suck up all the space in the + * file system and could take hours to fail. To avoid this, make a crude + * check to see if the requested allocation is larger than the available + * space in the file system (minus any blocks already allocated to the + * file), and if so, immediately return ENOSPC. We do the check only if + * the offset is 0; otherwise, trying to figure out how many additional + * blocks are required is too complicated. + * + * This workaround is here mostly to fail "absurdly" large requests for + * testing purposes; however, it is coded to allow normal (albeit slow) + * operation if the space can actually be allocated. Because of the way + * PMDK uses posix_fallocate, supporting Linux-style fallocate in + * FreeBSD should be considered. + */ + if (offset == 0) { + if (fstatfs(fd, &fsbuf) == -1 || fstat(fd, &fbuf) == -1) + return errno; + + size_t reqd_blocks = + ((size_t)len + (fsbuf.f_bsize - 1)) / fsbuf.f_bsize; + if (fbuf.st_blocks > 0) { + if (reqd_blocks >= (size_t)fbuf.st_blocks) + reqd_blocks -= (size_t)fbuf.st_blocks; + else + reqd_blocks = 0; + } + if (reqd_blocks > (size_t)fsbuf.f_bavail) + return ENOSPC; + } +#endif + +/* + * First, try to alloc the whole thing in one go. This allows ENOSPC to + * fail immediately -- allocating piece by piece would fill the storage + * just to abort halfway. + */ + int err = posix_fallocate(fd, offset, len); + if (err != ENOMEM && err != EINTR) + return err; + +/* + * Workaround for a bug in tmpfs where it fails large but reasonable + * requests that exceed available DRAM but fit within swap space. And + * even if a request fits within DRAM, tmpfs will evict other tasks + * just to reserve space. + * + * We also want to survive random unrelated signals. Profilers spam + * the program with SIGVTALRM/SIGPROF, anything run from a terminal can + * receive SIGNWINCH, etc. As fallocate is a long-running syscall, + * let's restart it, but in a way that avoids infinite loops. + * + * Thus: + * * limit a single syscall to 1GB + * * ignore sporadic signals + * * on repeated failures, start reducing syscall size + * * ... but not below 1MB + */ + os_off_t chunk = 1LL << 30; /* 1GB */ + int tries = 0; + + while (len) { + if (chunk > len) + chunk = len; + + int err = posix_fallocate(fd, offset, chunk); + if (!err) { + offset += chunk; + len -= chunk; + tries = 0; + } else if (err != ENOMEM && err != EINTR) { + return err; + } else if (++tries == 5) { + tries = 0; + chunk /= 2; + + /* + * Within memory pressure or a signal storm, small + * allocs are more likely to get through, but once we + * get this small, something is badly wrong. + */ + if (chunk < 1LL << 20) /* 1MB */ + return err; + } + } + + return 0; +} + +/* + * os_ftruncate -- ftruncate abstraction layer + */ +int +os_ftruncate(int fd, os_off_t length) +{ + return ftruncate(fd, length); +} + +/* + * os_flock -- flock abstraction layer + */ +int +os_flock(int fd, int operation) +{ + int opt = 0; + if (operation & OS_LOCK_EX) + opt |= LOCK_EX; + if (operation & OS_LOCK_SH) + opt |= LOCK_SH; + if (operation & OS_LOCK_UN) + opt |= LOCK_UN; + if (operation & OS_LOCK_NB) + opt |= LOCK_NB; + + return flock(fd, opt); +} + +/* + * os_writev -- writev abstraction layer + */ +ssize_t +os_writev(int fd, const struct iovec *iov, int iovcnt) +{ + return writev(fd, iov, iovcnt); +} + +/* + * os_clock_gettime -- clock_gettime abstraction layer + */ +int +os_clock_gettime(int id, struct timespec *ts) +{ + return clock_gettime(id, ts); +} + +/* + * os_rand_r -- rand_r abstraction layer + */ +unsigned +os_rand_r(unsigned *seedp) +{ + return (unsigned)rand_r(seedp); +} + +/* + * os_unsetenv -- unsetenv abstraction layer + */ +int +os_unsetenv(const char *name) +{ + return unsetenv(name); +} + +/* + * os_setenv -- setenv abstraction layer + */ +int +os_setenv(const char *name, const char *value, int overwrite) +{ + return setenv(name, value, overwrite); +} + +/* + * secure_getenv -- provide GNU secure_getenv for FreeBSD + */ +#if defined(__FreeBSD__) +static char * +secure_getenv(const char *name) +{ + if (issetugid() != 0) + return NULL; + + return getenv(name); +} +#endif + +/* + * os_getenv -- getenv abstraction layer + */ +char * +os_getenv(const char *name) +{ + return secure_getenv(name); +} + +/* + * os_strsignal -- strsignal abstraction layer + */ +const char * +os_strsignal(int sig) +{ + return strsignal(sig); +} + +int +os_execv(const char *path, char *const argv[]) +{ + return execv(path, argv); +} diff --git a/src/pmdk/src/core/os_thread.h b/src/pmdk/src/core/os_thread.h new file mode 100644 index 000000000..7369a4913 --- /dev/null +++ b/src/pmdk/src/core/os_thread.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * os_thread.h -- os thread abstraction layer + */ + +#ifndef OS_THREAD_H +#define OS_THREAD_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef union { + long long align; + char padding[44]; /* linux: 40 windows: 44 */ +} os_mutex_t; + +typedef union { + long long align; + char padding[56]; /* linux: 56 windows: 13 */ +} os_rwlock_t; + +typedef union { + long long align; + char padding[48]; /* linux: 48 windows: 12 */ +} os_cond_t; + +typedef union { + long long align; + char padding[32]; /* linux: 8 windows: 32 */ +} os_thread_t; + +typedef union { + long long align; /* linux: long windows: 8 FreeBSD: 12 */ + char padding[16]; /* 16 to be safe */ +} os_once_t; + +#define OS_ONCE_INIT { .padding = {0} } + +typedef unsigned os_tls_key_t; + +typedef union { + long long align; + char padding[56]; /* linux: 56 windows: 8 */ +} os_semaphore_t; + +typedef union { + long long align; + char padding[56]; /* linux: 56 windows: 8 */ +} os_thread_attr_t; + +typedef union { + long long align; + char padding[512]; +} os_cpu_set_t; + +#ifdef __FreeBSD__ +#define cpu_set_t cpuset_t +typedef uintptr_t os_spinlock_t; +#else +typedef volatile int os_spinlock_t; /* XXX: not implemented on windows */ +#endif + +void os_cpu_zero(os_cpu_set_t *set); +void os_cpu_set(size_t cpu, os_cpu_set_t *set); + +#ifndef _WIN32 +#define _When_(...) +#endif +int os_once(os_once_t *o, void (*func)(void)); + +int os_tls_key_create(os_tls_key_t *key, void (*destructor)(void *)); +int os_tls_key_delete(os_tls_key_t key); +int os_tls_set(os_tls_key_t key, const void *value); +void *os_tls_get(os_tls_key_t key); + +int os_mutex_init(os_mutex_t *__restrict mutex); +int os_mutex_destroy(os_mutex_t *__restrict mutex); +_When_(return == 0, _Acquires_lock_(mutex->lock)) +int os_mutex_lock(os_mutex_t *__restrict mutex); +_When_(return == 0, _Acquires_lock_(mutex->lock)) +int os_mutex_trylock(os_mutex_t *__restrict mutex); +int os_mutex_unlock(os_mutex_t *__restrict mutex); + +/* XXX - non POSIX */ +int os_mutex_timedlock(os_mutex_t *__restrict mutex, + const struct timespec *abstime); + +int os_rwlock_init(os_rwlock_t *__restrict rwlock); +int os_rwlock_destroy(os_rwlock_t *__restrict rwlock); +int os_rwlock_rdlock(os_rwlock_t *__restrict rwlock); +int os_rwlock_wrlock(os_rwlock_t *__restrict rwlock); +int os_rwlock_tryrdlock(os_rwlock_t *__restrict rwlock); +_When_(return == 0, _Acquires_exclusive_lock_(rwlock->lock)) +int os_rwlock_trywrlock(os_rwlock_t *__restrict rwlock); +_When_(rwlock->is_write != 0, _Requires_exclusive_lock_held_(rwlock->lock)) +_When_(rwlock->is_write == 0, _Requires_shared_lock_held_(rwlock->lock)) +int os_rwlock_unlock(os_rwlock_t *__restrict rwlock); +int os_rwlock_timedrdlock(os_rwlock_t *__restrict rwlock, + const struct timespec *abstime); +int os_rwlock_timedwrlock(os_rwlock_t *__restrict rwlock, + const struct timespec *abstime); + +int os_spin_init(os_spinlock_t *lock, int pshared); +int os_spin_destroy(os_spinlock_t *lock); +int os_spin_lock(os_spinlock_t *lock); +int os_spin_unlock(os_spinlock_t *lock); +int os_spin_trylock(os_spinlock_t *lock); + +int os_cond_init(os_cond_t *__restrict cond); +int os_cond_destroy(os_cond_t *__restrict cond); +int os_cond_broadcast(os_cond_t *__restrict cond); +int os_cond_signal(os_cond_t *__restrict cond); +int os_cond_timedwait(os_cond_t *__restrict cond, + os_mutex_t *__restrict mutex, const struct timespec *abstime); +int os_cond_wait(os_cond_t *__restrict cond, + os_mutex_t *__restrict mutex); + +/* threading */ + +int os_thread_create(os_thread_t *thread, const os_thread_attr_t *attr, + void *(*start_routine)(void *), void *arg); + +int os_thread_join(os_thread_t *thread, void **result); + +void os_thread_self(os_thread_t *thread); + +/* thread affinity */ + +int os_thread_setaffinity_np(os_thread_t *thread, size_t set_size, + const os_cpu_set_t *set); + +int os_thread_atfork(void (*prepare)(void), void (*parent)(void), + void (*child)(void)); + +int os_semaphore_init(os_semaphore_t *sem, unsigned value); +int os_semaphore_destroy(os_semaphore_t *sem); +int os_semaphore_wait(os_semaphore_t *sem); +int os_semaphore_trywait(os_semaphore_t *sem); +int os_semaphore_post(os_semaphore_t *sem); + +#ifdef __cplusplus +} +#endif +#endif /* OS_THREAD_H */ diff --git a/src/pmdk/src/core/os_thread_posix.c b/src/pmdk/src/core/os_thread_posix.c new file mode 100644 index 000000000..2d9b83b3c --- /dev/null +++ b/src/pmdk/src/core/os_thread_posix.c @@ -0,0 +1,436 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +/* + * os_thread_posix.c -- Posix thread abstraction layer + */ + +#define _GNU_SOURCE +#include +#ifdef __FreeBSD__ +#include +#endif +#include + +#include "os_thread.h" +#include "util.h" + +typedef struct { + pthread_t thread; +} internal_os_thread_t; + +/* + * os_once -- pthread_once abstraction layer + */ +int +os_once(os_once_t *o, void (*func)(void)) +{ + COMPILE_ERROR_ON(sizeof(os_once_t) < sizeof(pthread_once_t)); + return pthread_once((pthread_once_t *)o, func); +} + +/* + * os_tls_key_create -- pthread_key_create abstraction layer + */ +int +os_tls_key_create(os_tls_key_t *key, void (*destructor)(void *)) +{ + COMPILE_ERROR_ON(sizeof(os_tls_key_t) < sizeof(pthread_key_t)); + return pthread_key_create((pthread_key_t *)key, destructor); +} + +/* + * os_tls_key_delete -- pthread_key_delete abstraction layer + */ +int +os_tls_key_delete(os_tls_key_t key) +{ + return pthread_key_delete((pthread_key_t)key); +} + +/* + * os_tls_setspecific -- pthread_key_setspecific abstraction layer + */ +int +os_tls_set(os_tls_key_t key, const void *value) +{ + return pthread_setspecific((pthread_key_t)key, value); +} + +/* + * os_tls_get -- pthread_key_getspecific abstraction layer + */ +void * +os_tls_get(os_tls_key_t key) +{ + return pthread_getspecific((pthread_key_t)key); +} + +/* + * os_mutex_init -- pthread_mutex_init abstraction layer + */ +int +os_mutex_init(os_mutex_t *__restrict mutex) +{ + COMPILE_ERROR_ON(sizeof(os_mutex_t) < sizeof(pthread_mutex_t)); + return pthread_mutex_init((pthread_mutex_t *)mutex, NULL); +} + +/* + * os_mutex_destroy -- pthread_mutex_destroy abstraction layer + */ +int +os_mutex_destroy(os_mutex_t *__restrict mutex) +{ + return pthread_mutex_destroy((pthread_mutex_t *)mutex); +} + +/* + * os_mutex_lock -- pthread_mutex_lock abstraction layer + */ +int +os_mutex_lock(os_mutex_t *__restrict mutex) +{ + return pthread_mutex_lock((pthread_mutex_t *)mutex); +} + +/* + * os_mutex_trylock -- pthread_mutex_trylock abstraction layer + */ +int +os_mutex_trylock(os_mutex_t *__restrict mutex) +{ + return pthread_mutex_trylock((pthread_mutex_t *)mutex); +} + +/* + * os_mutex_unlock -- pthread_mutex_unlock abstraction layer + */ +int +os_mutex_unlock(os_mutex_t *__restrict mutex) +{ + return pthread_mutex_unlock((pthread_mutex_t *)mutex); +} + +/* + * os_mutex_timedlock -- pthread_mutex_timedlock abstraction layer + */ +int +os_mutex_timedlock(os_mutex_t *__restrict mutex, + const struct timespec *abstime) +{ + return pthread_mutex_timedlock((pthread_mutex_t *)mutex, abstime); +} + +/* + * os_rwlock_init -- pthread_rwlock_init abstraction layer + */ +int +os_rwlock_init(os_rwlock_t *__restrict rwlock) +{ + COMPILE_ERROR_ON(sizeof(os_rwlock_t) < sizeof(pthread_rwlock_t)); + return pthread_rwlock_init((pthread_rwlock_t *)rwlock, NULL); +} + +/* + * os_rwlock_destroy -- pthread_rwlock_destroy abstraction layer + */ +int +os_rwlock_destroy(os_rwlock_t *__restrict rwlock) +{ + return pthread_rwlock_destroy((pthread_rwlock_t *)rwlock); +} + +/* + * os_rwlock_rdlock - pthread_rwlock_rdlock abstraction layer + */ +int +os_rwlock_rdlock(os_rwlock_t *__restrict rwlock) +{ + return pthread_rwlock_rdlock((pthread_rwlock_t *)rwlock); +} + +/* + * os_rwlock_wrlock -- pthread_rwlock_wrlock abstraction layer + */ +int +os_rwlock_wrlock(os_rwlock_t *__restrict rwlock) +{ + return pthread_rwlock_wrlock((pthread_rwlock_t *)rwlock); +} + +/* + * os_rwlock_unlock -- pthread_rwlock_unlock abstraction layer + */ +int +os_rwlock_unlock(os_rwlock_t *__restrict rwlock) +{ + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} + +/* + * os_rwlock_tryrdlock -- pthread_rwlock_tryrdlock abstraction layer + */ +int +os_rwlock_tryrdlock(os_rwlock_t *__restrict rwlock) +{ + return pthread_rwlock_tryrdlock((pthread_rwlock_t *)rwlock); +} + +/* + * os_rwlock_tryrwlock -- pthread_rwlock_trywrlock abstraction layer + */ +int +os_rwlock_trywrlock(os_rwlock_t *__restrict rwlock) +{ + return pthread_rwlock_trywrlock((pthread_rwlock_t *)rwlock); +} + +/* + * os_rwlock_timedrdlock -- pthread_rwlock_timedrdlock abstraction layer + */ +int +os_rwlock_timedrdlock(os_rwlock_t *__restrict rwlock, + const struct timespec *abstime) +{ + return pthread_rwlock_timedrdlock((pthread_rwlock_t *)rwlock, abstime); +} + +/* + * os_rwlock_timedwrlock -- pthread_rwlock_timedwrlock abstraction layer + */ +int +os_rwlock_timedwrlock(os_rwlock_t *__restrict rwlock, + const struct timespec *abstime) +{ + return pthread_rwlock_timedwrlock((pthread_rwlock_t *)rwlock, abstime); +} + +/* + * os_spin_init -- pthread_spin_init abstraction layer + */ +int +os_spin_init(os_spinlock_t *lock, int pshared) +{ + COMPILE_ERROR_ON(sizeof(os_spinlock_t) < sizeof(pthread_spinlock_t)); + return pthread_spin_init((pthread_spinlock_t *)lock, pshared); +} + +/* + * os_spin_destroy -- pthread_spin_destroy abstraction layer + */ +int +os_spin_destroy(os_spinlock_t *lock) +{ + return pthread_spin_destroy((pthread_spinlock_t *)lock); +} + +/* + * os_spin_lock -- pthread_spin_lock abstraction layer + */ +int +os_spin_lock(os_spinlock_t *lock) +{ + return pthread_spin_lock((pthread_spinlock_t *)lock); +} + +/* + * os_spin_unlock -- pthread_spin_unlock abstraction layer + */ +int +os_spin_unlock(os_spinlock_t *lock) +{ + return pthread_spin_unlock((pthread_spinlock_t *)lock); +} + +/* + * os_spin_trylock -- pthread_spin_trylock abstraction layer + */ + +int +os_spin_trylock(os_spinlock_t *lock) +{ + return pthread_spin_trylock((pthread_spinlock_t *)lock); +} +/* + * os_cond_init -- pthread_cond_init abstraction layer + */ +int +os_cond_init(os_cond_t *__restrict cond) +{ + COMPILE_ERROR_ON(sizeof(os_cond_t) < sizeof(pthread_cond_t)); + return pthread_cond_init((pthread_cond_t *)cond, NULL); +} + +/* + * os_cond_destroy -- pthread_cond_destroy abstraction layer + */ +int +os_cond_destroy(os_cond_t *__restrict cond) +{ + return pthread_cond_destroy((pthread_cond_t *)cond); +} + +/* + * os_cond_broadcast -- pthread_cond_broadcast abstraction layer + */ +int +os_cond_broadcast(os_cond_t *__restrict cond) +{ + return pthread_cond_broadcast((pthread_cond_t *)cond); +} + +/* + * os_cond_signal -- pthread_cond_signal abstraction layer + */ +int +os_cond_signal(os_cond_t *__restrict cond) +{ + return pthread_cond_signal((pthread_cond_t *)cond); +} + +/* + * os_cond_timedwait -- pthread_cond_timedwait abstraction layer + */ +int +os_cond_timedwait(os_cond_t *__restrict cond, + os_mutex_t *__restrict mutex, const struct timespec *abstime) +{ + return pthread_cond_timedwait((pthread_cond_t *)cond, + (pthread_mutex_t *)mutex, abstime); +} + +/* + * os_cond_wait -- pthread_cond_wait abstraction layer + */ +int +os_cond_wait(os_cond_t *__restrict cond, + os_mutex_t *__restrict mutex) +{ + return pthread_cond_wait((pthread_cond_t *)cond, + (pthread_mutex_t *)mutex); +} + +/* + * os_thread_create -- pthread_create abstraction layer + */ +int +os_thread_create(os_thread_t *thread, const os_thread_attr_t *attr, + void *(*start_routine)(void *), void *arg) +{ + COMPILE_ERROR_ON(sizeof(os_thread_t) < sizeof(internal_os_thread_t)); + internal_os_thread_t *thread_info = (internal_os_thread_t *)thread; + + return pthread_create(&thread_info->thread, (pthread_attr_t *)attr, + start_routine, arg); +} + +/* + * os_thread_join -- pthread_join abstraction layer + */ +int +os_thread_join(os_thread_t *thread, void **result) +{ + internal_os_thread_t *thread_info = (internal_os_thread_t *)thread; + + return pthread_join(thread_info->thread, result); +} + +/* + * os_thread_self -- pthread_self abstraction layer + */ +void +os_thread_self(os_thread_t *thread) +{ + internal_os_thread_t *thread_info = (internal_os_thread_t *)thread; + + thread_info->thread = pthread_self(); +} + +/* + * os_thread_atfork -- pthread_atfork abstraction layer + */ +int +os_thread_atfork(void (*prepare)(void), void (*parent)(void), + void (*child)(void)) +{ + return pthread_atfork(prepare, parent, child); +} + +/* + * os_thread_setaffinity_np -- pthread_atfork abstraction layer + */ +int +os_thread_setaffinity_np(os_thread_t *thread, size_t set_size, + const os_cpu_set_t *set) +{ + COMPILE_ERROR_ON(sizeof(os_cpu_set_t) < sizeof(cpu_set_t)); + internal_os_thread_t *thread_info = (internal_os_thread_t *)thread; + + return pthread_setaffinity_np(thread_info->thread, set_size, + (cpu_set_t *)set); +} + +/* + * os_cpu_zero -- CP_ZERO abstraction layer + */ +void +os_cpu_zero(os_cpu_set_t *set) +{ + CPU_ZERO((cpu_set_t *)set); +} + +/* + * os_cpu_set -- CP_SET abstraction layer + */ +void +os_cpu_set(size_t cpu, os_cpu_set_t *set) +{ + CPU_SET(cpu, (cpu_set_t *)set); +} + +/* + * os_semaphore_init -- initializes semaphore instance + */ +int +os_semaphore_init(os_semaphore_t *sem, unsigned value) +{ + COMPILE_ERROR_ON(sizeof(os_semaphore_t) < sizeof(sem_t)); + return sem_init((sem_t *)sem, 0, value); +} + +/* + * os_semaphore_destroy -- destroys a semaphore instance + */ +int +os_semaphore_destroy(os_semaphore_t *sem) +{ + return sem_destroy((sem_t *)sem); +} + +/* + * os_semaphore_wait -- decreases the value of the semaphore + */ +int +os_semaphore_wait(os_semaphore_t *sem) +{ + return sem_wait((sem_t *)sem); +} + +/* + * os_semaphore_trywait -- tries to decrease the value of the semaphore + */ +int +os_semaphore_trywait(os_semaphore_t *sem) +{ + return sem_trywait((sem_t *)sem); +} + +/* + * os_semaphore_post -- increases the value of the semaphore + */ +int +os_semaphore_post(os_semaphore_t *sem) +{ + return sem_post((sem_t *)sem); +} diff --git a/src/pmdk/src/core/os_thread_windows.c b/src/pmdk/src/core/os_thread_windows.c new file mode 100644 index 000000000..8a2f1e9a5 --- /dev/null +++ b/src/pmdk/src/core/os_thread_windows.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * os_thread_windows.c -- (imperfect) POSIX-like threads for Windows + * + * Loosely inspired by: + * http://locklessinc.com/articles/pthreads_on_windows/ + */ + +#include +#include +#include +#include +#include "os_thread.h" +#include "util.h" +#include "out.h" + +typedef struct { + unsigned attr; + CRITICAL_SECTION lock; +} internal_os_mutex_t; + +typedef struct { + unsigned attr; + char is_write; + SRWLOCK lock; +} internal_os_rwlock_t; + +typedef struct { + unsigned attr; + CONDITION_VARIABLE cond; +} internal_os_cond_t; + +typedef long long internal_os_once_t; + +typedef struct { + HANDLE handle; +} internal_semaphore_t; + +typedef struct { + GROUP_AFFINITY affinity; +} internal_os_cpu_set_t; + +typedef struct { + HANDLE thread_handle; + void *arg; + void *(*start_routine)(void *); + void *result; +} internal_os_thread_t; + +/* number of useconds between 1970-01-01T00:00:00Z and 1601-01-01T00:00:00Z */ +#define DELTA_WIN2UNIX (11644473600000000ull) +#define TIMED_LOCK(action, ts) {\ + if ((action) == TRUE)\ + return 0;\ + unsigned long long et = (ts)->tv_sec * 1000000000 + (ts)->tv_nsec;\ + while (1) {\ + FILETIME _t;\ + GetSystemTimeAsFileTime(&_t);\ + ULARGE_INTEGER _UI = {\ + .HighPart = _t.dwHighDateTime,\ + .LowPart = _t.dwLowDateTime,\ + };\ + if (100 * _UI.QuadPart - 1000 * DELTA_WIN2UNIX >= et)\ + return ETIMEDOUT;\ + if ((action) == TRUE)\ + return 0;\ + Sleep(1);\ + }\ + return ETIMEDOUT;\ +} + +/* + * os_mutex_init -- initializes mutex + */ +int +os_mutex_init(os_mutex_t *__restrict mutex) +{ + COMPILE_ERROR_ON(sizeof(os_mutex_t) < sizeof(internal_os_mutex_t)); + internal_os_mutex_t *mutex_internal = (internal_os_mutex_t *)mutex; + InitializeCriticalSection(&mutex_internal->lock); + return 0; +} + +/* + * os_mutex_destroy -- destroys mutex + */ +int +os_mutex_destroy(os_mutex_t *__restrict mutex) +{ + internal_os_mutex_t *mutex_internal = (internal_os_mutex_t *)mutex; + DeleteCriticalSection(&mutex_internal->lock); + return 0; +} + +/* + * os_mutex_lock -- locks mutex + */ +_Use_decl_annotations_ +int +os_mutex_lock(os_mutex_t *__restrict mutex) +{ + internal_os_mutex_t *mutex_internal = (internal_os_mutex_t *)mutex; + EnterCriticalSection(&mutex_internal->lock); + + if (mutex_internal->lock.RecursionCount > 1) { + LeaveCriticalSection(&mutex_internal->lock); + FATAL("deadlock detected"); + } + return 0; +} + +/* + * os_mutex_trylock -- tries lock mutex + */ +_Use_decl_annotations_ +int +os_mutex_trylock(os_mutex_t *__restrict mutex) +{ + internal_os_mutex_t *mutex_internal = (internal_os_mutex_t *)mutex; + if (TryEnterCriticalSection(&mutex_internal->lock) == FALSE) + return EBUSY; + + if (mutex_internal->lock.RecursionCount > 1) { + LeaveCriticalSection(&mutex_internal->lock); + return EBUSY; + } + + return 0; +} + +/* + * os_mutex_timedlock -- tries lock mutex with timeout + */ +int +os_mutex_timedlock(os_mutex_t *__restrict mutex, + const struct timespec *abstime) +{ + TIMED_LOCK((os_mutex_trylock(mutex) == 0), abstime); +} + +/* + * os_mutex_unlock -- unlocks mutex + */ +int +os_mutex_unlock(os_mutex_t *__restrict mutex) +{ + internal_os_mutex_t *mutex_internal = (internal_os_mutex_t *)mutex; + LeaveCriticalSection(&mutex_internal->lock); + return 0; +} + +/* + * os_rwlock_init -- initializes rwlock + */ +int +os_rwlock_init(os_rwlock_t *__restrict rwlock) +{ + COMPILE_ERROR_ON(sizeof(os_rwlock_t) < sizeof(internal_os_rwlock_t)); + internal_os_rwlock_t *rwlock_internal = (internal_os_rwlock_t *)rwlock; + InitializeSRWLock(&rwlock_internal->lock); + return 0; +} + +/* + * os_rwlock_destroy -- destroys rwlock + */ +int +os_rwlock_destroy(os_rwlock_t *__restrict rwlock) +{ + /* do nothing */ + UNREFERENCED_PARAMETER(rwlock); + + return 0; +} + +/* + * os_rwlock_rdlock -- get shared lock + */ +int +os_rwlock_rdlock(os_rwlock_t *__restrict rwlock) +{ + internal_os_rwlock_t *rwlock_internal = (internal_os_rwlock_t *)rwlock; + AcquireSRWLockShared(&rwlock_internal->lock); + rwlock_internal->is_write = 0; + return 0; +} + +/* + * os_rwlock_wrlock -- get exclusive lock + */ +int +os_rwlock_wrlock(os_rwlock_t *__restrict rwlock) +{ + internal_os_rwlock_t *rwlock_internal = (internal_os_rwlock_t *)rwlock; + AcquireSRWLockExclusive(&rwlock_internal->lock); + rwlock_internal->is_write = 1; + return 0; +} + +/* + * os_rwlock_tryrdlock -- tries get shared lock + */ +int +os_rwlock_tryrdlock(os_rwlock_t *__restrict rwlock) +{ + internal_os_rwlock_t *rwlock_internal = (internal_os_rwlock_t *)rwlock; + if (TryAcquireSRWLockShared(&rwlock_internal->lock) == FALSE) { + return EBUSY; + } else { + rwlock_internal->is_write = 0; + return 0; + } +} + +/* + * os_rwlock_trywrlock -- tries get exclusive lock + */ +_Use_decl_annotations_ +int +os_rwlock_trywrlock(os_rwlock_t *__restrict rwlock) +{ + internal_os_rwlock_t *rwlock_internal = (internal_os_rwlock_t *)rwlock; + if (TryAcquireSRWLockExclusive(&rwlock_internal->lock) == FALSE) { + return EBUSY; + } else { + rwlock_internal->is_write = 1; + return 0; + } +} + +/* + * os_rwlock_timedrdlock -- gets shared lock with timeout + */ +int +os_rwlock_timedrdlock(os_rwlock_t *__restrict rwlock, + const struct timespec *abstime) +{ + TIMED_LOCK((os_rwlock_tryrdlock(rwlock) == 0), abstime); +} + +/* + * os_rwlock_timedwrlock -- gets exclusive lock with timeout + */ +int +os_rwlock_timedwrlock(os_rwlock_t *__restrict rwlock, + const struct timespec *abstime) +{ + TIMED_LOCK((os_rwlock_trywrlock(rwlock) == 0), abstime); +} + +/* + * os_rwlock_unlock -- unlocks rwlock + */ +_Use_decl_annotations_ +int +os_rwlock_unlock(os_rwlock_t *__restrict rwlock) +{ + internal_os_rwlock_t *rwlock_internal = (internal_os_rwlock_t *)rwlock; + if (rwlock_internal->is_write) + ReleaseSRWLockExclusive(&rwlock_internal->lock); + else + ReleaseSRWLockShared(&rwlock_internal->lock); + return 0; +} + +/* + * os_cond_init -- initializes condition variable + */ +int +os_cond_init(os_cond_t *__restrict cond) +{ + COMPILE_ERROR_ON(sizeof(os_cond_t) < sizeof(internal_os_cond_t)); + + internal_os_cond_t *cond_internal = (internal_os_cond_t *)cond; + InitializeConditionVariable(&cond_internal->cond); + return 0; +} + +/* + * os_cond_destroy -- destroys condition variable + */ +int +os_cond_destroy(os_cond_t *__restrict cond) +{ + /* do nothing */ + UNREFERENCED_PARAMETER(cond); + + return 0; +} + +/* + * os_cond_broadcast -- broadcast condition variable + */ +int +os_cond_broadcast(os_cond_t *__restrict cond) +{ + internal_os_cond_t *cond_internal = (internal_os_cond_t *)cond; + WakeAllConditionVariable(&cond_internal->cond); + return 0; +} + +/* + * os_cond_wait -- signal condition variable + */ +int +os_cond_signal(os_cond_t *__restrict cond) +{ + internal_os_cond_t *cond_internal = (internal_os_cond_t *)cond; + WakeConditionVariable(&cond_internal->cond); + return 0; +} +/* + * get_rel_wait -- (internal) convert timespec to windows timeout + */ +static DWORD +get_rel_wait(const struct timespec *abstime) +{ + struct __timeb64 t; + _ftime64_s(&t); + time_t now_ms = t.time * 1000 + t.millitm; + time_t ms = (time_t)(abstime->tv_sec * 1000 + + abstime->tv_nsec / 1000000); + + return (DWORD)(ms - now_ms); +} + +/* + * os_cond_timedwait -- waits on condition variable with timeout + */ +int +os_cond_timedwait(os_cond_t *__restrict cond, + os_mutex_t *__restrict mutex, const struct timespec *abstime) +{ + internal_os_cond_t *cond_internal = (internal_os_cond_t *)cond; + internal_os_mutex_t *mutex_internal = (internal_os_mutex_t *)mutex; + BOOL ret; + SetLastError(0); + ret = SleepConditionVariableCS(&cond_internal->cond, + &mutex_internal->lock, get_rel_wait(abstime)); + if (ret == FALSE) + return (GetLastError() == ERROR_TIMEOUT) ? ETIMEDOUT : EINVAL; + + return 0; +} + +/* + * os_cond_wait -- waits on condition variable + */ +int +os_cond_wait(os_cond_t *__restrict cond, + os_mutex_t *__restrict mutex) +{ + internal_os_cond_t *cond_internal = (internal_os_cond_t *)cond; + internal_os_mutex_t *mutex_internal = (internal_os_mutex_t *)mutex; + /* XXX - return error code based on GetLastError() */ + BOOL ret; + ret = SleepConditionVariableCS(&cond_internal->cond, + &mutex_internal->lock, INFINITE); + return (ret == FALSE) ? EINVAL : 0; +} + +/* + * os_once -- once-only function call + */ +int +os_once(os_once_t *once, void (*func)(void)) +{ + internal_os_once_t *once_internal = (internal_os_once_t *)once; + internal_os_once_t tmp; + + while ((tmp = *once_internal) != 2) { + if (tmp == 1) + continue; /* another thread is already calling func() */ + + /* try to be the first one... */ + if (!util_bool_compare_and_swap64(once_internal, tmp, 1)) + continue; /* sorry, another thread was faster */ + + func(); + + if (!util_bool_compare_and_swap64(once_internal, 1, 2)) { + ERR("error setting once"); + return -1; + } + } + + return 0; +} + +/* + * os_tls_key_create -- creates a new tls key + */ +int +os_tls_key_create(os_tls_key_t *key, void (*destructor)(void *)) +{ + *key = FlsAlloc(destructor); + if (*key == TLS_OUT_OF_INDEXES) + return EAGAIN; + return 0; +} + +/* + * os_tls_key_delete -- deletes key from tls + */ +int +os_tls_key_delete(os_tls_key_t key) +{ + if (!FlsFree(key)) + return EINVAL; + return 0; +} + +/* + * os_tls_set -- sets a value in tls + */ +int +os_tls_set(os_tls_key_t key, const void *value) +{ + if (!FlsSetValue(key, (LPVOID)value)) + return ENOENT; + return 0; +} + +/* + * os_tls_get -- gets a value from tls + */ +void * +os_tls_get(os_tls_key_t key) +{ + return FlsGetValue(key); +} + +/* threading */ + +/* + * os_thread_start_routine_wrapper is a start routine for _beginthreadex() and + * it helps: + * + * - wrap the os_thread_create's start function + */ +static unsigned __stdcall +os_thread_start_routine_wrapper(void *arg) +{ + internal_os_thread_t *thread_info = (internal_os_thread_t *)arg; + + thread_info->result = thread_info->start_routine(thread_info->arg); + + return 0; +} + +/* + * os_thread_create -- starts a new thread + */ +int +os_thread_create(os_thread_t *thread, const os_thread_attr_t *attr, + void *(*start_routine)(void *), void *arg) +{ + COMPILE_ERROR_ON(sizeof(os_thread_t) < sizeof(internal_os_thread_t)); + internal_os_thread_t *thread_info = (internal_os_thread_t *)thread; + + thread_info->start_routine = start_routine; + thread_info->arg = arg; + + thread_info->thread_handle = (HANDLE)_beginthreadex(NULL, 0, + os_thread_start_routine_wrapper, thread_info, CREATE_SUSPENDED, + NULL); + if (thread_info->thread_handle == 0) { + free(thread_info); + return errno; + } + + if (ResumeThread(thread_info->thread_handle) == -1) { + free(thread_info); + return EAGAIN; + } + + return 0; +} + +/* + * os_thread_join -- joins a thread + */ +int +os_thread_join(os_thread_t *thread, void **result) +{ + internal_os_thread_t *internal_thread = (internal_os_thread_t *)thread; + WaitForSingleObject(internal_thread->thread_handle, INFINITE); + CloseHandle(internal_thread->thread_handle); + + if (result != NULL) + *result = internal_thread->result; + + return 0; +} + +/* + * os_thread_self -- returns handle to calling thread + */ +void +os_thread_self(os_thread_t *thread) +{ + internal_os_thread_t *internal_thread = (internal_os_thread_t *)thread; + internal_thread->thread_handle = GetCurrentThread(); +} + +/* + * os_cpu_zero -- clears cpu set + */ +void +os_cpu_zero(os_cpu_set_t *set) +{ + internal_os_cpu_set_t *internal_set = (internal_os_cpu_set_t *)set; + + memset(&internal_set->affinity, 0, sizeof(internal_set->affinity)); +} + +/* + * os_cpu_set -- adds cpu to set + */ +void +os_cpu_set(size_t cpu, os_cpu_set_t *set) +{ + internal_os_cpu_set_t *internal_set = (internal_os_cpu_set_t *)set; + int sum = 0; + int group_max = GetActiveProcessorGroupCount(); + int group = 0; + while (group < group_max) { + sum += GetActiveProcessorCount(group); + if (sum > cpu) { + /* + * XXX: can't set affinity to two different cpu groups + */ + if (internal_set->affinity.Group != group) { + internal_set->affinity.Mask = 0; + internal_set->affinity.Group = group; + } + + cpu -= sum - GetActiveProcessorCount(group); + internal_set->affinity.Mask |= 1LL << cpu; + return; + } + + group++; + } + FATAL("os_cpu_set cpu out of bounds"); +} + +/* + * os_thread_setaffinity_np -- sets affinity of the thread + */ +int +os_thread_setaffinity_np(os_thread_t *thread, size_t set_size, + const os_cpu_set_t *set) +{ + internal_os_cpu_set_t *internal_set = (internal_os_cpu_set_t *)set; + internal_os_thread_t *internal_thread = (internal_os_thread_t *)thread; + + int ret = SetThreadGroupAffinity(internal_thread->thread_handle, + &internal_set->affinity, NULL); + return ret != 0 ? 0 : EINVAL; +} + +/* + * os_semaphore_init -- initializes a new semaphore instance + */ +int +os_semaphore_init(os_semaphore_t *sem, unsigned value) +{ + internal_semaphore_t *internal_sem = (internal_semaphore_t *)sem; + internal_sem->handle = CreateSemaphore(NULL, + value, LONG_MAX, NULL); + + return internal_sem->handle != 0 ? 0 : -1; +} + +/* + * os_semaphore_destroy -- destroys a semaphore instance + */ +int +os_semaphore_destroy(os_semaphore_t *sem) +{ + internal_semaphore_t *internal_sem = (internal_semaphore_t *)sem; + BOOL ret = CloseHandle(internal_sem->handle); + return ret ? 0 : -1; +} + +/* + * os_semaphore_wait -- decreases the value of the semaphore + */ +int +os_semaphore_wait(os_semaphore_t *sem) +{ + internal_semaphore_t *internal_sem = (internal_semaphore_t *)sem; + DWORD ret = WaitForSingleObject(internal_sem->handle, INFINITE); + return ret == WAIT_OBJECT_0 ? 0 : -1; +} + +/* + * os_semaphore_trywait -- tries to decrease the value of the semaphore + */ +int +os_semaphore_trywait(os_semaphore_t *sem) +{ + internal_semaphore_t *internal_sem = (internal_semaphore_t *)sem; + DWORD ret = WaitForSingleObject(internal_sem->handle, 0); + + if (ret == WAIT_TIMEOUT) + errno = EAGAIN; + + return ret == WAIT_OBJECT_0 ? 0 : -1; +} + +/* + * os_semaphore_post -- increases the value of the semaphore + */ +int +os_semaphore_post(os_semaphore_t *sem) +{ + internal_semaphore_t *internal_sem = (internal_semaphore_t *)sem; + BOOL ret = ReleaseSemaphore(internal_sem->handle, 1, NULL); + return ret ? 0 : -1; +} diff --git a/src/pmdk/src/core/os_windows.c b/src/pmdk/src/core/os_windows.c new file mode 100644 index 000000000..e6e7c5b51 --- /dev/null +++ b/src/pmdk/src/core/os_windows.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * os_windows.c -- windows abstraction layer + */ +#include +#include +#include +#include +#include +#include "alloc.h" +#include "util.h" +#include "os.h" +#include "out.h" + +#define UTF8_BOM "\xEF\xBB\xBF" + +/* + * os_open -- open abstraction layer + */ +int +os_open(const char *pathname, int flags, ...) +{ + wchar_t *path = util_toUTF16(pathname); + if (path == NULL) + return -1; + + int ret; + + if (flags & O_CREAT) { + va_list arg; + va_start(arg, flags); + mode_t mode = va_arg(arg, mode_t); + va_end(arg); + ret = _wopen(path, flags, mode); + } else { + ret = _wopen(path, flags); + } + util_free_UTF16(path); + /* BOM skipping should not modify errno */ + int orig_errno = errno; + /* + * text files on windows can contain BOM. As we open files + * in binary mode we have to detect bom and skip it + */ + if (ret != -1) { + char bom[3]; + if (_read(ret, bom, sizeof(bom)) != 3 || + memcmp(bom, UTF8_BOM, 3) != 0) { + /* UTF-8 bom not found - reset file to the beginning */ + _lseek(ret, 0, SEEK_SET); + } + } + errno = orig_errno; + return ret; +} + +/* + * os_fsync -- fsync abstraction layer + */ +int +os_fsync(int fd) +{ + HANDLE handle = (HANDLE) _get_osfhandle(fd); + + if (handle == INVALID_HANDLE_VALUE) { + errno = EBADF; + return -1; + } + + if (!FlushFileBuffers(handle)) { + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * os_fsync_dir -- fsync the directory + */ +int +os_fsync_dir(const char *dir_name) +{ + /* XXX not used and not implemented */ + ASSERT(0); + return -1; +} + +/* + * os_stat -- stat abstraction layer + */ +int +os_stat(const char *pathname, os_stat_t *buf) +{ + wchar_t *path = util_toUTF16(pathname); + if (path == NULL) + return -1; + + int ret = _wstat64(path, buf); + + util_free_UTF16(path); + return ret; +} + +/* + * os_unlink -- unlink abstraction layer + */ +int +os_unlink(const char *pathname) +{ + wchar_t *path = util_toUTF16(pathname); + if (path == NULL) + return -1; + + int ret = _wunlink(path); + util_free_UTF16(path); + return ret; +} + +/* + * os_access -- access abstraction layer + */ +int +os_access(const char *pathname, int mode) +{ + wchar_t *path = util_toUTF16(pathname); + if (path == NULL) + return -1; + + int ret = _waccess(path, mode); + util_free_UTF16(path); + return ret; +} + +/* + * os_skipBOM -- (internal) Skip BOM in file stream + * + * text files on windows can contain BOM. We have to detect bom and skip it. + */ +static void +os_skipBOM(FILE *file) +{ + if (file == NULL) + return; + + /* BOM skipping should not modify errno */ + int orig_errno = errno; + /* UTF-8 BOM */ + uint8_t bom[3]; + size_t read_num = fread(bom, sizeof(bom[0]), sizeof(bom), file); + if (read_num != ARRAY_SIZE(bom)) + goto out; + + if (memcmp(bom, UTF8_BOM, ARRAY_SIZE(bom)) != 0) { + /* UTF-8 bom not found - reset file to the beginning */ + fseek(file, 0, SEEK_SET); + } + +out: + errno = orig_errno; +} + +/* + * os_fopen -- fopen abstraction layer + */ +FILE * +os_fopen(const char *pathname, const char *mode) +{ + wchar_t *path = util_toUTF16(pathname); + if (path == NULL) + return NULL; + + wchar_t *wmode = util_toUTF16(mode); + if (wmode == NULL) { + util_free_UTF16(path); + return NULL; + } + + FILE *ret = _wfopen(path, wmode); + + util_free_UTF16(path); + util_free_UTF16(wmode); + + os_skipBOM(ret); + return ret; +} + +/* + * os_fdopen -- fdopen abstraction layer + */ +FILE * +os_fdopen(int fd, const char *mode) +{ + FILE *ret = fdopen(fd, mode); + os_skipBOM(ret); + return ret; +} + +/* + * os_chmod -- chmod abstraction layer + */ +int +os_chmod(const char *pathname, mode_t mode) +{ + wchar_t *path = util_toUTF16(pathname); + if (path == NULL) + return -1; + + int ret = _wchmod(path, mode); + util_free_UTF16(path); + return ret; +} + +/* + * os_mkstemp -- generate a unique temporary filename from template + */ +int +os_mkstemp(char *temp) +{ + unsigned rnd; + wchar_t *utemp = util_toUTF16(temp); + if (utemp == NULL) + return -1; + + wchar_t *path = _wmktemp(utemp); + if (path == NULL) { + util_free_UTF16(utemp); + return -1; + } + + wchar_t *npath = Malloc(sizeof(*npath) * wcslen(path) + _MAX_FNAME); + if (npath == NULL) { + util_free_UTF16(utemp); + return -1; + } + + wcscpy(npath, path); + + util_free_UTF16(utemp); + /* + * Use rand_s to generate more unique tmp file name than _mktemp do. + * In case with multiple threads and multiple files even after close() + * file name conflicts occurred. + * It resolved issue with synchronous removing + * multiples files by system. + */ + rand_s(&rnd); + + int ret = _snwprintf(npath + wcslen(npath), _MAX_FNAME, L"%u", rnd); + if (ret < 0) + goto out; + + /* + * Use O_TEMPORARY flag to make sure the file is deleted when + * the last file descriptor is closed. Also, it prevents opening + * this file from another process. + */ + ret = _wopen(npath, O_RDWR | O_CREAT | O_EXCL | O_TEMPORARY, + S_IWRITE | S_IREAD); + +out: + Free(npath); + return ret; +} + +/* + * os_posix_fallocate -- allocate file space + */ +int +os_posix_fallocate(int fd, os_off_t offset, os_off_t len) +{ + /* + * From POSIX: + * "EINVAL -- The len argument was zero or the offset argument was + * less than zero." + * + * From Linux man-page: + * "EINVAL -- offset was less than 0, or len was less than or + * equal to 0" + */ + if (offset < 0 || len <= 0) + return EINVAL; + + /* + * From POSIX: + * "EFBIG -- The value of offset+len is greater than the maximum + * file size." + * + * Overflow can't be checked for by _chsize_s, since it only gets + * the sum. + */ + if (offset + len < offset) + return EFBIG; + + HANDLE handle = (HANDLE)_get_osfhandle(fd); + if (handle == INVALID_HANDLE_VALUE) { + return errno; + } + + FILE_ATTRIBUTE_TAG_INFO attributes; + if (!GetFileInformationByHandleEx(handle, FileAttributeTagInfo, + &attributes, sizeof(attributes))) { + return EINVAL; + } + /* + * To physically allocate space on windows we have to remove + * sparsefile and file compressed flags. This method is much faster + * than using _chsize_s which has terrible performance. Dax on + * windows doesn't support sparse files and file compression so + * this workaround is acceptable. + */ + if (attributes.FileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) { + DWORD unused; + FILE_SET_SPARSE_BUFFER buffer; + buffer.SetSparse = FALSE; + + if (!DeviceIoControl(handle, FSCTL_SET_SPARSE, &buffer, + sizeof(buffer), NULL, 0, &unused, + NULL)) { + return EINVAL; + } + } + if (attributes.FileAttributes & FILE_ATTRIBUTE_COMPRESSED) { + DWORD unused; + USHORT buffer = 0; /* magic undocumented value */ + + if (!DeviceIoControl(handle, FSCTL_SET_COMPRESSION, + &buffer, sizeof(buffer), NULL, 0, + &unused, NULL)) { + return EINVAL; + } + } + + /* + * posix_fallocate should not clobber errno, but + * _filelengthi64 might set errno. + */ + int orig_errno = errno; + + __int64 current_size = _filelengthi64(fd); + + int file_length_errno = errno; + errno = orig_errno; + + if (current_size < 0) + return file_length_errno; + + __int64 requested_size = offset + len; + + if (requested_size <= current_size) + return 0; + + int ret = os_ftruncate(fd, requested_size); + + if (ret) { + errno = ret; + return -1; + } + + return 0; +} + +/* + * os_ftruncate -- truncate a file to a specified length + */ +int +os_ftruncate(int fd, os_off_t length) +{ + LARGE_INTEGER distanceToMove = {0}; + distanceToMove.QuadPart = length; + HANDLE handle = (HANDLE)_get_osfhandle(fd); + if (handle == INVALID_HANDLE_VALUE) + return -1; + + if (!SetFilePointerEx(handle, distanceToMove, NULL, FILE_BEGIN)) { + errno = EINVAL; + return -1; + } + + if (!SetEndOfFile(handle)) { + errno = EINVAL; + return -1; + } + return 0; +} + +/* + * os_flock -- apply or remove an advisory lock on an open file + */ +int +os_flock(int fd, int operation) +{ + int flags = 0; + SYSTEM_INFO systemInfo; + + GetSystemInfo(&systemInfo); + + switch (operation & (OS_LOCK_EX | OS_LOCK_SH | OS_LOCK_UN)) { + case OS_LOCK_EX: + case OS_LOCK_SH: + if (operation & OS_LOCK_NB) + flags = _LK_NBLCK; + else + flags = _LK_LOCK; + break; + + case OS_LOCK_UN: + flags = _LK_UNLCK; + break; + + default: + errno = EINVAL; + return -1; + } + + os_off_t filelen = _filelengthi64(fd); + if (filelen < 0) + return -1; + + /* for our purpose it's enough to lock the first page of the file */ + long len = (filelen > systemInfo.dwPageSize) ? + systemInfo.dwPageSize : (long)filelen; + + int res = _locking(fd, flags, len); + if (res != 0 && errno == EACCES) + errno = EWOULDBLOCK; /* for consistency with flock() */ + + return res; +} + +/* + * os_writev -- windows version of writev function + * + * XXX: _write and other similar functions are 32 bit on windows + * if size of data is bigger then 2^32, this function + * will be not atomic. + */ +ssize_t +os_writev(int fd, const struct iovec *iov, int iovcnt) +{ + size_t size = 0; + + /* XXX: _write is 32 bit on windows */ + for (int i = 0; i < iovcnt; i++) + size += iov[i].iov_len; + + void *buf = malloc(size); + if (buf == NULL) + return ENOMEM; + + char *it_buf = buf; + for (int i = 0; i < iovcnt; i++) { + memcpy(it_buf, iov[i].iov_base, iov[i].iov_len); + it_buf += iov[i].iov_len; + } + + ssize_t written = 0; + while (size > 0) { + int ret = _write(fd, buf, size >= MAXUINT ? + MAXUINT : (unsigned)size); + if (ret == -1) { + written = -1; + break; + } + written += ret; + size -= ret; + } + + free(buf); + return written; +} + +#define NSEC_IN_SEC 1000000000ull +/* number of useconds between 1970-01-01T00:00:00Z and 1601-01-01T00:00:00Z */ +#define DELTA_WIN2UNIX (11644473600000000ull) + +/* + * clock_gettime -- returns elapsed time since the system was restarted + * or since Epoch, depending on the mode id + */ +int +os_clock_gettime(int id, struct timespec *ts) +{ + switch (id) { + case CLOCK_MONOTONIC: + { + LARGE_INTEGER time; + LARGE_INTEGER frequency; + + QueryPerformanceFrequency(&frequency); + QueryPerformanceCounter(&time); + + ts->tv_sec = time.QuadPart / frequency.QuadPart; + ts->tv_nsec = (long)( + (time.QuadPart % frequency.QuadPart) * + NSEC_IN_SEC / frequency.QuadPart); + } + break; + + case CLOCK_REALTIME: + { + FILETIME ctime_ft; + GetSystemTimeAsFileTime(&ctime_ft); + ULARGE_INTEGER ctime = { + .HighPart = ctime_ft.dwHighDateTime, + .LowPart = ctime_ft.dwLowDateTime, + }; + ts->tv_sec = (ctime.QuadPart - DELTA_WIN2UNIX * 10) + / 10000000; + ts->tv_nsec = ((ctime.QuadPart - DELTA_WIN2UNIX * 10) + % 10000000) * 100; + } + break; + + default: + SetLastError(EINVAL); + return -1; + } + + return 0; +} + +/* + * os_setenv -- change or add an environment variable + */ +int +os_setenv(const char *name, const char *value, int overwrite) +{ + errno_t err; + + /* + * If caller doesn't want to overwrite make sure that a environment + * variable with the same name doesn't exist. + */ + if (!overwrite && getenv(name)) + return 0; + + /* + * _putenv_s returns a non-zero error code on failure but setenv + * needs to return -1 on failure, let's translate the error code. + */ + if ((err = _putenv_s(name, value)) != 0) { + errno = err; + return -1; + } + + return 0; +} + +/* + * os_unsetenv -- remove an environment variable + */ +int +os_unsetenv(const char *name) +{ + errno_t err; + if ((err = _putenv_s(name, "")) != 0) { + errno = err; + return -1; + } + + return 0; +} + +/* + * os_getenv -- getenv abstraction layer + */ +char * +os_getenv(const char *name) +{ + return getenv(name); +} + +/* + * rand_r -- rand_r for windows + * + * XXX: RAND_MAX is equal 0x7fff on Windows, so to get 32 bit random number + * we need to merge two numbers returned by rand_s(). + * It is not to the best solution as subsequences returned by rand_s are + * not guaranteed to be independent. + * + * XXX: Windows doesn't implement deterministic thread-safe pseudorandom + * generator (generator which can be initialized by seed ). + * We have to chose between a deterministic nonthread-safe generator + * (rand(), srand()) or a non-deterministic thread-safe generator(rand_s()) + * as thread-safety is more important, a seed parameter is ignored in this + * implementation. + */ +unsigned +os_rand_r(unsigned *seedp) +{ + UNREFERENCED_PARAMETER(seedp); + unsigned part1, part2; + rand_s(&part1); + rand_s(&part2); + return part1 << 16 | part2; +} + +/* + * sys_siglist -- map of signal to human readable messages like sys_siglist + */ +const char * const sys_siglist[] = { + "Unknown signal 0", /* 0 */ + "Hangup", /* 1 */ + "Interrupt", /* 2 */ + "Quit", /* 3 */ + "Illegal instruction", /* 4 */ + "Trace/breakpoint trap", /* 5 */ + "Aborted", /* 6 */ + "Bus error", /* 7 */ + "Floating point exception", /* 8 */ + "Killed", /* 9 */ + "User defined signal 1", /* 10 */ + "Segmentation fault", /* 11 */ + "User defined signal 2", /* 12 */ + "Broken pipe", /* 13 */ + "Alarm clock", /* 14 */ + "Terminated", /* 15 */ + "Stack fault", /* 16 */ + "Child exited", /* 17 */ + "Continued", /* 18 */ + "Stopped (signal)", /* 19 */ + "Stopped", /* 20 */ + "Stopped (tty input)", /* 21 */ + "Stopped (tty output)", /* 22 */ + "Urgent I/O condition", /* 23 */ + "CPU time limit exceeded", /* 24 */ + "File size limit exceeded", /* 25 */ + "Virtual timer expired", /* 26 */ + "Profiling timer expired", /* 27 */ + "Window changed", /* 28 */ + "I/O possible", /* 29 */ + "Power failure", /* 30 */ + "Bad system call", /* 31 */ + "Unknown signal 32" /* 32 */ +}; +int sys_siglist_size = ARRAYSIZE(sys_siglist); + +/* + * string constants for strsignal + * XXX: ideally this should have the signal number as the suffix but then we + * should use a buffer from thread local storage, so deferring the same till + * we need it + * NOTE: In Linux strsignal uses TLS for the same reason but if it fails to get + * a thread local buffer it falls back to using a static buffer trading the + * thread safety. + */ +#define STR_REALTIME_SIGNAL "Real-time signal" +#define STR_UNKNOWN_SIGNAL "Unknown signal" + +/* + * strsignal -- returns a string describing the signal number 'sig' + * + * XXX: According to POSIX, this one is of type 'char *', but in our + * implementation it returns 'const char *'. + */ +const char * +os_strsignal(int sig) +{ + if (sig >= 0 && sig < ARRAYSIZE(sys_siglist)) + return sys_siglist[sig]; + else if (sig >= 34 && sig <= 64) + return STR_REALTIME_SIGNAL; + else + return STR_UNKNOWN_SIGNAL; +} + +int +os_execv(const char *path, char *const argv[]) +{ + wchar_t *wpath = util_toUTF16(path); + if (wpath == NULL) + return -1; + + int argc = 0; + while (argv[argc]) + argc++; + + int ret; + wchar_t **wargv = Zalloc((argc + 1) * sizeof(wargv[0])); + if (!wargv) { + ret = -1; + goto wargv_alloc_failed; + } + + for (int i = 0; i < argc; ++i) { + wargv[i] = util_toUTF16(argv[i]); + if (!wargv[i]) { + ret = -1; + goto end; + } + } + + intptr_t iret = _wexecv(wpath, wargv); + if (iret == 0) + ret = 0; + else + ret = -1; + +end: + for (int i = 0; i < argc; ++i) + util_free_UTF16(wargv[i]); + Free(wargv); + +wargv_alloc_failed: + util_free_UTF16(wpath); + + return ret; +} diff --git a/src/pmdk/src/core/out.c b/src/pmdk/src/core/out.c new file mode 100644 index 000000000..445885e81 --- /dev/null +++ b/src/pmdk/src/core/out.c @@ -0,0 +1,592 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * out.c -- support for logging, tracing, and assertion output + * + * Macros like LOG(), OUT, ASSERT(), etc. end up here. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "out.h" +#include "os.h" +#include "os_thread.h" +#include "valgrind_internal.h" +#include "util.h" + +/* XXX - modify Linux makefiles to generate srcversion.h and remove #ifdef */ +#ifdef _WIN32 +#include "srcversion.h" +#endif + +static const char *Log_prefix; +static int Log_level; +static FILE *Out_fp; +static unsigned Log_alignment; + +#ifndef NO_LIBPTHREAD +#define MAXPRINT 8192 /* maximum expected log line */ +#else +#define MAXPRINT 256 /* maximum expected log line for libpmem */ +#endif + +struct errormsg +{ + char msg[MAXPRINT]; +#ifdef _WIN32 + wchar_t wmsg[MAXPRINT]; +#endif +}; + +#ifndef NO_LIBPTHREAD + +static os_once_t Last_errormsg_key_once = OS_ONCE_INIT; +static os_tls_key_t Last_errormsg_key; + +static void +_Last_errormsg_key_alloc(void) +{ + int pth_ret = os_tls_key_create(&Last_errormsg_key, free); + if (pth_ret) + FATAL("!os_thread_key_create"); + + VALGRIND_ANNOTATE_HAPPENS_BEFORE(&Last_errormsg_key_once); +} + +static void +Last_errormsg_key_alloc(void) +{ + os_once(&Last_errormsg_key_once, _Last_errormsg_key_alloc); + /* + * Workaround Helgrind's bug: + * https://bugs.kde.org/show_bug.cgi?id=337735 + */ + VALGRIND_ANNOTATE_HAPPENS_AFTER(&Last_errormsg_key_once); +} + +static inline void +Last_errormsg_fini(void) +{ + void *p = os_tls_get(Last_errormsg_key); + if (p) { + free(p); + (void) os_tls_set(Last_errormsg_key, NULL); + } + (void) os_tls_key_delete(Last_errormsg_key); +} + +static inline struct errormsg * +Last_errormsg_get(void) +{ + Last_errormsg_key_alloc(); + + struct errormsg *errormsg = os_tls_get(Last_errormsg_key); + if (errormsg == NULL) { + errormsg = malloc(sizeof(struct errormsg)); + if (errormsg == NULL) + FATAL("!malloc"); + /* make sure it contains empty string initially */ + errormsg->msg[0] = '\0'; + int ret = os_tls_set(Last_errormsg_key, errormsg); + if (ret) + FATAL("!os_tls_set"); + } + return errormsg; +} + +#else + +/* + * We don't want libpmem to depend on libpthread. Instead of using pthread + * API to dynamically allocate thread-specific error message buffer, we put + * it into TLS. However, keeping a pretty large static buffer (8K) in TLS + * may lead to some issues, so the maximum message length is reduced. + * Fortunately, it looks like the longest error message in libpmem should + * not be longer than about 90 chars (in case of pmem_check_version()). + */ + +static __thread struct errormsg Last_errormsg; + +static inline void +Last_errormsg_key_alloc(void) +{ +} + +static inline void +Last_errormsg_fini(void) +{ +} + +static inline const struct errormsg * +Last_errormsg_get(void) +{ + return &Last_errormsg; +} + +#endif /* NO_LIBPTHREAD */ + +/* + * out_init -- initialize the log + * + * This is called from the library initialization code. + */ +void +out_init(const char *log_prefix, const char *log_level_var, + const char *log_file_var, int major_version, + int minor_version) +{ + static int once; + + /* only need to initialize the out module once */ + if (once) + return; + once++; + + Log_prefix = log_prefix; + +#ifdef DEBUG + char *log_level; + char *log_file; + + if ((log_level = os_getenv(log_level_var)) != NULL) { + Log_level = atoi(log_level); + if (Log_level < 0) { + Log_level = 0; + } + } + + if ((log_file = os_getenv(log_file_var)) != NULL && + log_file[0] != '\0') { + + /* reserve more than enough space for a PID + '\0' */ + char log_file_pid[PATH_MAX]; + size_t len = strlen(log_file); + if (len > 0 && log_file[len - 1] == '-') { + if (util_snprintf(log_file_pid, PATH_MAX, "%s%d", + log_file, getpid()) < 0) { + ERR("snprintf: %d", errno); + abort(); + } + log_file = log_file_pid; + } + + if ((Out_fp = os_fopen(log_file, "w")) == NULL) { + char buff[UTIL_MAX_ERR_MSG]; + util_strerror(errno, buff, UTIL_MAX_ERR_MSG); + fprintf(stderr, "Error (%s): %s=%s: %s\n", + log_prefix, log_file_var, + log_file, buff); + abort(); + } + } +#endif /* DEBUG */ + + char *log_alignment = os_getenv("PMDK_LOG_ALIGN"); + if (log_alignment) { + int align = atoi(log_alignment); + if (align > 0) + Log_alignment = (unsigned)align; + } + + if (Out_fp == NULL) + Out_fp = stderr; + else + setlinebuf(Out_fp); + +#ifdef DEBUG + static char namepath[PATH_MAX]; + LOG(1, "pid %d: program: %s", getpid(), + util_getexecname(namepath, PATH_MAX)); +#endif + LOG(1, "%s version %d.%d", log_prefix, major_version, minor_version); + + static __attribute__((used)) const char *version_msg = + "src version: " SRCVERSION; + LOG(1, "%s", version_msg); +#if VG_PMEMCHECK_ENABLED + /* + * Attribute "used" to prevent compiler from optimizing out the variable + * when LOG expands to no code (!DEBUG) + */ + static __attribute__((used)) const char *pmemcheck_msg = + "compiled with support for Valgrind pmemcheck"; + LOG(1, "%s", pmemcheck_msg); +#endif /* VG_PMEMCHECK_ENABLED */ +#if VG_HELGRIND_ENABLED + static __attribute__((used)) const char *helgrind_msg = + "compiled with support for Valgrind helgrind"; + LOG(1, "%s", helgrind_msg); +#endif /* VG_HELGRIND_ENABLED */ +#if VG_MEMCHECK_ENABLED + static __attribute__((used)) const char *memcheck_msg = + "compiled with support for Valgrind memcheck"; + LOG(1, "%s", memcheck_msg); +#endif /* VG_MEMCHECK_ENABLED */ +#if VG_DRD_ENABLED + static __attribute__((used)) const char *drd_msg = + "compiled with support for Valgrind drd"; + LOG(1, "%s", drd_msg); +#endif /* VG_DRD_ENABLED */ +#if SDS_ENABLED + static __attribute__((used)) const char *shutdown_state_msg = + "compiled with support for shutdown state"; + LOG(1, "%s", shutdown_state_msg); +#endif +#if NDCTL_ENABLED + static __attribute__((used)) const char *ndctl_ge_63_msg = + "compiled with libndctl 63+"; + LOG(1, "%s", ndctl_ge_63_msg); +#endif + + Last_errormsg_key_alloc(); +} + +/* + * out_fini -- close the log file + * + * This is called to close log file before process stop. + */ +void +out_fini(void) +{ + if (Out_fp != NULL && Out_fp != stderr) { + fclose(Out_fp); + Out_fp = stderr; + } + + Last_errormsg_fini(); +} + +/* + * out_print_func -- default print_func, goes to stderr or Out_fp + */ +static void +out_print_func(const char *s) +{ + /* to suppress drd false-positive */ + /* XXX: confirm real nature of this issue: pmem/issues#863 */ +#ifdef SUPPRESS_FPUTS_DRD_ERROR + VALGRIND_ANNOTATE_IGNORE_READS_BEGIN(); + VALGRIND_ANNOTATE_IGNORE_WRITES_BEGIN(); +#endif + fputs(s, Out_fp); +#ifdef SUPPRESS_FPUTS_DRD_ERROR + VALGRIND_ANNOTATE_IGNORE_READS_END(); + VALGRIND_ANNOTATE_IGNORE_WRITES_END(); +#endif +} + +/* + * calling Print(s) calls the current print_func... + */ +typedef void (*Print_func)(const char *s); +typedef int (*Vsnprintf_func)(char *str, size_t size, const char *format, + va_list ap); +static Print_func Print = out_print_func; +static Vsnprintf_func Vsnprintf = vsnprintf; + +/* + * out_set_print_func -- allow override of print_func used by out module + */ +void +out_set_print_func(void (*print_func)(const char *s)) +{ + LOG(3, "print %p", print_func); + + Print = (print_func == NULL) ? out_print_func : print_func; +} + +/* + * out_set_vsnprintf_func -- allow override of vsnprintf_func used by out module + */ +void +out_set_vsnprintf_func(int (*vsnprintf_func)(char *str, size_t size, + const char *format, va_list ap)) +{ + LOG(3, "vsnprintf %p", vsnprintf_func); + + Vsnprintf = (vsnprintf_func == NULL) ? vsnprintf : vsnprintf_func; +} + +/* + * out_snprintf -- (internal) custom snprintf implementation + */ +FORMAT_PRINTF(3, 4) +static int +out_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + + va_start(ap, format); + ret = Vsnprintf(str, size, format, ap); + va_end(ap); + + return (ret); +} + +/* + * out_common -- common output code, all output goes through here + */ +static void +out_common(const char *file, int line, const char *func, int level, + const char *suffix, const char *fmt, va_list ap) +{ + int oerrno = errno; + char buf[MAXPRINT]; + unsigned cc = 0; + int ret; + const char *sep = ""; + char errstr[UTIL_MAX_ERR_MSG] = ""; + + unsigned long olast_error = 0; +#ifdef _WIN32 + if (fmt && fmt[0] == '!' && fmt[1] == '!') + olast_error = GetLastError(); +#endif + + if (file) { + char *f = strrchr(file, OS_DIR_SEPARATOR); + if (f) + file = f + 1; + ret = out_snprintf(&buf[cc], MAXPRINT - cc, + "<%s>: <%d> [%s:%d %s] ", + Log_prefix, level, file, line, func); + if (ret < 0) { + Print("out_snprintf failed"); + goto end; + } + cc += (unsigned)ret; + if (cc < Log_alignment) { + memset(buf + cc, ' ', Log_alignment - cc); + cc = Log_alignment; + } + } + + if (fmt) { + if (*fmt == '!') { + sep = ": "; + fmt++; + if (*fmt == '!') { + fmt++; + /* it will abort on non Windows OS */ + util_strwinerror(olast_error, errstr, + UTIL_MAX_ERR_MSG); + } else { + util_strerror(oerrno, errstr, UTIL_MAX_ERR_MSG); + } + + } + ret = Vsnprintf(&buf[cc], MAXPRINT - cc, fmt, ap); + if (ret < 0) { + Print("Vsnprintf failed"); + goto end; + } + cc += (unsigned)ret; + } + + out_snprintf(&buf[cc], MAXPRINT - cc, "%s%s%s", sep, errstr, suffix); + + Print(buf); + +end: + errno = oerrno; +#ifdef _WIN32 + SetLastError(olast_error); +#endif +} + +/* + * out_error -- common error output code, all error messages go through here + */ +static void +out_error(const char *file, int line, const char *func, + const char *suffix, const char *fmt, va_list ap) +{ + int oerrno = errno; + unsigned long olast_error = 0; +#ifdef _WIN32 + olast_error = GetLastError(); +#endif + unsigned cc = 0; + int ret; + const char *sep = ""; + char errstr[UTIL_MAX_ERR_MSG] = ""; + + char *errormsg = (char *)out_get_errormsg(); + + if (fmt) { + if (*fmt == '!') { + sep = ": "; + fmt++; + if (*fmt == '!') { + fmt++; + /* it will abort on non Windows OS */ + util_strwinerror(olast_error, errstr, + UTIL_MAX_ERR_MSG); + } else { + util_strerror(oerrno, errstr, UTIL_MAX_ERR_MSG); + } + } + + ret = Vsnprintf(&errormsg[cc], MAXPRINT, fmt, ap); + if (ret < 0) { + strcpy(errormsg, "Vsnprintf failed"); + goto end; + } + cc += (unsigned)ret; + out_snprintf(&errormsg[cc], MAXPRINT - cc, "%s%s", + sep, errstr); + } + +#ifdef DEBUG + if (Log_level >= 1) { + char buf[MAXPRINT]; + cc = 0; + + if (file) { + char *f = strrchr(file, OS_DIR_SEPARATOR); + if (f) + file = f + 1; + ret = out_snprintf(&buf[cc], MAXPRINT, + "<%s>: <1> [%s:%d %s] ", + Log_prefix, file, line, func); + if (ret < 0) { + Print("out_snprintf failed"); + goto end; + } + cc += (unsigned)ret; + if (cc < Log_alignment) { + memset(buf + cc, ' ', Log_alignment - cc); + cc = Log_alignment; + } + } + + out_snprintf(&buf[cc], MAXPRINT - cc, "%s%s", errormsg, + suffix); + + Print(buf); + } +#endif + +end: + errno = oerrno; +#ifdef _WIN32 + SetLastError(olast_error); +#endif +} + +/* + * out -- output a line, newline added automatically + */ +void +out(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + out_common(NULL, 0, NULL, 0, "\n", fmt, ap); + + va_end(ap); +} + +/* + * out_nonl -- output a line, no newline added automatically + */ +void +out_nonl(int level, const char *fmt, ...) +{ + va_list ap; + + if (Log_level < level) + return; + + va_start(ap, fmt); + out_common(NULL, 0, NULL, level, "", fmt, ap); + + va_end(ap); +} + +/* + * out_log -- output a log line if Log_level >= level + */ +void +out_log(const char *file, int line, const char *func, int level, + const char *fmt, ...) +{ + va_list ap; + + if (Log_level < level) + return; + + va_start(ap, fmt); + out_common(file, line, func, level, "\n", fmt, ap); + + va_end(ap); +} + +/* + * out_fatal -- output a fatal error & die (i.e. assertion failure) + */ +void +out_fatal(const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + out_common(file, line, func, 1, "\n", fmt, ap); + + va_end(ap); + + abort(); +} + +/* + * out_err -- output an error message + */ +void +out_err(const char *file, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + out_error(file, line, func, "\n", fmt, ap); + + va_end(ap); +} + +/* + * out_get_errormsg -- get the last error message + */ +const char * +out_get_errormsg(void) +{ + const struct errormsg *errormsg = Last_errormsg_get(); + return &errormsg->msg[0]; +} + +#ifdef _WIN32 +/* + * out_get_errormsgW -- get the last error message in wchar_t + */ +const wchar_t * +out_get_errormsgW(void) +{ + struct errormsg *errormsg = Last_errormsg_get(); + const char *utf8 = &errormsg->msg[0]; + wchar_t *utf16 = &errormsg->wmsg[0]; + if (util_toUTF16_buff(utf8, utf16, sizeof(errormsg->wmsg)) != 0) + FATAL("!Failed to convert string"); + + return (const wchar_t *)utf16; +} +#endif diff --git a/src/pmdk/src/core/out.h b/src/pmdk/src/core/out.h new file mode 100644 index 000000000..be81c3163 --- /dev/null +++ b/src/pmdk/src/core/out.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * out.h -- definitions for "out" module + */ + +#ifndef PMDK_OUT_H +#define PMDK_OUT_H 1 + +#include +#include +#include + +#include "util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Suppress errors which are after appropriate ASSERT* macro for nondebug + * builds. + */ +#if !defined(DEBUG) && (defined(__clang_analyzer__) || defined(__COVERITY__) ||\ + defined(__KLOCWORK__)) +#define OUT_FATAL_DISCARD_NORETURN __attribute__((noreturn)) +#else +#define OUT_FATAL_DISCARD_NORETURN +#endif + +#ifndef EVALUATE_DBG_EXPRESSIONS +#if defined(DEBUG) || defined(__clang_analyzer__) || defined(__COVERITY__) ||\ + defined(__KLOCWORK__) +#define EVALUATE_DBG_EXPRESSIONS 1 +#else +#define EVALUATE_DBG_EXPRESSIONS 0 +#endif +#endif + +#ifdef DEBUG + +#define OUT_LOG out_log +#define OUT_NONL out_nonl +#define OUT_FATAL out_fatal +#define OUT_FATAL_ABORT out_fatal + +#else + +static __attribute__((always_inline)) inline void +out_log_discard(const char *file, int line, const char *func, int level, + const char *fmt, ...) +{ + (void) file; + (void) line; + (void) func; + (void) level; + (void) fmt; +} + +static __attribute__((always_inline)) inline void +out_nonl_discard(int level, const char *fmt, ...) +{ + (void) level; + (void) fmt; +} + +static __attribute__((always_inline)) OUT_FATAL_DISCARD_NORETURN inline void +out_fatal_discard(const char *file, int line, const char *func, + const char *fmt, ...) +{ + (void) file; + (void) line; + (void) func; + (void) fmt; +} + +static __attribute__((always_inline)) NORETURN inline void +out_fatal_abort(const char *file, int line, const char *func, + const char *fmt, ...) +{ + (void) file; + (void) line; + (void) func; + (void) fmt; + + abort(); +} + +#define OUT_LOG out_log_discard +#define OUT_NONL out_nonl_discard +#define OUT_FATAL out_fatal_discard +#define OUT_FATAL_ABORT out_fatal_abort + +#endif + +#if defined(__KLOCWORK__) +#define TEST_ALWAYS_TRUE_EXPR(cnd) +#define TEST_ALWAYS_EQ_EXPR(cnd) +#define TEST_ALWAYS_NE_EXPR(cnd) +#else +#define TEST_ALWAYS_TRUE_EXPR(cnd)\ + if (__builtin_constant_p(cnd))\ + ASSERT_COMPILE_ERROR_ON(cnd); +#define TEST_ALWAYS_EQ_EXPR(lhs, rhs)\ + if (__builtin_constant_p(lhs) && __builtin_constant_p(rhs))\ + ASSERT_COMPILE_ERROR_ON((lhs) == (rhs)); +#define TEST_ALWAYS_NE_EXPR(lhs, rhs)\ + if (__builtin_constant_p(lhs) && __builtin_constant_p(rhs))\ + ASSERT_COMPILE_ERROR_ON((lhs) != (rhs)); +#endif + +/* produce debug/trace output */ +#define LOG(level, ...) do { \ + if (!EVALUATE_DBG_EXPRESSIONS) break;\ + OUT_LOG(__FILE__, __LINE__, __func__, level, __VA_ARGS__);\ +} while (0) + +/* produce debug/trace output without prefix and new line */ +#define LOG_NONL(level, ...) do { \ + if (!EVALUATE_DBG_EXPRESSIONS) break; \ + OUT_NONL(level, __VA_ARGS__); \ +} while (0) + +/* produce output and exit */ +#define FATAL(...)\ + OUT_FATAL_ABORT(__FILE__, __LINE__, __func__, __VA_ARGS__) + +/* assert a condition is true at runtime */ +#define ASSERT_rt(cnd) do { \ + if (!EVALUATE_DBG_EXPRESSIONS || (cnd)) break; \ + OUT_FATAL(__FILE__, __LINE__, __func__, "assertion failure: %s", #cnd);\ +} while (0) + +/* assertion with extra info printed if assertion fails at runtime */ +#define ASSERTinfo_rt(cnd, info) do { \ + if (!EVALUATE_DBG_EXPRESSIONS || (cnd)) break; \ + OUT_FATAL(__FILE__, __LINE__, __func__, \ + "assertion failure: %s (%s = %s)", #cnd, #info, info);\ +} while (0) + +/* assert two integer values are equal at runtime */ +#define ASSERTeq_rt(lhs, rhs) do { \ + if (!EVALUATE_DBG_EXPRESSIONS || ((lhs) == (rhs))) break; \ + OUT_FATAL(__FILE__, __LINE__, __func__,\ + "assertion failure: %s (0x%llx) == %s (0x%llx)", #lhs,\ + (unsigned long long)(lhs), #rhs, (unsigned long long)(rhs)); \ +} while (0) + +/* assert two integer values are not equal at runtime */ +#define ASSERTne_rt(lhs, rhs) do { \ + if (!EVALUATE_DBG_EXPRESSIONS || ((lhs) != (rhs))) break; \ + OUT_FATAL(__FILE__, __LINE__, __func__,\ + "assertion failure: %s (0x%llx) != %s (0x%llx)", #lhs,\ + (unsigned long long)(lhs), #rhs, (unsigned long long)(rhs)); \ +} while (0) + +/* assert a condition is true */ +#define ASSERT(cnd)\ + do {\ + /*\ + * Detect useless asserts on always true expression. Please use\ + * COMPILE_ERROR_ON(!cnd) or ASSERT_rt(cnd) in such cases.\ + */\ + TEST_ALWAYS_TRUE_EXPR(cnd);\ + ASSERT_rt(cnd);\ + } while (0) + +/* assertion with extra info printed if assertion fails */ +#define ASSERTinfo(cnd, info)\ + do {\ + /* See comment in ASSERT. */\ + TEST_ALWAYS_TRUE_EXPR(cnd);\ + ASSERTinfo_rt(cnd, info);\ + } while (0) + +/* assert two integer values are equal */ +#define ASSERTeq(lhs, rhs)\ + do {\ + /* See comment in ASSERT. */\ + TEST_ALWAYS_EQ_EXPR(lhs, rhs);\ + ASSERTeq_rt(lhs, rhs);\ + } while (0) + +/* assert two integer values are not equal */ +#define ASSERTne(lhs, rhs)\ + do {\ + /* See comment in ASSERT. */\ + TEST_ALWAYS_NE_EXPR(lhs, rhs);\ + ASSERTne_rt(lhs, rhs);\ + } while (0) + +#define ERR(...)\ + out_err(__FILE__, __LINE__, __func__, __VA_ARGS__) + +void out_init(const char *log_prefix, const char *log_level_var, + const char *log_file_var, int major_version, + int minor_version); +void out_fini(void); +void out(const char *fmt, ...) FORMAT_PRINTF(1, 2); +void out_nonl(int level, const char *fmt, ...) FORMAT_PRINTF(2, 3); +void out_log(const char *file, int line, const char *func, int level, + const char *fmt, ...) FORMAT_PRINTF(5, 6); +void out_err(const char *file, int line, const char *func, + const char *fmt, ...) FORMAT_PRINTF(4, 5); +void NORETURN out_fatal(const char *file, int line, const char *func, + const char *fmt, ...) FORMAT_PRINTF(4, 5); +void out_set_print_func(void (*print_func)(const char *s)); +void out_set_vsnprintf_func(int (*vsnprintf_func)(char *str, size_t size, + const char *format, va_list ap)); + +#ifdef _WIN32 +#ifndef PMDK_UTF8_API +#define out_get_errormsg out_get_errormsgW +#else +#define out_get_errormsg out_get_errormsgU +#endif +#endif + +#ifndef _WIN32 +const char *out_get_errormsg(void); +#else +const char *out_get_errormsgU(void); +const wchar_t *out_get_errormsgW(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/core/pmemcore.h b/src/pmdk/src/core/pmemcore.h new file mode 100644 index 000000000..8291ad1a3 --- /dev/null +++ b/src/pmdk/src/core/pmemcore.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, Intel Corporation */ + +/* + * pmemcore.h -- definitions for "core" module + */ + +#ifndef PMEMCORE_H +#define PMEMCORE_H 1 + +#include "util.h" +#include "out.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * core_init -- core module initialization + */ +static inline void +core_init(const char *log_prefix, const char *log_level_var, + const char *log_file_var, int major_version, + int minor_version) +{ + util_init(); + out_init(log_prefix, log_level_var, log_file_var, major_version, + minor_version); +} + +/* + * core_fini -- core module cleanup + */ +static inline void +core_fini(void) +{ + out_fini(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/core/pmemcore.inc b/src/pmdk/src/core/pmemcore.inc new file mode 100644 index 000000000..83320917b --- /dev/null +++ b/src/pmdk/src/core/pmemcore.inc @@ -0,0 +1,41 @@ +# Copyright 2020, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# src/pmemcore.inc -- core SOURCE definitions for PMDK libraries +# + +SOURCE +=\ + $(CORE)/alloc.c\ + $(CORE)/fs_posix.c\ + $(CORE)/os_posix.c\ + $(CORE)/os_thread_posix.c\ + $(CORE)/out.c\ + $(CORE)/util.c\ + $(CORE)/util_posix.c diff --git a/src/pmdk/src/core/util.c b/src/pmdk/src/core/util.c new file mode 100644 index 000000000..b0fd121df --- /dev/null +++ b/src/pmdk/src/core/util.c @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * util.c -- very basic utilities + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "os.h" +#include "valgrind_internal.h" +#include "alloc.h" + +/* library-wide page size */ +unsigned long long Pagesize; + +/* allocation/mmap granularity */ +unsigned long long Mmap_align; + +#if ANY_VG_TOOL_ENABLED +/* Initialized to true if the process is running inside Valgrind. */ +unsigned _On_valgrind; +#endif + +#if VG_HELGRIND_ENABLED +/* Initialized to true if the process is running inside Valgrind helgrind. */ +unsigned _On_helgrind; +#endif + +#if VG_DRD_ENABLED +/* Initialized to true if the process is running inside Valgrind drd. */ +unsigned _On_drd; +#endif + +#if VG_HELGRIND_ENABLED || VG_DRD_ENABLED +/* Initialized to true if the process is running inside Valgrind drd or hg. */ +unsigned _On_drd_or_hg; +#endif + +#if VG_MEMCHECK_ENABLED +/* Initialized to true if the process is running inside Valgrind memcheck. */ +unsigned _On_memcheck; +#endif + +#if VG_PMEMCHECK_ENABLED +/* Initialized to true if the process is running inside Valgrind pmemcheck. */ +unsigned _On_pmemcheck; + +#define LIB_LOG_LEN 20 +#define FUNC_LOG_LEN 50 +#define SUFFIX_LEN 7 + +/* true if pmreorder instrumentation has to be enabled */ +int _Pmreorder_emit; + +/* + * util_emit_log -- emits lib and func name with appropriate suffix + * to pmemcheck store log file + */ +void +util_emit_log(const char *lib, const char *func, int order) +{ + char lib_name[LIB_LOG_LEN]; + char func_name[FUNC_LOG_LEN]; + char suffix[SUFFIX_LEN]; + size_t lib_len = strlen(lib); + size_t func_len = strlen(func); + + if (order == 0) + strcpy(suffix, ".BEGIN"); + else + strcpy(suffix, ".END"); + + size_t suffix_len = strlen(suffix); + + if (lib_len + suffix_len + 1 > LIB_LOG_LEN) { + VALGRIND_EMIT_LOG("Library name is too long"); + return; + } + + if (func_len + suffix_len + 1 > FUNC_LOG_LEN) { + VALGRIND_EMIT_LOG("Function name is too long"); + return; + } + + strcpy(lib_name, lib); + strcat(lib_name, suffix); + strcpy(func_name, func); + strcat(func_name, suffix); + + if (order == 0) { + VALGRIND_EMIT_LOG(func_name); + VALGRIND_EMIT_LOG(lib_name); + } else { + VALGRIND_EMIT_LOG(lib_name); + VALGRIND_EMIT_LOG(func_name); + } +} +#endif + +/* + * util_is_zeroed -- check if given memory range is all zero + */ +int +util_is_zeroed(const void *addr, size_t len) +{ + const char *a = addr; + + if (len == 0) + return 1; + + if (a[0] == 0 && memcmp(a, a + 1, len - 1) == 0) + return 1; + + return 0; +} + +/* + * util_checksum_compute -- compute Fletcher64-like checksum + * + * csump points to where the checksum lives, so that location + * is treated as zeros while calculating the checksum. The + * checksummed data is assumed to be in little endian order. + */ +uint64_t +util_checksum_compute(void *addr, size_t len, uint64_t *csump, size_t skip_off) +{ + if (len % 4 != 0) + abort(); + + uint32_t *p32 = addr; + uint32_t *p32end = (uint32_t *)((char *)addr + len); + uint32_t *skip; + uint32_t lo32 = 0; + uint32_t hi32 = 0; + + if (skip_off) + skip = (uint32_t *)((char *)addr + skip_off); + else + skip = (uint32_t *)((char *)addr + len); + + while (p32 < p32end) + if (p32 == (uint32_t *)csump || p32 >= skip) { + /* lo32 += 0; treat first 32-bits as zero */ + p32++; + hi32 += lo32; + /* lo32 += 0; treat second 32-bits as zero */ + p32++; + hi32 += lo32; + } else { + lo32 += le32toh(*p32); + ++p32; + hi32 += lo32; + } + + return (uint64_t)hi32 << 32 | lo32; +} + +/* + * util_checksum -- compute Fletcher64-like checksum + * + * csump points to where the checksum lives, so that location + * is treated as zeros while calculating the checksum. + * If insert is true, the calculated checksum is inserted into + * the range at *csump. Otherwise the calculated checksum is + * checked against *csump and the result returned (true means + * the range checksummed correctly). + */ +int +util_checksum(void *addr, size_t len, uint64_t *csump, + int insert, size_t skip_off) +{ + uint64_t csum = util_checksum_compute(addr, len, csump, skip_off); + + if (insert) { + *csump = htole64(csum); + return 1; + } + + return *csump == htole64(csum); +} + +/* + * util_checksum_seq -- compute sequential Fletcher64-like checksum + * + * Merges checksum from the old buffer with checksum for current buffer. + */ +uint64_t +util_checksum_seq(const void *addr, size_t len, uint64_t csum) +{ + if (len % 4 != 0) + abort(); + const uint32_t *p32 = addr; + const uint32_t *p32end = (const uint32_t *)((const char *)addr + len); + uint32_t lo32 = (uint32_t)csum; + uint32_t hi32 = (uint32_t)(csum >> 32); + while (p32 < p32end) { + lo32 += le32toh(*p32); + ++p32; + hi32 += lo32; + } + return (uint64_t)hi32 << 32 | lo32; +} + +/* + * util_fgets -- fgets wrapper with conversion CRLF to LF + */ +char * +util_fgets(char *buffer, int max, FILE *stream) +{ + char *str = fgets(buffer, max, stream); + if (str == NULL) + goto end; + + int len = (int)strlen(str); + if (len < 2) + goto end; + if (str[len - 2] == '\r' && str[len - 1] == '\n') { + str[len - 2] = '\n'; + str[len - 1] = '\0'; + } +end: + return str; +} + +struct suff { + const char *suff; + uint64_t mag; +}; + +/* + * util_parse_size -- parse size from string + */ +int +util_parse_size(const char *str, size_t *sizep) +{ + const struct suff suffixes[] = { + { "B", 1ULL }, + { "K", 1ULL << 10 }, /* JEDEC */ + { "M", 1ULL << 20 }, + { "G", 1ULL << 30 }, + { "T", 1ULL << 40 }, + { "P", 1ULL << 50 }, + { "KiB", 1ULL << 10 }, /* IEC */ + { "MiB", 1ULL << 20 }, + { "GiB", 1ULL << 30 }, + { "TiB", 1ULL << 40 }, + { "PiB", 1ULL << 50 }, + { "kB", 1000ULL }, /* SI */ + { "MB", 1000ULL * 1000 }, + { "GB", 1000ULL * 1000 * 1000 }, + { "TB", 1000ULL * 1000 * 1000 * 1000 }, + { "PB", 1000ULL * 1000 * 1000 * 1000 * 1000 } + }; + + int res = -1; + unsigned i; + size_t size = 0; + char unit[9] = {0}; + + int ret = sscanf(str, "%zu%8s", &size, unit); + if (ret == 1) { + res = 0; + } else if (ret == 2) { + for (i = 0; i < ARRAY_SIZE(suffixes); ++i) { + if (strcmp(suffixes[i].suff, unit) == 0) { + size = size * suffixes[i].mag; + res = 0; + break; + } + } + } else { + return -1; + } + + if (sizep && res == 0) + *sizep = size; + return res; +} + +/* + * util_init -- initialize the utils + * + * This is called from the library initialization code. + */ +void +util_init(void) +{ + /* XXX - replace sysconf() with util_get_sys_xxx() */ + if (Pagesize == 0) + Pagesize = (unsigned long) sysconf(_SC_PAGESIZE); + +#ifndef _WIN32 + Mmap_align = Pagesize; +#else + if (Mmap_align == 0) { + SYSTEM_INFO si; + GetSystemInfo(&si); + Mmap_align = si.dwAllocationGranularity; + } +#endif + +#if ANY_VG_TOOL_ENABLED + _On_valgrind = RUNNING_ON_VALGRIND; +#endif + +#if VG_MEMCHECK_ENABLED + if (_On_valgrind) { + unsigned tmp; + unsigned result; + unsigned res = VALGRIND_GET_VBITS(&tmp, &result, sizeof(tmp)); + _On_memcheck = res ? 1 : 0; + } else { + _On_memcheck = 0; + } +#endif + +#if VG_DRD_ENABLED + if (_On_valgrind) + _On_drd = DRD_GET_DRD_THREADID ? 1 : 0; + else + _On_drd = 0; +#endif + +#if VG_HELGRIND_ENABLED + if (_On_valgrind) { + unsigned tmp; + unsigned result; + /* + * As of now (pmem-3.15) VALGRIND_HG_GET_ABITS is broken on + * the upstream version of Helgrind headers. It generates + * a sign-conversion error and actually returns UINT32_MAX-1 + * when not running under Helgrind. + */ + long res = VALGRIND_HG_GET_ABITS(&tmp, &result, sizeof(tmp)); + _On_helgrind = res != -2 ? 1 : 0; + } else { + _On_helgrind = 0; + } +#endif + +#if VG_DRD_ENABLED || VG_HELGRIND_ENABLED + _On_drd_or_hg = (unsigned)(On_helgrind + On_drd); +#endif + +#if VG_PMEMCHECK_ENABLED + if (On_valgrind) { + char *pmreorder_env = os_getenv("PMREORDER_EMIT_LOG"); + if (pmreorder_env) + _Pmreorder_emit = atoi(pmreorder_env); + + VALGRIND_PMC_REGISTER_PMEM_MAPPING(&_On_pmemcheck, + sizeof(_On_pmemcheck)); + unsigned pmc = (unsigned)VALGRIND_PMC_CHECK_IS_PMEM_MAPPING( + &_On_pmemcheck, sizeof(_On_pmemcheck)); + VALGRIND_PMC_REMOVE_PMEM_MAPPING(&_On_pmemcheck, + sizeof(_On_pmemcheck)); + _On_pmemcheck = pmc ? 1 : 0; + } else { + _On_pmemcheck = 0; + _Pmreorder_emit = 0; + } +#endif +} + +/* + * util_concat_str -- concatenate two strings + */ +char * +util_concat_str(const char *s1, const char *s2) +{ + char *result = malloc(strlen(s1) + strlen(s2) + 1); + if (!result) + return NULL; + + strcpy(result, s1); + strcat(result, s2); + + return result; +} + +/* + * util_localtime -- a wrapper for localtime function + * + * localtime can set nonzero errno even if it succeeds (e.g. when there is no + * /etc/localtime file under Linux) and we do not want the errno to be polluted + * in such cases. + */ +struct tm * +util_localtime(const time_t *timep) +{ + int oerrno = errno; + struct tm *tm = localtime(timep); + if (tm != NULL) + errno = oerrno; + + return tm; +} + +/* + * util_safe_strcpy -- copies string from src to dst, returns -1 + * when length of source string (including null-terminator) + * is greater than max_length, 0 otherwise + * + * For gcc (found in version 8.1.1) calling this function with + * max_length equal to dst size produces -Wstringop-truncation warning + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85902 + */ +#ifdef STRINGOP_TRUNCATION_SUPPORTED +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-truncation" +#endif +int +util_safe_strcpy(char *dst, const char *src, size_t max_length) +{ + if (max_length == 0) + return -1; + + strncpy(dst, src, max_length); + + return dst[max_length - 1] == '\0' ? 0 : -1; +} +#ifdef STRINGOP_TRUNCATION_SUPPORTED +#pragma GCC diagnostic pop +#endif + +#define PARSER_MAX_LINE (PATH_MAX + 1024) + +/* + * util_snprintf -- run snprintf; in case of truncation or a failure + * return a negative value, or the number of characters printed otherwise. + */ +int +util_snprintf(char *str, size_t size, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + int ret = vsnprintf(str, size, format, ap); + va_end(ap); + + if (ret < 0) { + if (!errno) + errno = EIO; + goto err; + } else if ((size_t)ret >= size) { + errno = ENOBUFS; + goto err; + } + + return ret; +err: + return -1; +} + +/* + * util_readline -- read line from stream + */ +char * +util_readline(FILE *fh) +{ + size_t bufsize = PARSER_MAX_LINE; + size_t position = 0; + char *buffer = NULL; + + do { + char *tmp = buffer; + buffer = Realloc(buffer, bufsize); + if (buffer == NULL) { + Free(tmp); + return NULL; + } + + /* ensure if we can cast bufsize to int */ + char *s = util_fgets(buffer + position, (int)bufsize / 2, fh); + if (s == NULL) { + Free(buffer); + return NULL; + } + + position = strlen(buffer); + bufsize *= 2; + } while (!feof(fh) && buffer[position - 1] != '\n'); + + return buffer; +} diff --git a/src/pmdk/src/core/util.h b/src/pmdk/src/core/util.h new file mode 100644 index 000000000..bf6339ec0 --- /dev/null +++ b/src/pmdk/src/core/util.h @@ -0,0 +1,541 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ +/* + * Copyright (c) 2016-2020, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * util.h -- internal definitions for util module + */ + +#ifndef PMDK_UTIL_H +#define PMDK_UTIL_H 1 + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#include /* popcnt, bitscan */ +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern unsigned long long Pagesize; +extern unsigned long long Mmap_align; + +#if defined(__x86_64) || defined(_M_X64) || defined(__aarch64__) +#define CACHELINE_SIZE 64ULL +#elif defined(__PPC64__) +#define CACHELINE_SIZE 128ULL +#else +#error unable to recognize architecture at compile time +#endif + +#define PAGE_ALIGNED_DOWN_SIZE(size) ((size) & ~(Pagesize - 1)) +#define PAGE_ALIGNED_UP_SIZE(size)\ + PAGE_ALIGNED_DOWN_SIZE((size) + (Pagesize - 1)) +#define IS_PAGE_ALIGNED(size) (((size) & (Pagesize - 1)) == 0) +#define IS_MMAP_ALIGNED(size) (((size) & (Mmap_align - 1)) == 0) +#define PAGE_ALIGN_UP(addr) ((void *)PAGE_ALIGNED_UP_SIZE((uintptr_t)(addr))) + +#define ALIGN_UP(size, align) (((size) + (align) - 1) & ~((align) - 1)) +#define ALIGN_DOWN(size, align) ((size) & ~((align) - 1)) + +#define ADDR_SUM(vp, lp) ((void *)((char *)(vp) + (lp))) + +#define util_alignof(t) offsetof(struct {char _util_c; t _util_m; }, _util_m) +#define FORMAT_PRINTF(a, b) __attribute__((__format__(__printf__, (a), (b)))) + +void util_init(void); +int util_is_zeroed(const void *addr, size_t len); +uint64_t util_checksum_compute(void *addr, size_t len, uint64_t *csump, + size_t skip_off); +int util_checksum(void *addr, size_t len, uint64_t *csump, + int insert, size_t skip_off); +uint64_t util_checksum_seq(const void *addr, size_t len, uint64_t csum); +int util_parse_size(const char *str, size_t *sizep); +char *util_fgets(char *buffer, int max, FILE *stream); +char *util_getexecname(char *path, size_t pathlen); +char *util_part_realpath(const char *path); +int util_compare_file_inodes(const char *path1, const char *path2); +void *util_aligned_malloc(size_t alignment, size_t size); +void util_aligned_free(void *ptr); +struct tm *util_localtime(const time_t *timep); +int util_safe_strcpy(char *dst, const char *src, size_t max_length); +void util_emit_log(const char *lib, const char *func, int order); +char *util_readline(FILE *fh); +int util_snprintf(char *str, size_t size, + const char *format, ...) FORMAT_PRINTF(3, 4); + +#ifdef _WIN32 +char *util_toUTF8(const wchar_t *wstr); +wchar_t *util_toUTF16(const char *wstr); +void util_free_UTF8(char *str); +void util_free_UTF16(wchar_t *str); +int util_toUTF16_buff(const char *in, wchar_t *out, size_t out_size); +int util_toUTF8_buff(const wchar_t *in, char *out, size_t out_size); +void util_suppress_errmsg(void); +int util_lasterror_to_errno(unsigned long err); +#endif + +#define UTIL_MAX_ERR_MSG 128 +void util_strerror(int errnum, char *buff, size_t bufflen); +void util_strwinerror(unsigned long err, char *buff, size_t bufflen); + +void util_set_alloc_funcs( + void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)); + +/* + * Macro calculates number of elements in given table + */ +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#ifdef _MSC_VER +#define force_inline inline __forceinline +#define NORETURN __declspec(noreturn) +#define barrier() _ReadWriteBarrier() +#else +#define force_inline __attribute__((always_inline)) inline +#define NORETURN __attribute__((noreturn)) +#define barrier() asm volatile("" ::: "memory") +#endif + +#ifdef _MSC_VER +typedef UNALIGNED uint64_t ua_uint64_t; +typedef UNALIGNED uint32_t ua_uint32_t; +typedef UNALIGNED uint16_t ua_uint16_t; +#else +typedef uint64_t ua_uint64_t __attribute__((aligned(1))); +typedef uint32_t ua_uint32_t __attribute__((aligned(1))); +typedef uint16_t ua_uint16_t __attribute__((aligned(1))); +#endif + +#define util_get_not_masked_bits(x, mask) ((x) & ~(mask)) + +/* + * util_setbit -- setbit macro substitution which properly deals with types + */ +static inline void +util_setbit(uint8_t *b, uint32_t i) +{ + b[i / 8] = (uint8_t)(b[i / 8] | (uint8_t)(1 << (i % 8))); +} + +/* + * util_clrbit -- clrbit macro substitution which properly deals with types + */ +static inline void +util_clrbit(uint8_t *b, uint32_t i) +{ + b[i / 8] = (uint8_t)(b[i / 8] & (uint8_t)(~(1 << (i % 8)))); +} + +#define util_isset(a, i) isset(a, i) +#define util_isclr(a, i) isclr(a, i) + +#define util_flag_isset(a, f) ((a) & (f)) +#define util_flag_isclr(a, f) (((a) & (f)) == 0) + +/* + * util_is_pow2 -- returns !0 when there's only 1 bit set in v, 0 otherwise + */ +static force_inline int +util_is_pow2(uint64_t v) +{ + return v && !(v & (v - 1)); +} + +/* + * util_div_ceil -- divides a by b and rounds up the result + */ +static force_inline unsigned +util_div_ceil(unsigned a, unsigned b) +{ + return (unsigned)(((unsigned long)a + b - 1) / b); +} + +/* + * util_bool_compare_and_swap -- perform an atomic compare and swap + * util_fetch_and_* -- perform an operation atomically, return old value + * util_synchronize -- issue a full memory barrier + * util_popcount -- count number of set bits + * util_lssb_index -- return index of least significant set bit, + * undefined on zero + * util_mssb_index -- return index of most significant set bit + * undefined on zero + * + * XXX assertions needed on (value != 0) in both versions of bitscans + * + */ + +#ifndef _MSC_VER +/* + * ISO C11 -- 7.17.1.4 + * memory_order - an enumerated type whose enumerators identify memory ordering + * constraints. + */ +typedef enum { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_consume = __ATOMIC_CONSUME, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +/* + * ISO C11 -- 7.17.7.2 The atomic_load generic functions + * Integer width specific versions as supplement for: + * + * + * #include + * C atomic_load(volatile A *object); + * C atomic_load_explicit(volatile A *object, memory_order order); + * + * The atomic_load interface doesn't return the loaded value, but instead + * copies it to a specified address -- see comments at the MSVC version. + * + * Also, instead of generic functions, two versions are available: + * for 32 bit fundamental integers, and for 64 bit ones. + */ +#define util_atomic_load_explicit32 __atomic_load +#define util_atomic_load_explicit64 __atomic_load + +/* + * ISO C11 -- 7.17.7.1 The atomic_store generic functions + * Integer width specific versions as supplement for: + * + * #include + * void atomic_store(volatile A *object, C desired); + * void atomic_store_explicit(volatile A *object, C desired, + * memory_order order); + */ +#define util_atomic_store_explicit32 __atomic_store_n +#define util_atomic_store_explicit64 __atomic_store_n + +/* + * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html + * https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html + * https://clang.llvm.org/docs/LanguageExtensions.html#builtin-functions + */ +#define util_bool_compare_and_swap32 __sync_bool_compare_and_swap +#define util_bool_compare_and_swap64 __sync_bool_compare_and_swap +#define util_fetch_and_add32 __sync_fetch_and_add +#define util_fetch_and_add64 __sync_fetch_and_add +#define util_fetch_and_sub32 __sync_fetch_and_sub +#define util_fetch_and_sub64 __sync_fetch_and_sub +#define util_fetch_and_and32 __sync_fetch_and_and +#define util_fetch_and_and64 __sync_fetch_and_and +#define util_fetch_and_or32 __sync_fetch_and_or +#define util_fetch_and_or64 __sync_fetch_and_or +#define util_synchronize __sync_synchronize +#define util_popcount(value) ((unsigned char)__builtin_popcount(value)) +#define util_popcount64(value) ((unsigned char)__builtin_popcountll(value)) +#define util_lssb_index(value) ((unsigned char)__builtin_ctz(value)) +#define util_lssb_index64(value) ((unsigned char)__builtin_ctzll(value)) +#define util_mssb_index(value) ((unsigned char)(31 - __builtin_clz(value))) +#define util_mssb_index64(value) ((unsigned char)(63 - __builtin_clzll(value))) + +#else + +/* ISO C11 -- 7.17.1.4 */ +typedef enum { + memory_order_relaxed, + memory_order_consume, + memory_order_acquire, + memory_order_release, + memory_order_acq_rel, + memory_order_seq_cst +} memory_order; + +/* + * ISO C11 -- 7.17.7.2 The atomic_load generic functions + * Integer width specific versions as supplement for: + * + * + * #include + * C atomic_load(volatile A *object); + * C atomic_load_explicit(volatile A *object, memory_order order); + * + * The atomic_load interface doesn't return the loaded value, but instead + * copies it to a specified address. + * The MSVC specific implementation needs to trigger a barrier (at least + * compiler barrier) after the load from the volatile value. The actual load + * from the volatile value itself is expected to be atomic. + * + * The actual isnterface here: + * #include "util.h" + * void util_atomic_load32(volatile A *object, A *destination); + * void util_atomic_load64(volatile A *object, A *destination); + * void util_atomic_load_explicit32(volatile A *object, A *destination, + * memory_order order); + * void util_atomic_load_explicit64(volatile A *object, A *destination, + * memory_order order); + */ + +#ifndef _M_X64 +#error MSVC ports of util_atomic_ only work on X86_64 +#endif + +#if _MSC_VER >= 2000 +#error util_atomic_ utility functions not tested with this version of VC++ +#error These utility functions are not future proof, as they are not +#error based on publicly available documentation. +#endif + +#define util_atomic_load_explicit(object, dest, order)\ + do {\ + COMPILE_ERROR_ON(order != memory_order_seq_cst &&\ + order != memory_order_consume &&\ + order != memory_order_acquire &&\ + order != memory_order_relaxed);\ + *dest = *object;\ + if (order == memory_order_seq_cst ||\ + order == memory_order_consume ||\ + order == memory_order_acquire)\ + _ReadWriteBarrier();\ + } while (0) + +#define util_atomic_load_explicit32 util_atomic_load_explicit +#define util_atomic_load_explicit64 util_atomic_load_explicit + +/* ISO C11 -- 7.17.7.1 The atomic_store generic functions */ + +#define util_atomic_store_explicit64(object, desired, order)\ + do {\ + COMPILE_ERROR_ON(order != memory_order_seq_cst &&\ + order != memory_order_release &&\ + order != memory_order_relaxed);\ + if (order == memory_order_seq_cst) {\ + _InterlockedExchange64(\ + (volatile long long *)object, desired);\ + } else {\ + if (order == memory_order_release)\ + _ReadWriteBarrier();\ + *object = desired;\ + }\ + } while (0) + +#define util_atomic_store_explicit32(object, desired, order)\ + do {\ + COMPILE_ERROR_ON(order != memory_order_seq_cst &&\ + order != memory_order_release &&\ + order != memory_order_relaxed);\ + if (order == memory_order_seq_cst) {\ + _InterlockedExchange(\ + (volatile long *)object, desired);\ + } else {\ + if (order == memory_order_release)\ + _ReadWriteBarrier();\ + *object = desired;\ + }\ + } while (0) + +/* + * https://msdn.microsoft.com/en-us/library/hh977022.aspx + */ + +static __inline int +bool_compare_and_swap32_VC(volatile LONG *ptr, + LONG oldval, LONG newval) +{ + LONG old = InterlockedCompareExchange(ptr, newval, oldval); + return (old == oldval); +} + +static __inline int +bool_compare_and_swap64_VC(volatile LONG64 *ptr, + LONG64 oldval, LONG64 newval) +{ + LONG64 old = InterlockedCompareExchange64(ptr, newval, oldval); + return (old == oldval); +} + +#define util_bool_compare_and_swap32(p, o, n)\ + bool_compare_and_swap32_VC((LONG *)(p), (LONG)(o), (LONG)(n)) +#define util_bool_compare_and_swap64(p, o, n)\ + bool_compare_and_swap64_VC((LONG64 *)(p), (LONG64)(o), (LONG64)(n)) +#define util_fetch_and_add32(ptr, value)\ + InterlockedExchangeAdd((LONG *)(ptr), value) +#define util_fetch_and_add64(ptr, value)\ + InterlockedExchangeAdd64((LONG64 *)(ptr), value) +#define util_fetch_and_sub32(ptr, value)\ + InterlockedExchangeSubtract((LONG *)(ptr), value) +#define util_fetch_and_sub64(ptr, value)\ + InterlockedExchangeAdd64((LONG64 *)(ptr), -((LONG64)(value))) +#define util_fetch_and_and32(ptr, value)\ + InterlockedAnd((LONG *)(ptr), value) +#define util_fetch_and_and64(ptr, value)\ + InterlockedAnd64((LONG64 *)(ptr), value) +#define util_fetch_and_or32(ptr, value)\ + InterlockedOr((LONG *)(ptr), value) +#define util_fetch_and_or64(ptr, value)\ + InterlockedOr64((LONG64 *)(ptr), value) + +static __inline void +util_synchronize(void) +{ + MemoryBarrier(); +} + +#define util_popcount(value) (unsigned char)__popcnt(value) +#define util_popcount64(value) (unsigned char)__popcnt64(value) + +static __inline unsigned char +util_lssb_index(int value) +{ + unsigned long ret; + _BitScanForward(&ret, value); + return (unsigned char)ret; +} + +static __inline unsigned char +util_lssb_index64(long long value) +{ + unsigned long ret; + _BitScanForward64(&ret, value); + return (unsigned char)ret; +} + +static __inline unsigned char +util_mssb_index(int value) +{ + unsigned long ret; + _BitScanReverse(&ret, value); + return (unsigned char)ret; +} + +static __inline unsigned char +util_mssb_index64(long long value) +{ + unsigned long ret; + _BitScanReverse64(&ret, value); + return (unsigned char)ret; +} + +#endif + +/* ISO C11 -- 7.17.7 Operations on atomic types */ +#define util_atomic_load32(object, dest)\ + util_atomic_load_explicit32(object, dest, memory_order_seq_cst) +#define util_atomic_load64(object, dest)\ + util_atomic_load_explicit64(object, dest, memory_order_seq_cst) + +#define util_atomic_store32(object, desired)\ + util_atomic_store_explicit32(object, desired, memory_order_seq_cst) +#define util_atomic_store64(object, desired)\ + util_atomic_store_explicit64(object, desired, memory_order_seq_cst) + +/* + * util_get_printable_ascii -- convert non-printable ascii to dot '.' + */ +static inline char +util_get_printable_ascii(char c) +{ + return isprint((unsigned char)c) ? c : '.'; +} + +char *util_concat_str(const char *s1, const char *s2); + +#if !defined(likely) +#if defined(__GNUC__) +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (!!(x)) +#define unlikely(x) (!!(x)) +#endif +#endif + +#if defined(__CHECKER__) +#define COMPILE_ERROR_ON(cond) +#define ASSERT_COMPILE_ERROR_ON(cond) +#elif defined(_MSC_VER) +#define COMPILE_ERROR_ON(cond) C_ASSERT(!(cond)) +/* XXX - can't be done with C_ASSERT() unless we have __builtin_constant_p() */ +#define ASSERT_COMPILE_ERROR_ON(cond) do {} while (0) +#else +#define COMPILE_ERROR_ON(cond) ((void)sizeof(char[(cond) ? -1 : 1])) +#define ASSERT_COMPILE_ERROR_ON(cond) COMPILE_ERROR_ON(cond) +#endif + +#ifndef _MSC_VER +#define ATTR_CONSTRUCTOR __attribute__((constructor)) static +#define ATTR_DESTRUCTOR __attribute__((destructor)) static +#else +#define ATTR_CONSTRUCTOR +#define ATTR_DESTRUCTOR +#endif + +#ifndef _MSC_VER +#define CONSTRUCTOR(fun) ATTR_CONSTRUCTOR +#else +#ifdef __cplusplus +#define CONSTRUCTOR(fun) \ +void fun(); \ +struct _##fun { \ + _##fun() { \ + fun(); \ + } \ +}; static _##fun foo; \ +static +#else +#define CONSTRUCTOR(fun) \ + MSVC_CONSTR(fun) \ + static +#endif +#endif + +#ifdef __GNUC__ +#define CHECK_FUNC_COMPATIBLE(func1, func2)\ + COMPILE_ERROR_ON(!__builtin_types_compatible_p(typeof(func1),\ + typeof(func2))) +#else +#define CHECK_FUNC_COMPATIBLE(func1, func2) do {} while (0) +#endif /* __GNUC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* util.h */ diff --git a/src/pmdk/src/core/util_posix.c b/src/pmdk/src/core/util_posix.c new file mode 100644 index 000000000..98765a1d4 --- /dev/null +++ b/src/pmdk/src/core/util_posix.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * util_posix.c -- Abstraction layer for misc utilities (Posix implementation) + */ + +#include +#include +#include +#include +#include +#include "os.h" +#include "out.h" +#include "util.h" + +/* pass through for Posix */ +void +util_strerror(int errnum, char *buff, size_t bufflen) +{ + strerror_r(errnum, buff, bufflen); +} + +/* + * util_strwinerror -- should never be called on posix OS - abort() + */ +void +util_strwinerror(unsigned long err, char *buff, size_t bufflen) +{ + abort(); +} + +/* + * util_part_realpath -- get canonicalized absolute pathname + * + * As paths used in a poolset file have to be absolute (checked when parsing + * a poolset file), here we only have to resolve symlinks. + */ +char * +util_part_realpath(const char *path) +{ + return realpath(path, NULL); +} + +/* + * util_compare_file_inodes -- compare device and inodes of two files; + * this resolves hard links + */ +int +util_compare_file_inodes(const char *path1, const char *path2) +{ + struct stat sb1, sb2; + if (os_stat(path1, &sb1)) { + if (errno != ENOENT) { + ERR("!stat failed for %s", path1); + return -1; + } + LOG(1, "stat failed for %s", path1); + errno = 0; + return strcmp(path1, path2) != 0; + } + + if (os_stat(path2, &sb2)) { + if (errno != ENOENT) { + ERR("!stat failed for %s", path2); + return -1; + } + LOG(1, "stat failed for %s", path2); + errno = 0; + return strcmp(path1, path2) != 0; + } + + return sb1.st_dev != sb2.st_dev || sb1.st_ino != sb2.st_ino; +} + +/* + * util_aligned_malloc -- allocate aligned memory + */ +void * +util_aligned_malloc(size_t alignment, size_t size) +{ + void *retval = NULL; + + errno = posix_memalign(&retval, alignment, size); + + return retval; +} + +/* + * util_aligned_free -- free allocated memory in util_aligned_malloc + */ +void +util_aligned_free(void *ptr) +{ + free(ptr); +} + +/* + * util_getexecname -- return name of current executable + */ +char * +util_getexecname(char *path, size_t pathlen) +{ + ASSERT(pathlen != 0); + ssize_t cc; + +#ifdef __FreeBSD__ +#include +#include + + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + + cc = (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) ? + -1 : (ssize_t)pathlen; +#else + cc = readlink("/proc/self/exe", path, pathlen); +#endif + if (cc == -1) { + strncpy(path, "unknown", pathlen); + path[pathlen - 1] = '\0'; + } else { + path[cc] = '\0'; + } + + return path; +} diff --git a/src/pmdk/src/core/util_windows.c b/src/pmdk/src/core/util_windows.c new file mode 100644 index 000000000..123ee6122 --- /dev/null +++ b/src/pmdk/src/core/util_windows.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * util_windows.c -- misc utilities with OS-specific implementation + */ + +#include +#include +#include +#include + +#include "alloc.h" +#include "util.h" +#include "out.h" +#include "os.h" + +/* Windows CRT doesn't support all errors, add unmapped here */ +#define ENOTSUP_STR "Operation not supported" +#define ECANCELED_STR "Operation canceled" +#define ENOERROR 0 +#define ENOERROR_STR "Success" +#define UNMAPPED_STR "Unmapped error" + +/* + * util_strerror -- return string describing error number + * + * XXX: There are many other POSIX error codes that are not recognized by + * strerror_s(), so eventually we may want to implement this in a similar + * fashion as strsignal(). + */ +void +util_strerror(int errnum, char *buff, size_t bufflen) +{ + switch (errnum) { + case ENOERROR: + strcpy_s(buff, bufflen, ENOERROR_STR); + break; + case ENOTSUP: + strcpy_s(buff, bufflen, ENOTSUP_STR); + break; + case ECANCELED: + strcpy_s(buff, bufflen, ECANCELED_STR); + break; + default: + if (strerror_s(buff, bufflen, errnum)) + strcpy_s(buff, bufflen, UNMAPPED_STR); + } +} + +/* + * util_strwinerror -- return string describing windows error codes + */ +void +util_strwinerror(unsigned long err, char *buff, size_t bufflen) +{ + wchar_t *error_str; + + if (FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + err, + 0, + (LPWSTR)&error_str, + 0, NULL) == 0) { + sprintf_s(buff, bufflen, "GetLastError() == %lu", err); + return; + } + + if (util_toUTF8_buff(error_str, buff, bufflen)) { + LocalFree(error_str); + sprintf_s(buff, bufflen, "GetLastError() == %lu", err); + return; + } + + /* let's do the error message without '\r' and '\n' at the end */ + size_t len = strlen(buff); + for (size_t i = len - 1; i > 0; i--) { + if (buff[i] != '\r' && buff[i] != '\n') { + buff[i + 1] = '\0'; + break; + } + } + + LocalFree(error_str); +} + +/* + * util_part_realpath -- get canonicalized absolute pathname for a part file + * + * On Windows, paths cannot be symlinks and paths used in a poolset have to + * be absolute (checked when parsing a poolset file), so we just return + * the path. + */ +char * +util_part_realpath(const char *path) +{ + return strdup(path); +} + +/* + * util_compare_file_inodes -- compare device and inodes of two files + */ +int +util_compare_file_inodes(const char *path1, const char *path2) +{ + return strcmp(path1, path2) != 0; +} + +/* + * util_aligned_malloc -- allocate aligned memory + */ +void * +util_aligned_malloc(size_t alignment, size_t size) +{ + return _aligned_malloc(size, alignment); +} + +/* + * util_aligned_free -- free allocated memory in util_aligned_malloc + */ +void +util_aligned_free(void *ptr) +{ + _aligned_free(ptr); +} + +/* + * util_toUTF8 -- allocating conversion from wide char string to UTF8 + */ +char * +util_toUTF8(const wchar_t *wstr) +{ + int size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wstr, -1, + NULL, 0, NULL, NULL); + if (size == 0) + goto err; + + char *str = Malloc(size * sizeof(char)); + if (str == NULL) + goto out; + + if (WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wstr, -1, str, + size, NULL, NULL) == 0) { + Free(str); + goto err; + } + +out: + return str; + +err: + errno = EINVAL; + return NULL; +} + +/* + * util_free_UTF8 -- free UTF8 string + */ +void util_free_UTF8(char *str) { + Free(str); +} + +/* + * util_toUTF16 -- allocating conversion from UTF8 to wide char string + */ +wchar_t * +util_toUTF16(const char *str) +{ + int size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, -1, + NULL, 0); + if (size == 0) + goto err; + + wchar_t *wstr = Malloc(size * sizeof(wchar_t)); + if (wstr == NULL) + goto out; + + if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str, -1, wstr, + size) == 0) { + Free(wstr); + goto err; + } + +out: + return wstr; + +err: + errno = EINVAL; + return NULL; +} + +/* + * util_free_UTF16 -- free wide char string + */ +void +util_free_UTF16(wchar_t *wstr) +{ + Free(wstr); +} + +/* + * util_toUTF16_buff -- non-allocating conversion from UTF8 to wide char string + * + * The user responsible for supplying a large enough out buffer. + */ +int +util_toUTF16_buff(const char *in, wchar_t *out, size_t out_size) +{ + ASSERT(out != NULL); + + int size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, in, + -1, NULL, 0); + if (size == 0 || out_size < size) + goto err; + + if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, in, -1, + out, size) == 0) + goto err; + + return 0; +err: + errno = EINVAL; + return -1; +} + +/* + * util_toUTF8_buff -- non-allocating conversion from wide char string to UTF8 + * + * The user responsible for supplying a large enough out buffer. + */ +int +util_toUTF8_buff(const wchar_t *in, char *out, size_t out_size) +{ + ASSERT(out != NULL); + + int size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, in, -1, + NULL, 0, NULL, NULL); + if (size == 0 || out_size < size) + goto err; + + if (WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, in, -1, + out, size, NULL, NULL) == 0) + goto err; + + return 0; +err: + errno = EINVAL; + return -1; +} + +/* + * util_getexecname -- return name of current executable + */ +char * +util_getexecname(char *path, size_t pathlen) +{ + ssize_t cc; + if ((cc = GetModuleFileNameA(NULL, path, (DWORD)pathlen)) == 0) + strcpy(path, "unknown"); + else + path[cc] = '\0'; + + return path; +} + +/* + * util_suppress_errmsg -- suppresses "abort" window on Windows if env variable + * is set, useful for automatic tests + */ +void +util_suppress_errmsg(void) +{ + if (os_getenv("PMDK_NO_ABORT_MSG") != NULL) { + DWORD err = GetErrorMode(); + SetErrorMode(err | SEM_NOGPFAULTERRORBOX | + SEM_FAILCRITICALERRORS); + _set_abort_behavior(0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT); + } +} + +static int Lasterror_to_errno[16000] = { + [ERROR_ACCESS_DENIED] = EACCES, + [ERROR_FILE_NOT_FOUND] = ENOENT, + [ERROR_INVALID_ACCESS] = EACCES, + [ERROR_INVALID_ADDRESS] = EINVAL, + [ERROR_INVALID_FUNCTION] = EINVAL, + [ERROR_INVALID_HANDLE] = EINVAL, + [ERROR_INVALID_PARAMETER] = EINVAL, + [ERROR_LOCK_FAILED] = EACCES, + [ERROR_MAPPED_ALIGNMENT] = EINVAL, + [ERROR_NOT_ENOUGH_MEMORY] = ENOMEM, + [ERROR_NOT_SUPPORTED] = ENOTSUP, + [ERROR_OUTOFMEMORY] = ENOMEM, + [ERROR_PATH_NOT_FOUND] = ENOENT, + [ERROR_TOO_MANY_OPEN_FILES] = EMFILE, +}; + +/* + * util_lasterror_to_errno - converts windows error codes to errno + */ +int +util_lasterror_to_errno(unsigned long err) +{ + if (err >= ARRAY_SIZE(Lasterror_to_errno)) + return -1; + + /* no error */ + if (err == 0) + return 0; + + int ret = Lasterror_to_errno[err]; + + /* 0 is used to signalize missing entry in Lasterror_to_errno array */ + if (ret == 0) + return -1; + + return ret; +} diff --git a/src/pmdk/src/core/valgrind/.cstyleignore b/src/pmdk/src/core/valgrind/.cstyleignore new file mode 100644 index 000000000..049c1774e --- /dev/null +++ b/src/pmdk/src/core/valgrind/.cstyleignore @@ -0,0 +1,5 @@ +drd.h +helgrind.h +memcheck.h +pmemcheck.h +valgrind.h diff --git a/src/pmdk/src/core/valgrind/README b/src/pmdk/src/core/valgrind/README new file mode 100644 index 000000000..b5fb4f6a4 --- /dev/null +++ b/src/pmdk/src/core/valgrind/README @@ -0,0 +1,2 @@ +Files in this directory were imported from Valgrind 3.14: +https://github.com/pmem/valgrind diff --git a/src/pmdk/src/core/valgrind/drd.h b/src/pmdk/src/core/valgrind/drd.h new file mode 100644 index 000000000..d63b3dd20 --- /dev/null +++ b/src/pmdk/src/core/valgrind/drd.h @@ -0,0 +1,571 @@ +/* + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (drd.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of DRD, a Valgrind tool for verification of + multithreaded programs. + + Copyright (C) 2006-2017 Bart Van Assche . + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (drd.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + +#ifndef __VALGRIND_DRD_H +#define __VALGRIND_DRD_H + + +#include "valgrind.h" + + +/** Obtain the thread ID assigned by Valgrind's core. */ +#define DRD_GET_VALGRIND_THREADID \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__DRD_GET_VALGRIND_THREAD_ID, \ + 0, 0, 0, 0, 0) + +/** Obtain the thread ID assigned by DRD. */ +#define DRD_GET_DRD_THREADID \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__DRD_GET_DRD_THREAD_ID, \ + 0, 0, 0, 0, 0) + + +/** Tell DRD not to complain about data races for the specified variable. */ +#define DRD_IGNORE_VAR(x) ANNOTATE_BENIGN_RACE_SIZED(&(x), sizeof(x), "") + +/** Tell DRD to no longer ignore data races for the specified variable. */ +#define DRD_STOP_IGNORING_VAR(x) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_FINISH_SUPPRESSION, \ + &(x), sizeof(x), 0, 0, 0) + +/** + * Tell DRD to trace all memory accesses for the specified variable + * until the memory that was allocated for the variable is freed. + */ +#define DRD_TRACE_VAR(x) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_START_TRACE_ADDR, \ + &(x), sizeof(x), 0, 0, 0) + +/** + * Tell DRD to stop tracing memory accesses for the specified variable. + */ +#define DRD_STOP_TRACING_VAR(x) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_STOP_TRACE_ADDR, \ + &(x), sizeof(x), 0, 0, 0) + +/** + * @defgroup RaceDetectionAnnotations Data race detection annotations. + * + * @see See also the source file producer-consumer. + */ +#define ANNOTATE_PCQ_CREATE(pcq) do { } while(0) + +/** Tell DRD that a FIFO queue has been destroyed. */ +#define ANNOTATE_PCQ_DESTROY(pcq) do { } while(0) + +/** + * Tell DRD that an element has been added to the FIFO queue at address pcq. + */ +#define ANNOTATE_PCQ_PUT(pcq) do { } while(0) + +/** + * Tell DRD that an element has been removed from the FIFO queue at address pcq, + * and that DRD should insert a happens-before relationship between the memory + * accesses that occurred before the corresponding ANNOTATE_PCQ_PUT(pcq) + * annotation and the memory accesses after this annotation. Correspondence + * between PUT and GET annotations happens in FIFO order. Since locking + * of the queue is needed anyway to add elements to or to remove elements from + * the queue, for DRD all four FIFO annotations are defined as no-ops. + */ +#define ANNOTATE_PCQ_GET(pcq) do { } while(0) + +/** + * Tell DRD that data races at the specified address are expected and must not + * be reported. + */ +#define ANNOTATE_BENIGN_RACE(addr, descr) \ + ANNOTATE_BENIGN_RACE_SIZED(addr, sizeof(*addr), descr) + +/* Same as ANNOTATE_BENIGN_RACE(addr, descr), but applies to + the memory range [addr, addr + size). */ +#define ANNOTATE_BENIGN_RACE_SIZED(addr, size, descr) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_START_SUPPRESSION, \ + addr, size, 0, 0, 0) + +/** Tell DRD to ignore all reads performed by the current thread. */ +#define ANNOTATE_IGNORE_READS_BEGIN() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_LOADS, \ + 0, 0, 0, 0, 0); + + +/** Tell DRD to no longer ignore the reads performed by the current thread. */ +#define ANNOTATE_IGNORE_READS_END() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_LOADS, \ + 1, 0, 0, 0, 0); + +/** Tell DRD to ignore all writes performed by the current thread. */ +#define ANNOTATE_IGNORE_WRITES_BEGIN() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_STORES, \ + 0, 0, 0, 0, 0) + +/** Tell DRD to no longer ignore the writes performed by the current thread. */ +#define ANNOTATE_IGNORE_WRITES_END() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_STORES, \ + 1, 0, 0, 0, 0) + +/** Tell DRD to ignore all memory accesses performed by the current thread. */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { ANNOTATE_IGNORE_READS_BEGIN(); ANNOTATE_IGNORE_WRITES_BEGIN(); } while(0) + +/** + * Tell DRD to no longer ignore the memory accesses performed by the current + * thread. + */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { ANNOTATE_IGNORE_READS_END(); ANNOTATE_IGNORE_WRITES_END(); } while(0) + +/** + * Tell DRD that size bytes starting at addr has been allocated by a custom + * memory allocator. + */ +#define ANNOTATE_NEW_MEMORY(addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_CLEAN_MEMORY, \ + addr, size, 0, 0, 0) + +/** Ask DRD to report every access to the specified address. */ +#define ANNOTATE_TRACE_MEMORY(addr) DRD_TRACE_VAR(*(char*)(addr)) + +/** + * Tell DRD to assign the specified name to the current thread. This name will + * be used in error messages printed by DRD. + */ +#define ANNOTATE_THREAD_NAME(name) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_SET_THREAD_NAME, \ + name, 0, 0, 0, 0) + +/*@}*/ + + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. +*/ +enum { + /* Ask the DRD tool to discard all information about memory accesses */ + /* and client objects for the specified range. This client request is */ + /* binary compatible with the similarly named Helgrind client request. */ + VG_USERREQ__DRD_CLEAN_MEMORY = VG_USERREQ_TOOL_BASE('H','G'), + /* args: Addr, SizeT. */ + + /* Ask the DRD tool the thread ID assigned by Valgrind. */ + VG_USERREQ__DRD_GET_VALGRIND_THREAD_ID = VG_USERREQ_TOOL_BASE('D','R'), + /* args: none. */ + /* Ask the DRD tool the thread ID assigned by DRD. */ + VG_USERREQ__DRD_GET_DRD_THREAD_ID, + /* args: none. */ + + /* To tell the DRD tool to suppress data race detection on the */ + /* specified address range. */ + VG_USERREQ__DRD_START_SUPPRESSION, + /* args: start address, size in bytes */ + /* To tell the DRD tool no longer to suppress data race detection on */ + /* the specified address range. */ + VG_USERREQ__DRD_FINISH_SUPPRESSION, + /* args: start address, size in bytes */ + + /* To ask the DRD tool to trace all accesses to the specified range. */ + VG_USERREQ__DRD_START_TRACE_ADDR, + /* args: Addr, SizeT. */ + /* To ask the DRD tool to stop tracing accesses to the specified range. */ + VG_USERREQ__DRD_STOP_TRACE_ADDR, + /* args: Addr, SizeT. */ + + /* Tell DRD whether or not to record memory loads in the calling thread. */ + VG_USERREQ__DRD_RECORD_LOADS, + /* args: Bool. */ + /* Tell DRD whether or not to record memory stores in the calling thread. */ + VG_USERREQ__DRD_RECORD_STORES, + /* args: Bool. */ + + /* Set the name of the thread that performs this client request. */ + VG_USERREQ__DRD_SET_THREAD_NAME, + /* args: null-terminated character string. */ + + /* Tell DRD that a DRD annotation has not yet been implemented. */ + VG_USERREQ__DRD_ANNOTATION_UNIMP, + /* args: char*. */ + + /* Tell DRD that a user-defined semaphore synchronization object + * is about to be created. */ + VG_USERREQ__DRD_ANNOTATE_SEM_INIT_PRE, + /* args: Addr, UInt value. */ + /* Tell DRD that a user-defined semaphore synchronization object + * has been destroyed. */ + VG_USERREQ__DRD_ANNOTATE_SEM_DESTROY_POST, + /* args: Addr. */ + /* Tell DRD that a user-defined semaphore synchronization + * object is going to be acquired (semaphore wait). */ + VG_USERREQ__DRD_ANNOTATE_SEM_WAIT_PRE, + /* args: Addr. */ + /* Tell DRD that a user-defined semaphore synchronization + * object has been acquired (semaphore wait). */ + VG_USERREQ__DRD_ANNOTATE_SEM_WAIT_POST, + /* args: Addr. */ + /* Tell DRD that a user-defined semaphore synchronization + * object is about to be released (semaphore post). */ + VG_USERREQ__DRD_ANNOTATE_SEM_POST_PRE, + /* args: Addr. */ + + /* Tell DRD to ignore the inter-thread ordering introduced by a mutex. */ + VG_USERREQ__DRD_IGNORE_MUTEX_ORDERING, + /* args: Addr. */ + + /* Tell DRD that a user-defined reader-writer synchronization object + * has been created. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_CREATE + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 14, + /* args: Addr. */ + /* Tell DRD that a user-defined reader-writer synchronization object + * is about to be destroyed. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_DESTROY + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 15, + /* args: Addr. */ + /* Tell DRD that a lock on a user-defined reader-writer synchronization + * object has been acquired. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_ACQUIRED + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 17, + /* args: Addr, Int is_rw. */ + /* Tell DRD that a lock on a user-defined reader-writer synchronization + * object is about to be released. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_RELEASED + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 18, + /* args: Addr, Int is_rw. */ + + /* Tell DRD that a Helgrind annotation has not yet been implemented. */ + VG_USERREQ__HELGRIND_ANNOTATION_UNIMP + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 32, + /* args: char*. */ + + /* Tell DRD to insert a happens-before annotation. */ + VG_USERREQ__DRD_ANNOTATE_HAPPENS_BEFORE + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 33, + /* args: Addr. */ + /* Tell DRD to insert a happens-after annotation. */ + VG_USERREQ__DRD_ANNOTATE_HAPPENS_AFTER + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 34, + /* args: Addr. */ + +}; + + +/** + * @addtogroup RaceDetectionAnnotations + */ +/*@{*/ + +#ifdef __cplusplus +/* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racy reads. + + Instead of doing + ANNOTATE_IGNORE_READS_BEGIN(); + ... = x; + ANNOTATE_IGNORE_READS_END(); + one can use + ... = ANNOTATE_UNPROTECTED_READ(x); */ +template +inline T ANNOTATE_UNPROTECTED_READ(const volatile T& x) { + ANNOTATE_IGNORE_READS_BEGIN(); + const T result = x; + ANNOTATE_IGNORE_READS_END(); + return result; +} +/* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ +#define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + static class static_var##_annotator \ + { \ + public: \ + static_var##_annotator() \ + { \ + ANNOTATE_BENIGN_RACE_SIZED(&static_var, sizeof(static_var), \ + #static_var ": " description); \ + } \ + } the_##static_var##_annotator; \ + } +#endif + +/*@}*/ + +#endif /* __VALGRIND_DRD_H */ diff --git a/src/pmdk/src/core/valgrind/helgrind.h b/src/pmdk/src/core/valgrind/helgrind.h new file mode 100644 index 000000000..4579948d7 --- /dev/null +++ b/src/pmdk/src/core/valgrind/helgrind.h @@ -0,0 +1,841 @@ +/* + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (helgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- + + This file is part of Helgrind, a Valgrind tool for detecting errors + in threaded programs. + + Copyright (C) 2007-2017 OpenWorks LLP + info@open-works.co.uk + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (helgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + +#ifndef __HELGRIND_H +#define __HELGRIND_H + +#include "valgrind.h" + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { + VG_USERREQ__HG_CLEAN_MEMORY = VG_USERREQ_TOOL_BASE('H','G'), + + /* The rest are for Helgrind's internal use. Not for end-user + use. Do not use them unless you are a Valgrind developer. */ + + /* Notify the tool what this thread's pthread_t is. */ + _VG_USERREQ__HG_SET_MY_PTHREAD_T = VG_USERREQ_TOOL_BASE('H','G') + + 256, + _VG_USERREQ__HG_PTH_API_ERROR, /* char*, int */ + _VG_USERREQ__HG_PTHREAD_JOIN_POST, /* pthread_t of quitter */ + _VG_USERREQ__HG_PTHREAD_MUTEX_INIT_POST, /* pth_mx_t*, long mbRec */ + _VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE, /* pth_mx_t*, long isInit */ + _VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE, /* pth_mx_t* */ + _VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_POST, /* pth_mx_t* */ + _VG_USERREQ__HG_PTHREAD_MUTEX_ACQUIRE_PRE, /* void*, long isTryLock */ + _VG_USERREQ__HG_PTHREAD_MUTEX_ACQUIRE_POST, /* void* */ + _VG_USERREQ__HG_PTHREAD_COND_SIGNAL_PRE, /* pth_cond_t* */ + _VG_USERREQ__HG_PTHREAD_COND_BROADCAST_PRE, /* pth_cond_t* */ + _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE, /* pth_cond_t*, pth_mx_t* */ + _VG_USERREQ__HG_PTHREAD_COND_WAIT_POST, /* pth_cond_t*, pth_mx_t* */ + _VG_USERREQ__HG_PTHREAD_COND_DESTROY_PRE, /* pth_cond_t*, long isInit */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_INIT_POST, /* pth_rwlk_t* */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_DESTROY_PRE, /* pth_rwlk_t* */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_PRE, /* pth_rwlk_t*, long isW */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_ACQUIRED, /* void*, long isW */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_RELEASED, /* void* */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_POST, /* pth_rwlk_t* */ + _VG_USERREQ__HG_POSIX_SEM_INIT_POST, /* sem_t*, ulong value */ + _VG_USERREQ__HG_POSIX_SEM_DESTROY_PRE, /* sem_t* */ + _VG_USERREQ__HG_POSIX_SEM_RELEASED, /* void* */ + _VG_USERREQ__HG_POSIX_SEM_ACQUIRED, /* void* */ + _VG_USERREQ__HG_PTHREAD_BARRIER_INIT_PRE, /* pth_bar_t*, ulong, ulong */ + _VG_USERREQ__HG_PTHREAD_BARRIER_WAIT_PRE, /* pth_bar_t* */ + _VG_USERREQ__HG_PTHREAD_BARRIER_DESTROY_PRE, /* pth_bar_t* */ + _VG_USERREQ__HG_PTHREAD_SPIN_INIT_OR_UNLOCK_PRE, /* pth_slk_t* */ + _VG_USERREQ__HG_PTHREAD_SPIN_INIT_OR_UNLOCK_POST, /* pth_slk_t* */ + _VG_USERREQ__HG_PTHREAD_SPIN_LOCK_PRE, /* pth_slk_t* */ + _VG_USERREQ__HG_PTHREAD_SPIN_LOCK_POST, /* pth_slk_t* */ + _VG_USERREQ__HG_PTHREAD_SPIN_DESTROY_PRE, /* pth_slk_t* */ + _VG_USERREQ__HG_CLIENTREQ_UNIMP, /* char* */ + _VG_USERREQ__HG_USERSO_SEND_PRE, /* arbitrary UWord SO-tag */ + _VG_USERREQ__HG_USERSO_RECV_POST, /* arbitrary UWord SO-tag */ + _VG_USERREQ__HG_USERSO_FORGET_ALL, /* arbitrary UWord SO-tag */ + _VG_USERREQ__HG_RESERVED2, /* Do not use */ + _VG_USERREQ__HG_RESERVED3, /* Do not use */ + _VG_USERREQ__HG_RESERVED4, /* Do not use */ + _VG_USERREQ__HG_ARANGE_MAKE_UNTRACKED, /* Addr a, ulong len */ + _VG_USERREQ__HG_ARANGE_MAKE_TRACKED, /* Addr a, ulong len */ + _VG_USERREQ__HG_PTHREAD_BARRIER_RESIZE_PRE, /* pth_bar_t*, ulong */ + _VG_USERREQ__HG_CLEAN_MEMORY_HEAPBLOCK, /* Addr start_of_block */ + _VG_USERREQ__HG_PTHREAD_COND_INIT_POST, /* pth_cond_t*, pth_cond_attr_t*/ + _VG_USERREQ__HG_GNAT_MASTER_HOOK, /* void*d,void*m,Word ml */ + _VG_USERREQ__HG_GNAT_MASTER_COMPLETED_HOOK, /* void*s,Word ml */ + _VG_USERREQ__HG_GET_ABITS, /* Addr a,Addr abits, ulong len */ + _VG_USERREQ__HG_PTHREAD_CREATE_BEGIN, + _VG_USERREQ__HG_PTHREAD_CREATE_END, + _VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_PRE, /* pth_mx_t*,long isTryLock */ + _VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST, /* pth_mx_t *,long tookLock */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_LOCK_POST, /* pth_rwlk_t*,long isW,long */ + _VG_USERREQ__HG_PTHREAD_RWLOCK_UNLOCK_PRE, /* pth_rwlk_t* */ + _VG_USERREQ__HG_POSIX_SEM_POST_PRE, /* sem_t* */ + _VG_USERREQ__HG_POSIX_SEM_POST_POST, /* sem_t* */ + _VG_USERREQ__HG_POSIX_SEM_WAIT_PRE, /* sem_t* */ + _VG_USERREQ__HG_POSIX_SEM_WAIT_POST, /* sem_t*, long tookLock */ + _VG_USERREQ__HG_PTHREAD_COND_SIGNAL_POST, /* pth_cond_t* */ + _VG_USERREQ__HG_PTHREAD_COND_BROADCAST_POST,/* pth_cond_t* */ + _VG_USERREQ__HG_RTLD_BIND_GUARD, /* int flags */ + _VG_USERREQ__HG_RTLD_BIND_CLEAR, /* int flags */ + _VG_USERREQ__HG_GNAT_DEPENDENT_MASTER_JOIN /* void*d, void*m */ + } Vg_TCheckClientRequest; + + +/*----------------------------------------------------------------*/ +/*--- ---*/ +/*--- Implementation-only facilities. Not for end-user use. ---*/ +/*--- For end-user facilities see below (the next section in ---*/ +/*--- this file.) ---*/ +/*--- ---*/ +/*----------------------------------------------------------------*/ + +/* Do a client request. These are macros rather than a functions so + as to avoid having an extra frame in stack traces. + + NB: these duplicate definitions in hg_intercepts.c. But here, we + have to make do with weaker typing (no definition of Word etc) and + no assertions, whereas in helgrind.h we can use those facilities. + Obviously it's important the two sets of definitions are kept in + sync. + + The commented-out asserts should actually hold, but unfortunately + they can't be allowed to be visible here, because that would + require the end-user code to #include . +*/ + +#define DO_CREQ_v_W(_creqF, _ty1F,_arg1F) \ + do { \ + long int _arg1; \ + /* assert(sizeof(_ty1F) == sizeof(long int)); */ \ + _arg1 = (long int)(_arg1F); \ + VALGRIND_DO_CLIENT_REQUEST_STMT( \ + (_creqF), \ + _arg1, 0,0,0,0); \ + } while (0) + +#define DO_CREQ_W_W(_resF, _dfltF, _creqF, _ty1F,_arg1F) \ + do { \ + long int _arg1; \ + /* assert(sizeof(_ty1F) == sizeof(long int)); */ \ + _arg1 = (long int)(_arg1F); \ + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + (_dfltF), \ + (_creqF), \ + _arg1, 0,0,0,0); \ + _resF = _qzz_res; \ + } while (0) + +#define DO_CREQ_v_WW(_creqF, _ty1F,_arg1F, _ty2F,_arg2F) \ + do { \ + long int _arg1, _arg2; \ + /* assert(sizeof(_ty1F) == sizeof(long int)); */ \ + /* assert(sizeof(_ty2F) == sizeof(long int)); */ \ + _arg1 = (long int)(_arg1F); \ + _arg2 = (long int)(_arg2F); \ + VALGRIND_DO_CLIENT_REQUEST_STMT( \ + (_creqF), \ + _arg1,_arg2,0,0,0); \ + } while (0) + +#define DO_CREQ_v_WWW(_creqF, _ty1F,_arg1F, \ + _ty2F,_arg2F, _ty3F, _arg3F) \ + do { \ + long int _arg1, _arg2, _arg3; \ + /* assert(sizeof(_ty1F) == sizeof(long int)); */ \ + /* assert(sizeof(_ty2F) == sizeof(long int)); */ \ + /* assert(sizeof(_ty3F) == sizeof(long int)); */ \ + _arg1 = (long int)(_arg1F); \ + _arg2 = (long int)(_arg2F); \ + _arg3 = (long int)(_arg3F); \ + VALGRIND_DO_CLIENT_REQUEST_STMT( \ + (_creqF), \ + _arg1,_arg2,_arg3,0,0); \ + } while (0) + +#define DO_CREQ_W_WWW(_resF, _dfltF, _creqF, _ty1F,_arg1F, \ + _ty2F,_arg2F, _ty3F, _arg3F) \ + do { \ + long int _qzz_res; \ + long int _arg1, _arg2, _arg3; \ + /* assert(sizeof(_ty1F) == sizeof(long int)); */ \ + _arg1 = (long int)(_arg1F); \ + _arg2 = (long int)(_arg2F); \ + _arg3 = (long int)(_arg3F); \ + /* \ + * XXX: here PMDK's version deviates from upstream;\ + * without the fix, this code generates \ + * a sign-conversion warning, which PMDK's \ + * "awesome" build system promotes to an error \ + */ \ + _qzz_res = (long)VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + (_dfltF), \ + (_creqF), \ + _arg1,_arg2,_arg3,0,0); \ + _resF = _qzz_res; \ + } while (0) + + + +#define _HG_CLIENTREQ_UNIMP(_qzz_str) \ + DO_CREQ_v_W(_VG_USERREQ__HG_CLIENTREQ_UNIMP, \ + (char*),(_qzz_str)) + + +/*----------------------------------------------------------------*/ +/*--- ---*/ +/*--- Helgrind-native requests. These allow access to ---*/ +/*--- the same set of annotation primitives that are used ---*/ +/*--- to build the POSIX pthread wrappers. ---*/ +/*--- ---*/ +/*----------------------------------------------------------------*/ + +/* ---------------------------------------------------------- + For describing ordinary mutexes (non-rwlocks). For rwlock + descriptions see ANNOTATE_RWLOCK_* below. + ---------------------------------------------------------- */ + +/* Notify here immediately after mutex creation. _mbRec == 0 for a + non-recursive mutex, 1 for a recursive mutex. */ +#define VALGRIND_HG_MUTEX_INIT_POST(_mutex, _mbRec) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_INIT_POST, \ + void*,(_mutex), long,(_mbRec)) + +/* Notify here immediately before mutex acquisition. _isTryLock == 0 + for a normal acquisition, 1 for a "try" style acquisition. */ +#define VALGRIND_HG_MUTEX_LOCK_PRE(_mutex, _isTryLock) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_ACQUIRE_PRE, \ + void*,(_mutex), long,(_isTryLock)) + +/* Notify here immediately after a successful mutex acquisition. */ +#define VALGRIND_HG_MUTEX_LOCK_POST(_mutex) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_ACQUIRE_POST, \ + void*,(_mutex)) + +/* Notify here immediately before a mutex release. */ +#define VALGRIND_HG_MUTEX_UNLOCK_PRE(_mutex) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE, \ + void*,(_mutex)) + +/* Notify here immediately after a mutex release. */ +#define VALGRIND_HG_MUTEX_UNLOCK_POST(_mutex) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_POST, \ + void*,(_mutex)) + +/* Notify here immediately before mutex destruction. */ +#define VALGRIND_HG_MUTEX_DESTROY_PRE(_mutex) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE, \ + void*,(_mutex)) + +/* ---------------------------------------------------------- + For describing semaphores. + ---------------------------------------------------------- */ + +/* Notify here immediately after semaphore creation. */ +#define VALGRIND_HG_SEM_INIT_POST(_sem, _value) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_POSIX_SEM_INIT_POST, \ + void*, (_sem), unsigned long, (_value)) + +/* Notify here immediately after a semaphore wait (an acquire-style + operation) */ +#define VALGRIND_HG_SEM_WAIT_POST(_sem) \ + DO_CREQ_v_W(_VG_USERREQ__HG_POSIX_SEM_ACQUIRED, \ + void*,(_sem)) + +/* Notify here immediately before semaphore post (a release-style + operation) */ +#define VALGRIND_HG_SEM_POST_PRE(_sem) \ + DO_CREQ_v_W(_VG_USERREQ__HG_POSIX_SEM_RELEASED, \ + void*,(_sem)) + +/* Notify here immediately before semaphore destruction. */ +#define VALGRIND_HG_SEM_DESTROY_PRE(_sem) \ + DO_CREQ_v_W(_VG_USERREQ__HG_POSIX_SEM_DESTROY_PRE, \ + void*, (_sem)) + +/* ---------------------------------------------------------- + For describing barriers. + ---------------------------------------------------------- */ + +/* Notify here immediately before barrier creation. _count is the + capacity. _resizable == 0 means the barrier may not be resized, 1 + means it may be. */ +#define VALGRIND_HG_BARRIER_INIT_PRE(_bar, _count, _resizable) \ + DO_CREQ_v_WWW(_VG_USERREQ__HG_PTHREAD_BARRIER_INIT_PRE, \ + void*,(_bar), \ + unsigned long,(_count), \ + unsigned long,(_resizable)) + +/* Notify here immediately before arrival at a barrier. */ +#define VALGRIND_HG_BARRIER_WAIT_PRE(_bar) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_BARRIER_WAIT_PRE, \ + void*,(_bar)) + +/* Notify here immediately before a resize (change of barrier + capacity). If _newcount >= the existing capacity, then there is no + change in the state of any threads waiting at the barrier. If + _newcount < the existing capacity, and >= _newcount threads are + currently waiting at the barrier, then this notification is + considered to also have the effect of telling the checker that all + waiting threads have now moved past the barrier. (I can't think of + any other sane semantics.) */ +#define VALGRIND_HG_BARRIER_RESIZE_PRE(_bar, _newcount) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_BARRIER_RESIZE_PRE, \ + void*,(_bar), \ + unsigned long,(_newcount)) + +/* Notify here immediately before barrier destruction. */ +#define VALGRIND_HG_BARRIER_DESTROY_PRE(_bar) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_BARRIER_DESTROY_PRE, \ + void*,(_bar)) + +/* ---------------------------------------------------------- + For describing memory ownership changes. + ---------------------------------------------------------- */ + +/* Clean memory state. This makes Helgrind forget everything it knew + about the specified memory range. Effectively this announces that + the specified memory range now "belongs" to the calling thread, so + that: (1) the calling thread can access it safely without + synchronisation, and (2) all other threads must sync with this one + to access it safely. This is particularly useful for memory + allocators that wish to recycle memory. */ +#define VALGRIND_HG_CLEAN_MEMORY(_qzz_start, _qzz_len) \ + DO_CREQ_v_WW(VG_USERREQ__HG_CLEAN_MEMORY, \ + void*,(_qzz_start), \ + unsigned long,(_qzz_len)) + +/* The same, but for the heap block starting at _qzz_blockstart. This + allows painting when we only know the address of an object, but not + its size, which is sometimes the case in C++ code involving + inheritance, and in which RTTI is not, for whatever reason, + available. Returns the number of bytes painted, which can be zero + for a zero-sized block. Hence, return values >= 0 indicate success + (the block was found), and the value -1 indicates block not + found, and -2 is returned when not running on Helgrind. */ +#define VALGRIND_HG_CLEAN_MEMORY_HEAPBLOCK(_qzz_blockstart) \ + (__extension__ \ + ({long int _npainted; \ + DO_CREQ_W_W(_npainted, (-2)/*default*/, \ + _VG_USERREQ__HG_CLEAN_MEMORY_HEAPBLOCK, \ + void*,(_qzz_blockstart)); \ + _npainted; \ + })) + +/* ---------------------------------------------------------- + For error control. + ---------------------------------------------------------- */ + +/* Tell H that an address range is not to be "tracked" until further + notice. This puts it in the NOACCESS state, in which case we + ignore all reads and writes to it. Useful for ignoring ranges of + memory where there might be races we don't want to see. If the + memory is subsequently reallocated via malloc/new/stack allocation, + then it is put back in the trackable state. Hence it is safe in + the situation where checking is disabled, the containing area is + deallocated and later reallocated for some other purpose. */ +#define VALGRIND_HG_DISABLE_CHECKING(_qzz_start, _qzz_len) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_ARANGE_MAKE_UNTRACKED, \ + void*,(_qzz_start), \ + unsigned long,(_qzz_len)) + +/* And put it back into the normal "tracked" state, that is, make it + once again subject to the normal race-checking machinery. This + puts it in the same state as new memory allocated by this thread -- + that is, basically owned exclusively by this thread. */ +#define VALGRIND_HG_ENABLE_CHECKING(_qzz_start, _qzz_len) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_ARANGE_MAKE_TRACKED, \ + void*,(_qzz_start), \ + unsigned long,(_qzz_len)) + + +/* Checks the accessibility bits for addresses [zza..zza+zznbytes-1]. + If zzabits array is provided, copy the accessibility bits in zzabits. + Return values: + -2 if not running on helgrind + -1 if any parts of zzabits is not addressable + >= 0 : success. + When success, it returns the nr of addressable bytes found. + So, to check that a whole range is addressable, check + VALGRIND_HG_GET_ABITS(addr,NULL,len) == len + In addition, if you want to examine the addressability of each + byte of the range, you need to provide a non NULL ptr as + second argument, pointing to an array of unsigned char + of length len. + Addressable bytes are indicated with 0xff. + Non-addressable bytes are indicated with 0x00. +*/ +#define VALGRIND_HG_GET_ABITS(zza,zzabits,zznbytes) \ + (__extension__ \ + ({long int _res; \ + /* \ + * XXX: here PMDK's version deviates from upstream; \ + * without the fix, this macro doesn't return \ + * the default value correctly \ + */ \ + DO_CREQ_W_WWW(_res, (-2LL)/*default*/, \ + _VG_USERREQ__HG_GET_ABITS, \ + void*,(zza), void*,(zzabits), \ + unsigned long,(zznbytes)); \ + _res; \ + })) + +/* End-user request for Ada applications compiled with GNAT. + Helgrind understands the Ada concept of Ada task dependencies and + terminations. See Ada Reference Manual section 9.3 "Task Dependence + - Termination of Tasks". + However, in some cases, the master of (terminated) tasks completes + only when the application exits. An example of this is dynamically + allocated tasks with an access type defined at Library Level. + By default, the state of such tasks in Helgrind will be 'exited but + join not done yet'. Many tasks in such a state are however causing + Helgrind CPU and memory to increase significantly. + VALGRIND_HG_GNAT_DEPENDENT_MASTER_JOIN can be used to indicate + to Helgrind that a not yet completed master has however already + 'seen' the termination of a dependent : this is conceptually the + same as a pthread_join and causes the cleanup of the dependent + as done by Helgrind when a master completes. + This allows to avoid the overhead in helgrind caused by such tasks. + A typical usage for a master to indicate it has done conceptually a join + with a dependent task before the master completes is: + while not Dep_Task'Terminated loop + ... do whatever to wait for Dep_Task termination. + end loop; + VALGRIND_HG_GNAT_DEPENDENT_MASTER_JOIN + (Dep_Task'Identity, + Ada.Task_Identification.Current_Task); + Note that VALGRIND_HG_GNAT_DEPENDENT_MASTER_JOIN should be a binding + to a C function built with the below macro. */ +#define VALGRIND_HG_GNAT_DEPENDENT_MASTER_JOIN(_qzz_dep, _qzz_master) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_GNAT_DEPENDENT_MASTER_JOIN, \ + void*,(_qzz_dep), \ + void*,(_qzz_master)) + +/*----------------------------------------------------------------*/ +/*--- ---*/ +/*--- ThreadSanitizer-compatible requests ---*/ +/*--- (mostly unimplemented) ---*/ +/*--- ---*/ +/*----------------------------------------------------------------*/ + +/* A quite-broad set of annotations, as used in the ThreadSanitizer + project. This implementation aims to be a (source-level) + compatible implementation of the macros defined in: + + http://code.google.com/p/data-race-test/source + /browse/trunk/dynamic_annotations/dynamic_annotations.h + + (some of the comments below are taken from the above file) + + The implementation here is very incomplete, and intended as a + starting point. Many of the macros are unimplemented. Rather than + allowing unimplemented macros to silently do nothing, they cause an + assertion. Intention is to implement them on demand. + + The major use of these macros is to make visible to race detectors, + the behaviour (effects) of user-implemented synchronisation + primitives, that the detectors could not otherwise deduce from the + normal observation of pthread etc calls. + + Some of the macros are no-ops in Helgrind. That's because Helgrind + is a pure happens-before detector, whereas ThreadSanitizer uses a + hybrid lockset and happens-before scheme, which requires more + accurate annotations for correct operation. + + The macros are listed in the same order as in dynamic_annotations.h + (URL just above). + + I should point out that I am less than clear about the intended + semantics of quite a number of them. Comments and clarifications + welcomed! +*/ + +/* ---------------------------------------------------------------- + These four allow description of user-level condition variables, + apparently in the style of POSIX's pthread_cond_t. Currently + unimplemented and will assert. + ---------------------------------------------------------------- +*/ +/* Report that wait on the condition variable at address CV has + succeeded and the lock at address LOCK is now held. CV and LOCK + are completely arbitrary memory addresses which presumably mean + something to the application, but are meaningless to Helgrind. */ +#define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_CONDVAR_LOCK_WAIT") + +/* Report that wait on the condition variable at CV has succeeded. + Variant w/o lock. */ +#define ANNOTATE_CONDVAR_WAIT(cv) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_CONDVAR_WAIT") + +/* Report that we are about to signal on the condition variable at + address CV. */ +#define ANNOTATE_CONDVAR_SIGNAL(cv) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_CONDVAR_SIGNAL") + +/* Report that we are about to signal_all on the condition variable at + CV. */ +#define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_CONDVAR_SIGNAL_ALL") + + +/* ---------------------------------------------------------------- + Create completely arbitrary happens-before edges between threads. + + If threads T1 .. Tn all do ANNOTATE_HAPPENS_BEFORE(obj) and later + (w.r.t. some notional global clock for the computation) thread Tm + does ANNOTATE_HAPPENS_AFTER(obj), then Helgrind will regard all + memory accesses done by T1 .. Tn before the ..BEFORE.. call as + happening-before all memory accesses done by Tm after the + ..AFTER.. call. Hence Helgrind won't complain about races if Tm's + accesses afterwards are to the same locations as accesses before by + any of T1 .. Tn. + + OBJ is a machine word (unsigned long, or void*), is completely + arbitrary, and denotes the identity of some synchronisation object + you're modelling. + + You must do the _BEFORE call just before the real sync event on the + signaller's side, and _AFTER just after the real sync event on the + waiter's side. + + If none of the rest of these macros make sense to you, at least + take the time to understand these two. They form the very essence + of describing arbitrary inter-thread synchronisation events to + Helgrind. You can get a long way just with them alone. + + See also, extensive discussion on semantics of this in + https://bugs.kde.org/show_bug.cgi?id=243935 + + ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(obj) is interim until such time + as bug 243935 is fully resolved. It instructs Helgrind to forget + about any ANNOTATE_HAPPENS_BEFORE calls on the specified object, in + effect putting it back in its original state. Once in that state, + a use of ANNOTATE_HAPPENS_AFTER on it has no effect on the calling + thread. + + An implementation may optionally release resources it has + associated with 'obj' when ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(obj) + happens. Users are recommended to use + ANNOTATE_HAPPENS_BEFORE_FORGET_ALL to indicate when a + synchronisation object is no longer needed, so as to avoid + potential indefinite resource leaks. + ---------------------------------------------------------------- +*/ +#define ANNOTATE_HAPPENS_BEFORE(obj) \ + DO_CREQ_v_W(_VG_USERREQ__HG_USERSO_SEND_PRE, void*,(obj)) + +#define ANNOTATE_HAPPENS_AFTER(obj) \ + DO_CREQ_v_W(_VG_USERREQ__HG_USERSO_RECV_POST, void*,(obj)) + +#define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(obj) \ + DO_CREQ_v_W(_VG_USERREQ__HG_USERSO_FORGET_ALL, void*,(obj)) + +/* ---------------------------------------------------------------- + Memory publishing. The TSan sources say: + + Report that the bytes in the range [pointer, pointer+size) are about + to be published safely. The race checker will create a happens-before + arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + subsequent accesses to this memory. + + I'm not sure I understand what this means exactly, nor whether it + is relevant for a pure h-b detector. Leaving unimplemented for + now. + ---------------------------------------------------------------- +*/ +#define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_PUBLISH_MEMORY_RANGE") + +/* DEPRECATED. Don't use it. */ +/* #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) */ + +/* DEPRECATED. Don't use it. */ +/* #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) */ + + +/* ---------------------------------------------------------------- + TSan sources say: + + Instruct the tool to create a happens-before arc between + MU->Unlock() and MU->Lock(). This annotation may slow down the + race detector; normally it is used only when it would be + difficult to annotate each of the mutex's critical sections + individually using the annotations above. + + If MU is a posix pthread_mutex_t then Helgrind will do this anyway. + In any case, leave as unimp for now. I'm unsure about the intended + behaviour. + ---------------------------------------------------------------- +*/ +#define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX") + +/* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ +/* #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) */ + + +/* ---------------------------------------------------------------- + TSan sources say: + + Annotations useful when defining memory allocators, or when + memory that was protected in one way starts to be protected in + another. + + Report that a new memory at "address" of size "size" has been + allocated. This might be used when the memory has been retrieved + from a free list and is about to be reused, or when a the locking + discipline for a variable changes. + + AFAICS this is the same as VALGRIND_HG_CLEAN_MEMORY. + ---------------------------------------------------------------- +*/ +#define ANNOTATE_NEW_MEMORY(address, size) \ + VALGRIND_HG_CLEAN_MEMORY((address), (size)) + + +/* ---------------------------------------------------------------- + TSan sources say: + + Annotations useful when defining FIFO queues that transfer data + between threads. + + All unimplemented. Am not claiming to understand this (yet). + ---------------------------------------------------------------- +*/ + +/* Report that the producer-consumer queue object at address PCQ has + been created. The ANNOTATE_PCQ_* annotations should be used only + for FIFO queues. For non-FIFO queues use ANNOTATE_HAPPENS_BEFORE + (for put) and ANNOTATE_HAPPENS_AFTER (for get). */ +#define ANNOTATE_PCQ_CREATE(pcq) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_PCQ_CREATE") + +/* Report that the queue at address PCQ is about to be destroyed. */ +#define ANNOTATE_PCQ_DESTROY(pcq) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_PCQ_DESTROY") + +/* Report that we are about to put an element into a FIFO queue at + address PCQ. */ +#define ANNOTATE_PCQ_PUT(pcq) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_PCQ_PUT") + +/* Report that we've just got an element from a FIFO queue at address + PCQ. */ +#define ANNOTATE_PCQ_GET(pcq) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_PCQ_GET") + + +/* ---------------------------------------------------------------- + Annotations that suppress errors. It is usually better to express + the program's synchronization using the other annotations, but + these can be used when all else fails. + + Currently these are all unimplemented. I can't think of a simple + way to implement them without at least some performance overhead. + ---------------------------------------------------------------- +*/ + +/* Report that we may have a benign race at "pointer", with size + "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the + point where "pointer" has been allocated, preferably close to the point + where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. + + XXX: what's this actually supposed to do? And what's the type of + DESCRIPTION? When does the annotation stop having an effect? +*/ +#define ANNOTATE_BENIGN_RACE(pointer, description) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_BENIGN_RACE") + +/* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to + the memory range [address, address+size). */ +#define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ + VALGRIND_HG_DISABLE_CHECKING(address, size) + +/* Request the analysis tool to ignore all reads in the current thread + until ANNOTATE_IGNORE_READS_END is called. Useful to ignore + intentional racey reads, while still checking other reads and all + writes. */ +#define ANNOTATE_IGNORE_READS_BEGIN() \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_IGNORE_READS_BEGIN") + +/* Stop ignoring reads. */ +#define ANNOTATE_IGNORE_READS_END() \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_IGNORE_READS_END") + +/* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ +#define ANNOTATE_IGNORE_WRITES_BEGIN() \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_IGNORE_WRITES_BEGIN") + +/* Stop ignoring writes. */ +#define ANNOTATE_IGNORE_WRITES_END() \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_IGNORE_WRITES_END") + +/* Start ignoring all memory accesses (reads and writes). */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { \ + ANNOTATE_IGNORE_READS_BEGIN(); \ + ANNOTATE_IGNORE_WRITES_BEGIN(); \ + } while (0) + +/* Stop ignoring all memory accesses. */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { \ + ANNOTATE_IGNORE_WRITES_END(); \ + ANNOTATE_IGNORE_READS_END(); \ + } while (0) + + +/* ---------------------------------------------------------------- + Annotations useful for debugging. + + Again, so for unimplemented, partly for performance reasons. + ---------------------------------------------------------------- +*/ + +/* Request to trace every access to ADDRESS. */ +#define ANNOTATE_TRACE_MEMORY(address) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_TRACE_MEMORY") + +/* Report the current thread name to a race detector. */ +#define ANNOTATE_THREAD_NAME(name) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_THREAD_NAME") + + +/* ---------------------------------------------------------------- + Annotations for describing behaviour of user-implemented lock + primitives. In all cases, the LOCK argument is a completely + arbitrary machine word (unsigned long, or void*) and can be any + value which gives a unique identity to the lock objects being + modelled. + + We just pretend they're ordinary posix rwlocks. That'll probably + give some rather confusing wording in error messages, claiming that + the arbitrary LOCK values are pthread_rwlock_t*'s, when in fact + they are not. Ah well. + ---------------------------------------------------------------- +*/ +/* Report that a lock has just been created at address LOCK. */ +#define ANNOTATE_RWLOCK_CREATE(lock) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_INIT_POST, \ + void*,(lock)) + +/* Report that the lock at address LOCK is about to be destroyed. */ +#define ANNOTATE_RWLOCK_DESTROY(lock) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_DESTROY_PRE, \ + void*,(lock)) + +/* Report that the lock at address LOCK has just been acquired. + is_w=1 for writer lock, is_w=0 for reader lock. */ +#define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ + DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_RWLOCK_ACQUIRED, \ + void*,(lock), unsigned long,(is_w)) + +/* Report that the lock at address LOCK is about to be released. */ +#define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ + DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_RWLOCK_RELEASED, \ + void*,(lock)) /* is_w is ignored */ + + +/* ------------------------------------------------------------- + Annotations useful when implementing barriers. They are not + normally needed by modules that merely use barriers. + The "barrier" argument is a pointer to the barrier object. + ---------------------------------------------------------------- +*/ + +/* Report that the "barrier" has been initialized with initial + "count". If 'reinitialization_allowed' is true, initialization is + allowed to happen multiple times w/o calling barrier_destroy() */ +#define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_BARRIER_INIT") + +/* Report that we are about to enter barrier_wait("barrier"). */ +#define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_BARRIER_DESTROY") + +/* Report that we just exited barrier_wait("barrier"). */ +#define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_BARRIER_DESTROY") + +/* Report that the "barrier" has been destroyed. */ +#define ANNOTATE_BARRIER_DESTROY(barrier) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_BARRIER_DESTROY") + + +/* ---------------------------------------------------------------- + Annotations useful for testing race detectors. + ---------------------------------------------------------------- +*/ + +/* Report that we expect a race on the variable at ADDRESS. Use only + in unit tests for a race detector. */ +#define ANNOTATE_EXPECT_RACE(address, description) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_EXPECT_RACE") + +/* A no-op. Insert where you like to test the interceptors. */ +#define ANNOTATE_NO_OP(arg) \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_NO_OP") + +/* Force the race detector to flush its state. The actual effect depends on + * the implementation of the detector. */ +#define ANNOTATE_FLUSH_STATE() \ + _HG_CLIENTREQ_UNIMP("ANNOTATE_FLUSH_STATE") + +#endif /* __HELGRIND_H */ diff --git a/src/pmdk/src/core/valgrind/memcheck.h b/src/pmdk/src/core/valgrind/memcheck.h new file mode 100644 index 000000000..fafe78711 --- /dev/null +++ b/src/pmdk/src/core/valgrind/memcheck.h @@ -0,0 +1,320 @@ + +/* + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (memcheck.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of MemCheck, a heavyweight Valgrind tool for + detecting memory errors. + + Copyright (C) 2000-2017 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (memcheck.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +#ifndef __MEMCHECK_H +#define __MEMCHECK_H + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query memory permissions + inside your own programs. + + See comment near the top of valgrind.h on how to use them. +*/ + +#include "valgrind.h" + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { + VG_USERREQ__MAKE_MEM_NOACCESS = VG_USERREQ_TOOL_BASE('M','C'), + VG_USERREQ__MAKE_MEM_UNDEFINED, + VG_USERREQ__MAKE_MEM_DEFINED, + VG_USERREQ__DISCARD, + VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE, + VG_USERREQ__CHECK_MEM_IS_DEFINED, + VG_USERREQ__DO_LEAK_CHECK, + VG_USERREQ__COUNT_LEAKS, + + VG_USERREQ__GET_VBITS, + VG_USERREQ__SET_VBITS, + + VG_USERREQ__CREATE_BLOCK, + + VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE, + + /* Not next to VG_USERREQ__COUNT_LEAKS because it was added later. */ + VG_USERREQ__COUNT_LEAK_BLOCKS, + + VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE, + VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE, + + VG_USERREQ__CHECK_MEM_IS_UNADDRESSABLE, + VG_USERREQ__CHECK_MEM_IS_UNDEFINED, + + /* This is just for memcheck's internal use - don't use it */ + _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR + = VG_USERREQ_TOOL_BASE('M','C') + 256 + } Vg_MemCheckClientRequest; + + + +/* Client-code macros to manipulate the state of memory. */ + +/* Mark memory at _qzz_addr as unaddressable for _qzz_len bytes. */ +#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__MAKE_MEM_NOACCESS, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Similarly, mark memory at _qzz_addr as addressable but undefined + for _qzz_len bytes. */ +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__MAKE_MEM_UNDEFINED, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Similarly, mark memory at _qzz_addr as addressable and defined + for _qzz_len bytes. */ +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__MAKE_MEM_DEFINED, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Similar to VALGRIND_MAKE_MEM_DEFINED except that addressability is + not altered: bytes which are addressable are marked as defined, + but those which are not addressable are left unchanged. */ +#define VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Create a block-description handle. The description is an ascii + string which is included in any messages pertaining to addresses + within the specified memory range. Has no other effect on the + properties of the memory range. */ +#define VALGRIND_CREATE_BLOCK(_qzz_addr,_qzz_len, _qzz_desc) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CREATE_BLOCK, \ + (_qzz_addr), (_qzz_len), (_qzz_desc), \ + 0, 0) + +/* Discard a block-description-handle. Returns 1 for an + invalid handle, 0 for a valid handle. */ +#define VALGRIND_DISCARD(_qzz_blkindex) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__DISCARD, \ + 0, (_qzz_blkindex), 0, 0, 0) + + +/* Client-code macros to check the state of memory. */ + +/* Check that memory at _qzz_addr is addressable for _qzz_len bytes. + If suitable addressability is not established, Valgrind prints an + error message and returns the address of the first offending byte. + Otherwise it returns zero. */ +#define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Check that memory at _qzz_addr is addressable and defined for + _qzz_len bytes. If suitable addressability and definedness are not + established, Valgrind prints an error message and returns the + address of the first offending byte. Otherwise it returns zero. */ +#define VALGRIND_CHECK_MEM_IS_DEFINED(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__CHECK_MEM_IS_DEFINED, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Use this macro to force the definedness and addressability of an + lvalue to be checked. If suitable addressability and definedness + are not established, Valgrind prints an error message and returns + the address of the first offending byte. Otherwise it returns + zero. */ +#define VALGRIND_CHECK_VALUE_IS_DEFINED(__lvalue) \ + VALGRIND_CHECK_MEM_IS_DEFINED( \ + (volatile unsigned char *)&(__lvalue), \ + (unsigned long)(sizeof (__lvalue))) + +/* Check that memory at _qzz_addr is unaddressable for _qzz_len bytes. + If any byte in this range is addressable, Valgrind returns the + address of the first offending byte. Otherwise it returns zero. */ +#define VALGRIND_CHECK_MEM_IS_UNADDRESSABLE(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__CHECK_MEM_IS_UNADDRESSABLE,\ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Check that memory at _qzz_addr is undefined for _qzz_len bytes. If any + byte in this range is defined or unaddressable, Valgrind returns the + address of the first offending byte. Otherwise it returns zero. */ +#define VALGRIND_CHECK_MEM_IS_UNDEFINED(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__CHECK_MEM_IS_UNDEFINED, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/* Do a full memory leak check (like --leak-check=full) mid-execution. */ +#define VALGRIND_DO_LEAK_CHECK \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK, \ + 0, 0, 0, 0, 0) + +/* Same as VALGRIND_DO_LEAK_CHECK but only showing the entries for + which there was an increase in leaked bytes or leaked nr of blocks + since the previous leak search. */ +#define VALGRIND_DO_ADDED_LEAK_CHECK \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK, \ + 0, 1, 0, 0, 0) + +/* Same as VALGRIND_DO_ADDED_LEAK_CHECK but showing entries with + increased or decreased leaked bytes/blocks since previous leak + search. */ +#define VALGRIND_DO_CHANGED_LEAK_CHECK \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK, \ + 0, 2, 0, 0, 0) + +/* Do a summary memory leak check (like --leak-check=summary) mid-execution. */ +#define VALGRIND_DO_QUICK_LEAK_CHECK \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK, \ + 1, 0, 0, 0, 0) + +/* Return number of leaked, dubious, reachable and suppressed bytes found by + all previous leak checks. They must be lvalues. */ +#define VALGRIND_COUNT_LEAKS(leaked, dubious, reachable, suppressed) \ + /* For safety on 64-bit platforms we assign the results to private + unsigned long variables, then assign these to the lvalues the user + specified, which works no matter what type 'leaked', 'dubious', etc + are. We also initialise '_qzz_leaked', etc because + VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as + defined. */ \ + { \ + unsigned long _qzz_leaked = 0, _qzz_dubious = 0; \ + unsigned long _qzz_reachable = 0, _qzz_suppressed = 0; \ + VALGRIND_DO_CLIENT_REQUEST_STMT( \ + VG_USERREQ__COUNT_LEAKS, \ + &_qzz_leaked, &_qzz_dubious, \ + &_qzz_reachable, &_qzz_suppressed, 0); \ + leaked = _qzz_leaked; \ + dubious = _qzz_dubious; \ + reachable = _qzz_reachable; \ + suppressed = _qzz_suppressed; \ + } + +/* Return number of leaked, dubious, reachable and suppressed bytes found by + all previous leak checks. They must be lvalues. */ +#define VALGRIND_COUNT_LEAK_BLOCKS(leaked, dubious, reachable, suppressed) \ + /* For safety on 64-bit platforms we assign the results to private + unsigned long variables, then assign these to the lvalues the user + specified, which works no matter what type 'leaked', 'dubious', etc + are. We also initialise '_qzz_leaked', etc because + VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as + defined. */ \ + { \ + unsigned long _qzz_leaked = 0, _qzz_dubious = 0; \ + unsigned long _qzz_reachable = 0, _qzz_suppressed = 0; \ + VALGRIND_DO_CLIENT_REQUEST_STMT( \ + VG_USERREQ__COUNT_LEAK_BLOCKS, \ + &_qzz_leaked, &_qzz_dubious, \ + &_qzz_reachable, &_qzz_suppressed, 0); \ + leaked = _qzz_leaked; \ + dubious = _qzz_dubious; \ + reachable = _qzz_reachable; \ + suppressed = _qzz_suppressed; \ + } + + +/* Get the validity data for addresses [zza..zza+zznbytes-1] and copy it + into the provided zzvbits array. Return values: + 0 if not running on valgrind + 1 success + 2 [previously indicated unaligned arrays; these are now allowed] + 3 if any parts of zzsrc/zzvbits are not addressable. + The metadata is not copied in cases 0, 2 or 3 so it should be + impossible to segfault your system by using this call. +*/ +#define VALGRIND_GET_VBITS(zza,zzvbits,zznbytes) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__GET_VBITS, \ + (const char*)(zza), \ + (char*)(zzvbits), \ + (zznbytes), 0, 0) + +/* Set the validity data for addresses [zza..zza+zznbytes-1], copying it + from the provided zzvbits array. Return values: + 0 if not running on valgrind + 1 success + 2 [previously indicated unaligned arrays; these are now allowed] + 3 if any parts of zza/zzvbits are not addressable. + The metadata is not copied in cases 0, 2 or 3 so it should be + impossible to segfault your system by using this call. +*/ +#define VALGRIND_SET_VBITS(zza,zzvbits,zznbytes) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__SET_VBITS, \ + (const char*)(zza), \ + (const char*)(zzvbits), \ + (zznbytes), 0, 0 ) + +/* Disable and re-enable reporting of addressing errors in the + specified address range. */ +#define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +#define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +#endif + diff --git a/src/pmdk/src/core/valgrind/pmemcheck.h b/src/pmdk/src/core/valgrind/pmemcheck.h new file mode 100644 index 000000000..94796db97 --- /dev/null +++ b/src/pmdk/src/core/valgrind/pmemcheck.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +#ifndef __PMEMCHECK_H +#define __PMEMCHECK_H + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query memory permissions + inside your own programs. + + See comment near the top of valgrind.h on how to use them. +*/ + +#include "valgrind.h" + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { + VG_USERREQ__PMC_REGISTER_PMEM_MAPPING = VG_USERREQ_TOOL_BASE('P','C'), + VG_USERREQ__PMC_REGISTER_PMEM_FILE, + VG_USERREQ__PMC_REMOVE_PMEM_MAPPING, + VG_USERREQ__PMC_CHECK_IS_PMEM_MAPPING, + VG_USERREQ__PMC_PRINT_PMEM_MAPPINGS, + VG_USERREQ__PMC_DO_FLUSH, + VG_USERREQ__PMC_DO_FENCE, + VG_USERREQ__PMC_RESERVED1, /* Do not use. */ + VG_USERREQ__PMC_WRITE_STATS, + VG_USERREQ__PMC_RESERVED2, /* Do not use. */ + VG_USERREQ__PMC_RESERVED3, /* Do not use. */ + VG_USERREQ__PMC_RESERVED4, /* Do not use. */ + VG_USERREQ__PMC_RESERVED5, /* Do not use. */ + VG_USERREQ__PMC_RESERVED7, /* Do not use. */ + VG_USERREQ__PMC_RESERVED8, /* Do not use. */ + VG_USERREQ__PMC_RESERVED9, /* Do not use. */ + VG_USERREQ__PMC_RESERVED10, /* Do not use. */ + VG_USERREQ__PMC_SET_CLEAN, + /* transaction support */ + VG_USERREQ__PMC_START_TX, + VG_USERREQ__PMC_START_TX_N, + VG_USERREQ__PMC_END_TX, + VG_USERREQ__PMC_END_TX_N, + VG_USERREQ__PMC_ADD_TO_TX, + VG_USERREQ__PMC_ADD_TO_TX_N, + VG_USERREQ__PMC_REMOVE_FROM_TX, + VG_USERREQ__PMC_REMOVE_FROM_TX_N, + VG_USERREQ__PMC_ADD_THREAD_TO_TX_N, + VG_USERREQ__PMC_REMOVE_THREAD_FROM_TX_N, + VG_USERREQ__PMC_ADD_TO_GLOBAL_TX_IGNORE, + VG_USERREQ__PMC_RESERVED6, /* Do not use. */ + VG_USERREQ__PMC_EMIT_LOG, + } Vg_PMemCheckClientRequest; + + + +/* Client-code macros to manipulate pmem mappings */ + +/** Register a persistent memory mapping region */ +#define VALGRIND_PMC_REGISTER_PMEM_MAPPING(_qzz_addr, _qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_REGISTER_PMEM_MAPPING, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/** Register a persistent memory file */ +#define VALGRIND_PMC_REGISTER_PMEM_FILE(_qzz_desc, _qzz_addr_base, \ + _qzz_size, _qzz_offset) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_REGISTER_PMEM_FILE, \ + (_qzz_desc), (_qzz_addr_base), (_qzz_size), \ + (_qzz_offset), 0) + +/** Remove a persistent memory mapping region */ +#define VALGRIND_PMC_REMOVE_PMEM_MAPPING(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_REMOVE_PMEM_MAPPING, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/** Check if the given range is a registered persistent memory mapping */ +#define VALGRIND_PMC_CHECK_IS_PMEM_MAPPING(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_CHECK_IS_PMEM_MAPPING, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/** Register an SFENCE */ +#define VALGRIND_PMC_PRINT_PMEM_MAPPINGS \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PMC_PRINT_PMEM_MAPPINGS, \ + 0, 0, 0, 0, 0) + +/** Register a CLFLUSH-like operation */ +#define VALGRIND_PMC_DO_FLUSH(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_DO_FLUSH, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/** Register an SFENCE */ +#define VALGRIND_PMC_DO_FENCE \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PMC_DO_FENCE, \ + 0, 0, 0, 0, 0) + +/** Write tool stats */ +#define VALGRIND_PMC_WRITE_STATS \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PMC_WRITE_STATS, \ + 0, 0, 0, 0, 0) + +/** Emit user log */ +#define VALGRIND_PMC_EMIT_LOG(_qzz_emit_log) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_EMIT_LOG, \ + (_qzz_emit_log), 0, 0, 0, 0) + +/** Set a region of persistent memory as clean */ +#define VALGRIND_PMC_SET_CLEAN(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_SET_CLEAN, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/** Support for transactions */ + +/** Start an implicit persistent memory transaction */ +#define VALGRIND_PMC_START_TX \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PMC_START_TX, \ + 0, 0, 0, 0, 0) + +/** Start an explicit persistent memory transaction */ +#define VALGRIND_PMC_START_TX_N(_qzz_txn) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_START_TX_N, \ + (_qzz_txn), 0, 0, 0, 0) + +/** End an implicit persistent memory transaction */ +#define VALGRIND_PMC_END_TX \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PMC_END_TX, \ + 0, 0, 0, 0, 0) + +/** End an explicit persistent memory transaction */ +#define VALGRIND_PMC_END_TX_N(_qzz_txn) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_END_TX_N, \ + (_qzz_txn), 0, 0, 0, 0) + +/** Add a persistent memory region to the implicit transaction */ +#define VALGRIND_PMC_ADD_TO_TX(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_ADD_TO_TX, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/** Add a persistent memory region to an explicit transaction */ +#define VALGRIND_PMC_ADD_TO_TX_N(_qzz_txn,_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_ADD_TO_TX_N, \ + (_qzz_txn), (_qzz_addr), (_qzz_len), 0, 0) + +/** Remove a persistent memory region from the implicit transaction */ +#define VALGRIND_PMC_REMOVE_FROM_TX(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_REMOVE_FROM_TX, \ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +/** Remove a persistent memory region from an explicit transaction */ +#define VALGRIND_PMC_REMOVE_FROM_TX_N(_qzz_txn,_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_REMOVE_FROM_TX_N, \ + (_qzz_txn), (_qzz_addr), (_qzz_len), 0, 0) + +/** End an explicit persistent memory transaction */ +#define VALGRIND_PMC_ADD_THREAD_TX_N(_qzz_txn) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_ADD_THREAD_TO_TX_N, \ + (_qzz_txn), 0, 0, 0, 0) + +/** End an explicit persistent memory transaction */ +#define VALGRIND_PMC_REMOVE_THREAD_FROM_TX_N(_qzz_txn) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__PMC_REMOVE_THREAD_FROM_TX_N, \ + (_qzz_txn), 0, 0, 0, 0) + +/** Remove a persistent memory region from the implicit transaction */ +#define VALGRIND_PMC_ADD_TO_GLOBAL_TX_IGNORE(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PMC_ADD_TO_GLOBAL_TX_IGNORE,\ + (_qzz_addr), (_qzz_len), 0, 0, 0) + +#endif diff --git a/src/pmdk/src/core/valgrind/valgrind.h b/src/pmdk/src/core/valgrind/valgrind.h new file mode 100644 index 000000000..16d3f9bad --- /dev/null +++ b/src/pmdk/src/core/valgrind/valgrind.h @@ -0,0 +1,6647 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2017 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + + +/* ------------------------------------------------------------------ */ +/* VERSION NUMBER OF VALGRIND */ +/* ------------------------------------------------------------------ */ + +/* Specify Valgrind's version number, so that user code can + conditionally compile based on our version number. Note that these + were introduced at version 3.6 and so do not exist in version 3.5 + or earlier. The recommended way to use them to check for "version + X.Y or later" is (eg) + +#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__) \ + && (__VALGRIND_MAJOR__ > 3 \ + || (__VALGRIND_MAJOR__ == 3 && __VALGRIND_MINOR__ >= 6)) +*/ +#define __VALGRIND_MAJOR__ 3 +#define __VALGRIND_MINOR__ 14 + + +#include + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). + + Misc note: how to find out what's predefined in gcc by default: + gcc -Wp,-dM somefile.c +*/ +#undef PLAT_x86_darwin +#undef PLAT_amd64_darwin +#undef PLAT_x86_win32 +#undef PLAT_amd64_win64 +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64be_linux +#undef PLAT_ppc64le_linux +#undef PLAT_arm_linux +#undef PLAT_arm64_linux +#undef PLAT_s390x_linux +#undef PLAT_mips32_linux +#undef PLAT_mips64_linux +#undef PLAT_x86_solaris +#undef PLAT_amd64_solaris + + +#if defined(__APPLE__) && defined(__i386__) +# define PLAT_x86_darwin 1 +#elif defined(__APPLE__) && defined(__x86_64__) +# define PLAT_amd64_darwin 1 +#elif (defined(__MINGW32__) && !defined(__MINGW64__)) \ + || defined(__CYGWIN32__) \ + || (defined(_WIN32) && defined(_M_IX86)) +# define PLAT_x86_win32 1 +#elif defined(__MINGW64__) \ + || (defined(_WIN64) && defined(_M_X64)) +# define PLAT_amd64_win64 1 +#elif defined(__linux__) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif defined(__linux__) && defined(__x86_64__) && !defined(__ILP32__) +# define PLAT_amd64_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF != 2 +/* Big Endian uses ELF version 1 */ +# define PLAT_ppc64be_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF == 2 +/* Little Endian uses ELF version 2 */ +# define PLAT_ppc64le_linux 1 +#elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__) +# define PLAT_arm_linux 1 +#elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__) +# define PLAT_arm64_linux 1 +#elif defined(__linux__) && defined(__s390__) && defined(__s390x__) +# define PLAT_s390x_linux 1 +#elif defined(__linux__) && defined(__mips__) && (__mips==64) +# define PLAT_mips64_linux 1 +#elif defined(__linux__) && defined(__mips__) && (__mips!=64) +# define PLAT_mips32_linux 1 +#elif defined(__sun) && defined(__i386__) +# define PLAT_x86_solaris 1 +#elif defined(__sun) && defined(__x86_64__) +# define PLAT_amd64_solaris 1 +#else +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +/* + * VALGRIND_DO_CLIENT_REQUEST(): a statement that invokes a Valgrind client + * request. Accepts both pointers and integers as arguments. + * + * VALGRIND_DO_CLIENT_REQUEST_STMT(): a statement that invokes a Valgrind + * client request that does not return a value. + + * VALGRIND_DO_CLIENT_REQUEST_EXPR(): a C expression that invokes a Valgrind + * client request and whose value equals the client request result. Accepts + * both pointers and integers as arguments. Note that such calls are not + * necessarily pure functions -- they may have side effects. + */ + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, \ + _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + do { (_zzq_rlval) = VALGRIND_DO_CLIENT_REQUEST_EXPR((_zzq_default), \ + (_zzq_request), (_zzq_arg1), (_zzq_arg2), \ + (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0) + +#define VALGRIND_DO_CLIENT_REQUEST_STMT(_zzq_request, _zzq_arg1, \ + _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + do { (void) VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + (_zzq_request), (_zzq_arg1), (_zzq_arg2), \ + (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0) + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + (_zzq_default) + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ----------------- x86-{linux,darwin,solaris} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) \ + || (defined(PLAT_x86_win32) && defined(__GNUC__)) \ + || defined(PLAT_x86_solaris) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "xchgl %%edi,%%edi\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_x86_linux || PLAT_x86_darwin || (PLAT_x86_win32 && __GNUC__) + || PLAT_x86_solaris */ + +/* ------------------------- x86-Win32 ------------------------- */ + +#if defined(PLAT_x86_win32) && !defined(__GNUC__) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#if defined(_MSC_VER) + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + __asm rol edi, 3 __asm rol edi, 13 \ + __asm rol edi, 29 __asm rol edi, 19 + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + valgrind_do_client_request_expr((uintptr_t)(_zzq_default), \ + (uintptr_t)(_zzq_request), (uintptr_t)(_zzq_arg1), \ + (uintptr_t)(_zzq_arg2), (uintptr_t)(_zzq_arg3), \ + (uintptr_t)(_zzq_arg4), (uintptr_t)(_zzq_arg5)) + +static __inline uintptr_t +valgrind_do_client_request_expr(uintptr_t _zzq_default, uintptr_t _zzq_request, + uintptr_t _zzq_arg1, uintptr_t _zzq_arg2, + uintptr_t _zzq_arg3, uintptr_t _zzq_arg4, + uintptr_t _zzq_arg5) +{ + volatile uintptr_t _zzq_args[6]; + volatile unsigned int _zzq_result; + _zzq_args[0] = (uintptr_t)(_zzq_request); + _zzq_args[1] = (uintptr_t)(_zzq_arg1); + _zzq_args[2] = (uintptr_t)(_zzq_arg2); + _zzq_args[3] = (uintptr_t)(_zzq_arg3); + _zzq_args[4] = (uintptr_t)(_zzq_arg4); + _zzq_args[5] = (uintptr_t)(_zzq_arg5); + __asm { __asm lea eax, _zzq_args __asm mov edx, _zzq_default + __SPECIAL_INSTRUCTION_PREAMBLE + /* %EDX = client_request ( %EAX ) */ + __asm xchg ebx,ebx + __asm mov _zzq_result, edx + } + return _zzq_result; +} + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm { __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + __asm xchg ecx,ecx \ + __asm mov __addr, eax \ + } \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX ERROR + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm { __SPECIAL_INSTRUCTION_PREAMBLE \ + __asm xchg edi,edi \ + } \ + } while (0) + +#else +#error Unsupported compiler. +#endif + +#endif /* PLAT_x86_win32 */ + +/* ----------------- amd64-{linux,darwin,solaris} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) \ + || defined(PLAT_amd64_solaris) \ + || (defined(PLAT_amd64_win64) && defined(__GNUC__)) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({ volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "xchgq %%rdi,%%rdi\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin || PLAT_amd64_solaris */ + +/* ------------------------- amd64-Win64 ------------------------- */ + +#if defined(PLAT_amd64_win64) && !defined(__GNUC__) + +#error Unsupported compiler. + +#endif /* PLAT_amd64_win64 */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,31 ; rlwinm 0,0,13,0,31\n\t" \ + "rlwinm 0,0,29,0,31 ; rlwinm 0,0,19,0,31\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or 5,5,5\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64be_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + unsigned long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned long int _zzq_args[6]; \ + unsigned long int _zzq_result; \ + unsigned long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or 5,5,5\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_ppc64be_linux */ + +#if defined(PLAT_ppc64le_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + unsigned long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned long int _zzq_args[6]; \ + unsigned long int _zzq_result; \ + unsigned long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R12 */ \ + "or 3,3,3\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or 5,5,5\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_ppc64le_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "mov r12, r12, ror #3 ; mov r12, r12, ror #13 \n\t" \ + "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile("mov r3, %1\n\t" /*default*/ \ + "mov r4, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = client_request ( R4 ) */ \ + "orr r10, r10, r10\n\t" \ + "mov %0, r3" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "cc","memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = guest_NRADDR */ \ + "orr r11, r11, r11\n\t" \ + "mov %0, r3" \ + : "=r" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R4 */ \ + "orr r12, r12, r12\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "orr r9, r9, r9\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_arm_linux */ + +/* ------------------------ arm64-linux ------------------------- */ + +#if defined(PLAT_arm64_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "ror x12, x12, #3 ; ror x12, x12, #13 \n\t" \ + "ror x12, x12, #51 ; ror x12, x12, #61 \n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile("mov x3, %1\n\t" /*default*/ \ + "mov x4, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* X3 = client_request ( X4 ) */ \ + "orr x10, x10, x10\n\t" \ + "mov %0, x3" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" ((unsigned long int)(_zzq_default)), \ + "r" (&_zzq_args[0]) \ + : "cc","memory", "x3", "x4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* X3 = guest_NRADDR */ \ + "orr x11, x11, x11\n\t" \ + "mov %0, x3" \ + : "=r" (__addr) \ + : \ + : "cc", "memory", "x3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir X8 */ \ + "orr x12, x12, x12\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "orr x9, x9, x9\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_arm64_linux */ + +/* ------------------------ s390x-linux ------------------------ */ + +#if defined(PLAT_s390x_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + } + OrigFn; + +/* __SPECIAL_INSTRUCTION_PREAMBLE will be used to identify Valgrind specific + * code. This detection is implemented in platform specific toIR.c + * (e.g. VEX/priv/guest_s390_decoder.c). + */ +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "lr 15,15\n\t" \ + "lr 1,1\n\t" \ + "lr 2,2\n\t" \ + "lr 3,3\n\t" + +#define __CLIENT_REQUEST_CODE "lr 2,2\n\t" +#define __GET_NR_CONTEXT_CODE "lr 3,3\n\t" +#define __CALL_NO_REDIR_CODE "lr 4,4\n\t" +#define __VEX_INJECT_IR_CODE "lr 5,5\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile(/* r2 = args */ \ + "lgr 2,%1\n\t" \ + /* r3 = default */ \ + "lgr 3,%2\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + __CLIENT_REQUEST_CODE \ + /* results = r3 */ \ + "lgr %0, 3\n\t" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "2", "3", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + __GET_NR_CONTEXT_CODE \ + "lgr %0, 3\n\t" \ + : "=a" (__addr) \ + : \ + : "cc", "3", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_R1 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + __CALL_NO_REDIR_CODE + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + __VEX_INJECT_IR_CODE); \ + } while (0) + +#endif /* PLAT_s390x_linux */ + +/* ------------------------- mips32-linux ---------------- */ + +#if defined(PLAT_mips32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +/* .word 0x342 + * .word 0x742 + * .word 0xC2 + * .word 0x4C2*/ +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "srl $0, $0, 13\n\t" \ + "srl $0, $0, 29\n\t" \ + "srl $0, $0, 3\n\t" \ + "srl $0, $0, 19\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({ volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile("move $11, %1\n\t" /*default*/ \ + "move $12, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* T3 = client_request ( T4 ) */ \ + "or $13, $13, $13\n\t" \ + "move %0, $11\n\t" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "$11", "$12", "memory"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %t9 = guest_NRADDR */ \ + "or $14, $14, $14\n\t" \ + "move %0, $11" /*result*/ \ + : "=r" (__addr) \ + : \ + : "$11" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_T9 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%t9 */ \ + "or $15, $15, $15\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or $11, $11, $11\n\t" \ + ); \ + } while (0) + + +#endif /* PLAT_mips32_linux */ + +/* ------------------------- mips64-linux ---------------- */ + +#if defined(PLAT_mips64_linux) + +typedef + struct { + unsigned long nraddr; /* where's the code? */ + } + OrigFn; + +/* dsll $0,$0, 3 + * dsll $0,$0, 13 + * dsll $0,$0, 29 + * dsll $0,$0, 19*/ +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "dsll $0,$0, 3 ; dsll $0,$0,13\n\t" \ + "dsll $0,$0,29 ; dsll $0,$0,19\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({ volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile("move $11, %1\n\t" /*default*/ \ + "move $12, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* $11 = client_request ( $12 ) */ \ + "or $13, $13, $13\n\t" \ + "move %0, $11\n\t" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "$11", "$12", "memory"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* $11 = guest_NRADDR */ \ + "or $14, $14, $14\n\t" \ + "move %0, $11" /*result*/ \ + : "=r" (__addr) \ + : \ + : "$11"); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_T9 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir $25 */ \ + "or $15, $15, $15\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or $11, $11, $11\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_mips64_linux */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. NOTE also: inserts + the default behaviour equivalance class tag "0000" into the name. + See pub_tool_redir.h for details -- normally you don't need to + think about this, though. */ + +/* Use an extra level of macroisation so as to ensure the soname/fnname + args are fully macro-expanded before pasting them together. */ +#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + VG_CONCAT4(_vgw00000ZU_,soname,_,fnname) + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + VG_CONCAT4(_vgw00000ZZ_,soname,_,fnname) + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Also provide end-user facilities for function replacement, rather + than wrapping. A replacement function differs from a wrapper in + that it has no way to get hold of the original function being + called, and hence no way to call onwards to it. In a replacement + function, VALGRIND_GET_ORIG_FN always returns zero. */ + +#define I_REPLACE_SONAME_FNNAME_ZU(soname,fnname) \ + VG_CONCAT4(_vgr00000ZU_,soname,_,fnname) + +#define I_REPLACE_SONAME_FNNAME_ZZ(soname,fnname) \ + VG_CONCAT4(_vgr00000ZZ_,soname,_,fnname) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0) + +#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0) + +#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0) + +#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0) + +/* ----------------- x86-{linux,darwin,solaris} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) \ + || defined(PLAT_x86_solaris) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "movl %%esp,%%edi\n\t" \ + "andl $0xfffffff0,%%esp\n\t" +#define VALGRIND_RESTORE_STACK \ + "movl %%edi,%%esp\n\t" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $12, %%esp\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $8, %%esp\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $4, %%esp\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $12, %%esp\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $8, %%esp\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $4, %%esp\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $12, %%esp\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $8, %%esp\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $4, %%esp\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux || PLAT_x86_darwin || PLAT_x86_solaris */ + +/* ---------------- amd64-{linux,darwin,solaris} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) \ + || defined(PLAT_amd64_solaris) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* This is all pretty complex. It's so as to make stack unwinding + work reliably. See bug 243270. The basic problem is the sub and + add of 128 of %rsp in all of the following macros. If gcc believes + the CFA is in %rsp, then unwinding may fail, because what's at the + CFA is not what gcc "expected" when it constructs the CFIs for the + places where the macros are instantiated. + + But we can't just add a CFI annotation to increase the CFA offset + by 128, to match the sub of 128 from %rsp, because we don't know + whether gcc has chosen %rsp as the CFA at that point, or whether it + has chosen some other register (eg, %rbp). In the latter case, + adding a CFI annotation to change the CFA offset is simply wrong. + + So the solution is to get hold of the CFA using + __builtin_dwarf_cfa(), put it in a known register, and add a + CFI annotation to say what the register is. We choose %rbp for + this (perhaps perversely), because: + + (1) %rbp is already subject to unwinding. If a new register was + chosen then the unwinder would have to unwind it in all stack + traces, which is expensive, and + + (2) %rbp is already subject to precise exception updates in the + JIT. If a new register was chosen, we'd have to have precise + exceptions for it too, which reduces performance of the + generated code. + + However .. one extra complication. We can't just whack the result + of __builtin_dwarf_cfa() into %rbp and then add %rbp to the + list of trashed registers at the end of the inline assembly + fragments; gcc won't allow %rbp to appear in that list. Hence + instead we need to stash %rbp in %r15 for the duration of the asm, + and say that %r15 is trashed instead. gcc seems happy to go with + that. + + Oh .. and this all needs to be conditionalised so that it is + unchanged from before this commit, when compiled with older gccs + that don't support __builtin_dwarf_cfa. Furthermore, since + this header file is freestanding, it has to be independent of + config.h, and so the following conditionalisation cannot depend on + configure time checks. + + Although it's not clear from + 'defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM)', + this expression excludes Darwin. + .cfi directives in Darwin assembly appear to be completely + different and I haven't investigated how they work. + + For even more entertainment value, note we have to use the + completely undocumented __builtin_dwarf_cfa(), which appears to + really compute the CFA, whereas __builtin_frame_address(0) claims + to but actually doesn't. See + https://bugs.kde.org/show_bug.cgi?id=243270#c47 +*/ +#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM) +# define __FRAME_POINTER \ + ,"r"(__builtin_dwarf_cfa()) +# define VALGRIND_CFI_PROLOGUE \ + "movq %%rbp, %%r15\n\t" \ + "movq %2, %%rbp\n\t" \ + ".cfi_remember_state\n\t" \ + ".cfi_def_cfa rbp, 0\n\t" +# define VALGRIND_CFI_EPILOGUE \ + "movq %%r15, %%rbp\n\t" \ + ".cfi_restore_state\n\t" +#else +# define __FRAME_POINTER +# define VALGRIND_CFI_PROLOGUE +# define VALGRIND_CFI_EPILOGUE +#endif + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "movq %%rsp,%%r14\n\t" \ + "andq $0xfffffffffffffff0,%%rsp\n\t" +#define VALGRIND_RESTORE_STACK \ + "movq %%r14,%%rsp\n\t" + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastiness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $136,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $136,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $136,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin || PLAT_amd64_solaris */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "mr 28,1\n\t" \ + "rlwinm 1,1,0,0,27\n\t" +#define VALGRIND_RESTORE_STACK \ + "mr 1,28\n\t" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64be_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "mr 28,1\n\t" \ + "rldicr 1,1,0,59\n\t" +#define VALGRIND_RESTORE_STACK \ + "mr 1,28\n\t" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64be_linux */ + +/* ------------------------- ppc64le-linux ----------------------- */ +#if defined(PLAT_ppc64le_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "mr 28,1\n\t" \ + "rldicr 1,1,0,59\n\t" +#define VALGRIND_RESTORE_STACK \ + "mr 1,28\n\t" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(12)\n\t" \ + "std 3,104(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(12)\n\t" \ + "std 3,112(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(12)\n\t" \ + "std 3,104(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(12)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(12)\n\t" \ + "std 3,112(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(12)\n\t" \ + "std 3,104(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64le_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4", "r12", "r14" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +/* This is a bit tricky. We store the original stack pointer in r10 + as it is callee-saves. gcc doesn't allow the use of r11 for some + reason. Also, we can't directly "bic" the stack pointer in thumb + mode since r13 isn't an allowed register number in that context. + So use r4 as a temporary, since that is about to get trashed + anyway, just after each use of this macro. Side effect is we need + to be very careful about any future changes, since + VALGRIND_ALIGN_STACK simply assumes r4 is usable. */ +#define VALGRIND_ALIGN_STACK \ + "mov r10, sp\n\t" \ + "mov r4, sp\n\t" \ + "bic r4, r4, #7\n\t" \ + "mov sp, r4\n\t" +#define VALGRIND_RESTORE_STACK \ + "mov sp, r10\n\t" + +/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "push {r0, r1, r2, r3} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #40] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "ldr r2, [%1, #48] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_arm_linux */ + +/* ------------------------ arm64-linux ------------------------ */ + +#if defined(PLAT_arm64_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "x0", "x1", "x2", "x3","x4", "x5", "x6", "x7", "x8", "x9", \ + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", \ + "x18", "x19", "x20", "x30", \ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", \ + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", \ + "v26", "v27", "v28", "v29", "v30", "v31" + +/* x21 is callee-saved, so we can use it to save and restore SP around + the hidden call. */ +#define VALGRIND_ALIGN_STACK \ + "mov x21, sp\n\t" \ + "bic sp, x21, #15\n\t" +#define VALGRIND_RESTORE_STACK \ + "mov sp, x21\n\t" + +/* These CALL_FN_ macros assume that on arm64-linux, + sizeof(unsigned long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x20 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x20 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1, #80] \n\t" \ + "str x8, [sp, #8] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x30 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1, #80] \n\t" \ + "str x8, [sp, #8] \n\t" \ + "ldr x8, [%1, #88] \n\t" \ + "str x8, [sp, #16] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11, \ + arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x30 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1, #80] \n\t" \ + "str x8, [sp, #8] \n\t" \ + "ldr x8, [%1, #88] \n\t" \ + "str x8, [sp, #16] \n\t" \ + "ldr x8, [%1, #96] \n\t" \ + "str x8, [sp, #24] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_arm64_linux */ + +/* ------------------------- s390x-linux ------------------------- */ + +#if defined(PLAT_s390x_linux) + +/* Similar workaround as amd64 (see above), but we use r11 as frame + pointer and save the old r11 in r7. r11 might be used for + argvec, therefore we copy argvec in r1 since r1 is clobbered + after the call anyway. */ +#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM) +# define __FRAME_POINTER \ + ,"d"(__builtin_dwarf_cfa()) +# define VALGRIND_CFI_PROLOGUE \ + ".cfi_remember_state\n\t" \ + "lgr 1,%1\n\t" /* copy the argvec pointer in r1 */ \ + "lgr 7,11\n\t" \ + "lgr 11,%2\n\t" \ + ".cfi_def_cfa r11, 0\n\t" +# define VALGRIND_CFI_EPILOGUE \ + "lgr 11, 7\n\t" \ + ".cfi_restore_state\n\t" +#else +# define __FRAME_POINTER +# define VALGRIND_CFI_PROLOGUE \ + "lgr 1,%1\n\t" +# define VALGRIND_CFI_EPILOGUE +#endif + +/* Nb: On s390 the stack pointer is properly aligned *at all times* + according to the s390 GCC maintainer. (The ABI specification is not + precise in this regard.) Therefore, VALGRIND_ALIGN_STACK and + VALGRIND_RESTORE_STACK are not defined here. */ + +/* These regs are trashed by the hidden call. Note that we overwrite + r14 in s390_irgen_noredir (VEX/priv/guest_s390_irgen.c) to give the + function a proper return address. All others are ABI defined call + clobbers. */ +#define __CALLER_SAVED_REGS "0","1","2","3","4","5","14", \ + "f0","f1","f2","f3","f4","f5","f6","f7" + +/* Nb: Although r11 is modified in the asm snippets below (inside + VALGRIND_CFI_PROLOGUE) it is not listed in the clobber section, for + two reasons: + (1) r11 is restored in VALGRIND_CFI_EPILOGUE, so effectively it is not + modified + (2) GCC will complain that r11 cannot appear inside a clobber section, + when compiled with -O -fno-omit-frame-pointer + */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 1, 0(1)\n\t" /* target->r1 */ \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "d" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +/* The call abi has the arguments in r2-r6 and stack */ +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-168\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,168\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-176\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,176\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-184\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,184\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-192\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,192\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-200\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,200\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10, arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-208\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "mvc 200(8,15), 88(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,208\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10, arg11, arg12)\ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-216\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "mvc 200(8,15), 88(1)\n\t" \ + "mvc 208(8,15), 96(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,216\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + + +#endif /* PLAT_s390x_linux */ + +/* ------------------------- mips32-linux ----------------------- */ + +#if defined(PLAT_mips32_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6", \ +"$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ +"$25", "$31" + +/* These CALL_FN_ macros assume that on mips-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16\n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" /* arg1*/ \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 24\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 24 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 32\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "nop\n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 32 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 32\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 32 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 40\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 40 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 40\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 40 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 48\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 40(%1) \n\t" \ + "sw $4, 36($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 48 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 48\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 40(%1) \n\t" \ + "sw $4, 36($29) \n\t" \ + "lw $4, 44(%1) \n\t" \ + "sw $4, 40($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 48 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 56\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 40(%1) \n\t" \ + "sw $4, 36($29) \n\t" \ + "lw $4, 44(%1) \n\t" \ + "sw $4, 40($29) \n\t" \ + "lw $4, 48(%1) \n\t" \ + "sw $4, 44($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 56 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_mips32_linux */ + +/* ------------------------- mips64-linux ------------------------- */ + +#if defined(PLAT_mips64_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6", \ +"$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ +"$25", "$31" + +/* These CALL_FN_ macros assume that on mips64-linux, + sizeof(long long) == 8. */ + +#define MIPS64_LONG2REG_CAST(x) ((long long)(long)x) + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[1]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + __asm__ volatile( \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[2]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" /* arg1*/ \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[3]; \ + volatile unsigned long long _res; \ + _argvec[0] = _orig.nraddr; \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[4]; \ + volatile unsigned long long _res; \ + _argvec[0] = _orig.nraddr; \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[5]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[6]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[7]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[8]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[9]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[10]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + __asm__ volatile( \ + "dsubu $29, $29, 8\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 8\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[11]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + _argvec[10] = MIPS64_LONG2REG_CAST(arg10); \ + __asm__ volatile( \ + "dsubu $29, $29, 16\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 80(%1)\n\t" \ + "sd $4, 8($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 16\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[12]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + _argvec[10] = MIPS64_LONG2REG_CAST(arg10); \ + _argvec[11] = MIPS64_LONG2REG_CAST(arg11); \ + __asm__ volatile( \ + "dsubu $29, $29, 24\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 80(%1)\n\t" \ + "sd $4, 8($29)\n\t" \ + "ld $4, 88(%1)\n\t" \ + "sd $4, 16($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 24\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[13]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + _argvec[10] = MIPS64_LONG2REG_CAST(arg10); \ + _argvec[11] = MIPS64_LONG2REG_CAST(arg11); \ + _argvec[12] = MIPS64_LONG2REG_CAST(arg12); \ + __asm__ volatile( \ + "dsubu $29, $29, 32\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 80(%1)\n\t" \ + "sd $4, 8($29)\n\t" \ + "ld $4, 88(%1)\n\t" \ + "sd $4, 16($29)\n\t" \ + "ld $4, 96(%1)\n\t" \ + "sd $4, 24($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 32\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#endif /* PLAT_mips64_linux */ + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE NUMERIC VALUES OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end of the most + relevant group. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* Allows the client program and/or gdbserver to execute a monitor + command. */ + VG_USERREQ__GDB_MONITOR_COMMAND = 0x1202, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__RESIZEINPLACE_BLOCK = 0x130b, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + /* The first two pass the va_list argument by value, which + assumes it is the same size as or smaller than a UWord, + which generally isn't the case. Hence are deprecated. + The second two pass the vargs by reference and so are + immune to this problem. */ + /* both :: char* fmt, va_list vargs (DEPRECATED) */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + /* both :: char* fmt, va_list* vargs */ + VG_USERREQ__PRINTF_VALIST_BY_REF = 0x1403, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF = 0x1404, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503, + + /* Wine support */ + VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601, + + /* Querying of debug info. */ + VG_USERREQ__MAP_IP_TO_SRCLOC = 0x1701, + + /* Disable/enable error reporting level. Takes a single + Word arg which is the delta to this thread's error + disablement indicator. Hence 1 disables or further + disables errors, and -1 moves back towards enablement. + Other values are not allowed. */ + VG_USERREQ__CHANGE_ERR_DISABLEMENT = 0x1801, + + /* Some requests used for Valgrind internal, such as + self-test or self-hosting. */ + /* Initialise IR injection */ + VG_USERREQ__VEX_INIT_FOR_IRI = 0x1901, + /* Used by Inner Valgrind to inform Outer Valgrind where to + find the list of inner guest threads */ + VG_USERREQ__INNER_THREADS = 0x1902 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0) \ + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0) + +#define VALGRIND_INNER_THREADS(_qzz_addr) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__INNER_THREADS, \ + _qzz_addr, 0, 0, 0, 0) + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. The return value + is the number of characters printed, excluding the "**** " part at the + start and the backtrace (if present). */ + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) && !defined(_MSC_VER) +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +#endif +static int +#if defined(_MSC_VER) +__inline +#endif +VALGRIND_PRINTF(const char *format, ...) +{ +#if defined(NVALGRIND) + (void)format; + return 0; +#else /* NVALGRIND */ +#if defined(_MSC_VER) || defined(__MINGW64__) + uintptr_t _qzz_res; +#else + unsigned long _qzz_res; +#endif + va_list vargs; + va_start(vargs, format); +#if defined(_MSC_VER) || defined(__MINGW64__) + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_VALIST_BY_REF, + (uintptr_t)format, + (uintptr_t)&vargs, + 0, 0, 0); +#else + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); +#endif + va_end(vargs); + return (int)_qzz_res; +#endif /* NVALGRIND */ +} + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) && !defined(_MSC_VER) +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +#endif +static int +#if defined(_MSC_VER) +__inline +#endif +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ +#if defined(NVALGRIND) + (void)format; + return 0; +#else /* NVALGRIND */ +#if defined(_MSC_VER) || defined(__MINGW64__) + uintptr_t _qzz_res; +#else + unsigned long _qzz_res; +#endif + va_list vargs; + va_start(vargs, format); +#if defined(_MSC_VER) || defined(__MINGW64__) + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, + (uintptr_t)format, + (uintptr_t)&vargs, + 0, 0, 0); +#else + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); +#endif + va_end(vargs); + return (int)_qzz_res; +#endif /* NVALGRIND */ +} + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitrary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0) + +/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing + when heap blocks are allocated in order to give accurate results. This + happens automatically for the standard allocator functions such as + malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete, + delete[], etc. + + But if your program uses a custom allocator, this doesn't automatically + happen, and Valgrind will not do as well. For example, if you allocate + superblocks with mmap() and then allocates chunks of the superblocks, all + Valgrind's observations will be at the mmap() level and it won't know that + the chunks should be considered separate entities. In Memcheck's case, + that means you probably won't get heap block overrun detection (because + there won't be redzones marked as unaddressable) and you definitely won't + get any leak detection. + + The following client requests allow a custom allocator to be annotated so + that it can be handled accurately by Valgrind. + + VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated + by a malloc()-like function. For Memcheck (an illustrative case), this + does two things: + + - It records that the block has been allocated. This means any addresses + within the block mentioned in error messages will be + identified as belonging to the block. It also means that if the block + isn't freed it will be detected by the leak checker. + + - It marks the block as being addressable and undefined (if 'is_zeroed' is + not set), or addressable and defined (if 'is_zeroed' is set). This + controls how accesses to the block by the program are handled. + + 'addr' is the start of the usable block (ie. after any + redzone), 'sizeB' is its size. 'rzB' is the redzone size if the allocator + can apply redzones -- these are blocks of padding at the start and end of + each block. Adding redzones is recommended as it makes it much more likely + Valgrind will spot block overruns. `is_zeroed' indicates if the memory is + zeroed (or filled with another predictable value), as is the case for + calloc(). + + VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a + heap block -- that will be used by the client program -- is allocated. + It's best to put it at the outermost level of the allocator if possible; + for example, if you have a function my_alloc() which calls + internal_alloc(), and the client request is put inside internal_alloc(), + stack traces relating to the heap block will contain entries for both + my_alloc() and internal_alloc(), which is probably not what you want. + + For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out + custom blocks from within a heap block, B, that has been allocated with + malloc/calloc/new/etc, then block B will be *ignored* during leak-checking + -- the custom blocks will take precedence. + + VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK. For + Memcheck, it does two things: + + - It records that the block has been deallocated. This assumes that the + block was annotated as having been allocated via + VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. + + - It marks the block as being unaddressable. + + VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a + heap block is deallocated. + + VALGRIND_RESIZEINPLACE_BLOCK informs a tool about reallocation. For + Memcheck, it does four things: + + - It records that the size of a block has been changed. This assumes that + the block was annotated as having been allocated via + VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. + + - If the block shrunk, it marks the freed memory as being unaddressable. + + - If the block grew, it marks the new area as undefined and defines a red + zone past the end of the new block. + + - The V-bits of the overlap between the old and the new block are preserved. + + VALGRIND_RESIZEINPLACE_BLOCK should be put after allocation of the new block + and before deallocation of the old block. + + In many cases, these three client requests will not be enough to get your + allocator working well with Memcheck. More specifically, if your allocator + writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call + will be necessary to mark the memory as addressable just before the zeroing + occurs, otherwise you'll get a lot of invalid write errors. For example, + you'll need to do this if your allocator recycles freed blocks, but it + zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK). + Alternatively, if your allocator reuses freed blocks for allocator-internal + data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary. + + Really, what's happening is a blurring of the lines between the client + program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the + memory should be considered unaddressable to the client program, but the + allocator knows more than the rest of the client program and so may be able + to safely access it. Extra client requests are necessary for Valgrind to + understand the distinction between the allocator and the rest of the + program. + + Ignored if addr == 0. +*/ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0) + +/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. + Ignored if addr == 0. +*/ +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__RESIZEINPLACE_BLOCK, \ + addr, oldSizeB, newSizeB, rzB, 0) + +/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. + Ignored if addr == 0. +*/ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0) + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0) + +/* Create a memory pool with some flags specifying extended behaviour. + When flags is zero, the behaviour is identical to VALGRIND_CREATE_MEMPOOL. + + The flag VALGRIND_MEMPOOL_METAPOOL specifies that the pieces of memory + associated with the pool using VALGRIND_MEMPOOL_ALLOC will be used + by the application as superblocks to dole out MALLOC_LIKE blocks using + VALGRIND_MALLOCLIKE_BLOCK. In other words, a meta pool is a "2 levels" + pool : first level is the blocks described by VALGRIND_MEMPOOL_ALLOC. + The second level blocks are described using VALGRIND_MALLOCLIKE_BLOCK. + Note that the association between the pool and the second level blocks + is implicit : second level blocks will be located inside first level + blocks. It is necessary to use the VALGRIND_MEMPOOL_METAPOOL flag + for such 2 levels pools, as otherwise valgrind will detect overlapping + memory blocks, and will abort execution (e.g. during leak search). + + Such a meta pool can also be marked as an 'auto free' pool using the flag + VALGRIND_MEMPOOL_AUTO_FREE, which must be OR-ed together with the + VALGRIND_MEMPOOL_METAPOOL. For an 'auto free' pool, VALGRIND_MEMPOOL_FREE + will automatically free the second level blocks that are contained + inside the first level block freed with VALGRIND_MEMPOOL_FREE. + In other words, calling VALGRIND_MEMPOOL_FREE will cause implicit calls + to VALGRIND_FREELIKE_BLOCK for all the second level blocks included + in the first level block. + Note: it is an error to use the VALGRIND_MEMPOOL_AUTO_FREE flag + without the VALGRIND_MEMPOOL_METAPOOL flag. +*/ +#define VALGRIND_MEMPOOL_AUTO_FREE 1 +#define VALGRIND_MEMPOOL_METAPOOL 2 +#define VALGRIND_CREATE_MEMPOOL_EXT(pool, rzB, is_zeroed, flags) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, flags, 0) + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0) + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0) + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0) + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0) + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0) + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0) + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0) + +/* Mark a piece of memory as being a stack. Returns a stack id. + start is the lowest addressable stack byte, end is the highest + addressable stack byte. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0) + +/* Change the start and end address of the stack id. + start is the new lowest addressable stack byte, end is the new highest + addressable stack byte. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0) + +/* Load PDB debug info for Wine PE image_map. */ +#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__LOAD_PDB_DEBUGINFO, \ + fd, ptr, total_size, delta, 0) + +/* Map a code address to a source file name and line number. buf64 + must point to a 64-byte buffer in the caller's address space. The + result will be dumped in there and is guaranteed to be zero + terminated. If no info is found, the first byte is set to zero. */ +#define VALGRIND_MAP_IP_TO_SRCLOC(addr, buf64) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__MAP_IP_TO_SRCLOC, \ + addr, buf64, 0, 0, 0) + +/* Disable error reporting for this thread. Behaves in a stack like + way, so you can safely call this multiple times provided that + VALGRIND_ENABLE_ERROR_REPORTING is called the same number of times + to re-enable reporting. The first call of this macro disables + reporting. Subsequent calls have no effect except to increase the + number of VALGRIND_ENABLE_ERROR_REPORTING calls needed to re-enable + reporting. Child threads do not inherit this setting from their + parents -- they are always created with reporting enabled. */ +#define VALGRIND_DISABLE_ERROR_REPORTING \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \ + 1, 0, 0, 0, 0) + +/* Re-enable error reporting, as per comments on + VALGRIND_DISABLE_ERROR_REPORTING. */ +#define VALGRIND_ENABLE_ERROR_REPORTING \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \ + -1, 0, 0, 0, 0) + +/* Execute a monitor command from the client program. + If a connection is opened with GDB, the output will be sent + according to the output mode set for vgdb. + If no connection is opened, output will go to the log output. + Returns 1 if command not recognised, 0 otherwise. */ +#define VALGRIND_MONITOR_COMMAND(command) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0, VG_USERREQ__GDB_MONITOR_COMMAND, \ + command, 0, 0, 0, 0) + + +#undef PLAT_x86_darwin +#undef PLAT_amd64_darwin +#undef PLAT_x86_win32 +#undef PLAT_amd64_win64 +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64be_linux +#undef PLAT_ppc64le_linux +#undef PLAT_arm_linux +#undef PLAT_s390x_linux +#undef PLAT_mips32_linux +#undef PLAT_mips64_linux +#undef PLAT_x86_solaris +#undef PLAT_amd64_solaris + +#endif /* __VALGRIND_H */ diff --git a/src/pmdk/src/core/valgrind_internal.h b/src/pmdk/src/core/valgrind_internal.h new file mode 100644 index 000000000..910421a9c --- /dev/null +++ b/src/pmdk/src/core/valgrind_internal.h @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * valgrind_internal.h -- internal definitions for valgrind macros + */ + +#ifndef PMDK_VALGRIND_INTERNAL_H +#define PMDK_VALGRIND_INTERNAL_H 1 + +#if !defined(_WIN32) && !defined(__FreeBSD__) +#ifndef VALGRIND_ENABLED +#define VALGRIND_ENABLED 1 +#endif +#endif + +#if VALGRIND_ENABLED +#define VG_PMEMCHECK_ENABLED 1 +#define VG_HELGRIND_ENABLED 1 +#define VG_MEMCHECK_ENABLED 1 +#define VG_DRD_ENABLED 1 +#endif + +#if VG_PMEMCHECK_ENABLED || VG_HELGRIND_ENABLED || VG_MEMCHECK_ENABLED || \ + VG_DRD_ENABLED +#define ANY_VG_TOOL_ENABLED 1 +#else +#define ANY_VG_TOOL_ENABLED 0 +#endif + +#if ANY_VG_TOOL_ENABLED +extern unsigned _On_valgrind; +#define On_valgrind __builtin_expect(_On_valgrind, 0) +#include "valgrind/valgrind.h" +#else +#define On_valgrind (0) +#endif + +#if VG_HELGRIND_ENABLED +extern unsigned _On_helgrind; +#define On_helgrind __builtin_expect(_On_helgrind, 0) +#include "valgrind/helgrind.h" +#else +#define On_helgrind (0) +#endif + +#if VG_DRD_ENABLED +extern unsigned _On_drd; +#define On_drd __builtin_expect(_On_drd, 0) +#include "valgrind/drd.h" +#else +#define On_drd (0) +#endif + +#if VG_HELGRIND_ENABLED || VG_DRD_ENABLED + +extern unsigned _On_drd_or_hg; +#define On_drd_or_hg __builtin_expect(_On_drd_or_hg, 0) + +#define VALGRIND_ANNOTATE_HAPPENS_BEFORE(obj) do {\ + if (On_drd_or_hg) \ + ANNOTATE_HAPPENS_BEFORE((obj));\ +} while (0) + +#define VALGRIND_ANNOTATE_HAPPENS_AFTER(obj) do {\ + if (On_drd_or_hg) \ + ANNOTATE_HAPPENS_AFTER((obj));\ +} while (0) + +#define VALGRIND_ANNOTATE_NEW_MEMORY(addr, size) do {\ + if (On_drd_or_hg) \ + ANNOTATE_NEW_MEMORY((addr), (size));\ +} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_READS_BEGIN() do {\ + if (On_drd_or_hg) \ + ANNOTATE_IGNORE_READS_BEGIN();\ +} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_READS_END() do {\ + if (On_drd_or_hg) \ + ANNOTATE_IGNORE_READS_END();\ +} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_WRITES_BEGIN() do {\ + if (On_drd_or_hg) \ + ANNOTATE_IGNORE_WRITES_BEGIN();\ +} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_WRITES_END() do {\ + if (On_drd_or_hg) \ + ANNOTATE_IGNORE_WRITES_END();\ +} while (0) + +/* Supported by both helgrind and drd. */ +#define VALGRIND_HG_DRD_DISABLE_CHECKING(addr, size) do {\ + if (On_drd_or_hg) \ + VALGRIND_HG_DISABLE_CHECKING((addr), (size));\ +} while (0) + +#else + +#define On_drd_or_hg (0) + +#define VALGRIND_ANNOTATE_HAPPENS_BEFORE(obj) do { (void)(obj); } while (0) + +#define VALGRIND_ANNOTATE_HAPPENS_AFTER(obj) do { (void)(obj); } while (0) + +#define VALGRIND_ANNOTATE_NEW_MEMORY(addr, size) do {\ + (void) (addr);\ + (void) (size);\ +} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_READS_BEGIN() do {} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_READS_END() do {} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_WRITES_BEGIN() do {} while (0) + +#define VALGRIND_ANNOTATE_IGNORE_WRITES_END() do {} while (0) + +#define VALGRIND_HG_DRD_DISABLE_CHECKING(addr, size) do {\ + (void) (addr);\ + (void) (size);\ +} while (0) + +#endif + +#if VG_PMEMCHECK_ENABLED + +extern unsigned _On_pmemcheck; +#define On_pmemcheck __builtin_expect(_On_pmemcheck, 0) + +#include "valgrind/pmemcheck.h" + +void pobj_emit_log(const char *func, int order); +void pmem_emit_log(const char *func, int order); +void pmem2_emit_log(const char *func, int order); +extern int _Pmreorder_emit; + +#define Pmreorder_emit __builtin_expect(_Pmreorder_emit, 0) + +#define VALGRIND_REGISTER_PMEM_MAPPING(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_REGISTER_PMEM_MAPPING((addr), (len));\ +} while (0) + +#define VALGRIND_REGISTER_PMEM_FILE(desc, base_addr, size, offset) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_REGISTER_PMEM_FILE((desc), (base_addr), (size), \ + (offset));\ +} while (0) + +#define VALGRIND_REMOVE_PMEM_MAPPING(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_REMOVE_PMEM_MAPPING((addr), (len));\ +} while (0) + +#define VALGRIND_CHECK_IS_PMEM_MAPPING(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_CHECK_IS_PMEM_MAPPING((addr), (len));\ +} while (0) + +#define VALGRIND_PRINT_PMEM_MAPPINGS do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_PRINT_PMEM_MAPPINGS;\ +} while (0) + +#define VALGRIND_DO_FLUSH(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_DO_FLUSH((addr), (len));\ +} while (0) + +#define VALGRIND_DO_FENCE do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_DO_FENCE;\ +} while (0) + +#define VALGRIND_DO_PERSIST(addr, len) do {\ + if (On_pmemcheck) {\ + VALGRIND_PMC_DO_FLUSH((addr), (len));\ + VALGRIND_PMC_DO_FENCE;\ + }\ +} while (0) + +#define VALGRIND_SET_CLEAN(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_SET_CLEAN(addr, len);\ +} while (0) + +#define VALGRIND_WRITE_STATS do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_WRITE_STATS;\ +} while (0) + +#define VALGRIND_EMIT_LOG(emit_log) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_EMIT_LOG((emit_log));\ +} while (0) + +#define VALGRIND_START_TX do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_START_TX;\ +} while (0) + +#define VALGRIND_START_TX_N(txn) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_START_TX_N(txn);\ +} while (0) + +#define VALGRIND_END_TX do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_END_TX;\ +} while (0) + +#define VALGRIND_END_TX_N(txn) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_END_TX_N(txn);\ +} while (0) + +#define VALGRIND_ADD_TO_TX(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_ADD_TO_TX(addr, len);\ +} while (0) + +#define VALGRIND_ADD_TO_TX_N(txn, addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_ADD_TO_TX_N(txn, addr, len);\ +} while (0) + +#define VALGRIND_REMOVE_FROM_TX(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_REMOVE_FROM_TX(addr, len);\ +} while (0) + +#define VALGRIND_REMOVE_FROM_TX_N(txn, addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_REMOVE_FROM_TX_N(txn, addr, len);\ +} while (0) + +#define VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(addr, len) do {\ + if (On_pmemcheck)\ + VALGRIND_PMC_ADD_TO_GLOBAL_TX_IGNORE(addr, len);\ +} while (0) + +/* + * Logs library and function name with proper suffix + * to pmemcheck store log file. + */ +#define PMEMOBJ_API_START()\ + if (Pmreorder_emit)\ + pobj_emit_log(__func__, 0); +#define PMEMOBJ_API_END()\ + if (Pmreorder_emit)\ + pobj_emit_log(__func__, 1); + +#define PMEM_API_START()\ + if (Pmreorder_emit)\ + pmem_emit_log(__func__, 0); +#define PMEM_API_END()\ + if (Pmreorder_emit)\ + pmem_emit_log(__func__, 1); + +#define PMEM2_API_START(func_name)\ + if (Pmreorder_emit)\ + pmem2_emit_log(func_name, 0); +#define PMEM2_API_END(func_name)\ + if (Pmreorder_emit)\ + pmem2_emit_log(func_name, 1); + +#else + +#define On_pmemcheck (0) +#define Pmreorder_emit (0) + +#define VALGRIND_REGISTER_PMEM_MAPPING(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_REGISTER_PMEM_FILE(desc, base_addr, size, offset) do {\ + (void) (desc);\ + (void) (base_addr);\ + (void) (size);\ + (void) (offset);\ +} while (0) + +#define VALGRIND_REMOVE_PMEM_MAPPING(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_CHECK_IS_PMEM_MAPPING(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_PRINT_PMEM_MAPPINGS do {} while (0) + +#define VALGRIND_DO_FLUSH(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_DO_FENCE do {} while (0) + +#define VALGRIND_DO_PERSIST(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_SET_CLEAN(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_WRITE_STATS do {} while (0) + +#define VALGRIND_EMIT_LOG(emit_log) do {\ + (void) (emit_log);\ +} while (0) + +#define VALGRIND_START_TX do {} while (0) + +#define VALGRIND_START_TX_N(txn) do { (void) (txn); } while (0) + +#define VALGRIND_END_TX do {} while (0) + +#define VALGRIND_END_TX_N(txn) do {\ + (void) (txn);\ +} while (0) + +#define VALGRIND_ADD_TO_TX(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_ADD_TO_TX_N(txn, addr, len) do {\ + (void) (txn);\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_REMOVE_FROM_TX(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_REMOVE_FROM_TX_N(txn, addr, len) do {\ + (void) (txn);\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(addr, len) do {\ + (void) (addr);\ + (void) (len);\ +} while (0) + +#define PMEMOBJ_API_START() do {} while (0) + +#define PMEMOBJ_API_END() do {} while (0) + +#define PMEM_API_START() do {} while (0) + +#define PMEM_API_END() do {} while (0) + +#define PMEM2_API_START(func_name) do {\ + (void) (func_name);\ +} while (0) + +#define PMEM2_API_END(func_name) do {\ + (void) (func_name);\ +} while (0) +#endif + +#if VG_MEMCHECK_ENABLED + +extern unsigned _On_memcheck; +#define On_memcheck __builtin_expect(_On_memcheck, 0) + +#include "valgrind/memcheck.h" + +#define VALGRIND_DO_DISABLE_ERROR_REPORTING do {\ + if (On_valgrind)\ + VALGRIND_DISABLE_ERROR_REPORTING;\ +} while (0) + +#define VALGRIND_DO_ENABLE_ERROR_REPORTING do {\ + if (On_valgrind)\ + VALGRIND_ENABLE_ERROR_REPORTING;\ +} while (0) + +#define VALGRIND_DO_CREATE_MEMPOOL(heap, rzB, is_zeroed) do {\ + if (On_memcheck)\ + VALGRIND_CREATE_MEMPOOL(heap, rzB, is_zeroed);\ +} while (0) + +#define VALGRIND_DO_DESTROY_MEMPOOL(heap) do {\ + if (On_memcheck)\ + VALGRIND_DESTROY_MEMPOOL(heap);\ +} while (0) + +#define VALGRIND_DO_MEMPOOL_ALLOC(heap, addr, size) do {\ + if (On_memcheck)\ + VALGRIND_MEMPOOL_ALLOC(heap, addr, size);\ +} while (0) + +#define VALGRIND_DO_MEMPOOL_FREE(heap, addr) do {\ + if (On_memcheck)\ + VALGRIND_MEMPOOL_FREE(heap, addr);\ +} while (0) + +#define VALGRIND_DO_MEMPOOL_CHANGE(heap, addrA, addrB, size) do {\ + if (On_memcheck)\ + VALGRIND_MEMPOOL_CHANGE(heap, addrA, addrB, size);\ +} while (0) + +#define VALGRIND_DO_MAKE_MEM_DEFINED(addr, len) do {\ + if (On_memcheck)\ + VALGRIND_MAKE_MEM_DEFINED(addr, len);\ +} while (0) + +#define VALGRIND_DO_MAKE_MEM_UNDEFINED(addr, len) do {\ + if (On_memcheck)\ + VALGRIND_MAKE_MEM_UNDEFINED(addr, len);\ +} while (0) + +#define VALGRIND_DO_MAKE_MEM_NOACCESS(addr, len) do {\ + if (On_memcheck)\ + VALGRIND_MAKE_MEM_NOACCESS(addr, len);\ +} while (0) + +#define VALGRIND_DO_CHECK_MEM_IS_ADDRESSABLE(addr, len) do {\ + if (On_memcheck)\ + VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, len);\ +} while (0) + +#else + +#define On_memcheck (0) + +#define VALGRIND_DO_DISABLE_ERROR_REPORTING do {} while (0) + +#define VALGRIND_DO_ENABLE_ERROR_REPORTING do {} while (0) + +#define VALGRIND_DO_CREATE_MEMPOOL(heap, rzB, is_zeroed)\ + do { (void) (heap); (void) (rzB); (void) (is_zeroed); } while (0) + +#define VALGRIND_DO_DESTROY_MEMPOOL(heap)\ + do { (void) (heap); } while (0) + +#define VALGRIND_DO_MEMPOOL_ALLOC(heap, addr, size)\ + do { (void) (heap); (void) (addr); (void) (size); } while (0) + +#define VALGRIND_DO_MEMPOOL_FREE(heap, addr)\ + do { (void) (heap); (void) (addr); } while (0) + +#define VALGRIND_DO_MEMPOOL_CHANGE(heap, addrA, addrB, size)\ + do {\ + (void) (heap); (void) (addrA); (void) (addrB); (void) (size);\ + } while (0) + +#define VALGRIND_DO_MAKE_MEM_DEFINED(addr, len)\ + do { (void) (addr); (void) (len); } while (0) + +#define VALGRIND_DO_MAKE_MEM_UNDEFINED(addr, len)\ + do { (void) (addr); (void) (len); } while (0) + +#define VALGRIND_DO_MAKE_MEM_NOACCESS(addr, len)\ + do { (void) (addr); (void) (len); } while (0) + +#define VALGRIND_DO_CHECK_MEM_IS_ADDRESSABLE(addr, len)\ + do { (void) (addr); (void) (len); } while (0) + +#endif + +#endif diff --git a/src/pmdk/src/freebsd/README b/src/pmdk/src/freebsd/README new file mode 100644 index 000000000..f0f065d88 --- /dev/null +++ b/src/pmdk/src/freebsd/README @@ -0,0 +1,13 @@ +Persistent Memory Development Kit + +This is src/freebsd/README. + +This directory contains FreeBSD-specific files for the Persistent Memory Development Kit. + +The subdirectory "include" contains header files that have no equivalents +on FreeBSD. Most of these files are empty, which is a cheap trick to avoid +preprocessor errors when including non-existing files. Others are redirects +for files that are in different locations on FreeBSD. This way we don't +need a lot of preprocessor conditionals in all the source code files, although +it does require conditionals in the Makefiles (which could be addressed by +using autoconf). diff --git a/src/pmdk/src/freebsd/include/endian.h b/src/pmdk/src/freebsd/include/endian.h new file mode 100644 index 000000000..103b89e46 --- /dev/null +++ b/src/pmdk/src/freebsd/include/endian.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * endian.h -- redirect for FreeBSD + */ + +#include diff --git a/src/pmdk/src/freebsd/include/features.h b/src/pmdk/src/freebsd/include/features.h new file mode 100644 index 000000000..b0740ec17 --- /dev/null +++ b/src/pmdk/src/freebsd/include/features.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * features.h -- Empty file redirect + */ diff --git a/src/pmdk/src/freebsd/include/linux/kdev_t.h b/src/pmdk/src/freebsd/include/linux/kdev_t.h new file mode 100644 index 000000000..4890fa063 --- /dev/null +++ b/src/pmdk/src/freebsd/include/linux/kdev_t.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * linux/kdev_t.h -- Empty file redirect + */ diff --git a/src/pmdk/src/freebsd/include/linux/limits.h b/src/pmdk/src/freebsd/include/linux/limits.h new file mode 100644 index 000000000..8c1575db8 --- /dev/null +++ b/src/pmdk/src/freebsd/include/linux/limits.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * linux/limits.h -- Empty file redirect + */ diff --git a/src/pmdk/src/freebsd/include/sys/sysmacros.h b/src/pmdk/src/freebsd/include/sys/sysmacros.h new file mode 100644 index 000000000..065392b88 --- /dev/null +++ b/src/pmdk/src/freebsd/include/sys/sysmacros.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * sys/sysmacros.h -- Empty file redirect + */ diff --git a/src/pmdk/src/include/.cstyleignore b/src/pmdk/src/include/.cstyleignore new file mode 100644 index 000000000..1bcbd0d1c --- /dev/null +++ b/src/pmdk/src/include/.cstyleignore @@ -0,0 +1 @@ +pmemcompat.h diff --git a/src/pmdk/src/include/README b/src/pmdk/src/include/README new file mode 100644 index 000000000..6cd9ebbf1 --- /dev/null +++ b/src/pmdk/src/include/README @@ -0,0 +1,27 @@ +Persistent Memory Development Kit + +This is src/include/README. + +This directory contains include files that are meant to be installed on +a system when the Persistent Memory Development Kit package is installed. +These include files provide the public information exported +by the libraries that is necessary for applications to call into +the libraries. Private include files, used only internally in +the libraries, don't live here -- they typically live next to +the source for their module. + +Here you'll find: + +libpmem.h -- definitions of libpmem entry points (see libpmem(7)) + +libpmem2.h -- definitions of libpmem2 entry points (see libpmem2(7)) + +libpmemblk.h -- definitions of libpmemblk entry points (see libpmemblk(7)) + +libpmemlog.h -- definitions of libpmemlog entry points (see libpmemlog(7)) + +libpmemobj.h -- definitions of libpmemobj entry points (see libpmemobj(7)) + +Experimental libraries: + +librpmem.h -- definitions of librpmem entry points (see librpmem(7)) diff --git a/src/pmdk/src/include/libpmem.h b/src/pmdk/src/include/libpmem.h new file mode 100644 index 000000000..5eb09309e --- /dev/null +++ b/src/pmdk/src/include/libpmem.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmem.h -- definitions of libpmem entry points + * + * This library provides support for programming with persistent memory (pmem). + * + * libpmem provides support for using raw pmem directly. + * + * See libpmem(7) for details. + */ + +#ifndef LIBPMEM_H +#define LIBPMEM_H 1 + +#include + +#ifdef _WIN32 +#include + +#ifndef PMDK_UTF8_API +#define pmem_map_file pmem_map_fileW +#define pmem_check_version pmem_check_versionW +#define pmem_errormsg pmem_errormsgW +#else +#define pmem_map_file pmem_map_fileU +#define pmem_check_version pmem_check_versionU +#define pmem_errormsg pmem_errormsgU +#endif + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This limit is set arbitrary to incorporate a pool header and required + * alignment plus supply. + */ +#define PMEM_MIN_PART ((size_t)(1024 * 1024 * 2)) /* 2 MiB */ + +/* + * flags supported by pmem_map_file() + */ +#define PMEM_FILE_CREATE (1 << 0) +#define PMEM_FILE_EXCL (1 << 1) +#define PMEM_FILE_SPARSE (1 << 2) +#define PMEM_FILE_TMPFILE (1 << 3) + +#ifndef _WIN32 +void *pmem_map_file(const char *path, size_t len, int flags, mode_t mode, + size_t *mapped_lenp, int *is_pmemp); +#else +void *pmem_map_fileU(const char *path, size_t len, int flags, mode_t mode, + size_t *mapped_lenp, int *is_pmemp); +void *pmem_map_fileW(const wchar_t *path, size_t len, int flags, mode_t mode, + size_t *mapped_lenp, int *is_pmemp); +#endif + +int pmem_unmap(void *addr, size_t len); +int pmem_is_pmem(const void *addr, size_t len); +void pmem_persist(const void *addr, size_t len); +int pmem_msync(const void *addr, size_t len); +int pmem_has_auto_flush(void); +void pmem_flush(const void *addr, size_t len); +void pmem_deep_flush(const void *addr, size_t len); +int pmem_deep_drain(const void *addr, size_t len); +int pmem_deep_persist(const void *addr, size_t len); +void pmem_drain(void); +int pmem_has_hw_drain(void); + +void *pmem_memmove_persist(void *pmemdest, const void *src, size_t len); +void *pmem_memcpy_persist(void *pmemdest, const void *src, size_t len); +void *pmem_memset_persist(void *pmemdest, int c, size_t len); +void *pmem_memmove_nodrain(void *pmemdest, const void *src, size_t len); +void *pmem_memcpy_nodrain(void *pmemdest, const void *src, size_t len); +void *pmem_memset_nodrain(void *pmemdest, int c, size_t len); + +#define PMEM_F_MEM_NODRAIN (1U << 0) + +#define PMEM_F_MEM_NONTEMPORAL (1U << 1) +#define PMEM_F_MEM_TEMPORAL (1U << 2) + +#define PMEM_F_MEM_WC (1U << 3) +#define PMEM_F_MEM_WB (1U << 4) + +#define PMEM_F_MEM_NOFLUSH (1U << 5) + +#define PMEM_F_MEM_VALID_FLAGS (PMEM_F_MEM_NODRAIN | \ + PMEM_F_MEM_NONTEMPORAL | \ + PMEM_F_MEM_TEMPORAL | \ + PMEM_F_MEM_WC | \ + PMEM_F_MEM_WB | \ + PMEM_F_MEM_NOFLUSH) + +void *pmem_memmove(void *pmemdest, const void *src, size_t len, unsigned flags); +void *pmem_memcpy(void *pmemdest, const void *src, size_t len, unsigned flags); +void *pmem_memset(void *pmemdest, int c, size_t len, unsigned flags); + +/* + * PMEM_MAJOR_VERSION and PMEM_MINOR_VERSION provide the current version of the + * libpmem API as provided by this header file. Applications can verify that + * the version available at run-time is compatible with the version used at + * compile-time by passing these defines to pmem_check_version(). + */ +#define PMEM_MAJOR_VERSION 1 +#define PMEM_MINOR_VERSION 1 + +#ifndef _WIN32 +const char *pmem_check_version(unsigned major_required, + unsigned minor_required); +#else +const char *pmem_check_versionU(unsigned major_required, + unsigned minor_required); +const wchar_t *pmem_check_versionW(unsigned major_required, + unsigned minor_required); +#endif + +#ifndef _WIN32 +const char *pmem_errormsg(void); +#else +const char *pmem_errormsgU(void); +const wchar_t *pmem_errormsgW(void); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* libpmem.h */ diff --git a/src/pmdk/src/include/libpmem2.h b/src/pmdk/src/include/libpmem2.h new file mode 100644 index 000000000..37fb24a0d --- /dev/null +++ b/src/pmdk/src/include/libpmem2.h @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +/* + * libpmem2.h -- definitions of libpmem2 entry points + * + * This library provides support for programming with persistent memory (pmem). + * + * libpmem2 provides support for using raw pmem directly. + * + * See libpmem2(7) for details. + */ + +#ifndef LIBPMEM2_H +#define LIBPMEM2_H 1 + +#include +#include + +#ifdef _WIN32 +#include + +#ifndef PMDK_UTF8_API +#define pmem2_source_device_id pmem2_source_device_idW +#define pmem2_errormsg pmem2_errormsgW +#define pmem2_perror pmem2_perrorW +#else +#define pmem2_source_device_id pmem2_source_device_idU +#define pmem2_errormsg pmem2_errormsgU +#define pmem2_perror pmem2_perrorU +#endif + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define PMEM2_E_UNKNOWN (-100000) +#define PMEM2_E_NOSUPP (-100001) +#define PMEM2_E_FILE_HANDLE_NOT_SET (-100003) +#define PMEM2_E_INVALID_FILE_HANDLE (-100004) +#define PMEM2_E_INVALID_FILE_TYPE (-100005) +#define PMEM2_E_MAP_RANGE (-100006) +#define PMEM2_E_MAPPING_EXISTS (-100007) +#define PMEM2_E_GRANULARITY_NOT_SET (-100008) +#define PMEM2_E_GRANULARITY_NOT_SUPPORTED (-100009) +#define PMEM2_E_OFFSET_OUT_OF_RANGE (-100010) +#define PMEM2_E_OFFSET_UNALIGNED (-100011) +#define PMEM2_E_INVALID_ALIGNMENT_FORMAT (-100012) +#define PMEM2_E_INVALID_ALIGNMENT_VALUE (-100013) +#define PMEM2_E_INVALID_SIZE_FORMAT (-100014) +#define PMEM2_E_LENGTH_UNALIGNED (-100015) +#define PMEM2_E_MAPPING_NOT_FOUND (-100016) +#define PMEM2_E_BUFFER_TOO_SMALL (-100017) +#define PMEM2_E_SOURCE_EMPTY (-100018) +#define PMEM2_E_INVALID_SHARING_VALUE (-100019) +#define PMEM2_E_SRC_DEVDAX_PRIVATE (-100020) +#define PMEM2_E_INVALID_ADDRESS_REQUEST_TYPE (-100021) +#define PMEM2_E_ADDRESS_UNALIGNED (-100022) +#define PMEM2_E_ADDRESS_NULL (-100023) +#define PMEM2_E_DEEP_FLUSH_RANGE (-100024) +#define PMEM2_E_INVALID_REGION_FORMAT (-100025) +#define PMEM2_E_DAX_REGION_NOT_FOUND (-100026) +#define PMEM2_E_INVALID_DEV_FORMAT (-100027) +#define PMEM2_E_CANNOT_READ_BOUNDS (-100028) +#define PMEM2_E_NO_BAD_BLOCK_FOUND (-100029) +#define PMEM2_E_LENGTH_OUT_OF_RANGE (-100030) +#define PMEM2_E_INVALID_PROT_FLAG (-100031) +#define PMEM2_E_NO_ACCESS (-100032) +#define PMEM2_E_VM_RESERVATION_NOT_EMPTY (-100033) +#define PMEM2_E_MAP_EXISTS (-100034) +#define PMEM2_E_FILE_DESCRIPTOR_NOT_SET (-100035) + +/* source setup */ + +struct pmem2_source; + +int pmem2_source_from_fd(struct pmem2_source **src, int fd); +int pmem2_source_from_anon(struct pmem2_source **src, size_t size); +#ifdef _WIN32 +int pmem2_source_from_handle(struct pmem2_source **src, HANDLE handle); +int pmem2_source_get_handle(const struct pmem2_source *src, HANDLE *h); +#else +int pmem2_source_get_fd(const struct pmem2_source *src, int *fd); +#endif + +int pmem2_source_size(const struct pmem2_source *src, size_t *size); + +int pmem2_source_alignment(const struct pmem2_source *src, + size_t *alignment); + +int pmem2_source_delete(struct pmem2_source **src); + +/* vm reservation setup */ + +struct pmem2_vm_reservation; + +void *pmem2_vm_reservation_get_address(struct pmem2_vm_reservation *rsv); + +size_t pmem2_vm_reservation_get_size(struct pmem2_vm_reservation *rsv); + +int pmem2_vm_reservation_new(struct pmem2_vm_reservation **rsv_ptr, + void *addr, size_t size); + +int pmem2_vm_reservation_delete(struct pmem2_vm_reservation **rsv_ptr); + +/* config setup */ + +struct pmem2_config; + +int pmem2_config_new(struct pmem2_config **cfg); + +int pmem2_config_delete(struct pmem2_config **cfg); + +enum pmem2_granularity { + PMEM2_GRANULARITY_BYTE, + PMEM2_GRANULARITY_CACHE_LINE, + PMEM2_GRANULARITY_PAGE, +}; + +int pmem2_config_set_required_store_granularity(struct pmem2_config *cfg, + enum pmem2_granularity g); + +int pmem2_config_set_offset(struct pmem2_config *cfg, size_t offset); + +int pmem2_config_set_length(struct pmem2_config *cfg, size_t length); + +enum pmem2_sharing_type { + PMEM2_SHARED, + PMEM2_PRIVATE, +}; + +int pmem2_config_set_sharing(struct pmem2_config *cfg, + enum pmem2_sharing_type type); + +#define PMEM2_PROT_EXEC (1U << 29) +#define PMEM2_PROT_READ (1U << 30) +#define PMEM2_PROT_WRITE (1U << 31) +#define PMEM2_PROT_NONE 0 + +int pmem2_config_set_protection(struct pmem2_config *cfg, + unsigned prot); + +int pmem2_config_set_vm_reservation(struct pmem2_config *cfg, + struct pmem2_vm_reservation *rsv, size_t offset); + +/* mapping */ + +struct pmem2_map; +int pmem2_map_from_existing(struct pmem2_map **map, + const struct pmem2_source *src, void *addr, size_t len, + enum pmem2_granularity gran); + +int pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, + const struct pmem2_source *src); + +int pmem2_map_delete(struct pmem2_map **map_ptr); + +void *pmem2_map_get_address(struct pmem2_map *map); + +size_t pmem2_map_get_size(struct pmem2_map *map); + +enum pmem2_granularity pmem2_map_get_store_granularity(struct pmem2_map *map); + +/* flushing */ + +typedef void (*pmem2_persist_fn)(const void *ptr, size_t size); + +typedef void (*pmem2_flush_fn)(const void *ptr, size_t size); + +typedef void (*pmem2_drain_fn)(void); + +pmem2_persist_fn pmem2_get_persist_fn(struct pmem2_map *map); + +pmem2_flush_fn pmem2_get_flush_fn(struct pmem2_map *map); + +pmem2_drain_fn pmem2_get_drain_fn(struct pmem2_map *map); + +#define PMEM2_F_MEM_NODRAIN (1U << 0) + +#define PMEM2_F_MEM_NONTEMPORAL (1U << 1) +#define PMEM2_F_MEM_TEMPORAL (1U << 2) + +#define PMEM2_F_MEM_WC (1U << 3) +#define PMEM2_F_MEM_WB (1U << 4) + +#define PMEM2_F_MEM_NOFLUSH (1U << 5) + +#define PMEM2_F_MEM_VALID_FLAGS (PMEM2_F_MEM_NODRAIN | \ + PMEM2_F_MEM_NONTEMPORAL | \ + PMEM2_F_MEM_TEMPORAL | \ + PMEM2_F_MEM_WC | \ + PMEM2_F_MEM_WB | \ + PMEM2_F_MEM_NOFLUSH) + +typedef void *(*pmem2_memmove_fn)(void *pmemdest, const void *src, size_t len, + unsigned flags); + +typedef void *(*pmem2_memcpy_fn)(void *pmemdest, const void *src, size_t len, + unsigned flags); + +typedef void *(*pmem2_memset_fn)(void *pmemdest, int c, size_t len, + unsigned flags); + +pmem2_memmove_fn pmem2_get_memmove_fn(struct pmem2_map *map); + +pmem2_memcpy_fn pmem2_get_memcpy_fn(struct pmem2_map *map); + +pmem2_memset_fn pmem2_get_memset_fn(struct pmem2_map *map); + +/* RAS */ + +int pmem2_deep_flush(struct pmem2_map *map, void *ptr, size_t size); + +#ifndef _WIN32 +int pmem2_source_device_id(const struct pmem2_source *src, + char *id, size_t *len); +#else +int pmem2_source_device_idW(const struct pmem2_source *src, + wchar_t *id, size_t *len); + +int pmem2_source_device_idU(const struct pmem2_source *src, + char *id, size_t *len); +#endif + +int pmem2_source_device_usc(const struct pmem2_source *src, uint64_t *usc); + +struct pmem2_badblock_context; + +struct pmem2_badblock { + size_t offset; + size_t length; +}; + +int pmem2_badblock_context_new(struct pmem2_badblock_context **bbctx, + const struct pmem2_source *src); + +int pmem2_badblock_next(struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb); + +void pmem2_badblock_context_delete( + struct pmem2_badblock_context **bbctx); + +int pmem2_badblock_clear(struct pmem2_badblock_context *bbctx, + const struct pmem2_badblock *bb); + +/* error handling */ + +#ifndef _WIN32 +const char *pmem2_errormsg(void); +#else +const char *pmem2_errormsgU(void); + +const wchar_t *pmem2_errormsgW(void); +#endif + +int pmem2_err_to_errno(int); + +#ifndef _WIN32 +void pmem2_perror(const char *format, + ...) __attribute__((__format__(__printf__, 1, 2))); +#else +void pmem2_perrorU(const char *format, ...); + +void pmem2_perrorW(const wchar_t *format, ...); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* libpmem2.h */ diff --git a/src/pmdk/src/include/libpmemblk.h b/src/pmdk/src/include/libpmemblk.h new file mode 100644 index 000000000..72f6fd5b0 --- /dev/null +++ b/src/pmdk/src/include/libpmemblk.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemblk.h -- definitions of libpmemblk entry points + * + * This library provides support for programming with persistent memory (pmem). + * + * libpmemblk provides support for arrays of atomically-writable blocks. + * + * See libpmemblk(7) for details. + */ + +#ifndef LIBPMEMBLK_H +#define LIBPMEMBLK_H 1 + +#include + +#ifdef _WIN32 +#include + +#ifndef PMDK_UTF8_API +#define pmemblk_open pmemblk_openW +#define pmemblk_create pmemblk_createW +#define pmemblk_check pmemblk_checkW +#define pmemblk_check_version pmemblk_check_versionW +#define pmemblk_errormsg pmemblk_errormsgW +#define pmemblk_ctl_get pmemblk_ctl_getW +#define pmemblk_ctl_set pmemblk_ctl_setW +#define pmemblk_ctl_exec pmemblk_ctl_execW +#else +#define pmemblk_open pmemblk_openU +#define pmemblk_create pmemblk_createU +#define pmemblk_check pmemblk_checkU +#define pmemblk_check_version pmemblk_check_versionU +#define pmemblk_errormsg pmemblk_errormsgU +#define pmemblk_ctl_get pmemblk_ctl_getU +#define pmemblk_ctl_set pmemblk_ctl_setU +#define pmemblk_ctl_exec pmemblk_ctl_execU +#endif + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * opaque type, internal to libpmemblk + */ +typedef struct pmemblk PMEMblkpool; + +/* + * PMEMBLK_MAJOR_VERSION and PMEMBLK_MINOR_VERSION provide the current version + * of the libpmemblk API as provided by this header file. Applications can + * verify that the version available at run-time is compatible with the version + * used at compile-time by passing these defines to pmemblk_check_version(). + */ +#define PMEMBLK_MAJOR_VERSION 1 +#define PMEMBLK_MINOR_VERSION 1 + +#ifndef _WIN32 +const char *pmemblk_check_version(unsigned major_required, + unsigned minor_required); +#else +const char *pmemblk_check_versionU(unsigned major_required, + unsigned minor_required); +const wchar_t *pmemblk_check_versionW(unsigned major_required, + unsigned minor_required); +#endif + +/* XXX - unify minimum pool size for both OS-es */ + +#ifndef _WIN32 +#if defined(__x86_64__) || defined(__M_X64__) || defined(__aarch64__) +/* minimum pool size: 16MiB + 4KiB (minimum BTT size + mmap alignment) */ +#define PMEMBLK_MIN_POOL ((size_t)((1u << 20) * 16 + (1u << 10) * 8)) +#elif defined(__PPC64__) +/* minimum pool size: 16MiB + 128KiB (minimum BTT size + mmap alignment) */ +#define PMEMBLK_MIN_POOL ((size_t)((1u << 20) * 16 + (1u << 10) * 128)) +#else +#error unable to recognize ISA at compile time +#endif +#else +/* minimum pool size: 16MiB + 64KiB (minimum BTT size + mmap alignment) */ +#define PMEMBLK_MIN_POOL ((size_t)((1u << 20) * 16 + (1u << 10) * 64)) +#endif + +/* + * This limit is set arbitrary to incorporate a pool header and required + * alignment plus supply. + */ +#define PMEMBLK_MIN_PART ((size_t)(1024 * 1024 * 2)) /* 2 MiB */ + +#define PMEMBLK_MIN_BLK ((size_t)512) + +#ifndef _WIN32 +PMEMblkpool *pmemblk_open(const char *path, size_t bsize); +#else +PMEMblkpool *pmemblk_openU(const char *path, size_t bsize); +PMEMblkpool *pmemblk_openW(const wchar_t *path, size_t bsize); +#endif + +#ifndef _WIN32 +PMEMblkpool *pmemblk_create(const char *path, size_t bsize, + size_t poolsize, mode_t mode); +#else +PMEMblkpool *pmemblk_createU(const char *path, size_t bsize, + size_t poolsize, mode_t mode); +PMEMblkpool *pmemblk_createW(const wchar_t *path, size_t bsize, + size_t poolsize, mode_t mode); +#endif + +#ifndef _WIN32 +int pmemblk_check(const char *path, size_t bsize); +#else +int pmemblk_checkU(const char *path, size_t bsize); +int pmemblk_checkW(const wchar_t *path, size_t bsize); +#endif + +void pmemblk_close(PMEMblkpool *pbp); +size_t pmemblk_bsize(PMEMblkpool *pbp); +size_t pmemblk_nblock(PMEMblkpool *pbp); +int pmemblk_read(PMEMblkpool *pbp, void *buf, long long blockno); +int pmemblk_write(PMEMblkpool *pbp, const void *buf, long long blockno); +int pmemblk_set_zero(PMEMblkpool *pbp, long long blockno); +int pmemblk_set_error(PMEMblkpool *pbp, long long blockno); + +/* + * Passing NULL to pmemblk_set_funcs() tells libpmemblk to continue to use the + * default for that function. The replacement functions must not make calls + * back into libpmemblk. + */ +void pmemblk_set_funcs( + void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)); + +#ifndef _WIN32 +const char *pmemblk_errormsg(void); +#else +const char *pmemblk_errormsgU(void); +const wchar_t *pmemblk_errormsgW(void); +#endif + +#ifndef _WIN32 +/* EXPERIMENTAL */ +int pmemblk_ctl_get(PMEMblkpool *pbp, const char *name, void *arg); +int pmemblk_ctl_set(PMEMblkpool *pbp, const char *name, void *arg); +int pmemblk_ctl_exec(PMEMblkpool *pbp, const char *name, void *arg); +#else +int pmemblk_ctl_getU(PMEMblkpool *pbp, const char *name, void *arg); +int pmemblk_ctl_getW(PMEMblkpool *pbp, const wchar_t *name, void *arg); +int pmemblk_ctl_setU(PMEMblkpool *pbp, const char *name, void *arg); +int pmemblk_ctl_setW(PMEMblkpool *pbp, const wchar_t *name, void *arg); +int pmemblk_ctl_execU(PMEMblkpool *pbp, const char *name, void *arg); +int pmemblk_ctl_execW(PMEMblkpool *pbp, const wchar_t *name, void *arg); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* libpmemblk.h */ diff --git a/src/pmdk/src/include/libpmemlog.h b/src/pmdk/src/include/libpmemlog.h new file mode 100644 index 000000000..5594a0138 --- /dev/null +++ b/src/pmdk/src/include/libpmemlog.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemlog.h -- definitions of libpmemlog entry points + * + * This library provides support for programming with persistent memory (pmem). + * + * libpmemlog provides support for pmem-resident log files. + * + * See libpmemlog(7) for details. + */ + +#ifndef LIBPMEMLOG_H +#define LIBPMEMLOG_H 1 + +#include + +#ifdef _WIN32 +#include + +#ifndef PMDK_UTF8_API +#define pmemlog_open pmemlog_openW +#define pmemlog_create pmemlog_createW +#define pmemlog_check pmemlog_checkW +#define pmemlog_check_version pmemlog_check_versionW +#define pmemlog_errormsg pmemlog_errormsgW +#define pmemlog_ctl_get pmemlog_ctl_getW +#define pmemlog_ctl_set pmemlog_ctl_setW +#define pmemlog_ctl_exec pmemlog_ctl_execW +#else +#define pmemlog_open pmemlog_openU +#define pmemlog_create pmemlog_createU +#define pmemlog_check pmemlog_checkU +#define pmemlog_check_version pmemlog_check_versionU +#define pmemlog_errormsg pmemlog_errormsgU +#define pmemlog_ctl_get pmemlog_ctl_getU +#define pmemlog_ctl_set pmemlog_ctl_setU +#define pmemlog_ctl_exec pmemlog_ctl_execU +#endif + +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * opaque type, internal to libpmemlog + */ +typedef struct pmemlog PMEMlogpool; + +/* + * PMEMLOG_MAJOR_VERSION and PMEMLOG_MINOR_VERSION provide the current + * version of the libpmemlog API as provided by this header file. + * Applications can verify that the version available at run-time + * is compatible with the version used at compile-time by passing + * these defines to pmemlog_check_version(). + */ +#define PMEMLOG_MAJOR_VERSION 1 +#define PMEMLOG_MINOR_VERSION 1 + +#ifndef _WIN32 +const char *pmemlog_check_version(unsigned major_required, + unsigned minor_required); +#else +const char *pmemlog_check_versionU(unsigned major_required, + unsigned minor_required); +const wchar_t *pmemlog_check_versionW(unsigned major_required, + unsigned minor_required); +#endif + +/* + * support for PMEM-resident log files... + */ +#define PMEMLOG_MIN_POOL ((size_t)(1024 * 1024 * 2)) /* min pool size: 2MiB */ + +/* + * This limit is set arbitrary to incorporate a pool header and required + * alignment plus supply. + */ +#define PMEMLOG_MIN_PART ((size_t)(1024 * 1024 * 2)) /* 2 MiB */ + +#ifndef _WIN32 +PMEMlogpool *pmemlog_open(const char *path); +#else +PMEMlogpool *pmemlog_openU(const char *path); +PMEMlogpool *pmemlog_openW(const wchar_t *path); +#endif + +#ifndef _WIN32 +PMEMlogpool *pmemlog_create(const char *path, size_t poolsize, mode_t mode); +#else +PMEMlogpool *pmemlog_createU(const char *path, size_t poolsize, mode_t mode); +PMEMlogpool *pmemlog_createW(const wchar_t *path, size_t poolsize, mode_t mode); +#endif + +#ifndef _WIN32 +int pmemlog_check(const char *path); +#else +int pmemlog_checkU(const char *path); +int pmemlog_checkW(const wchar_t *path); +#endif + +void pmemlog_close(PMEMlogpool *plp); +size_t pmemlog_nbyte(PMEMlogpool *plp); +int pmemlog_append(PMEMlogpool *plp, const void *buf, size_t count); +int pmemlog_appendv(PMEMlogpool *plp, const struct iovec *iov, int iovcnt); +long long pmemlog_tell(PMEMlogpool *plp); +void pmemlog_rewind(PMEMlogpool *plp); +void pmemlog_walk(PMEMlogpool *plp, size_t chunksize, + int (*process_chunk)(const void *buf, size_t len, void *arg), + void *arg); + +/* + * Passing NULL to pmemlog_set_funcs() tells libpmemlog to continue to use the + * default for that function. The replacement functions must not make calls + * back into libpmemlog. + */ +void pmemlog_set_funcs( + void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)); + +#ifndef _WIN32 +const char *pmemlog_errormsg(void); +#else +const char *pmemlog_errormsgU(void); +const wchar_t *pmemlog_errormsgW(void); +#endif + +#ifndef _WIN32 +/* EXPERIMENTAL */ +int pmemlog_ctl_get(PMEMlogpool *plp, const char *name, void *arg); +int pmemlog_ctl_set(PMEMlogpool *plp, const char *name, void *arg); +int pmemlog_ctl_exec(PMEMlogpool *plp, const char *name, void *arg); +#else +int pmemlog_ctl_getU(PMEMlogpool *plp, const char *name, void *arg); +int pmemlog_ctl_getW(PMEMlogpool *plp, const wchar_t *name, void *arg); +int pmemlog_ctl_setU(PMEMlogpool *plp, const char *name, void *arg); +int pmemlog_ctl_setW(PMEMlogpool *plp, const wchar_t *name, void *arg); +int pmemlog_ctl_execU(PMEMlogpool *plp, const char *name, void *arg); +int pmemlog_ctl_execW(PMEMlogpool *plp, const wchar_t *name, void *arg); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* libpmemlog.h */ diff --git a/src/pmdk/src/include/libpmemobj++/README.md b/src/pmdk/src/include/libpmemobj++/README.md new file mode 100644 index 000000000..220083b31 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj++/README.md @@ -0,0 +1,2 @@ +This folder contained libpmemobj C++ bindings. +They have been moved to https://github.com/pmem/libpmemobj-cpp diff --git a/src/pmdk/src/include/libpmemobj++/detail/README.md b/src/pmdk/src/include/libpmemobj++/detail/README.md new file mode 100644 index 000000000..eae27c13a --- /dev/null +++ b/src/pmdk/src/include/libpmemobj++/detail/README.md @@ -0,0 +1,2 @@ +This folder contained libpmemobj C++ bindings. +They have been moved to https://github.com/pmem/libpmemobj-cpp/tree/master/include/libpmemobj++/detail diff --git a/src/pmdk/src/include/libpmemobj.h b/src/pmdk/src/include/libpmemobj.h new file mode 100644 index 000000000..e1fd75aa6 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj.h -- definitions of libpmemobj entry points + * + * This library provides support for programming with persistent memory (pmem). + * + * libpmemobj provides a pmem-resident transactional object store. + * + * See libpmemobj(7) for details. + */ + +#ifndef LIBPMEMOBJ_H +#define LIBPMEMOBJ_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* libpmemobj.h */ diff --git a/src/pmdk/src/include/libpmemobj/action.h b/src/pmdk/src/include/libpmemobj/action.h new file mode 100644 index 000000000..7e1c7bc51 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/action.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * libpmemobj/action.h -- definitions of libpmemobj action interface + */ + +#ifndef LIBPMEMOBJ_ACTION_H +#define LIBPMEMOBJ_ACTION_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define POBJ_RESERVE_NEW(pop, t, act)\ +((TOID(t))pmemobj_reserve(pop, act, sizeof(t), TOID_TYPE_NUM(t))) + +#define POBJ_RESERVE_ALLOC(pop, t, size, act)\ +((TOID(t))pmemobj_reserve(pop, act, size, TOID_TYPE_NUM(t))) + +#define POBJ_XRESERVE_NEW(pop, t, act, flags)\ +((TOID(t))pmemobj_xreserve(pop, act, sizeof(t), TOID_TYPE_NUM(t), flags)) + +#define POBJ_XRESERVE_ALLOC(pop, t, size, act, flags)\ +((TOID(t))pmemobj_xreserve(pop, act, size, TOID_TYPE_NUM(t), flags)) + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/action_base.h */ diff --git a/src/pmdk/src/include/libpmemobj/action_base.h b/src/pmdk/src/include/libpmemobj/action_base.h new file mode 100644 index 000000000..e9d3e93ba --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/action_base.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * libpmemobj/action_base.h -- definitions of libpmemobj action interface + */ + +#ifndef LIBPMEMOBJ_ACTION_BASE_H +#define LIBPMEMOBJ_ACTION_BASE_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +enum pobj_action_type { + /* a heap action (e.g., alloc) */ + POBJ_ACTION_TYPE_HEAP, + /* a single memory operation (e.g., value set) */ + POBJ_ACTION_TYPE_MEM, + + POBJ_MAX_ACTION_TYPE +}; + +struct pobj_action_heap { + /* offset to the element being freed/allocated */ + uint64_t offset; + /* usable size of the element being allocated */ + uint64_t usable_size; +}; + +struct pobj_action { + /* + * These fields are internal for the implementation and are not + * guaranteed to be stable across different versions of the API. + * Use with caution. + * + * This structure should NEVER be stored on persistent memory! + */ + enum pobj_action_type type; + uint32_t data[3]; + union { + struct pobj_action_heap heap; + uint64_t data2[14]; + }; +}; + +#define POBJ_ACTION_XRESERVE_VALID_FLAGS\ + (POBJ_XALLOC_CLASS_MASK |\ + POBJ_XALLOC_ARENA_MASK |\ + POBJ_XALLOC_ZERO) + +PMEMoid pmemobj_reserve(PMEMobjpool *pop, struct pobj_action *act, + size_t size, uint64_t type_num); +PMEMoid pmemobj_xreserve(PMEMobjpool *pop, struct pobj_action *act, + size_t size, uint64_t type_num, uint64_t flags); +void pmemobj_set_value(PMEMobjpool *pop, struct pobj_action *act, + uint64_t *ptr, uint64_t value); +void pmemobj_defer_free(PMEMobjpool *pop, PMEMoid oid, struct pobj_action *act); + +int pmemobj_publish(PMEMobjpool *pop, struct pobj_action *actv, + size_t actvcnt); +int pmemobj_tx_publish(struct pobj_action *actv, size_t actvcnt); +int pmemobj_tx_xpublish(struct pobj_action *actv, size_t actvcnt, + uint64_t flags); + +void pmemobj_cancel(PMEMobjpool *pop, struct pobj_action *actv, size_t actvcnt); + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/action_base.h */ diff --git a/src/pmdk/src/include/libpmemobj/atomic.h b/src/pmdk/src/include/libpmemobj/atomic.h new file mode 100644 index 000000000..ed1162828 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/atomic.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/atomic.h -- definitions of libpmemobj atomic macros + */ + +#ifndef LIBPMEMOBJ_ATOMIC_H +#define LIBPMEMOBJ_ATOMIC_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define POBJ_NEW(pop, o, t, constr, arg)\ +pmemobj_alloc((pop), (PMEMoid *)(o), sizeof(t), TOID_TYPE_NUM(t),\ + (constr), (arg)) + +#define POBJ_ALLOC(pop, o, t, size, constr, arg)\ +pmemobj_alloc((pop), (PMEMoid *)(o), (size), TOID_TYPE_NUM(t),\ + (constr), (arg)) + +#define POBJ_ZNEW(pop, o, t)\ +pmemobj_zalloc((pop), (PMEMoid *)(o), sizeof(t), TOID_TYPE_NUM(t)) + +#define POBJ_ZALLOC(pop, o, t, size)\ +pmemobj_zalloc((pop), (PMEMoid *)(o), (size), TOID_TYPE_NUM(t)) + +#define POBJ_REALLOC(pop, o, t, size)\ +pmemobj_realloc((pop), (PMEMoid *)(o), (size), TOID_TYPE_NUM(t)) + +#define POBJ_ZREALLOC(pop, o, t, size)\ +pmemobj_zrealloc((pop), (PMEMoid *)(o), (size), TOID_TYPE_NUM(t)) + +#define POBJ_FREE(o)\ +pmemobj_free((PMEMoid *)(o)) + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/atomic.h */ diff --git a/src/pmdk/src/include/libpmemobj/atomic_base.h b/src/pmdk/src/include/libpmemobj/atomic_base.h new file mode 100644 index 000000000..b2058f54f --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/atomic_base.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/atomic_base.h -- definitions of libpmemobj atomic entry points + */ + +#ifndef LIBPMEMOBJ_ATOMIC_BASE_H +#define LIBPMEMOBJ_ATOMIC_BASE_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Non-transactional atomic allocations + * + * Those functions can be used outside transactions. The allocations are always + * aligned to the cache-line boundary. + */ + +#define POBJ_XALLOC_VALID_FLAGS (POBJ_XALLOC_ZERO |\ + POBJ_XALLOC_CLASS_MASK) + +/* + * Allocates a new object from the pool and calls a constructor function before + * returning. It is guaranteed that allocated object is either properly + * initialized, or if it's interrupted before the constructor completes, the + * memory reserved for the object is automatically reclaimed. + */ +int pmemobj_alloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num, pmemobj_constr constructor, void *arg); + +/* + * Allocates with flags a new object from the pool. + */ +int pmemobj_xalloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num, uint64_t flags, + pmemobj_constr constructor, void *arg); + +/* + * Allocates a new zeroed object from the pool. + */ +int pmemobj_zalloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num); + +/* + * Resizes an existing object. + */ +int pmemobj_realloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num); + +/* + * Resizes an existing object, if extended new space is zeroed. + */ +int pmemobj_zrealloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num); + +/* + * Allocates a new object with duplicate of the string s. + */ +int pmemobj_strdup(PMEMobjpool *pop, PMEMoid *oidp, const char *s, + uint64_t type_num); + +/* + * Allocates a new object with duplicate of the wide character string s. + */ +int pmemobj_wcsdup(PMEMobjpool *pop, PMEMoid *oidp, const wchar_t *s, + uint64_t type_num); + +/* + * Frees an existing object. + */ +void pmemobj_free(PMEMoid *oidp); + +struct pobj_defrag_result { + size_t total; /* number of processed objects */ + size_t relocated; /* number of relocated objects */ +}; + +/* + * Performs defragmentation on the provided array of objects. + */ +int pmemobj_defrag(PMEMobjpool *pop, PMEMoid **oidv, size_t oidcnt, + struct pobj_defrag_result *result); + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/atomic_base.h */ diff --git a/src/pmdk/src/include/libpmemobj/base.h b/src/pmdk/src/include/libpmemobj/base.h new file mode 100644 index 000000000..ebb30d801 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/base.h @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/base.h -- definitions of base libpmemobj entry points + */ + +#ifndef LIBPMEMOBJ_BASE_H +#define LIBPMEMOBJ_BASE_H 1 + +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +#include +#include + +#ifdef _WIN32 +#include + +#ifndef PMDK_UTF8_API +#define pmemobj_check_version pmemobj_check_versionW +#define pmemobj_errormsg pmemobj_errormsgW +#else +#define pmemobj_check_version pmemobj_check_versionU +#define pmemobj_errormsg pmemobj_errormsgU +#endif + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * opaque type internal to libpmemobj + */ +typedef struct pmemobjpool PMEMobjpool; + +#define PMEMOBJ_MAX_ALLOC_SIZE ((size_t)0x3FFDFFFC0) + +/* + * allocation functions flags + */ +#define POBJ_FLAG_ZERO (((uint64_t)1) << 0) +#define POBJ_FLAG_NO_FLUSH (((uint64_t)1) << 1) +#define POBJ_FLAG_NO_SNAPSHOT (((uint64_t)1) << 2) +#define POBJ_FLAG_ASSUME_INITIALIZED (((uint64_t)1) << 3) +#define POBJ_FLAG_TX_NO_ABORT (((uint64_t)1) << 4) + +#define POBJ_CLASS_ID(id) (((uint64_t)(id)) << 48) +#define POBJ_ARENA_ID(id) (((uint64_t)(id)) << 32) + +#define POBJ_XALLOC_CLASS_MASK ((((uint64_t)1 << 16) - 1) << 48) +#define POBJ_XALLOC_ARENA_MASK ((((uint64_t)1 << 16) - 1) << 32) +#define POBJ_XALLOC_ZERO POBJ_FLAG_ZERO +#define POBJ_XALLOC_NO_FLUSH POBJ_FLAG_NO_FLUSH +#define POBJ_XALLOC_NO_ABORT POBJ_FLAG_TX_NO_ABORT + +/* + * pmemobj_mem* flags + */ +#define PMEMOBJ_F_MEM_NODRAIN (1U << 0) + +#define PMEMOBJ_F_MEM_NONTEMPORAL (1U << 1) +#define PMEMOBJ_F_MEM_TEMPORAL (1U << 2) + +#define PMEMOBJ_F_MEM_WC (1U << 3) +#define PMEMOBJ_F_MEM_WB (1U << 4) + +#define PMEMOBJ_F_MEM_NOFLUSH (1U << 5) + +/* + * pmemobj_mem*, pmemobj_xflush & pmemobj_xpersist flags + */ +#define PMEMOBJ_F_RELAXED (1U << 31) + +/* + * Persistent memory object + */ + +/* + * Object handle + */ +typedef struct pmemoid { + uint64_t pool_uuid_lo; + uint64_t off; +} PMEMoid; + +static const PMEMoid OID_NULL = { 0, 0 }; +#define OID_IS_NULL(o) ((o).off == 0) +#define OID_EQUALS(lhs, rhs)\ +((lhs).off == (rhs).off &&\ + (lhs).pool_uuid_lo == (rhs).pool_uuid_lo) + +PMEMobjpool *pmemobj_pool_by_ptr(const void *addr); +PMEMobjpool *pmemobj_pool_by_oid(PMEMoid oid); + +#ifndef _WIN32 + +extern int _pobj_cache_invalidate; +extern __thread struct _pobj_pcache { + PMEMobjpool *pop; + uint64_t uuid_lo; + int invalidate; +} _pobj_cached_pool; + +/* + * Returns the direct pointer of an object. + */ +static inline void * +pmemobj_direct_inline(PMEMoid oid) +{ + if (oid.off == 0 || oid.pool_uuid_lo == 0) + return NULL; + + struct _pobj_pcache *cache = &_pobj_cached_pool; + if (_pobj_cache_invalidate != cache->invalidate || + cache->uuid_lo != oid.pool_uuid_lo) { + cache->invalidate = _pobj_cache_invalidate; + + if (!(cache->pop = pmemobj_pool_by_oid(oid))) { + cache->uuid_lo = 0; + return NULL; + } + + cache->uuid_lo = oid.pool_uuid_lo; + } + + return (void *)((uintptr_t)cache->pop + oid.off); +} + +#endif /* _WIN32 */ + +/* + * Returns the direct pointer of an object. + */ +#if defined(_WIN32) || defined(_PMEMOBJ_INTRNL) ||\ + defined(PMEMOBJ_DIRECT_NON_INLINE) +void *pmemobj_direct(PMEMoid oid); +#else +#define pmemobj_direct pmemobj_direct_inline +#endif + +struct pmemvlt { + uint64_t runid; +}; + +#define PMEMvlt(T)\ +struct {\ + struct pmemvlt vlt;\ + T value;\ +} + +/* + * Returns lazily initialized volatile variable. (EXPERIMENTAL) + */ +void *pmemobj_volatile(PMEMobjpool *pop, struct pmemvlt *vlt, + void *ptr, size_t size, + int (*constr)(void *ptr, void *arg), void *arg); + +/* + * Returns the OID of the object pointed to by addr. + */ +PMEMoid pmemobj_oid(const void *addr); + +/* + * Returns the number of usable bytes in the object. May be greater than + * the requested size of the object because of internal alignment. + * + * Can be used with objects allocated by any of the available methods. + */ +size_t pmemobj_alloc_usable_size(PMEMoid oid); + +/* + * Returns the type number of the object. + */ +uint64_t pmemobj_type_num(PMEMoid oid); + +/* + * Pmemobj specific low-level memory manipulation functions. + * + * These functions are meant to be used with pmemobj pools, because they provide + * additional functionality specific to this type of pool. These may include + * for example replication support. They also take advantage of the knowledge + * of the type of memory in the pool (pmem/non-pmem) to assure persistence. + */ + +/* + * Pmemobj version of memcpy. Data copied is made persistent. + */ +void *pmemobj_memcpy_persist(PMEMobjpool *pop, void *dest, const void *src, + size_t len); + +/* + * Pmemobj version of memset. Data range set is made persistent. + */ +void *pmemobj_memset_persist(PMEMobjpool *pop, void *dest, int c, size_t len); + +/* + * Pmemobj version of memcpy. Data copied is made persistent (unless opted-out + * using flags). + */ +void *pmemobj_memcpy(PMEMobjpool *pop, void *dest, const void *src, size_t len, + unsigned flags); + +/* + * Pmemobj version of memmove. Data copied is made persistent (unless opted-out + * using flags). + */ +void *pmemobj_memmove(PMEMobjpool *pop, void *dest, const void *src, size_t len, + unsigned flags); + +/* + * Pmemobj version of memset. Data range set is made persistent (unless + * opted-out using flags). + */ +void *pmemobj_memset(PMEMobjpool *pop, void *dest, int c, size_t len, + unsigned flags); + +/* + * Pmemobj version of pmem_persist. + */ +void pmemobj_persist(PMEMobjpool *pop, const void *addr, size_t len); + +/* + * Pmemobj version of pmem_persist with additional flags argument. + */ +int pmemobj_xpersist(PMEMobjpool *pop, const void *addr, size_t len, + unsigned flags); + +/* + * Pmemobj version of pmem_flush. + */ +void pmemobj_flush(PMEMobjpool *pop, const void *addr, size_t len); + +/* + * Pmemobj version of pmem_flush with additional flags argument. + */ +int pmemobj_xflush(PMEMobjpool *pop, const void *addr, size_t len, + unsigned flags); + +/* + * Pmemobj version of pmem_drain. + */ +void pmemobj_drain(PMEMobjpool *pop); + +/* + * Version checking. + */ + +/* + * PMEMOBJ_MAJOR_VERSION and PMEMOBJ_MINOR_VERSION provide the current version + * of the libpmemobj API as provided by this header file. Applications can + * verify that the version available at run-time is compatible with the version + * used at compile-time by passing these defines to pmemobj_check_version(). + */ +#define PMEMOBJ_MAJOR_VERSION 2 +#define PMEMOBJ_MINOR_VERSION 4 + +#ifndef _WIN32 +const char *pmemobj_check_version(unsigned major_required, + unsigned minor_required); +#else +const char *pmemobj_check_versionU(unsigned major_required, + unsigned minor_required); +const wchar_t *pmemobj_check_versionW(unsigned major_required, + unsigned minor_required); +#endif + +/* + * Passing NULL to pmemobj_set_funcs() tells libpmemobj to continue to use the + * default for that function. The replacement functions must not make calls + * back into libpmemobj. + */ +void pmemobj_set_funcs( + void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)); + +typedef int (*pmemobj_constr)(PMEMobjpool *pop, void *ptr, void *arg); + +/* + * (debug helper function) logs notice message if used inside a transaction + */ +void _pobj_debug_notice(const char *func_name, const char *file, int line); + +#ifndef _WIN32 +const char *pmemobj_errormsg(void); +#else +const char *pmemobj_errormsgU(void); +const wchar_t *pmemobj_errormsgW(void); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* libpmemobj/base.h */ diff --git a/src/pmdk/src/include/libpmemobj/ctl.h b/src/pmdk/src/include/libpmemobj/ctl.h new file mode 100644 index 000000000..12bbc5843 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/ctl.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * libpmemobj/ctl.h -- definitions of pmemobj_ctl related entry points + */ + +#ifndef LIBPMEMOBJ_CTL_H +#define LIBPMEMOBJ_CTL_H 1 + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocation class interface + * + * When requesting an object from the allocator, the first step is to determine + * which allocation class best approximates the size of the object. + * Once found, the appropriate free list, called bucket, for that + * class is selected in a fashion that minimizes contention between threads. + * Depending on the requested size and the allocation class, it might happen + * that the object size (including required metadata) would be bigger than the + * allocation class size - called unit size. In those situations, the object is + * constructed from two or more units (up to 64). + * + * If the requested number of units cannot be retrieved from the selected + * bucket, the thread reaches out to the global, shared, heap which manages + * memory in 256 kilobyte chunks and gives it out in a best-fit fashion. This + * operation must be performed under an exclusive lock. + * Once the thread is in the possession of a chunk, the lock is dropped, and the + * memory is split into units that repopulate the bucket. + * + * These are the CTL entry points that control allocation classes: + * - heap.alloc_class.[class_id].desc + * Creates/retrieves allocation class information + * + * It's VERY important to remember that the allocation classes are a RUNTIME + * property of the allocator - they are NOT stored persistently in the pool. + * It's recommended to always create custom allocation classes immediately after + * creating or opening the pool, before any use. + * If there are existing objects created using a class that is no longer stored + * in the runtime state of the allocator, they can be normally freed, but + * allocating equivalent objects will be done using the allocation class that + * is currently defined for that size. + * + * Please see the libpmemobj man page for more information about entry points. + */ + +/* + * Persistent allocation header + */ +enum pobj_header_type { + /* + * 64-byte header used up until the version 1.3 of the library, + * functionally equivalent to the compact header. + * It's not recommended to create any new classes with this header. + */ + POBJ_HEADER_LEGACY, + /* + * 16-byte header used by the default allocation classes. All library + * metadata is by default allocated using this header. + * Supports type numbers and variably sized allocations. + */ + POBJ_HEADER_COMPACT, + /* + * 0-byte header with metadata stored exclusively in a bitmap. This + * ensures that objects are allocated in memory contiguously and + * without attached headers. + * This can be used to create very small allocation classes, but it + * does not support type numbers. + * Additionally, allocations with this header can only span a single + * unit. + * Objects allocated with this header do show up when iterating through + * the heap using pmemobj_first/pmemobj_next functions, but have a + * type_num equal 0. + */ + POBJ_HEADER_NONE, + + MAX_POBJ_HEADER_TYPES +}; + +/* + * Description of allocation classes + */ +struct pobj_alloc_class_desc { + /* + * The number of bytes in a single unit of allocation. A single + * allocation can span up to 64 units (or 1 in the case of no header). + * If one creates an allocation class with a certain unit size and + * forces it to handle bigger sizes, more than one unit + * will be used. + * For example, an allocation class with a compact header and 128 bytes + * unit size, for a request of 200 bytes will create a memory block + * containing 256 bytes that spans two units. The usable size of that + * allocation will be 240 bytes: 2 * 128 - 16 (header). + */ + size_t unit_size; + + /* + * Desired alignment of objects from the allocation class. + * If non zero, must be a power of two and an even divisor of unit size. + * + * All allocation classes have default alignment + * of 64. User data alignment is affected by the size of a header. For + * compact one this means that the alignment is 48 bytes. + * + */ + size_t alignment; + + /* + * The minimum number of units that must be present in a + * single, contiguous, memory block. + * Those blocks (internally called runs), are fetched on demand from the + * heap. Accessing that global state is a serialization point for the + * allocator and thus it is imperative for performance and scalability + * that a reasonable amount of memory is fetched in a single call. + * Threads generally do not share memory blocks from which they + * allocate, but blocks do go back to the global heap if they are no + * longer actively used for allocation. + */ + unsigned units_per_block; + + /* + * The header of allocations that originate from this allocation class. + */ + enum pobj_header_type header_type; + + /* + * The identifier of this allocation class. + */ + unsigned class_id; +}; + +enum pobj_stats_enabled { + POBJ_STATS_ENABLED_TRANSIENT, + POBJ_STATS_ENABLED_BOTH, + POBJ_STATS_ENABLED_PERSISTENT, + POBJ_STATS_DISABLED, +}; + +#ifndef _WIN32 +/* EXPERIMENTAL */ +int pmemobj_ctl_get(PMEMobjpool *pop, const char *name, void *arg); +int pmemobj_ctl_set(PMEMobjpool *pop, const char *name, void *arg); +int pmemobj_ctl_exec(PMEMobjpool *pop, const char *name, void *arg); +#else +int pmemobj_ctl_getU(PMEMobjpool *pop, const char *name, void *arg); +int pmemobj_ctl_getW(PMEMobjpool *pop, const wchar_t *name, void *arg); +int pmemobj_ctl_setU(PMEMobjpool *pop, const char *name, void *arg); +int pmemobj_ctl_setW(PMEMobjpool *pop, const wchar_t *name, void *arg); +int pmemobj_ctl_execU(PMEMobjpool *pop, const char *name, void *arg); +int pmemobj_ctl_execW(PMEMobjpool *pop, const wchar_t *name, void *arg); + +#ifndef PMDK_UTF8_API +#define pmemobj_ctl_get pmemobj_ctl_getW +#define pmemobj_ctl_set pmemobj_ctl_setW +#define pmemobj_ctl_exec pmemobj_ctl_execW +#else +#define pmemobj_ctl_get pmemobj_ctl_getU +#define pmemobj_ctl_set pmemobj_ctl_setU +#define pmemobj_ctl_exec pmemobj_ctl_execU +#endif + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* libpmemobj/ctl.h */ diff --git a/src/pmdk/src/include/libpmemobj/iterator.h b/src/pmdk/src/include/libpmemobj/iterator.h new file mode 100644 index 000000000..901e3dda8 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/iterator.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/iterator.h -- definitions of libpmemobj iterator macros + */ + +#ifndef LIBPMEMOBJ_ITERATOR_H +#define LIBPMEMOBJ_ITERATOR_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline PMEMoid +POBJ_FIRST_TYPE_NUM(PMEMobjpool *pop, uint64_t type_num) +{ + PMEMoid _pobj_ret = pmemobj_first(pop); + + while (!OID_IS_NULL(_pobj_ret) && + pmemobj_type_num(_pobj_ret) != type_num) { + _pobj_ret = pmemobj_next(_pobj_ret); + } + return _pobj_ret; +} + +static inline PMEMoid +POBJ_NEXT_TYPE_NUM(PMEMoid o) +{ + PMEMoid _pobj_ret = o; + + do { + _pobj_ret = pmemobj_next(_pobj_ret);\ + } while (!OID_IS_NULL(_pobj_ret) && + pmemobj_type_num(_pobj_ret) != pmemobj_type_num(o)); + return _pobj_ret; +} + +#define POBJ_FIRST(pop, t) ((TOID(t))POBJ_FIRST_TYPE_NUM(pop, TOID_TYPE_NUM(t))) + +#define POBJ_NEXT(o) ((__typeof__(o))POBJ_NEXT_TYPE_NUM((o).oid)) + +/* + * Iterates through every existing allocated object. + */ +#define POBJ_FOREACH(pop, varoid)\ +for (_pobj_debug_notice("POBJ_FOREACH", __FILE__, __LINE__),\ + varoid = pmemobj_first(pop);\ + (varoid).off != 0; varoid = pmemobj_next(varoid)) + +/* + * Safe variant of POBJ_FOREACH in which pmemobj_free on varoid is allowed + */ +#define POBJ_FOREACH_SAFE(pop, varoid, nvaroid)\ +for (_pobj_debug_notice("POBJ_FOREACH_SAFE", __FILE__, __LINE__),\ + varoid = pmemobj_first(pop);\ + (varoid).off != 0 && (nvaroid = pmemobj_next(varoid), 1);\ + varoid = nvaroid) + +/* + * Iterates through every object of the specified type. + */ +#define POBJ_FOREACH_TYPE(pop, var)\ +POBJ_FOREACH(pop, (var).oid)\ +if (pmemobj_type_num((var).oid) == TOID_TYPE_NUM_OF(var)) + +/* + * Safe variant of POBJ_FOREACH_TYPE in which pmemobj_free on var + * is allowed. + */ +#define POBJ_FOREACH_SAFE_TYPE(pop, var, nvar)\ +POBJ_FOREACH_SAFE(pop, (var).oid, (nvar).oid)\ +if (pmemobj_type_num((var).oid) == TOID_TYPE_NUM_OF(var)) + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/iterator.h */ diff --git a/src/pmdk/src/include/libpmemobj/iterator_base.h b/src/pmdk/src/include/libpmemobj/iterator_base.h new file mode 100644 index 000000000..76076bc86 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/iterator_base.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/iterator_base.h -- definitions of libpmemobj iterator entry points + */ + +#ifndef LIBPMEMOBJ_ITERATOR_BASE_H +#define LIBPMEMOBJ_ITERATOR_BASE_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The following functions allow access to the entire collection of objects. + * + * Use with conjunction with non-transactional allocations. Pmemobj pool acts + * as a generic container (list) of objects that are not assigned to any + * user-defined data structures. + */ + +/* + * Returns the first object of the specified type number. + */ +PMEMoid pmemobj_first(PMEMobjpool *pop); + +/* + * Returns the next object of the same type. + */ +PMEMoid pmemobj_next(PMEMoid oid); + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/iterator_base.h */ diff --git a/src/pmdk/src/include/libpmemobj/lists_atomic.h b/src/pmdk/src/include/libpmemobj/lists_atomic.h new file mode 100644 index 000000000..8b58f23d1 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/lists_atomic.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/lists_atomic.h -- definitions of libpmemobj atomic lists macros + */ + +#ifndef LIBPMEMOBJ_LISTS_ATOMIC_H +#define LIBPMEMOBJ_LISTS_ATOMIC_H 1 + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Non-transactional persistent atomic circular doubly-linked list + */ +#define POBJ_LIST_ENTRY(type)\ +struct {\ + TOID(type) pe_next;\ + TOID(type) pe_prev;\ +} + +#define POBJ_LIST_HEAD(name, type)\ +struct name {\ + TOID(type) pe_first;\ + PMEMmutex lock;\ +} + +#define POBJ_LIST_FIRST(head) ((head)->pe_first) +#define POBJ_LIST_LAST(head, field) (\ +TOID_IS_NULL((head)->pe_first) ?\ +(head)->pe_first :\ +D_RO((head)->pe_first)->field.pe_prev) + +#define POBJ_LIST_EMPTY(head) (TOID_IS_NULL((head)->pe_first)) +#define POBJ_LIST_NEXT(elm, field) (D_RO(elm)->field.pe_next) +#define POBJ_LIST_PREV(elm, field) (D_RO(elm)->field.pe_prev) +#define POBJ_LIST_DEST_HEAD 1 +#define POBJ_LIST_DEST_TAIL 0 +#define POBJ_LIST_DEST_BEFORE 1 +#define POBJ_LIST_DEST_AFTER 0 + +#define POBJ_LIST_FOREACH(var, head, field)\ +for (_pobj_debug_notice("POBJ_LIST_FOREACH", __FILE__, __LINE__),\ + (var) = POBJ_LIST_FIRST((head));\ + TOID_IS_NULL((var)) == 0;\ + TOID_EQUALS(POBJ_LIST_NEXT((var), field),\ + POBJ_LIST_FIRST((head))) ?\ + TOID_ASSIGN((var), OID_NULL) :\ + ((var) = POBJ_LIST_NEXT((var), field))) + +#define POBJ_LIST_FOREACH_REVERSE(var, head, field)\ +for (_pobj_debug_notice("POBJ_LIST_FOREACH_REVERSE", __FILE__, __LINE__),\ + (var) = POBJ_LIST_LAST((head), field);\ + TOID_IS_NULL((var)) == 0;\ + TOID_EQUALS(POBJ_LIST_PREV((var), field),\ + POBJ_LIST_LAST((head), field)) ?\ + TOID_ASSIGN((var), OID_NULL) :\ + ((var) = POBJ_LIST_PREV((var), field))) + +#define POBJ_LIST_INSERT_HEAD(pop, head, elm, field)\ +pmemobj_list_insert((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head), OID_NULL,\ + POBJ_LIST_DEST_HEAD, (elm).oid) + +#define POBJ_LIST_INSERT_TAIL(pop, head, elm, field)\ +pmemobj_list_insert((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head), OID_NULL,\ + POBJ_LIST_DEST_TAIL, (elm).oid) + +#define POBJ_LIST_INSERT_AFTER(pop, head, listelm, elm, field)\ +pmemobj_list_insert((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head), (listelm).oid,\ + 0 /* after */, (elm).oid) + +#define POBJ_LIST_INSERT_BEFORE(pop, head, listelm, elm, field)\ +pmemobj_list_insert((pop), \ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head), (listelm).oid,\ + 1 /* before */, (elm).oid) + +#define POBJ_LIST_INSERT_NEW_HEAD(pop, head, field, size, constr, arg)\ +pmemobj_list_insert_new((pop),\ + TOID_OFFSETOF((head)->pe_first, field),\ + (head), OID_NULL, POBJ_LIST_DEST_HEAD, (size),\ + TOID_TYPE_NUM_OF((head)->pe_first), (constr), (arg)) + +#define POBJ_LIST_INSERT_NEW_TAIL(pop, head, field, size, constr, arg)\ +pmemobj_list_insert_new((pop),\ + TOID_OFFSETOF((head)->pe_first, field),\ + (head), OID_NULL, POBJ_LIST_DEST_TAIL, (size),\ + TOID_TYPE_NUM_OF((head)->pe_first), (constr), (arg)) + +#define POBJ_LIST_INSERT_NEW_AFTER(pop, head, listelm, field, size,\ + constr, arg)\ +pmemobj_list_insert_new((pop),\ + TOID_OFFSETOF((head)->pe_first, field),\ + (head), (listelm).oid, 0 /* after */, (size),\ + TOID_TYPE_NUM_OF((head)->pe_first), (constr), (arg)) + +#define POBJ_LIST_INSERT_NEW_BEFORE(pop, head, listelm, field, size,\ + constr, arg)\ +pmemobj_list_insert_new((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head), (listelm).oid, 1 /* before */, (size),\ + TOID_TYPE_NUM_OF((head)->pe_first), (constr), (arg)) + +#define POBJ_LIST_REMOVE(pop, head, elm, field)\ +pmemobj_list_remove((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head), (elm).oid, 0 /* no free */) + +#define POBJ_LIST_REMOVE_FREE(pop, head, elm, field)\ +pmemobj_list_remove((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head), (elm).oid, 1 /* free */) + +#define POBJ_LIST_MOVE_ELEMENT_HEAD(pop, head, head_new, elm, field, field_new)\ +pmemobj_list_move((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head_new), field_new),\ + (head_new), OID_NULL, POBJ_LIST_DEST_HEAD, (elm).oid) + +#define POBJ_LIST_MOVE_ELEMENT_TAIL(pop, head, head_new, elm, field, field_new)\ +pmemobj_list_move((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head_new), field_new),\ + (head_new), OID_NULL, POBJ_LIST_DEST_TAIL, (elm).oid) + +#define POBJ_LIST_MOVE_ELEMENT_AFTER(pop,\ + head, head_new, listelm, elm, field, field_new)\ +pmemobj_list_move((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head_new), field_new),\ + (head_new),\ + (listelm).oid,\ + 0 /* after */, (elm).oid) + +#define POBJ_LIST_MOVE_ELEMENT_BEFORE(pop,\ + head, head_new, listelm, elm, field, field_new)\ +pmemobj_list_move((pop),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head), field),\ + (head),\ + TOID_OFFSETOF(POBJ_LIST_FIRST(head_new), field_new),\ + (head_new),\ + (listelm).oid,\ + 1 /* before */, (elm).oid) + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/lists_atomic.h */ diff --git a/src/pmdk/src/include/libpmemobj/lists_atomic_base.h b/src/pmdk/src/include/libpmemobj/lists_atomic_base.h new file mode 100644 index 000000000..be80d1db5 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/lists_atomic_base.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/lists_atomic_base.h -- definitions of libpmemobj atomic lists + */ + +#ifndef LIBPMEMOBJ_LISTS_ATOMIC_BASE_H +#define LIBPMEMOBJ_LISTS_ATOMIC_BASE_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Non-transactional persistent atomic circular doubly-linked list + */ + +int pmemobj_list_insert(PMEMobjpool *pop, size_t pe_offset, void *head, + PMEMoid dest, int before, PMEMoid oid); + +PMEMoid pmemobj_list_insert_new(PMEMobjpool *pop, size_t pe_offset, void *head, + PMEMoid dest, int before, size_t size, uint64_t type_num, + pmemobj_constr constructor, void *arg); + +int pmemobj_list_remove(PMEMobjpool *pop, size_t pe_offset, void *head, + PMEMoid oid, int free); + +int pmemobj_list_move(PMEMobjpool *pop, size_t pe_old_offset, + void *head_old, size_t pe_new_offset, void *head_new, + PMEMoid dest, int before, PMEMoid oid); + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/lists_atomic_base.h */ diff --git a/src/pmdk/src/include/libpmemobj/pool.h b/src/pmdk/src/include/libpmemobj/pool.h new file mode 100644 index 000000000..95d396848 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/pool.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/pool.h -- definitions of libpmemobj pool macros + */ + +#ifndef LIBPMEMOBJ_POOL_H +#define LIBPMEMOBJ_POOL_H 1 + +#include +#include + +#define POBJ_ROOT(pop, t) (\ +(TOID(t))pmemobj_root((pop), sizeof(t))) + +#endif /* libpmemobj/pool.h */ diff --git a/src/pmdk/src/include/libpmemobj/pool_base.h b/src/pmdk/src/include/libpmemobj/pool_base.h new file mode 100644 index 000000000..303e69f34 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/pool_base.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/pool_base.h -- definitions of libpmemobj pool entry points + */ + +#ifndef LIBPMEMOBJ_POOL_BASE_H +#define LIBPMEMOBJ_POOL_BASE_H 1 + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define PMEMOBJ_MIN_POOL ((size_t)(1024 * 1024 * 8)) /* 8 MiB */ + +/* + * This limit is set arbitrary to incorporate a pool header and required + * alignment plus supply. + */ +#define PMEMOBJ_MIN_PART ((size_t)(1024 * 1024 * 2)) /* 2 MiB */ + +/* + * Pool management. + */ +#ifdef _WIN32 +#ifndef PMDK_UTF8_API +#define pmemobj_open pmemobj_openW +#define pmemobj_create pmemobj_createW +#define pmemobj_check pmemobj_checkW +#else +#define pmemobj_open pmemobj_openU +#define pmemobj_create pmemobj_createU +#define pmemobj_check pmemobj_checkU +#endif +#endif + +#ifndef _WIN32 +PMEMobjpool *pmemobj_open(const char *path, const char *layout); +#else +PMEMobjpool *pmemobj_openU(const char *path, const char *layout); +PMEMobjpool *pmemobj_openW(const wchar_t *path, const wchar_t *layout); +#endif + +#ifndef _WIN32 +PMEMobjpool *pmemobj_create(const char *path, const char *layout, + size_t poolsize, mode_t mode); +#else +PMEMobjpool *pmemobj_createU(const char *path, const char *layout, + size_t poolsize, mode_t mode); +PMEMobjpool *pmemobj_createW(const wchar_t *path, const wchar_t *layout, + size_t poolsize, mode_t mode); +#endif + +#ifndef _WIN32 +int pmemobj_check(const char *path, const char *layout); +#else +int pmemobj_checkU(const char *path, const char *layout); +int pmemobj_checkW(const wchar_t *path, const wchar_t *layout); +#endif + +void pmemobj_close(PMEMobjpool *pop); +/* + * If called for the first time on a newly created pool, the root object + * of given size is allocated. Otherwise, it returns the existing root object. + * In such case, the size must be not less than the actual root object size + * stored in the pool. If it's larger, the root object is automatically + * resized. + * + * This function is thread-safe. + */ +PMEMoid pmemobj_root(PMEMobjpool *pop, size_t size); + +/* + * Same as above, but calls the constructor function when the object is first + * created and on all subsequent reallocations. + */ +PMEMoid pmemobj_root_construct(PMEMobjpool *pop, size_t size, + pmemobj_constr constructor, void *arg); + +/* + * Returns the size in bytes of the root object. Always equal to the requested + * size. + */ +size_t pmemobj_root_size(PMEMobjpool *pop); + +/* + * Sets volatile pointer to the user data for specified pool. + */ +void pmemobj_set_user_data(PMEMobjpool *pop, void *data); + +/* + * Gets volatile pointer to the user data associated with the specified pool. + */ +void *pmemobj_get_user_data(PMEMobjpool *pop); + +#ifdef __cplusplus +} +#endif +#endif /* libpmemobj/pool_base.h */ diff --git a/src/pmdk/src/include/libpmemobj/thread.h b/src/pmdk/src/include/libpmemobj/thread.h new file mode 100644 index 000000000..23bc4701b --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/thread.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/thread.h -- definitions of libpmemobj thread/locking entry points + */ + +#ifndef LIBPMEMOBJ_THREAD_H +#define LIBPMEMOBJ_THREAD_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Locking. + */ +#define _POBJ_CL_SIZE 64 /* cache line size */ + +typedef union { + long long align; + char padding[_POBJ_CL_SIZE]; +} PMEMmutex; + +typedef union { + long long align; + char padding[_POBJ_CL_SIZE]; +} PMEMrwlock; + +typedef union { + long long align; + char padding[_POBJ_CL_SIZE]; +} PMEMcond; + +void pmemobj_mutex_zero(PMEMobjpool *pop, PMEMmutex *mutexp); +int pmemobj_mutex_lock(PMEMobjpool *pop, PMEMmutex *mutexp); +int pmemobj_mutex_timedlock(PMEMobjpool *pop, PMEMmutex *__restrict mutexp, + const struct timespec *__restrict abs_timeout); +int pmemobj_mutex_trylock(PMEMobjpool *pop, PMEMmutex *mutexp); +int pmemobj_mutex_unlock(PMEMobjpool *pop, PMEMmutex *mutexp); + +void pmemobj_rwlock_zero(PMEMobjpool *pop, PMEMrwlock *rwlockp); +int pmemobj_rwlock_rdlock(PMEMobjpool *pop, PMEMrwlock *rwlockp); +int pmemobj_rwlock_wrlock(PMEMobjpool *pop, PMEMrwlock *rwlockp); +int pmemobj_rwlock_timedrdlock(PMEMobjpool *pop, + PMEMrwlock *__restrict rwlockp, + const struct timespec *__restrict abs_timeout); +int pmemobj_rwlock_timedwrlock(PMEMobjpool *pop, + PMEMrwlock *__restrict rwlockp, + const struct timespec *__restrict abs_timeout); +int pmemobj_rwlock_tryrdlock(PMEMobjpool *pop, PMEMrwlock *rwlockp); +int pmemobj_rwlock_trywrlock(PMEMobjpool *pop, PMEMrwlock *rwlockp); +int pmemobj_rwlock_unlock(PMEMobjpool *pop, PMEMrwlock *rwlockp); + +void pmemobj_cond_zero(PMEMobjpool *pop, PMEMcond *condp); +int pmemobj_cond_broadcast(PMEMobjpool *pop, PMEMcond *condp); +int pmemobj_cond_signal(PMEMobjpool *pop, PMEMcond *condp); +int pmemobj_cond_timedwait(PMEMobjpool *pop, PMEMcond *__restrict condp, + PMEMmutex *__restrict mutexp, + const struct timespec *__restrict abs_timeout); +int pmemobj_cond_wait(PMEMobjpool *pop, PMEMcond *condp, + PMEMmutex *__restrict mutexp); + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/thread.h */ diff --git a/src/pmdk/src/include/libpmemobj/tx.h b/src/pmdk/src/include/libpmemobj/tx.h new file mode 100644 index 000000000..ab9580338 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/tx.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/tx.h -- definitions of libpmemobj transactional macros + */ + +#ifndef LIBPMEMOBJ_TX_H +#define LIBPMEMOBJ_TX_H 1 + +#include +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef POBJ_TX_CRASH_ON_NO_ONABORT +#define TX_ONABORT_CHECK do {\ + if (_stage == TX_STAGE_ONABORT)\ + abort();\ + } while (0) +#else +#define TX_ONABORT_CHECK do {} while (0) +#endif + +#define _POBJ_TX_BEGIN(pop, ...)\ +{\ + jmp_buf _tx_env;\ + enum pobj_tx_stage _stage;\ + int _pobj_errno;\ + if (setjmp(_tx_env)) {\ + errno = pmemobj_tx_errno();\ + } else {\ + _pobj_errno = pmemobj_tx_begin(pop, _tx_env, __VA_ARGS__,\ + TX_PARAM_NONE);\ + if (_pobj_errno)\ + errno = _pobj_errno;\ + }\ + while ((_stage = pmemobj_tx_stage()) != TX_STAGE_NONE) {\ + switch (_stage) {\ + case TX_STAGE_WORK: + +#define TX_BEGIN_PARAM(pop, ...)\ +_POBJ_TX_BEGIN(pop, ##__VA_ARGS__) + +#define TX_BEGIN_LOCK TX_BEGIN_PARAM + +/* Just to let compiler warn when incompatible function pointer is used */ +static inline pmemobj_tx_callback +_pobj_validate_cb_sig(pmemobj_tx_callback cb) +{ + return cb; +} + +#define TX_BEGIN_CB(pop, cb, arg, ...) _POBJ_TX_BEGIN(pop, TX_PARAM_CB,\ + _pobj_validate_cb_sig(cb), arg, ##__VA_ARGS__) + +#define TX_BEGIN(pop) _POBJ_TX_BEGIN(pop, TX_PARAM_NONE) + +#define TX_ONABORT\ + pmemobj_tx_process();\ + break;\ + case TX_STAGE_ONABORT: + +#define TX_ONCOMMIT\ + pmemobj_tx_process();\ + break;\ + case TX_STAGE_ONCOMMIT: + +#define TX_FINALLY\ + pmemobj_tx_process();\ + break;\ + case TX_STAGE_FINALLY: + +#define TX_END\ + pmemobj_tx_process();\ + break;\ + default:\ + TX_ONABORT_CHECK;\ + pmemobj_tx_process();\ + break;\ + }\ + }\ + _pobj_errno = pmemobj_tx_end();\ + if (_pobj_errno)\ + errno = _pobj_errno;\ +} + +#define TX_ADD(o)\ +pmemobj_tx_add_range((o).oid, 0, sizeof(*(o)._type)) + +#define TX_ADD_FIELD(o, field)\ + TX_ADD_DIRECT(&(D_RO(o)->field)) + +#define TX_ADD_DIRECT(p)\ +pmemobj_tx_add_range_direct(p, sizeof(*(p))) + +#define TX_ADD_FIELD_DIRECT(p, field)\ +pmemobj_tx_add_range_direct(&(p)->field, sizeof((p)->field)) + +#define TX_XADD(o, flags)\ +pmemobj_tx_xadd_range((o).oid, 0, sizeof(*(o)._type), flags) + +#define TX_XADD_FIELD(o, field, flags)\ + TX_XADD_DIRECT(&(D_RO(o)->field), flags) + +#define TX_XADD_DIRECT(p, flags)\ +pmemobj_tx_xadd_range_direct(p, sizeof(*(p)), flags) + +#define TX_XADD_FIELD_DIRECT(p, field, flags)\ +pmemobj_tx_xadd_range_direct(&(p)->field, sizeof((p)->field), flags) + +#define TX_NEW(t)\ +((TOID(t))pmemobj_tx_alloc(sizeof(t), TOID_TYPE_NUM(t))) + +#define TX_ALLOC(t, size)\ +((TOID(t))pmemobj_tx_alloc(size, TOID_TYPE_NUM(t))) + +#define TX_ZNEW(t)\ +((TOID(t))pmemobj_tx_zalloc(sizeof(t), TOID_TYPE_NUM(t))) + +#define TX_ZALLOC(t, size)\ +((TOID(t))pmemobj_tx_zalloc(size, TOID_TYPE_NUM(t))) + +#define TX_XALLOC(t, size, flags)\ +((TOID(t))pmemobj_tx_xalloc(size, TOID_TYPE_NUM(t), flags)) + +/* XXX - not available when compiled with VC++ as C code (/TC) */ +#if !defined(_MSC_VER) || defined(__cplusplus) +#define TX_REALLOC(o, size)\ +((__typeof__(o))pmemobj_tx_realloc((o).oid, size, TOID_TYPE_NUM_OF(o))) + +#define TX_ZREALLOC(o, size)\ +((__typeof__(o))pmemobj_tx_zrealloc((o).oid, size, TOID_TYPE_NUM_OF(o))) +#endif /* !defined(_MSC_VER) || defined(__cplusplus) */ + +#define TX_STRDUP(s, type_num)\ +pmemobj_tx_strdup(s, type_num) + +#define TX_XSTRDUP(s, type_num, flags)\ +pmemobj_tx_xstrdup(s, type_num, flags) + +#define TX_WCSDUP(s, type_num)\ +pmemobj_tx_wcsdup(s, type_num) + +#define TX_XWCSDUP(s, type_num, flags)\ +pmemobj_tx_xwcsdup(s, type_num, flags) + +#define TX_FREE(o)\ +pmemobj_tx_free((o).oid) + +#define TX_XFREE(o, flags)\ +pmemobj_tx_xfree((o).oid, flags) + +#define TX_SET(o, field, value) (\ + TX_ADD_FIELD(o, field),\ + D_RW(o)->field = (value)) + +#define TX_SET_DIRECT(p, field, value) (\ + TX_ADD_FIELD_DIRECT(p, field),\ + (p)->field = (value)) + +static inline void * +TX_MEMCPY(void *dest, const void *src, size_t num) +{ + pmemobj_tx_add_range_direct(dest, num); + return memcpy(dest, src, num); +} + +static inline void * +TX_MEMSET(void *dest, int c, size_t num) +{ + pmemobj_tx_add_range_direct(dest, num); + return memset(dest, c, num); +} + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/tx.h */ diff --git a/src/pmdk/src/include/libpmemobj/tx_base.h b/src/pmdk/src/include/libpmemobj/tx_base.h new file mode 100644 index 000000000..3bb8032e5 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/tx_base.h @@ -0,0 +1,450 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/tx_base.h -- definitions of libpmemobj transactional entry points + */ + +#ifndef LIBPMEMOBJ_TX_BASE_H +#define LIBPMEMOBJ_TX_BASE_H 1 + +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Transactions + * + * Stages are changed only by the pmemobj_tx_* functions, each transition + * to the TX_STAGE_ONABORT is followed by a longjmp to the jmp_buf provided in + * the pmemobj_tx_begin function. + */ +enum pobj_tx_stage { + TX_STAGE_NONE, /* no transaction in this thread */ + TX_STAGE_WORK, /* transaction in progress */ + TX_STAGE_ONCOMMIT, /* successfully committed */ + TX_STAGE_ONABORT, /* tx_begin failed or transaction aborted */ + TX_STAGE_FINALLY, /* always called */ + + MAX_TX_STAGE +}; + +/* + * Always returns the current transaction stage for a thread. + */ +enum pobj_tx_stage pmemobj_tx_stage(void); + +enum pobj_tx_param { + TX_PARAM_NONE, + TX_PARAM_MUTEX, /* PMEMmutex */ + TX_PARAM_RWLOCK, /* PMEMrwlock */ + TX_PARAM_CB, /* pmemobj_tx_callback cb, void *arg */ +}; + +enum pobj_log_type { + TX_LOG_TYPE_SNAPSHOT, + TX_LOG_TYPE_INTENT, +}; + +enum pobj_tx_failure_behavior { + POBJ_TX_FAILURE_ABORT, + POBJ_TX_FAILURE_RETURN, +}; + +#if !defined(pmdk_use_attr_deprec_with_msg) && defined(__COVERITY__) +#define pmdk_use_attr_deprec_with_msg 0 +#endif + +#if !defined(pmdk_use_attr_deprec_with_msg) && defined(__clang__) +#if __has_extension(attribute_deprecated_with_message) +#define pmdk_use_attr_deprec_with_msg 1 +#else +#define pmdk_use_attr_deprec_with_msg 0 +#endif +#endif + +#if !defined(pmdk_use_attr_deprec_with_msg) && \ + defined(__GNUC__) && !defined(__INTEL_COMPILER) +#if __GNUC__ * 100 + __GNUC_MINOR__ >= 601 /* 6.1 */ +#define pmdk_use_attr_deprec_with_msg 1 +#else +#define pmdk_use_attr_deprec_with_msg 0 +#endif +#endif + +#if !defined(pmdk_use_attr_deprec_with_msg) +#define pmdk_use_attr_deprec_with_msg 0 +#endif + +#if pmdk_use_attr_deprec_with_msg +#define tx_lock_deprecated __attribute__((deprecated(\ + "enum pobj_tx_lock is deprecated, use enum pobj_tx_param"))) +#else +#define tx_lock_deprecated +#endif + +/* deprecated, do not use */ +enum tx_lock_deprecated pobj_tx_lock { + TX_LOCK_NONE tx_lock_deprecated = TX_PARAM_NONE, + TX_LOCK_MUTEX tx_lock_deprecated = TX_PARAM_MUTEX, + TX_LOCK_RWLOCK tx_lock_deprecated = TX_PARAM_RWLOCK, +}; + +typedef void (*pmemobj_tx_callback)(PMEMobjpool *pop, enum pobj_tx_stage stage, + void *); + +#define POBJ_TX_XALLOC_VALID_FLAGS (POBJ_XALLOC_ZERO |\ + POBJ_XALLOC_NO_FLUSH |\ + POBJ_XALLOC_ARENA_MASK |\ + POBJ_XALLOC_CLASS_MASK |\ + POBJ_XALLOC_NO_ABORT) + +#define POBJ_XADD_NO_FLUSH POBJ_FLAG_NO_FLUSH +#define POBJ_XADD_NO_SNAPSHOT POBJ_FLAG_NO_SNAPSHOT +#define POBJ_XADD_ASSUME_INITIALIZED POBJ_FLAG_ASSUME_INITIALIZED +#define POBJ_XADD_NO_ABORT POBJ_FLAG_TX_NO_ABORT +#define POBJ_XADD_VALID_FLAGS (POBJ_XADD_NO_FLUSH |\ + POBJ_XADD_NO_SNAPSHOT |\ + POBJ_XADD_ASSUME_INITIALIZED |\ + POBJ_XADD_NO_ABORT) + +#define POBJ_XLOCK_NO_ABORT POBJ_FLAG_TX_NO_ABORT +#define POBJ_XLOCK_VALID_FLAGS (POBJ_XLOCK_NO_ABORT) + +#define POBJ_XFREE_NO_ABORT POBJ_FLAG_TX_NO_ABORT +#define POBJ_XFREE_VALID_FLAGS (POBJ_XFREE_NO_ABORT) + +#define POBJ_XPUBLISH_NO_ABORT POBJ_FLAG_TX_NO_ABORT +#define POBJ_XPUBLISH_VALID_FLAGS (POBJ_XPUBLISH_NO_ABORT) + +#define POBJ_XLOG_APPEND_BUFFER_NO_ABORT POBJ_FLAG_TX_NO_ABORT +#define POBJ_XLOG_APPEND_BUFFER_VALID_FLAGS (POBJ_XLOG_APPEND_BUFFER_NO_ABORT) +/* + * Starts a new transaction in the current thread. + * If called within an open transaction, starts a nested transaction. + * + * If successful, transaction stage changes to TX_STAGE_WORK and function + * returns zero. Otherwise, stage changes to TX_STAGE_ONABORT and an error + * number is returned. + */ +int pmemobj_tx_begin(PMEMobjpool *pop, jmp_buf env, ...); + +/* + * Adds lock of given type to current transaction. + * 'Flags' is a bitmask of the following values: + * - POBJ_XLOCK_NO_ABORT - if the function does not end successfully, + * do not abort the transaction and return the error number. + */ +int pmemobj_tx_xlock(enum pobj_tx_param type, void *lockp, uint64_t flags); + +/* + * Adds lock of given type to current transaction. + */ +int pmemobj_tx_lock(enum pobj_tx_param type, void *lockp); + +/* + * Aborts current transaction + * + * Causes transition to TX_STAGE_ONABORT. + * + * This function must be called during TX_STAGE_WORK. + */ +void pmemobj_tx_abort(int errnum); + +/* + * Commits current transaction + * + * This function must be called during TX_STAGE_WORK. + */ +void pmemobj_tx_commit(void); + +/* + * Cleanups current transaction. Must always be called after pmemobj_tx_begin, + * even if starting the transaction failed. + * + * If called during TX_STAGE_NONE, has no effect. + * + * Always causes transition to TX_STAGE_NONE. + * + * If transaction was successful, returns 0. Otherwise returns error code set + * by pmemobj_tx_abort. + * + * This function must *not* be called during TX_STAGE_WORK. + */ +int pmemobj_tx_end(void); + +/* + * Performs the actions associated with current stage of the transaction, + * and makes the transition to the next stage. Current stage must always + * be obtained by calling pmemobj_tx_stage. + * + * This function must be called in transaction. + */ +void pmemobj_tx_process(void); + +/* + * Returns last transaction error code. + */ +int pmemobj_tx_errno(void); + +/* + * Takes a "snapshot" of the memory block of given size and located at given + * offset 'off' in the object 'oid' and saves it in the undo log. + * The application is then free to directly modify the object in that memory + * range. In case of failure or abort, all the changes within this range will + * be rolled-back automatically. + * + * If successful, returns zero. + * Otherwise, stage changes to TX_STAGE_ONABORT and an error number is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +int pmemobj_tx_add_range(PMEMoid oid, uint64_t off, size_t size); + +/* + * Takes a "snapshot" of the given memory region and saves it in the undo log. + * The application is then free to directly modify the object in that memory + * range. In case of failure or abort, all the changes within this range will + * be rolled-back automatically. The supplied block of memory has to be within + * the given pool. + * + * If successful, returns zero. + * Otherwise, stage changes to TX_STAGE_ONABORT and an error number is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +int pmemobj_tx_add_range_direct(const void *ptr, size_t size); + +/* + * Behaves exactly the same as pmemobj_tx_add_range when 'flags' equals 0. + * 'Flags' is a bitmask of the following values: + * - POBJ_XADD_NO_FLUSH - skips flush on commit + * - POBJ_XADD_NO_SNAPSHOT - added range will not be snapshotted + * - POBJ_XADD_ASSUME_INITIALIZED - added range is assumed to be initialized + * - POBJ_XADD_NO_ABORT - if the function does not end successfully, + * do not abort the transaction and return the error number. + */ +int pmemobj_tx_xadd_range(PMEMoid oid, uint64_t off, size_t size, + uint64_t flags); + +/* + * Behaves exactly the same as pmemobj_tx_add_range_direct when 'flags' equals + * 0. 'Flags' is a bitmask of the following values: + * - POBJ_XADD_NO_FLUSH - skips flush on commit + * - POBJ_XADD_NO_SNAPSHOT - added range will not be snapshotted + * - POBJ_XADD_ASSUME_INITIALIZED - added range is assumed to be initialized + * - POBJ_XADD_NO_ABORT - if the function does not end successfully, + * do not abort the transaction and return the error number. + */ +int pmemobj_tx_xadd_range_direct(const void *ptr, size_t size, uint64_t flags); + +/* + * Transactionally allocates a new object. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_alloc(size_t size, uint64_t type_num); + +/* + * Transactionally allocates a new object. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * 'Flags' is a bitmask of the following values: + * - POBJ_XALLOC_ZERO - zero the allocated object + * - POBJ_XALLOC_NO_FLUSH - skip flush on commit + * - POBJ_XALLOC_NO_ABORT - if the function does not end successfully, + * do not abort the transaction and return the error number. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_xalloc(size_t size, uint64_t type_num, uint64_t flags); + +/* + * Transactionally allocates new zeroed object. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_zalloc(size_t size, uint64_t type_num); + +/* + * Transactionally resizes an existing object. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_realloc(PMEMoid oid, size_t size, uint64_t type_num); + +/* + * Transactionally resizes an existing object, if extended new space is zeroed. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_zrealloc(PMEMoid oid, size_t size, uint64_t type_num); + +/* + * Transactionally allocates a new object with duplicate of the string s. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_strdup(const char *s, uint64_t type_num); + +/* + * Transactionally allocates a new object with duplicate of the string s. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * 'Flags' is a bitmask of the following values: + * - POBJ_XALLOC_ZERO - zero the allocated object + * - POBJ_XALLOC_NO_FLUSH - skip flush on commit + * - POBJ_XALLOC_NO_ABORT - if the function does not end successfully, + * do not abort the transaction and return the error number. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_xstrdup(const char *s, uint64_t type_num, uint64_t flags); + +/* + * Transactionally allocates a new object with duplicate of the wide character + * string s. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_wcsdup(const wchar_t *s, uint64_t type_num); + +/* + * Transactionally allocates a new object with duplicate of the wide character + * string s. + * + * If successful, returns PMEMoid. + * Otherwise, stage changes to TX_STAGE_ONABORT and an OID_NULL is returned. + * 'Flags' is a bitmask of the following values: + * - POBJ_XALLOC_ZERO - zero the allocated object + * - POBJ_XALLOC_NO_FLUSH - skip flush on commit + * - POBJ_XALLOC_NO_ABORT - if the function does not end successfully, + * do not abort the transaction and return the error number. + * + * This function must be called during TX_STAGE_WORK. + */ +PMEMoid pmemobj_tx_xwcsdup(const wchar_t *s, uint64_t type_num, uint64_t flags); + +/* + * Transactionally frees an existing object. + * + * If successful, returns zero. + * Otherwise, stage changes to TX_STAGE_ONABORT and an error number is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +int pmemobj_tx_free(PMEMoid oid); + +/* + * Transactionally frees an existing object. + * + * If successful, returns zero. + * Otherwise, the stage changes to TX_STAGE_ONABORT and the error number is + * returned. + * 'Flags' is a bitmask of the following values: + * - POBJ_XFREE_NO_ABORT - if the function does not end successfully, + * do not abort the transaction and return the error number. + * + * This function must be called during TX_STAGE_WORK. + */ +int pmemobj_tx_xfree(PMEMoid oid, uint64_t flags); + +/* + * Append user allocated buffer to the ulog. + * + * If successful, returns zero. + * Otherwise, stage changes to TX_STAGE_ONABORT and an error number is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +int pmemobj_tx_log_append_buffer(enum pobj_log_type type, + void *addr, size_t size); + +/* + * Append user allocated buffer to the ulog. + * + * If successful, returns zero. + * Otherwise, stage changes to TX_STAGE_ONABORT and an error number is returned. + * 'Flags' is a bitmask of the following values: + * - POBJ_XLOG_APPEND_BUFFER_NO_ABORT - if the function does not end + * successfully, do not abort the transaction and return the error number. + * + * This function must be called during TX_STAGE_WORK. + */ +int pmemobj_tx_xlog_append_buffer(enum pobj_log_type type, + void *addr, size_t size, uint64_t flags); + +/* + * Enables or disables automatic ulog allocations. + * + * If successful, returns zero. + * Otherwise, stage changes to TX_STAGE_ONABORT and an error number is returned. + * + * This function must be called during TX_STAGE_WORK. + */ +int pmemobj_tx_log_auto_alloc(enum pobj_log_type type, int on_off); + +/* + * Calculates and returns size for user buffers for snapshots. + */ +size_t pmemobj_tx_log_snapshots_max_size(size_t *sizes, size_t nsizes); + +/* + * Calculates and returns size for user buffers for intents. + */ +size_t pmemobj_tx_log_intents_max_size(size_t nintents); + +/* + * Sets volatile pointer to the user data for the current transaction. + */ +void pmemobj_tx_set_user_data(void *data); + +/* + * Gets volatile pointer to the user data associated with the current + * transaction. + */ +void *pmemobj_tx_get_user_data(void); + +/* + * Sets the failure behavior of transactional functions. + * + * This function must be called during TX_STAGE_WORK. + */ +void pmemobj_tx_set_failure_behavior(enum pobj_tx_failure_behavior behavior); + +/* + * Returns failure behavior for the current transaction. + * + * This function must be called during TX_STAGE_WORK. + */ +enum pobj_tx_failure_behavior pmemobj_tx_get_failure_behavior(void); + +#ifdef __cplusplus +} +#endif + +#endif /* libpmemobj/tx_base.h */ diff --git a/src/pmdk/src/include/libpmemobj/types.h b/src/pmdk/src/include/libpmemobj/types.h new file mode 100644 index 000000000..76658c519 --- /dev/null +++ b/src/pmdk/src/include/libpmemobj/types.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * libpmemobj/types.h -- definitions of libpmemobj type-safe macros + */ +#ifndef LIBPMEMOBJ_TYPES_H +#define LIBPMEMOBJ_TYPES_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define TOID_NULL(t) ((TOID(t))OID_NULL) +#define PMEMOBJ_MAX_LAYOUT ((size_t)1024) + +/* + * Type safety macros + */ +#if !(defined _MSC_VER || defined __clang__) + +#define TOID_ASSIGN(o, value)(\ +{\ + (o).oid = value;\ + (o); /* to avoid "error: statement with no effect" */\ +}) + +#else /* _MSC_VER or __clang__ */ + +#define TOID_ASSIGN(o, value) ((o).oid = value, (o)) + +#endif + +#if (defined _MSC_VER && _MSC_VER < 1912) +/* + * XXX - workaround for offsetof issue in VS 15.3, + * it has been fixed since Visual Studio 2017 Version 15.5 + * (_MSC_VER == 1912) + */ +#ifdef PMEMOBJ_OFFSETOF_WA +#ifdef _CRT_USE_BUILTIN_OFFSETOF +#undef offsetof +#define offsetof(s, m) ((size_t)&reinterpret_cast < char const volatile& > \ +((((s *)0)->m))) +#endif +#else +#ifdef _CRT_USE_BUILTIN_OFFSETOF +#error "Invalid definition of offsetof() macro - see: \ +https://developercommunity.visualstudio.com/content/problem/96174/\ +offsetof-macro-is-broken-for-nested-objects.html \ +Please upgrade your VS, fix offsetof as described under the link or define \ +PMEMOBJ_OFFSETOF_WA to enable workaround in libpmemobj.h" +#endif +#endif + +#endif /* _MSC_VER */ + +#define TOID_EQUALS(lhs, rhs)\ +((lhs).oid.off == (rhs).oid.off &&\ + (lhs).oid.pool_uuid_lo == (rhs).oid.pool_uuid_lo) + +/* type number of root object */ +#define POBJ_ROOT_TYPE_NUM 0 +#define _toid_struct +#define _toid_union +#define _toid_enum +#define _POBJ_LAYOUT_REF(name) (sizeof(_pobj_layout_##name##_ref)) + +/* + * Typed OID + */ +#define TOID(t)\ +union _toid_##t##_toid + +#ifdef __cplusplus +#define _TOID_CONSTR(t)\ +_toid_##t##_toid()\ +{ }\ +_toid_##t##_toid(PMEMoid _oid) : oid(_oid)\ +{ } +#else +#define _TOID_CONSTR(t) +#endif + +/* + * Declaration of typed OID + */ +#define _TOID_DECLARE(t, i)\ +typedef uint8_t _toid_##t##_toid_type_num[(i) + 1];\ +TOID(t)\ +{\ + _TOID_CONSTR(t)\ + PMEMoid oid;\ + t *_type;\ + _toid_##t##_toid_type_num *_type_num;\ +} + +/* + * Declaration of typed OID of an object + */ +#define TOID_DECLARE(t, i) _TOID_DECLARE(t, i) + +/* + * Declaration of typed OID of a root object + */ +#define TOID_DECLARE_ROOT(t) _TOID_DECLARE(t, POBJ_ROOT_TYPE_NUM) + +/* + * Type number of specified type + */ +#define TOID_TYPE_NUM(t) (sizeof(_toid_##t##_toid_type_num) - 1) + +/* + * Type number of object read from typed OID + */ +#define TOID_TYPE_NUM_OF(o) (sizeof(*(o)._type_num) - 1) + +/* + * NULL check + */ +#define TOID_IS_NULL(o) ((o).oid.off == 0) + +/* + * Validates whether type number stored in typed OID is the same + * as type number stored in object's metadata + */ +#define TOID_VALID(o) (TOID_TYPE_NUM_OF(o) == pmemobj_type_num((o).oid)) + +/* + * Checks whether the object is of a given type + */ +#define OID_INSTANCEOF(o, t) (TOID_TYPE_NUM(t) == pmemobj_type_num(o)) + +/* + * Begin of layout declaration + */ +#define POBJ_LAYOUT_BEGIN(name)\ +typedef uint8_t _pobj_layout_##name##_ref[__COUNTER__ + 1] + +/* + * End of layout declaration + */ +#define POBJ_LAYOUT_END(name)\ +typedef char _pobj_layout_##name##_cnt[__COUNTER__ + 1 -\ +_POBJ_LAYOUT_REF(name)]; + +/* + * Number of types declared inside layout without the root object + */ +#define POBJ_LAYOUT_TYPES_NUM(name) (sizeof(_pobj_layout_##name##_cnt) - 1) + +/* + * Declaration of typed OID inside layout declaration + */ +#define POBJ_LAYOUT_TOID(name, t)\ +TOID_DECLARE(t, (__COUNTER__ + 1 - _POBJ_LAYOUT_REF(name))); + +/* + * Declaration of typed OID of root inside layout declaration + */ +#define POBJ_LAYOUT_ROOT(name, t)\ +TOID_DECLARE_ROOT(t); + +/* + * Name of declared layout + */ +#define POBJ_LAYOUT_NAME(name) #name + +#define TOID_TYPEOF(o) __typeof__(*(o)._type) + +#define TOID_OFFSETOF(o, field) offsetof(TOID_TYPEOF(o), field) + +/* + * XXX - DIRECT_RW and DIRECT_RO are not available when compiled using VC++ + * as C code (/TC). Use /TP option. + */ +#ifndef _MSC_VER + +#define DIRECT_RW(o) (\ +{__typeof__(o) _o; _o._type = NULL; (void)_o;\ +(__typeof__(*(o)._type) *)pmemobj_direct((o).oid); }) +#define DIRECT_RO(o) ((const __typeof__(*(o)._type) *)pmemobj_direct((o).oid)) + +#elif defined(__cplusplus) + +/* + * XXX - On Windows, these macros do not behave exactly the same as on Linux. + */ +#define DIRECT_RW(o) \ + (reinterpret_cast < __typeof__((o)._type) > (pmemobj_direct((o).oid))) +#define DIRECT_RO(o) \ + (reinterpret_cast < const __typeof__((o)._type) > \ + (pmemobj_direct((o).oid))) + +#endif /* (defined(_MSC_VER) || defined(__cplusplus)) */ + +#define D_RW DIRECT_RW +#define D_RO DIRECT_RO + +#ifdef __cplusplus +} +#endif +#endif /* libpmemobj/types.h */ diff --git a/src/pmdk/src/include/libpmempool.h b/src/pmdk/src/include/libpmempool.h new file mode 100644 index 000000000..87f3b4eb4 --- /dev/null +++ b/src/pmdk/src/include/libpmempool.h @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * libpmempool.h -- definitions of libpmempool entry points + * + * See libpmempool(7) for details. + */ + +#ifndef LIBPMEMPOOL_H +#define LIBPMEMPOOL_H 1 + +#include +#include +#include + +#ifdef _WIN32 +#include + +#ifndef PMDK_UTF8_API +#define pmempool_check_status pmempool_check_statusW +#define pmempool_check_args pmempool_check_argsW + +#define pmempool_check_init pmempool_check_initW +#define pmempool_check pmempool_checkW +#define pmempool_sync pmempool_syncW +#define pmempool_transform pmempool_transformW +#define pmempool_rm pmempool_rmW +#define pmempool_check_version pmempool_check_versionW +#define pmempool_errormsg pmempool_errormsgW +#define pmempool_feature_enable pmempool_feature_enableW +#define pmempool_feature_disable pmempool_feature_disableW +#define pmempool_feature_query pmempool_feature_queryW +#else +#define pmempool_check_status pmempool_check_statusU +#define pmempool_check_args pmempool_check_argsU + +#define pmempool_check_init pmempool_check_initU +#define pmempool_check pmempool_checkU +#define pmempool_sync pmempool_syncU +#define pmempool_transform pmempool_transformU +#define pmempool_rm pmempool_rmU +#define pmempool_check_version pmempool_check_versionU +#define pmempool_errormsg pmempool_errormsgU +#define pmempool_feature_enable pmempool_feature_enableU +#define pmempool_feature_disable pmempool_feature_disableU +#define pmempool_feature_query pmempool_feature_queryU +#endif + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* PMEMPOOL CHECK */ + +/* + * pool types + */ +enum pmempool_pool_type { + PMEMPOOL_POOL_TYPE_DETECT, + PMEMPOOL_POOL_TYPE_LOG, + PMEMPOOL_POOL_TYPE_BLK, + PMEMPOOL_POOL_TYPE_OBJ, + PMEMPOOL_POOL_TYPE_BTT, + PMEMPOOL_POOL_TYPE_RESERVED1, /* used to be cto */ +}; + +/* + * perform repairs + */ +#define PMEMPOOL_CHECK_REPAIR (1U << 0) +/* + * emulate repairs + */ +#define PMEMPOOL_CHECK_DRY_RUN (1U << 1) +/* + * perform hazardous repairs + */ +#define PMEMPOOL_CHECK_ADVANCED (1U << 2) +/* + * do not ask before repairs + */ +#define PMEMPOOL_CHECK_ALWAYS_YES (1U << 3) +/* + * generate info statuses + */ +#define PMEMPOOL_CHECK_VERBOSE (1U << 4) +/* + * generate string format statuses + */ +#define PMEMPOOL_CHECK_FORMAT_STR (1U << 5) + +/* + * types of check statuses + */ +enum pmempool_check_msg_type { + PMEMPOOL_CHECK_MSG_TYPE_INFO, + PMEMPOOL_CHECK_MSG_TYPE_ERROR, + PMEMPOOL_CHECK_MSG_TYPE_QUESTION, +}; + +/* + * check result types + */ +enum pmempool_check_result { + PMEMPOOL_CHECK_RESULT_CONSISTENT, + PMEMPOOL_CHECK_RESULT_NOT_CONSISTENT, + PMEMPOOL_CHECK_RESULT_REPAIRED, + PMEMPOOL_CHECK_RESULT_CANNOT_REPAIR, + PMEMPOOL_CHECK_RESULT_ERROR, + PMEMPOOL_CHECK_RESULT_SYNC_REQ, +}; + +/* + * check context + */ +typedef struct pmempool_check_ctx PMEMpoolcheck; + +/* + * finalize the check and get the result + */ +enum pmempool_check_result pmempool_check_end(PMEMpoolcheck *ppc); + +/* PMEMPOOL RM */ + +#define PMEMPOOL_RM_FORCE (1U << 0) /* ignore any errors */ +#define PMEMPOOL_RM_POOLSET_LOCAL (1U << 1) /* remove local poolsets */ +#define PMEMPOOL_RM_POOLSET_REMOTE (1U << 2) /* remove remote poolsets */ + +/* + * LIBPMEMPOOL SYNC + */ + +/* + * fix bad blocks - it requires creating or reading special recovery files + */ +#define PMEMPOOL_SYNC_FIX_BAD_BLOCKS (1U << 0) +/* + * do not apply changes, only check if operation is viable + */ +#define PMEMPOOL_SYNC_DRY_RUN (1U << 1) + +/* + * LIBPMEMPOOL TRANSFORM + */ + +/* + * do not apply changes, only check if operation is viable + */ +#define PMEMPOOL_TRANSFORM_DRY_RUN (1U << 1) + +/* + * PMEMPOOL_MAJOR_VERSION and PMEMPOOL_MINOR_VERSION provide the current version + * of the libpmempool API as provided by this header file. Applications can + * verify that the version available at run-time is compatible with the version + * used at compile-time by passing these defines to pmempool_check_version(). + */ +#define PMEMPOOL_MAJOR_VERSION 1 +#define PMEMPOOL_MINOR_VERSION 3 + +/* + * check status + */ +struct pmempool_check_statusU { + enum pmempool_check_msg_type type; + struct { + const char *msg; + const char *answer; + } str; +}; + +#ifndef _WIN32 +#define pmempool_check_status pmempool_check_statusU +#else +struct pmempool_check_statusW { + enum pmempool_check_msg_type type; + struct { + const wchar_t *msg; + const wchar_t *answer; + } str; +}; +#endif + +/* + * check context arguments + */ +struct pmempool_check_argsU { + const char *path; + const char *backup_path; + enum pmempool_pool_type pool_type; + unsigned flags; +}; + +#ifndef _WIN32 +#define pmempool_check_args pmempool_check_argsU +#else +struct pmempool_check_argsW { + const wchar_t *path; + const wchar_t *backup_path; + enum pmempool_pool_type pool_type; + unsigned flags; +}; +#endif + +/* + * initialize a check context + */ +#ifndef _WIN32 +PMEMpoolcheck * +pmempool_check_init(struct pmempool_check_args *args, size_t args_size); +#else +PMEMpoolcheck * +pmempool_check_initU(struct pmempool_check_argsU *args, size_t args_size); +PMEMpoolcheck * +pmempool_check_initW(struct pmempool_check_argsW *args, size_t args_size); +#endif + +/* + * start / resume the check + */ +#ifndef _WIN32 +struct pmempool_check_status *pmempool_check(PMEMpoolcheck *ppc); +#else +struct pmempool_check_statusU *pmempool_checkU(PMEMpoolcheck *ppc); +struct pmempool_check_statusW *pmempool_checkW(PMEMpoolcheck *ppc); +#endif + +/* + * LIBPMEMPOOL SYNC & TRANSFORM + */ + +/* + * Synchronize data between replicas within a poolset. + * + * EXPERIMENTAL + */ +#ifndef _WIN32 +int pmempool_sync(const char *poolset_file, unsigned flags); +#else +int pmempool_syncU(const char *poolset_file, unsigned flags); +int pmempool_syncW(const wchar_t *poolset_file, unsigned flags); +#endif + +/* + * Modify internal structure of a poolset. + * + * EXPERIMENTAL + */ +#ifndef _WIN32 +int pmempool_transform(const char *poolset_file_src, + const char *poolset_file_dst, unsigned flags); +#else +int pmempool_transformU(const char *poolset_file_src, + const char *poolset_file_dst, unsigned flags); +int pmempool_transformW(const wchar_t *poolset_file_src, + const wchar_t *poolset_file_dst, unsigned flags); +#endif + +/* PMEMPOOL feature enable, disable, query */ + +/* + * feature types + */ +enum pmempool_feature { + PMEMPOOL_FEAT_SINGLEHDR, + PMEMPOOL_FEAT_CKSUM_2K, + PMEMPOOL_FEAT_SHUTDOWN_STATE, + PMEMPOOL_FEAT_CHECK_BAD_BLOCKS, +}; + +/* PMEMPOOL FEATURE ENABLE */ +#ifndef _WIN32 +int pmempool_feature_enable(const char *path, enum pmempool_feature feature, + unsigned flags); +#else +int pmempool_feature_enableU(const char *path, enum pmempool_feature feature, + unsigned flags); +int pmempool_feature_enableW(const wchar_t *path, + enum pmempool_feature feature, unsigned flags); +#endif + +/* PMEMPOOL FEATURE DISABLE */ +#ifndef _WIN32 +int pmempool_feature_disable(const char *path, enum pmempool_feature feature, + unsigned flags); +#else +int pmempool_feature_disableU(const char *path, enum pmempool_feature feature, + unsigned flags); +int pmempool_feature_disableW(const wchar_t *path, + enum pmempool_feature feature, unsigned flags); +#endif + +/* PMEMPOOL FEATURE QUERY */ +#ifndef _WIN32 +int pmempool_feature_query(const char *path, enum pmempool_feature feature, + unsigned flags); +#else +int pmempool_feature_queryU(const char *path, enum pmempool_feature feature, + unsigned flags); +int pmempool_feature_queryW(const wchar_t *path, + enum pmempool_feature feature, unsigned flags); +#endif + +/* PMEMPOOL RM */ +#ifndef _WIN32 +int pmempool_rm(const char *path, unsigned flags); +#else +int pmempool_rmU(const char *path, unsigned flags); +int pmempool_rmW(const wchar_t *path, unsigned flags); +#endif + +#ifndef _WIN32 +const char *pmempool_check_version(unsigned major_required, + unsigned minor_required); +#else +const char *pmempool_check_versionU(unsigned major_required, + unsigned minor_required); +const wchar_t *pmempool_check_versionW(unsigned major_required, + unsigned minor_required); +#endif + +#ifndef _WIN32 +const char *pmempool_errormsg(void); +#else +const char *pmempool_errormsgU(void); +const wchar_t *pmempool_errormsgW(void); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* libpmempool.h */ diff --git a/src/pmdk/src/include/librpmem.h b/src/pmdk/src/include/librpmem.h new file mode 100644 index 000000000..ac0e34c7c --- /dev/null +++ b/src/pmdk/src/include/librpmem.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * librpmem.h -- definitions of librpmem entry points (EXPERIMENTAL) + * + * This library provides low-level support for remote access to persistent + * memory utilizing RDMA-capable RNICs. + * + * See librpmem(7) for details. + */ + +#ifndef LIBRPMEM_H +#define LIBRPMEM_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct rpmem_pool RPMEMpool; + +#define RPMEM_POOL_HDR_SIG_LEN 8 +#define RPMEM_POOL_HDR_UUID_LEN 16 /* uuid byte length */ +#define RPMEM_POOL_USER_FLAGS_LEN 16 + +struct rpmem_pool_attr { + char signature[RPMEM_POOL_HDR_SIG_LEN]; /* pool signature */ + uint32_t major; /* format major version number */ + uint32_t compat_features; /* mask: compatible "may" features */ + uint32_t incompat_features; /* mask: "must support" features */ + uint32_t ro_compat_features; /* mask: force RO if unsupported */ + unsigned char poolset_uuid[RPMEM_POOL_HDR_UUID_LEN]; /* pool uuid */ + unsigned char uuid[RPMEM_POOL_HDR_UUID_LEN]; /* first part uuid */ + unsigned char next_uuid[RPMEM_POOL_HDR_UUID_LEN]; /* next pool uuid */ + unsigned char prev_uuid[RPMEM_POOL_HDR_UUID_LEN]; /* prev pool uuid */ + unsigned char user_flags[RPMEM_POOL_USER_FLAGS_LEN]; /* user flags */ +}; + +RPMEMpool *rpmem_create(const char *target, const char *pool_set_name, + void *pool_addr, size_t pool_size, unsigned *nlanes, + const struct rpmem_pool_attr *create_attr); + +RPMEMpool *rpmem_open(const char *target, const char *pool_set_name, + void *pool_addr, size_t pool_size, unsigned *nlanes, + struct rpmem_pool_attr *open_attr); + +int rpmem_set_attr(RPMEMpool *rpp, const struct rpmem_pool_attr *attr); + +int rpmem_close(RPMEMpool *rpp); + +#define RPMEM_PERSIST_RELAXED (1U << 0) +#define RPMEM_FLUSH_RELAXED (1U << 0) + +int rpmem_flush(RPMEMpool *rpp, size_t offset, size_t length, unsigned lane, + unsigned flags); +int rpmem_drain(RPMEMpool *rpp, unsigned lane, unsigned flags); + +int rpmem_persist(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane, unsigned flags); +int rpmem_read(RPMEMpool *rpp, void *buff, size_t offset, size_t length, + unsigned lane); +int rpmem_deep_persist(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane); + +#define RPMEM_REMOVE_FORCE 0x1 +#define RPMEM_REMOVE_POOL_SET 0x2 + +int rpmem_remove(const char *target, const char *pool_set, int flags); + +/* + * RPMEM_MAJOR_VERSION and RPMEM_MINOR_VERSION provide the current version of + * the librpmem API as provided by this header file. Applications can verify + * that the version available at run-time is compatible with the version used + * at compile-time by passing these defines to rpmem_check_version(). + */ +#define RPMEM_MAJOR_VERSION 1 +#define RPMEM_MINOR_VERSION 3 +const char *rpmem_check_version(unsigned major_required, + unsigned minor_required); + +const char *rpmem_errormsg(void); + +/* minimum size of a pool */ +#define RPMEM_MIN_POOL ((size_t)(1024 * 8)) /* 8 KB */ + +/* + * This limit is set arbitrary to incorporate a pool header and required + * alignment plus supply. + */ +#define RPMEM_MIN_PART ((size_t)(1024 * 1024 * 2)) /* 2 MiB */ + +#ifdef __cplusplus +} +#endif +#endif /* librpmem.h */ diff --git a/src/pmdk/src/include/pmemcompat.h b/src/pmdk/src/include/pmemcompat.h new file mode 100644 index 000000000..a7730bfc5 --- /dev/null +++ b/src/pmdk/src/include/pmemcompat.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * pmemcompat.h -- compatibility layer for libpmem* libraries + */ + +#ifndef PMEMCOMPAT_H +#define PMEMCOMPAT_H +#include + +/* for backward compatibility */ +#ifdef NVML_UTF8_API +#pragma message( "NVML_UTF8_API macro is obsolete, please use PMDK_UTF8_API instead." ) +#ifndef PMDK_UTF8_API +#define PMDK_UTF8_API +#endif +#endif + +struct iovec { + void *iov_base; + size_t iov_len; +}; + +typedef int mode_t; +/* + * XXX: this code will not work on windows if our library is included in + * an extern block. + */ +#if defined(__cplusplus) && defined(_MSC_VER) && !defined(__typeof__) +#include +/* + * These templates are used to remove a type reference(T&) which, in some + * cases, is returned by decltype + */ +namespace pmem { + +namespace detail { + +template +struct get_type { + using type = T; +}; + +template +struct get_type { + using type = T*; +}; + +template +struct get_type { + using type = T; +}; + +} /* namespace detail */ + +} /* namespace pmem */ + +#define __typeof__(p) pmem::detail::get_type::type + +#endif + +#endif diff --git a/src/pmdk/src/libpmem/Makefile b/src/pmdk/src/libpmem/Makefile new file mode 100644 index 000000000..d4d5a14f7 --- /dev/null +++ b/src/pmdk/src/libpmem/Makefile @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# src/libpmem/Makefile -- Makefile for libpmem +# + +include ../common.inc + +LIBRARY_NAME = pmem +LIBRARY_SO_VERSION = 1 +LIBRARY_VERSION = 0.0 +SOURCE =\ + $(CORE)/alloc.c\ + $(CORE)/fs_posix.c\ + $(CORE)/os_posix.c\ + $(CORE)/os_thread_posix.c\ + $(CORE)/out.c\ + $(CORE)/util.c\ + $(CORE)/util_posix.c\ + $(COMMON)/file.c\ + $(COMMON)/file_posix.c\ + $(COMMON)/mmap.c\ + $(COMMON)/mmap_posix.c\ + $(COMMON)/os_deep_linux.c\ + libpmem.c\ + memops_generic.c\ + pmem.c\ + pmem_posix.c\ + $(PMEM2)/pmem2_utils.c\ + $(PMEM2)/config.c\ + $(PMEM2)/persist_posix.c\ + $(PMEM2)/source.c\ + $(PMEM2)/source_posix.c + +ifeq ($(OS_KERNEL_NAME),Linux) +SOURCE +=\ + $(PMEM2)/pmem2_utils_linux.c\ + $(PMEM2)/pmem2_utils_$(OS_DIMM).c\ + $(PMEM2)/auto_flush_linux.c\ + $(PMEM2)/deep_flush_linux.c +else +SOURCE +=\ + $(PMEM2)/pmem2_utils_other.c\ + $(PMEM2)/auto_flush_none.c\ + $(PMEM2)/deep_flush_other.c +endif + +ifeq ($(OS_DIMM),ndctl) +SOURCE += region_namespace_ndctl.c +CFLAGS += $(LIBNDCTL_CFLAGS) +LIBS += $(LIBNDCTL_LIBS) +else +SOURCE += region_namespace_none.c +endif + +INCS += -I$(TOP)/src/libpmem2 + +include ../libpmem2/$(ARCH)/sources.inc + +SOURCE += $(LIBPMEM2_ARCH_SOURCE) + +include ../Makefile.inc + +include $(PMEM2)/$(ARCH)/flags.inc + +CFLAGS += -I. +LIBS += -pthread diff --git a/src/pmdk/src/libpmem/libpmem.c b/src/pmdk/src/libpmem/libpmem.c new file mode 100644 index 000000000..fbc2e5b60 --- /dev/null +++ b/src/pmdk/src/libpmem/libpmem.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2017, Intel Corporation */ + +/* + * libpmem.c -- pmem entry points for libpmem + */ + +#include +#include + +#include "libpmem.h" + +#include "pmem.h" +#include "pmemcommon.h" + +/* + * libpmem_init -- load-time initialization for libpmem + * + * Called automatically by the run-time loader. + */ +ATTR_CONSTRUCTOR +void +libpmem_init(void) +{ + common_init(PMEM_LOG_PREFIX, PMEM_LOG_LEVEL_VAR, PMEM_LOG_FILE_VAR, + PMEM_MAJOR_VERSION, PMEM_MINOR_VERSION); + LOG(3, NULL); + pmem_init(); +} + +/* + * libpmem_fini -- libpmem cleanup routine + * + * Called automatically when the process terminates. + */ +ATTR_DESTRUCTOR +void +libpmem_fini(void) +{ + LOG(3, NULL); + + common_fini(); +} + +/* + * pmem_check_versionU -- see if library meets application version requirements + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmem_check_versionU(unsigned major_required, unsigned minor_required) +{ + LOG(3, "major_required %u minor_required %u", + major_required, minor_required); + + if (major_required != PMEM_MAJOR_VERSION) { + ERR("libpmem major version mismatch (need %u, found %u)", + major_required, PMEM_MAJOR_VERSION); + return out_get_errormsg(); + } + + if (minor_required > PMEM_MINOR_VERSION) { + ERR("libpmem minor version mismatch (need %u, found %u)", + minor_required, PMEM_MINOR_VERSION); + return out_get_errormsg(); + } + + return NULL; +} + +#ifndef _WIN32 +/* + * pmem_check_version -- see if library meets application version requirements + */ +const char * +pmem_check_version(unsigned major_required, unsigned minor_required) +{ + return pmem_check_versionU(major_required, minor_required); +} +#else +/* + * pmem_check_versionW -- see if library meets application version requirements + */ +const wchar_t * +pmem_check_versionW(unsigned major_required, unsigned minor_required) +{ + if (pmem_check_versionU(major_required, minor_required) != NULL) + return out_get_errormsgW(); + else + return NULL; +} +#endif + +/* + * pmem_errormsgU -- return last error message + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmem_errormsgU(void) +{ + return out_get_errormsg(); +} + +#ifndef _WIN32 +/* + * pmem_errormsg -- return last error message + */ +const char * +pmem_errormsg(void) +{ + return pmem_errormsgU(); +} +#else +/* + * pmem_errormsgW -- return last error message as wchar_t + */ +const wchar_t * +pmem_errormsgW(void) +{ + return out_get_errormsgW(); +} +#endif diff --git a/src/pmdk/src/libpmem/libpmem.def b/src/pmdk/src/libpmem/libpmem.def new file mode 100644 index 000000000..8f82670f1 --- /dev/null +++ b/src/pmdk/src/libpmem/libpmem.def @@ -0,0 +1,66 @@ +;;;; Begin Copyright Notice +; SPDX-License-Identifier: BSD-3-Clause +; Copyright 2015-2018, Intel Corporation +;;;; End Copyright Notice + +; +; XXX - libpmem exports mmap/munmap/msync/mprotect functions +; +; This is a _temporary_ solution to make those function available for all +; the other PMDK libraries and to have only one instance of a file mapping +; list (owned by libpmem). Otherwise, each library would have its own +; instance of the file mapping list, resulting in libpmem being not able +; to find a file handle associated with the mapping address passed to +; pmem_msync(), pmem_memcpy(), etc. causing those functions to fail. +; +; The proposed target solution would include: +; - implementation of pmem_mmap, pmem_unmap, pmem_msync and +; pmem_mprotect functions in libpmem (pmem_unmap and pmem_msync are +; already there); +; - making sure that all the PMDK libraries never call mmap, munmap, +; msync and mprotect directly, but only through their libpmem counterparts; +; - new pmem_mmap() function must provide similar functionality to +; mmap(), i.e. it must take 'offset' argument, but should not take +; file descriptor argument. Perhaps it could be an opaque handle +; to the file, that is internally casted to a file descriptor +; or a HANDLE, depending on the OS. +; + +LIBRARY libpmem + +VERSION 1.0 + +EXPORTS + pmem_map_fileU + pmem_map_fileW + pmem_unmap + pmem_is_pmem + pmem_persist + pmem_msync + pmem_has_auto_flush + pmem_deep_persist + pmem_flush + pmem_deep_flush + pmem_deep_drain + pmem_drain + pmem_has_hw_drain + pmem_memmove_persist + pmem_memcpy_persist + pmem_memset_persist + pmem_memmove_nodrain + pmem_memcpy_nodrain + pmem_memset_nodrain + pmem_memmove + pmem_memcpy + pmem_memset + pmem_check_versionU + pmem_check_versionW + pmem_errormsgU + pmem_errormsgW + + mmap + munmap + msync + mprotect + + DllMain diff --git a/src/pmdk/src/libpmem/libpmem.link.in b/src/pmdk/src/libpmem/libpmem.link.in new file mode 100644 index 000000000..16673da95 --- /dev/null +++ b/src/pmdk/src/libpmem/libpmem.link.in @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2019, Intel Corporation +# +# +# src/libpmem.link -- linker link file for libpmem +# +LIBPMEM_1.0 { + global: + pmem_map_file; + pmem_unmap; + pmem_is_pmem; + pmem_persist; + pmem_msync; + pmem_has_auto_flush; + pmem_deep_persist; + pmem_flush; + pmem_deep_flush; + pmem_deep_drain; + pmem_drain; + pmem_has_hw_drain; + pmem_check_version; + pmem_errormsg; + pmem_memmove_persist; + pmem_memcpy_persist; + pmem_memset_persist; + pmem_memmove_nodrain; + pmem_memcpy_nodrain; + pmem_memset_nodrain; + pmem_memmove; + pmem_memcpy; + pmem_memset; + fault_injection; + local: + *; +}; diff --git a/src/pmdk/src/libpmem/libpmem.rc b/src/pmdk/src/libpmem/libpmem.rc new file mode 100644 index 000000000..82e6a5c95 --- /dev/null +++ b/src/pmdk/src/libpmem/libpmem.rc @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016, Intel Corporation */ + +/* + * libpmem.rc -- libpmem resource file + */ + +#include +#define FILE_NAME "libpmem.dll" +#define DESCRIPTION "libpmem - persistent memory support library" +#define TYPE VFT_DLL +#include diff --git a/src/pmdk/src/libpmem/libpmem.vcxproj b/src/pmdk/src/libpmem/libpmem.vcxproj new file mode 100644 index 000000000..e548abb8d --- /dev/null +++ b/src/pmdk/src/libpmem/libpmem.vcxproj @@ -0,0 +1,162 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {901f04db-e1a5-4a41-8b81-9d31c19acd59} + + + + + + + {9e9e3d25-2139-4a5d-9200-18148ddead45} + DynamicLibrary + libpmem + libpmem + en-US + 14.0 + 10.0.10240.0 + 10.0.17134.0 + + + + DynamicLibrary + true + v140 + + + DynamicLibrary + false + false + v140 + + + + + + + + + + + + $(VC_IncludePath);$(WindowsSDK_IncludePath);..\..\src\libpmem2\x86_64\ + + + $(VC_IncludePath);$(WindowsSDK_IncludePath);..\..\src\libpmem2\x86_64\ + + + + $(SolutionDir)libpmem2;%(AdditionalIncludeDirectories) + + + + + $(SolutionDir)libpmem2;%(AdditionalIncludeDirectories) + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmem/libpmem.vcxproj.filters b/src/pmdk/src/libpmem/libpmem.vcxproj.filters new file mode 100644 index 000000000..1ce550731 --- /dev/null +++ b/src/pmdk/src/libpmem/libpmem.vcxproj.filters @@ -0,0 +1,243 @@ + + + + + {16473205-8f12-4d4c-b1e9-e14ea3013e70} + h + + + {17275273-f923-45ff-9b7e-b2ea76561168} + c;def + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmem/libpmem_main.c b/src/pmdk/src/libpmem/libpmem_main.c new file mode 100644 index 000000000..8978229c9 --- /dev/null +++ b/src/pmdk/src/libpmem/libpmem_main.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2018, Intel Corporation */ + +/* + * libpmem_main.c -- entry point for libpmem.dll + * + * XXX - This is a placeholder. All the library initialization/cleanup + * that is done in library ctors/dtors, as well as TLS initialization + * should be moved here. + */ + +#include "win_mmap.h" + +void libpmem_init(void); +void libpmem_fini(void); + +int APIENTRY +DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) +{ + switch (dwReason) { + case DLL_PROCESS_ATTACH: + libpmem_init(); + win_mmap_init(); + break; + + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + + case DLL_PROCESS_DETACH: + win_mmap_fini(); + libpmem_fini(); + break; + } + return TRUE; +} diff --git a/src/pmdk/src/libpmem/pmem.c b/src/pmdk/src/libpmem/pmem.c new file mode 100644 index 000000000..2730d02f0 --- /dev/null +++ b/src/pmdk/src/libpmem/pmem.c @@ -0,0 +1,957 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * pmem.c -- pmem entry points for libpmem + * + * + * PERSISTENT MEMORY INSTRUCTIONS ON X86 + * + * The primary feature of this library is to provide a way to flush + * changes to persistent memory as outlined below (note that many + * of the decisions below are made at initialization time, and not + * repeated every time a flush is requested). + * + * To flush a range to pmem when CLWB is available: + * + * CLWB for each cache line in the given range. + * + * SFENCE to ensure the CLWBs above have completed. + * + * To flush a range to pmem when CLFLUSHOPT is available and CLWB is not + * (same as above but issue CLFLUSHOPT instead of CLWB): + * + * CLFLUSHOPT for each cache line in the given range. + * + * SFENCE to ensure the CLWBs above have completed. + * + * To flush a range to pmem when neither CLFLUSHOPT or CLWB are available + * (same as above but fences surrounding CLFLUSH are not required): + * + * CLFLUSH for each cache line in the given range. + * + * To memcpy a range of memory to pmem when MOVNT is available: + * + * Copy any non-64-byte portion of the destination using MOV. + * + * Use the flush flow above without the fence for the copied portion. + * + * Copy using MOVNTDQ, up to any non-64-byte aligned end portion. + * (The MOVNT instructions bypass the cache, so no flush is required.) + * + * Copy any unaligned end portion using MOV. + * + * Use the flush flow above for the copied portion (including fence). + * + * To memcpy a range of memory to pmem when MOVNT is not available: + * + * Just pass the call to the normal memcpy() followed by pmem_persist(). + * + * To memset a non-trivial sized range of memory to pmem: + * + * Same as the memcpy cases above but store the given value instead + * of reading values from the source. + * + * These features are supported for ARM AARCH64 using equivalent ARM + * assembly instruction. Please refer to (arm_cacheops.h) for more details. + * + * INTERFACES FOR FLUSHING TO PERSISTENT MEMORY + * + * Given the flows above, three interfaces are provided for flushing a range + * so that the caller has the ability to separate the steps when necessary, + * but otherwise leaves the detection of available instructions to the libpmem: + * + * pmem_persist(addr, len) + * + * This is the common case, which just calls the two other functions: + * + * pmem_flush(addr, len); + * pmem_drain(); + * + * pmem_flush(addr, len) + * + * CLWB or CLFLUSHOPT or CLFLUSH for each cache line + * + * pmem_drain() + * + * SFENCE unless using CLFLUSH + * + * + * INTERFACES FOR COPYING/SETTING RANGES OF MEMORY + * + * Given the flows above, the following interfaces are provided for the + * memmove/memcpy/memset operations to persistent memory: + * + * pmem_memmove_nodrain() + * + * Checks for overlapped ranges to determine whether to copy from + * the beginning of the range or from the end. If MOVNT instructions + * are available, uses the memory copy flow described above, otherwise + * calls the libc memmove() followed by pmem_flush(). Since no conditional + * compilation and/or architecture specific CFLAGS are in use at the + * moment, SSE2 ( thus movnt ) is just assumed to be available. + * + * pmem_memcpy_nodrain() + * + * Just calls pmem_memmove_nodrain(). + * + * pmem_memset_nodrain() + * + * If MOVNT instructions are available, uses the memset flow described + * above, otherwise calls the libc memset() followed by pmem_flush(). + * + * pmem_memmove_persist() + * pmem_memcpy_persist() + * pmem_memset_persist() + * + * Calls the appropriate _nodrain() function followed by pmem_drain(). + * + * + * DECISIONS MADE AT INITIALIZATION TIME + * + * As much as possible, all decisions described above are made at library + * initialization time. This is achieved using function pointers that are + * setup by pmem_init() when the library loads. + * + * Func_fence is used by pmem_drain() to call one of: + * fence_empty() + * memory_barrier() + * + * Func_flush is used by pmem_flush() to call one of: + * flush_dcache() + * flush_dcache_invalidate_opt() + * flush_dcache_invalidate() + * + * Func_memmove_nodrain is used by memmove_nodrain() to call one of: + * memmove_nodrain_libc() + * memmove_nodrain_movnt() + * + * Func_memset_nodrain is used by memset_nodrain() to call one of: + * memset_nodrain_libc() + * memset_nodrain_movnt() + * + * DEBUG LOGGING + * + * Many of the functions here get called hundreds of times from loops + * iterating over ranges, making the usual LOG() calls at level 3 + * impractical. The call tracing log for those functions is set at 15. + */ + +#include +#include +#include +#include + +#include "libpmem.h" +#include "pmem.h" +#include "pmem2_arch.h" +#include "out.h" +#include "os.h" +#include "mmap.h" +#include "file.h" +#include "valgrind_internal.h" +#include "os_deep.h" +#include "auto_flush.h" + +struct pmem_funcs { + memmove_nodrain_func memmove_nodrain; + memset_nodrain_func memset_nodrain; + + flush_func deep_flush; + flush_func flush; + fence_func fence; +}; + +static struct pmem_funcs Funcs; +static is_pmem_func Is_pmem = NULL; + +/* + * pmem_has_hw_drain -- return whether or not HW drain was found + * + * Always false for x86: HW drain is done by HW with no SW involvement. + */ +int +pmem_has_hw_drain(void) +{ + LOG(3, NULL); + + return 0; +} + +/* + * pmem_drain -- wait for any PM stores to drain from HW buffers + */ +void +pmem_drain(void) +{ + LOG(15, NULL); + + Funcs.fence(); +} + +/* + * pmem_has_auto_flush -- check if platform supports eADR + */ +int +pmem_has_auto_flush() +{ + LOG(3, NULL); + + return pmem2_auto_flush(); +} + +/* + * pmem_deep_flush -- flush processor cache for the given range + * regardless of eADR support on platform + */ +void +pmem_deep_flush(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + VALGRIND_DO_CHECK_MEM_IS_ADDRESSABLE(addr, len); + + Funcs.deep_flush(addr, len); +} + +/* + * pmem_flush -- flush processor cache for the given range + */ +void +pmem_flush(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + VALGRIND_DO_CHECK_MEM_IS_ADDRESSABLE(addr, len); + + Funcs.flush(addr, len); +} + +/* + * pmem_persist -- make any cached changes to a range of pmem persistent + */ +void +pmem_persist(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + pmem_flush(addr, len); + pmem_drain(); +} + +/* + * pmem_msync -- flush to persistence via msync + * + * Using msync() means this routine is less optimal for pmem (but it + * still works) but it also works for any memory mapped file, unlike + * pmem_persist() which is only safe where pmem_is_pmem() returns true. + */ +int +pmem_msync(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + VALGRIND_DO_CHECK_MEM_IS_ADDRESSABLE(addr, len); + + /* + * msync requires addr to be a multiple of pagesize but there are no + * requirements for len. Align addr down and change len so that + * [addr, addr + len) still contains initial range. + */ + + /* increase len by the amount we gain when we round addr down */ + len += (uintptr_t)addr & (Pagesize - 1); + + /* round addr down to page boundary */ + uintptr_t uptr = (uintptr_t)addr & ~((uintptr_t)Pagesize - 1); + + /* + * msync accepts addresses aligned to page boundary, so we may sync + * more and part of it may have been marked as undefined/inaccessible + * Msyncing such memory is not a bug, so as a workaround temporarily + * disable error reporting. + */ + VALGRIND_DO_DISABLE_ERROR_REPORTING; + + int ret; + if ((ret = msync((void *)uptr, len, MS_SYNC)) < 0) + ERR("!msync"); + + VALGRIND_DO_ENABLE_ERROR_REPORTING; + + /* full flush */ + VALGRIND_DO_PERSIST(uptr, len); + + return ret; +} + +/* + * is_pmem_always -- (internal) always true (for meaningful parameters) version + * of pmem_is_pmem() + */ +static int +is_pmem_always(const void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + if (len == 0) + return 0; + + return 1; +} + +/* + * is_pmem_never -- (internal) never true version of pmem_is_pmem() + */ +static int +is_pmem_never(const void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + return 0; +} + +/* + * pmem_is_pmem_init -- (internal) initialize Func_is_pmem pointer + * + * This should be done only once - on the first call to pmem_is_pmem(). + * If PMEM_IS_PMEM_FORCE is set, it would override the default behavior + * of pmem_is_pmem(). + */ +static void +pmem_is_pmem_init(void) +{ + LOG(3, NULL); + + static volatile unsigned init; + + while (init != 2) { + if (!util_bool_compare_and_swap32(&init, 0, 1)) + continue; + + /* + * For debugging/testing, allow pmem_is_pmem() to be forced + * to always true or never true using environment variable + * PMEM_IS_PMEM_FORCE values of zero or one. + * + * This isn't #ifdef DEBUG because it has a trivial performance + * impact and it may turn out to be useful as a "chicken bit" + * for systems where pmem_is_pmem() isn't correctly detecting + * true persistent memory. + */ + char *ptr = os_getenv("PMEM_IS_PMEM_FORCE"); + if (ptr) { + int val = atoi(ptr); + + if (val == 0) + Is_pmem = is_pmem_never; + else if (val == 1) + Is_pmem = is_pmem_always; + + VALGRIND_ANNOTATE_HAPPENS_BEFORE(&Is_pmem); + + LOG(4, "PMEM_IS_PMEM_FORCE=%d", val); + } + + if (Funcs.deep_flush == NULL) + Is_pmem = is_pmem_never; + + if (!util_bool_compare_and_swap32(&init, 1, 2)) + FATAL("util_bool_compare_and_swap32"); + } +} + +/* + * pmem_is_pmem -- return true if entire range is persistent memory + */ +int +pmem_is_pmem(const void *addr, size_t len) +{ + LOG(10, "addr %p len %zu", addr, len); + + static int once; + + /* This is not thread-safe, but pmem_is_pmem_init() is. */ + if (once == 0) { + pmem_is_pmem_init(); + util_fetch_and_add32(&once, 1); + } + + VALGRIND_ANNOTATE_HAPPENS_AFTER(&Is_pmem); + return Is_pmem(addr, len); +} + +#define PMEM_FILE_ALL_FLAGS\ + (PMEM_FILE_CREATE|PMEM_FILE_EXCL|PMEM_FILE_SPARSE|PMEM_FILE_TMPFILE) + +#define PMEM_DAX_VALID_FLAGS\ + (PMEM_FILE_CREATE|PMEM_FILE_SPARSE) + +/* + * pmem_map_fileU -- create or open the file and map it to memory + */ +#ifndef _WIN32 +static inline +#endif +void * +pmem_map_fileU(const char *path, size_t len, int flags, + mode_t mode, size_t *mapped_lenp, int *is_pmemp) +{ + LOG(3, "path \"%s\" size %zu flags %x mode %o mapped_lenp %p " + "is_pmemp %p", path, len, flags, mode, mapped_lenp, is_pmemp); + + int oerrno; + int fd; + int open_flags = O_RDWR; + int delete_on_err = 0; + int file_type = util_file_get_type(path); +#ifdef _WIN32 + open_flags |= O_BINARY; +#endif + + if (file_type == OTHER_ERROR) + return NULL; + + if (flags & ~(PMEM_FILE_ALL_FLAGS)) { + ERR("invalid flag specified %x", flags); + errno = EINVAL; + return NULL; + } + + if (file_type == TYPE_DEVDAX) { + if (flags & ~(PMEM_DAX_VALID_FLAGS)) { + ERR("flag unsupported for Device DAX %x", flags); + errno = EINVAL; + return NULL; + } else { + /* we are ignoring all of the flags */ + flags = 0; + ssize_t actual_len = util_file_get_size(path); + if (actual_len < 0) { + ERR("unable to read Device DAX size"); + errno = EINVAL; + return NULL; + } + if (len != 0 && len != (size_t)actual_len) { + ERR("Device DAX length must be either 0 or " + "the exact size of the device: %zu", + actual_len); + errno = EINVAL; + return NULL; + } + len = 0; + } + } + + if (flags & PMEM_FILE_CREATE) { + if ((os_off_t)len < 0) { + ERR("invalid file length %zu", len); + errno = EINVAL; + return NULL; + } + open_flags |= O_CREAT; + } + + if (flags & PMEM_FILE_EXCL) + open_flags |= O_EXCL; + + if ((len != 0) && !(flags & PMEM_FILE_CREATE)) { + ERR("non-zero 'len' not allowed without PMEM_FILE_CREATE"); + errno = EINVAL; + return NULL; + } + + if ((len == 0) && (flags & PMEM_FILE_CREATE)) { + ERR("zero 'len' not allowed with PMEM_FILE_CREATE"); + errno = EINVAL; + return NULL; + } + + if ((flags & PMEM_FILE_TMPFILE) && !(flags & PMEM_FILE_CREATE)) { + ERR("PMEM_FILE_TMPFILE not allowed without PMEM_FILE_CREATE"); + errno = EINVAL; + return NULL; + } + + if (flags & PMEM_FILE_TMPFILE) { + if ((fd = util_tmpfile(path, + OS_DIR_SEP_STR"pmem.XXXXXX", + open_flags & O_EXCL)) < 0) { + LOG(2, "failed to create temporary file at \"%s\"", + path); + return NULL; + } + } else { + if ((fd = os_open(path, open_flags, mode)) < 0) { + ERR("!open %s", path); + return NULL; + } + if ((flags & PMEM_FILE_CREATE) && (flags & PMEM_FILE_EXCL)) + delete_on_err = 1; + } + + if (flags & PMEM_FILE_CREATE) { + /* + * Always set length of file to 'len'. + * (May either extend or truncate existing file.) + */ + if (os_ftruncate(fd, (os_off_t)len) != 0) { + ERR("!ftruncate"); + goto err; + } + if ((flags & PMEM_FILE_SPARSE) == 0) { + if ((errno = os_posix_fallocate(fd, 0, + (os_off_t)len)) != 0) { + ERR("!posix_fallocate"); + goto err; + } + } + } else { + ssize_t actual_size = util_fd_get_size(fd); + if (actual_size < 0) { + ERR("stat %s: negative size", path); + errno = EINVAL; + goto err; + } + + len = (size_t)actual_size; + } + + void *addr = pmem_map_register(fd, len, path, file_type == TYPE_DEVDAX); + if (addr == NULL) + goto err; + + if (mapped_lenp != NULL) + *mapped_lenp = len; + + if (is_pmemp != NULL) + *is_pmemp = pmem_is_pmem(addr, len); + + LOG(3, "returning %p", addr); + + VALGRIND_REGISTER_PMEM_MAPPING(addr, len); + VALGRIND_REGISTER_PMEM_FILE(fd, addr, len, 0); + + (void) os_close(fd); + + return addr; +err: + oerrno = errno; + (void) os_close(fd); + if (delete_on_err) + (void) os_unlink(path); + errno = oerrno; + return NULL; +} + +#ifndef _WIN32 +/* + * pmem_map_file -- create or open the file and map it to memory + */ +void * +pmem_map_file(const char *path, size_t len, int flags, + mode_t mode, size_t *mapped_lenp, int *is_pmemp) +{ + return pmem_map_fileU(path, len, flags, mode, mapped_lenp, is_pmemp); +} +#else +/* + * pmem_map_fileW -- create or open the file and map it to memory + */ +void * +pmem_map_fileW(const wchar_t *path, size_t len, int flags, mode_t mode, + size_t *mapped_lenp, int *is_pmemp) { + char *upath = util_toUTF8(path); + if (upath == NULL) + return NULL; + + void *ret = pmem_map_fileU(upath, len, flags, mode, mapped_lenp, + is_pmemp); + + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * pmem_unmap -- unmap the specified region + */ +int +pmem_unmap(void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + +#ifndef _WIN32 + util_range_unregister(addr, len); +#endif + VALGRIND_REMOVE_PMEM_MAPPING(addr, len); + return util_unmap(addr, len); +} + +/* + * pmem_memmove -- memmove to pmem + */ +void * +pmem_memmove(void *pmemdest, const void *src, size_t len, unsigned flags) +{ + LOG(15, "pmemdest %p src %p len %zu flags 0x%x", + pmemdest, src, len, flags); + +#ifdef DEBUG + if (flags & ~PMEM_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM_API_START(); + Funcs.memmove_nodrain(pmemdest, src, len, flags & ~PMEM_F_MEM_NODRAIN, + Funcs.flush); + + if ((flags & (PMEM_F_MEM_NODRAIN | PMEM_F_MEM_NOFLUSH)) == 0) + pmem_drain(); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memcpy -- memcpy to pmem + */ +void * +pmem_memcpy(void *pmemdest, const void *src, size_t len, unsigned flags) +{ + LOG(15, "pmemdest %p src %p len %zu flags 0x%x", + pmemdest, src, len, flags); + +#ifdef DEBUG + if (flags & ~PMEM_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM_API_START(); + Funcs.memmove_nodrain(pmemdest, src, len, flags & ~PMEM_F_MEM_NODRAIN, + Funcs.flush); + + if ((flags & (PMEM_F_MEM_NODRAIN | PMEM_F_MEM_NOFLUSH)) == 0) + pmem_drain(); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memset -- memset to pmem + */ +void * +pmem_memset(void *pmemdest, int c, size_t len, unsigned flags) +{ + LOG(15, "pmemdest %p c 0x%x len %zu flags 0x%x", + pmemdest, c, len, flags); + +#ifdef DEBUG + if (flags & ~PMEM_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + + PMEM_API_START(); + Funcs.memset_nodrain(pmemdest, c, len, flags & ~PMEM_F_MEM_NODRAIN, + Funcs.flush); + + if ((flags & (PMEM_F_MEM_NODRAIN | PMEM_F_MEM_NOFLUSH)) == 0) + pmem_drain(); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memmove_nodrain -- memmove to pmem without hw drain + */ +void * +pmem_memmove_nodrain(void *pmemdest, const void *src, size_t len) +{ + LOG(15, "pmemdest %p src %p len %zu", pmemdest, src, len); + + PMEM_API_START(); + + Funcs.memmove_nodrain(pmemdest, src, len, 0, Funcs.flush); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memcpy_nodrain -- memcpy to pmem without hw drain + */ +void * +pmem_memcpy_nodrain(void *pmemdest, const void *src, size_t len) +{ + LOG(15, "pmemdest %p src %p len %zu", pmemdest, src, len); + + PMEM_API_START(); + + Funcs.memmove_nodrain(pmemdest, src, len, 0, Funcs.flush); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memmove_persist -- memmove to pmem + */ +void * +pmem_memmove_persist(void *pmemdest, const void *src, size_t len) +{ + LOG(15, "pmemdest %p src %p len %zu", pmemdest, src, len); + + PMEM_API_START(); + + Funcs.memmove_nodrain(pmemdest, src, len, 0, Funcs.flush); + pmem_drain(); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memcpy_persist -- memcpy to pmem + */ +void * +pmem_memcpy_persist(void *pmemdest, const void *src, size_t len) +{ + LOG(15, "pmemdest %p src %p len %zu", pmemdest, src, len); + + PMEM_API_START(); + + Funcs.memmove_nodrain(pmemdest, src, len, 0, Funcs.flush); + pmem_drain(); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memset_nodrain -- memset to pmem without hw drain + */ +void * +pmem_memset_nodrain(void *pmemdest, int c, size_t len) +{ + LOG(15, "pmemdest %p c %d len %zu", pmemdest, c, len); + + PMEM_API_START(); + + Funcs.memset_nodrain(pmemdest, c, len, 0, Funcs.flush); + + PMEM_API_END(); + return pmemdest; +} + +/* + * pmem_memset_persist -- memset to pmem + */ +void * +pmem_memset_persist(void *pmemdest, int c, size_t len) +{ + LOG(15, "pmemdest %p c %d len %zu", pmemdest, c, len); + + PMEM_API_START(); + + Funcs.memset_nodrain(pmemdest, c, len, 0, Funcs.flush); + pmem_drain(); + + PMEM_API_END(); + return pmemdest; +} + +/* + * memmove_nodrain_libc -- (internal) memmove to pmem using libc + */ +static void * +memmove_nodrain_libc(void *pmemdest, const void *src, size_t len, + unsigned flags, flush_func flush) +{ + LOG(15, "pmemdest %p src %p len %zu flags 0x%x", pmemdest, src, len, + flags); + + memmove(pmemdest, src, len); + + if (!(flags & PMEM_F_MEM_NOFLUSH)) + flush(pmemdest, len); + + return pmemdest; +} + +/* + * memset_nodrain_libc -- (internal) memset to pmem using libc + */ +static void * +memset_nodrain_libc(void *pmemdest, int c, size_t len, unsigned flags, + flush_func flush) +{ + LOG(15, "pmemdest %p c 0x%x len %zu flags 0x%x", pmemdest, c, len, + flags); + + memset(pmemdest, c, len); + + if (!(flags & PMEM_F_MEM_NOFLUSH)) + flush(pmemdest, len); + + return pmemdest; +} + +/* + * flush_empty -- (internal) do not flush the CPU cache + */ +static void +flush_empty(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + flush_empty_nolog(addr, len); +} + +/* + * fence_empty -- (internal) issue the fence instruction + */ +static void +fence_empty(void) +{ + LOG(15, NULL); + + VALGRIND_DO_FENCE; +} + +/* + * pmem_init -- load-time initialization for pmem.c + */ +void +pmem_init(void) +{ + LOG(3, NULL); + + struct pmem2_arch_info info; + info.memmove_nodrain = NULL; + info.memset_nodrain = NULL; + info.flush = NULL; + info.fence = NULL; + info.flush_has_builtin_fence = 0; + + pmem2_arch_init(&info); + + int flush; + char *e = os_getenv("PMEM_NO_FLUSH"); + if (e && (strcmp(e, "1") == 0)) { + flush = 0; + LOG(3, "Forced not flushing CPU_cache"); + } else if (e && (strcmp(e, "0") == 0)) { + flush = 1; + LOG(3, "Forced flushing CPU_cache"); + } else if (pmem2_auto_flush() == 1) { + flush = 0; + LOG(3, "Not flushing CPU_cache, eADR detected"); + } else { + flush = 1; + LOG(3, "Flushing CPU cache"); + } + + Funcs.deep_flush = info.flush; + if (flush) { + Funcs.flush = info.flush; + Funcs.memmove_nodrain = info.memmove_nodrain; + Funcs.memset_nodrain = info.memset_nodrain; + if (info.flush_has_builtin_fence) + Funcs.fence = fence_empty; + else + Funcs.fence = info.fence; + } else { + Funcs.memmove_nodrain = info.memmove_nodrain_eadr; + Funcs.memset_nodrain = info.memset_nodrain_eadr; + Funcs.flush = flush_empty; + Funcs.fence = info.fence; + } + + char *ptr = os_getenv("PMEM_NO_GENERIC_MEMCPY"); + long long no_generic = 0; + if (ptr) + no_generic = atoll(ptr); + + if (info.memmove_nodrain == NULL) { + if (no_generic) { + Funcs.memmove_nodrain = memmove_nodrain_libc; + LOG(3, "using libc memmove"); + } else { + Funcs.memmove_nodrain = memmove_nodrain_generic; + LOG(3, "using generic memmove"); + } + } else { + Funcs.memmove_nodrain = info.memmove_nodrain; + } + + if (info.memset_nodrain == NULL) { + if (no_generic) { + Funcs.memset_nodrain = memset_nodrain_libc; + LOG(3, "using libc memset"); + } else { + Funcs.memset_nodrain = memset_nodrain_generic; + LOG(3, "using generic memset"); + } + } else { + Funcs.memset_nodrain = info.memset_nodrain; + } + + if (Funcs.flush == flush_empty) + LOG(3, "not flushing CPU cache"); + else if (Funcs.flush != Funcs.deep_flush) + FATAL("invalid flush function address"); + + pmem_os_init(&Is_pmem); +} + +/* + * pmem_deep_persist -- perform deep persist on a memory range + * + * It merely acts as wrapper around an msync call in most cases, the only + * exception is the case of an mmap'ed DAX device on Linux. + */ +int +pmem_deep_persist(const void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + pmem_deep_flush(addr, len); + return pmem_deep_drain(addr, len); +} + +/* + * pmem_deep_drain -- perform deep drain on a memory range + */ +int +pmem_deep_drain(const void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + return os_range_deep_common((uintptr_t)addr, len); +} + +#if VG_PMEMCHECK_ENABLED +/* + * pmem_emit_log -- logs library and function names to pmemcheck store log + */ +void +pmem_emit_log(const char *func, int order) +{ + util_emit_log("libpmem", func, order); +} +#endif + +#if FAULT_INJECTION +void +pmem_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + core_inject_fault_at(type, nth, at); +} + +int +pmem_fault_injection_enabled(void) +{ + return core_fault_injection_enabled(); +} +#endif diff --git a/src/pmdk/src/libpmem/pmem.h b/src/pmdk/src/libpmem/pmem.h new file mode 100644 index 000000000..7cf6bf13c --- /dev/null +++ b/src/pmdk/src/libpmem/pmem.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * pmem.h -- internal definitions for libpmem + */ +#ifndef PMEM_H +#define PMEM_H + +#include +#include "alloc.h" +#include "fault_injection.h" +#include "util.h" +#include "valgrind_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PMEM_LOG_PREFIX "libpmem" +#define PMEM_LOG_LEVEL_VAR "PMEM_LOG_LEVEL" +#define PMEM_LOG_FILE_VAR "PMEM_LOG_FILE" + +typedef int (*is_pmem_func)(const void *addr, size_t len); + +void pmem_init(void); +void pmem_os_init(is_pmem_func *func); + +int is_pmem_detect(const void *addr, size_t len); +void *pmem_map_register(int fd, size_t len, const char *path, int is_dev_dax); + +#if FAULT_INJECTION +void +pmem_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at); + +int +pmem_fault_injection_enabled(void); +#else +static inline void +pmem_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + abort(); +} + +static inline int +pmem_fault_injection_enabled(void) +{ + return 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmem/pmem_posix.c b/src/pmdk/src/libpmem/pmem_posix.c new file mode 100644 index 000000000..d295b13c7 --- /dev/null +++ b/src/pmdk/src/libpmem/pmem_posix.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * pmem_posix.c -- pmem utilities with Posix implementation + */ + +#include +#include + +#include "pmem.h" +#include "out.h" +#include "mmap.h" + +/* + * is_pmem_detect -- implement pmem_is_pmem() + * + * This function returns true only if the entire range can be confirmed + * as being direct access persistent memory. Finding any part of the + * range is not direct access, or failing to look up the information + * because it is unmapped or because any sort of error happens, just + * results in returning false. + */ +int +is_pmem_detect(const void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + if (len == 0) + return 0; + + int retval = util_range_is_pmem(addr, len); + + LOG(4, "returning %d", retval); + return retval; +} + +/* + * pmem_map_register -- memory map file and register mapping + */ +void * +pmem_map_register(int fd, size_t len, const char *path, int is_dev_dax) +{ + LOG(3, "fd %d len %zu path %s id_dev_dax %d", + fd, len, path, is_dev_dax); + + void *addr; + int map_sync; + addr = util_map(fd, 0, len, MAP_SHARED, 0, 0, &map_sync); + if (!addr) + return NULL; + + enum pmem_map_type type = MAX_PMEM_TYPE; + if (is_dev_dax) + type = PMEM_DEV_DAX; + else if (map_sync) + type = PMEM_MAP_SYNC; + + if (type != MAX_PMEM_TYPE) { + if (util_range_register(addr, len, path, type)) { + LOG(1, "can't track mapped region"); + goto err_unmap; + } + } + + return addr; +err_unmap: + util_unmap(addr, len); + return NULL; +} + +/* + * pmem_os_init -- os-dependent part of pmem initialization + */ +void +pmem_os_init(is_pmem_func *func) +{ + LOG(3, NULL); + + *func = is_pmem_detect; +} diff --git a/src/pmdk/src/libpmem/pmem_windows.c b/src/pmdk/src/libpmem/pmem_windows.c new file mode 100644 index 000000000..f8a94eb01 --- /dev/null +++ b/src/pmdk/src/libpmem/pmem_windows.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * pmem_windows.c -- pmem utilities with OS-specific implementation + */ + +#include +#include "pmem.h" +#include "out.h" +#include "mmap.h" +#include "win_mmap.h" +#include "sys/mman.h" + +#if (NTDDI_VERSION >= NTDDI_WIN10_RS1) +typedef BOOL (WINAPI *PQVM)( + HANDLE, const void *, + enum WIN32_MEMORY_INFORMATION_CLASS, PVOID, + SIZE_T, PSIZE_T); + +static PQVM Func_qvmi = NULL; +#endif + +/* + * is_direct_mapped -- (internal) for each page in the given region + * checks with MM, if it's direct mapped. + */ +static int +is_direct_mapped(const void *begin, const void *end) +{ + LOG(3, "begin %p end %p", begin, end); + +#if (NTDDI_VERSION >= NTDDI_WIN10_RS1) + int retval = 1; + WIN32_MEMORY_REGION_INFORMATION region_info; + SIZE_T bytes_returned; + + if (Func_qvmi == NULL) { + LOG(4, "QueryVirtualMemoryInformation not supported, " + "assuming non-DAX."); + return 0; + } + + const void *begin_aligned = (const void *)rounddown((intptr_t)begin, + Pagesize); + const void *end_aligned = (const void *)roundup((intptr_t)end, + Pagesize); + + for (const void *page = begin_aligned; + page < end_aligned; + page = (const void *)((char *)page + Pagesize)) { + if (Func_qvmi(GetCurrentProcess(), page, + MemoryRegionInfo, ®ion_info, + sizeof(region_info), &bytes_returned)) { + retval = region_info.DirectMapped; + } else { + LOG(4, "QueryVirtualMemoryInformation failed, assuming " + "non-DAX. Last error: %08x", GetLastError()); + retval = 0; + } + + if (retval == 0) { + LOG(4, "page %p is not direct mapped", page); + break; + } + } + + return retval; +#else + /* if the MM API is not available the safest answer is NO */ + return 0; +#endif /* NTDDI_VERSION >= NTDDI_WIN10_RS1 */ + +} + +/* + * is_pmem_detect -- implement pmem_is_pmem() + * + * This function returns true only if the entire range can be confirmed + * as being direct access persistent memory. Finding any part of the + * range is not direct access, or failing to look up the information + * because it is unmapped or because any sort of error happens, just + * results in returning false. + */ +int +is_pmem_detect(const void *addr, size_t len) +{ + LOG(3, "addr %p len %zu", addr, len); + + if (len == 0) + return 0; + + if (len > UINTPTR_MAX - (uintptr_t)addr) { + len = UINTPTR_MAX - (uintptr_t)addr; + LOG(4, "limit len to %zu to not get beyond address space", len); + } + + int retval = 1; + const void *begin = addr; + const void *end = (const void *)((char *)addr + len); + + LOG(4, "begin %p end %p", begin, end); + + AcquireSRWLockShared(&FileMappingQLock); + + PFILE_MAPPING_TRACKER mt; + PMDK_SORTEDQ_FOREACH(mt, &FileMappingQHead, ListEntry) { + if (mt->BaseAddress >= end) { + LOG(4, "ignoring all mapped ranges beyond given range"); + break; + } + if (mt->EndAddress <= begin) { + LOG(4, "skipping all mapped ranges before given range"); + continue; + } + + if (!(mt->Flags & FILE_MAPPING_TRACKER_FLAG_DIRECT_MAPPED)) { + LOG(4, "tracked range [%p, %p) is not direct mapped", + mt->BaseAddress, mt->EndAddress); + retval = 0; + break; + } + + /* + * If there is a gap between the given region that we process + * currently and the mapped region in our tracking list, we + * need to process the gap by taking the long route of asking + * MM for each page in that range. + */ + if (begin < mt->BaseAddress && + !is_direct_mapped(begin, mt->BaseAddress)) { + LOG(4, "untracked range [%p, %p) is not direct mapped", + begin, mt->BaseAddress); + retval = 0; + break; + } + + /* push our begin to reflect what we have already processed */ + begin = mt->EndAddress; + } + + /* + * If we still have a range to verify, check with MM if the entire + * region is direct mapped. + */ + if (begin < end && !is_direct_mapped(begin, end)) { + LOG(4, "untracked end range [%p, %p) is not direct mapped", + begin, end); + retval = 0; + } + + ReleaseSRWLockShared(&FileMappingQLock); + + LOG(4, "returning %d", retval); + return retval; +} + +/* + * pmem_map_register -- memory map file and register mapping + */ +void * +pmem_map_register(int fd, size_t len, const char *path, int is_dev_dax) +{ + /* there is no device dax on windows */ + ASSERTeq(is_dev_dax, 0); + + return util_map(fd, 0, len, MAP_SHARED, 0, 0, NULL); +} + +/* + * pmem_os_init -- os-dependent part of pmem initialization + */ +void +pmem_os_init(is_pmem_func *func) +{ + LOG(3, NULL); + + *func = is_pmem_detect; +#if NTDDI_VERSION >= NTDDI_WIN10_RS1 + Func_qvmi = (PQVM)GetProcAddress( + GetModuleHandle(TEXT("KernelBase.dll")), + "QueryVirtualMemoryInformation"); +#endif +} diff --git a/src/pmdk/src/libpmem2/Makefile b/src/pmdk/src/libpmem2/Makefile new file mode 100644 index 000000000..4816aa364 --- /dev/null +++ b/src/pmdk/src/libpmem2/Makefile @@ -0,0 +1,66 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019-2020, Intel Corporation + +# +# src/libpmem2/Makefile -- Makefile for libpmem2 +# + +include ../common.inc + +LIBRARY_NAME = pmem2 +LIBRARY_SO_VERSION = 1 +LIBRARY_VERSION = 0.0 +SOURCE =\ + $(COMMON)/ravl.c\ + libpmem2.c\ + badblocks.c\ + badblocks_$(OS_DIMM).c\ + config.c\ + deep_flush.c\ + errormsg.c\ + memops_generic.c\ + map.c\ + map_posix.c\ + persist.c\ + persist_posix.c\ + pmem2_utils.c\ + usc_$(OS_DIMM).c\ + source.c\ + source_posix.c\ + vm_reservation.c\ + vm_reservation_posix.c\ + ravl_interval.c + +ifeq ($(OS_KERNEL_NAME),Linux) +SOURCE +=\ + auto_flush_linux.c\ + deep_flush_linux.c\ + extent_linux.c\ + pmem2_utils_linux.c\ + pmem2_utils_$(OS_DIMM).c +else +SOURCE +=\ + auto_flush_none.c\ + deep_flush_other.c\ + extent_none.c\ + pmem2_utils_other.c +endif + +ifeq ($(OS_DIMM),ndctl) +SOURCE += region_namespace_ndctl.c +else +SOURCE += region_namespace_none.c +endif + +include $(ARCH)/sources.inc + +include ../core/pmemcore.inc + +SOURCE += $(LIBPMEM2_ARCH_SOURCE) + +include ../Makefile.inc + +include $(PMEM2)/$(ARCH)/flags.inc + +CFLAGS += -I. $(LIBNDCTL_CFLAGS) +LIBS += -pthread $(LIBNDCTL_LIBS) diff --git a/src/pmdk/src/libpmem2/aarch64/arm_cacheops.h b/src/pmdk/src/libpmem2/aarch64/arm_cacheops.h new file mode 100644 index 000000000..5eab9995d --- /dev/null +++ b/src/pmdk/src/libpmem2/aarch64/arm_cacheops.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ +/* + * ARM inline assembly to flush and invalidate caches + * clwb => dc cvac + * clflushopt => dc civac + * fence => dmb ish + * sfence => dmb ishst + */ + +/* + * Cache instructions on ARM: + * ARMv8.0-a DC CVAC - cache clean to Point of Coherency + * Meant for thread synchronization, usually implies + * real memory flush but may mean less. + * ARMv8.2-a DC CVAP - cache clean to Point of Persistency + * Meant exactly for our use. + * ARMv8.5-a DC CVADP - cache clean to Point of Deep Persistency + * As of mid-2019 not on any commercially available CPU. + * Any of the above may be disabled for EL0, but it's probably safe to consider + * that a system configuration error. + * Other flags include I (like "DC CIVAC") that invalidates the cache line, but + * we don't want that. + * + * Memory fences: + * * DMB [ISH] MFENCE + * * DMB [ISH]ST SFENCE + * * DMB [ISH]LD LFENCE + * + * Memory domains (cache coherency): + * * non-shareable - local to a single core + * * inner shareable (ISH) - a group of CPU clusters/sockets/other hardware + * Linux requires that anything within one operating system/hypervisor + * is within the same Inner Shareable domain. + * * outer shareable (OSH) - one or more separate ISH domains + * * full system (SY) - anything that can possibly access memory + * Docs: ARM DDI 0487E.a page B2-144. + * + * Exception (privilege) levels: + * * EL0 - userspace (ring 3) + * * EL1 - kernel (ring 0) + * * EL2 - hypervisor (ring -1) + * * EL3 - "secure world" (ring -3) + */ + +#ifndef AARCH64_CACHEOPS_H +#define AARCH64_CACHEOPS_H + +#include + +static inline void +arm_clean_va_to_poc(void const *p __attribute__((unused))) +{ + asm volatile("dc cvac, %0" : : "r" (p) : "memory"); +} + +static inline void +arm_store_memory_barrier(void) +{ + asm volatile("dmb ishst" : : : "memory"); +} +#endif diff --git a/src/pmdk/src/libpmem2/aarch64/flags.inc b/src/pmdk/src/libpmem2/aarch64/flags.inc new file mode 100644 index 000000000..e54a244ed --- /dev/null +++ b/src/pmdk/src/libpmem2/aarch64/flags.inc @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +# +# src/libpmem2/aarch64/flags.inc -- flags for libpmem2/arm64 +# + +vpath %.c $(TOP)/src/libpmem2/aarch64 +vpath %.h $(TOP)/src/libpmem2/aarch64 + +CFLAGS += -Iaarch64 diff --git a/src/pmdk/src/libpmem2/aarch64/flush.h b/src/pmdk/src/libpmem2/aarch64/flush.h new file mode 100644 index 000000000..880d15b7d --- /dev/null +++ b/src/pmdk/src/libpmem2/aarch64/flush.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +#ifndef ARM64_FLUSH_H +#define ARM64_FLUSH_H + +#include +#include "arm_cacheops.h" +#include "util.h" + +#define FLUSH_ALIGN ((uintptr_t)64) + +/* + * flush_dcache_nolog -- flush the CPU cache, using DC CVAC + */ +static force_inline void +flush_dcache_nolog(const void *addr, size_t len) +{ + uintptr_t uptr; + + /* + * Loop through cache-line-size (typically 64B) aligned chunks + * covering the given range. + */ + for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) { + arm_clean_va_to_poc((char *)uptr); + } +} + +#endif diff --git a/src/pmdk/src/libpmem2/aarch64/init.c b/src/pmdk/src/libpmem2/aarch64/init.c new file mode 100644 index 000000000..fe5b8c4ac --- /dev/null +++ b/src/pmdk/src/libpmem2/aarch64/init.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +#include + +#include "auto_flush.h" +#include "flush.h" +#include "out.h" +#include "pmem2_arch.h" + +/* + * memory_barrier -- (internal) issue the fence instruction + */ +static void +memory_barrier(void) +{ + LOG(15, NULL); + arm_store_memory_barrier(); +} + +/* + * flush_dcache -- (internal) flush the CPU cache + */ +static void +flush_dcache(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + flush_dcache_nolog(addr, len); +} + +/* + * pmem2_arch_init -- initialize architecture-specific list of pmem operations + */ +void +pmem2_arch_init(struct pmem2_arch_info *info) +{ + LOG(3, NULL); + + info->fence = memory_barrier; + info->flush = flush_dcache; + + if (info->flush == flush_dcache) + LOG(3, "Synchronize VA to poc for ARM"); + else + FATAL("invalid deep flush function address"); +} diff --git a/src/pmdk/src/libpmem2/aarch64/sources.inc b/src/pmdk/src/libpmem2/aarch64/sources.inc new file mode 100644 index 000000000..22f79eea8 --- /dev/null +++ b/src/pmdk/src/libpmem2/aarch64/sources.inc @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +# +# src/libpmem2/aarch64/sources.inc -- list of files for libpmem2/arm64 +# + +LIBPMEM2_ARCH_SOURCE = init.c diff --git a/src/pmdk/src/libpmem2/auto_flush.h b/src/pmdk/src/libpmem2/auto_flush.h new file mode 100644 index 000000000..8865c13f1 --- /dev/null +++ b/src/pmdk/src/libpmem2/auto_flush.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * auto_flush.h -- auto flush detection functionality + */ + +#ifndef PMEM2_AUTO_FLUSH_H +#define PMEM2_AUTO_FLUSH_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +int pmem2_auto_flush(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmem2/auto_flush_linux.c b/src/pmdk/src/libpmem2/auto_flush_linux.c new file mode 100644 index 000000000..ca12e348b --- /dev/null +++ b/src/pmdk/src/libpmem2/auto_flush_linux.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * auto_flush_linux.c -- Linux auto flush detection + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include "out.h" +#include "os.h" +#include "fs.h" +#include "auto_flush.h" + +#define BUS_DEVICE_PATH "/sys/bus/nd/devices" +#define PERSISTENCE_DOMAIN "persistence_domain" +#define DOMAIN_VALUE_LEN 32 + +/* + * check_cpu_cache -- (internal) check if file contains "cpu_cache" entry + */ +static int +check_cpu_cache(const char *domain_path) +{ + LOG(3, "domain_path: %s", domain_path); + + char domain_value[DOMAIN_VALUE_LEN]; + int domain_fd; + int cpu_cache = 0; + + if ((domain_fd = os_open(domain_path, O_RDONLY)) < 0) { + LOG(1, "!open(\"%s\", O_RDONLY)", domain_path); + goto end; + } + ssize_t len = read(domain_fd, domain_value, + DOMAIN_VALUE_LEN); + + if (len < 0) { + ERR("!read(%d, %p, %d)", domain_fd, + domain_value, DOMAIN_VALUE_LEN); + cpu_cache = -1; + goto end; + } else if (len == 0) { + errno = EIO; + ERR("read(%d, %p, %d) empty string", + domain_fd, domain_value, + DOMAIN_VALUE_LEN); + cpu_cache = -1; + goto end; + } else if (domain_value[len - 1] != '\n') { + ERR("!read(%d, %p, %d) invalid format", + domain_fd, domain_value, + DOMAIN_VALUE_LEN); + cpu_cache = -1; + goto end; + } + + domain_value[len - 1] = '\0'; + LOG(15, "detected persistent_domain: %s", domain_value); + if (strcmp(domain_value, "cpu_cache") == 0) { + LOG(15, "cpu_cache in persistent_domain: %s", domain_path); + cpu_cache = 1; + } else { + LOG(15, "cpu_cache not in persistent_domain: %s", domain_path); + cpu_cache = 0; + } + +end: + if (domain_fd >= 0) + os_close(domain_fd); + return cpu_cache; +} + +/* + * check_domain_in_region -- (internal) check if region + * contains persistence_domain file + */ +static int +check_domain_in_region(const char *region_path) +{ + LOG(3, "region_path: %s", region_path); + + struct fs *reg = NULL; + struct fs_entry *reg_entry; + char domain_path[PATH_MAX]; + int cpu_cache = 0; + + reg = fs_new(region_path); + if (reg == NULL) { + ERR("!fs_new: \"%s\"", region_path); + cpu_cache = -1; + goto end; + } + + while ((reg_entry = fs_read(reg)) != NULL) { + /* + * persistence_domain has to be a file type entry + * and it has to be first level child for region; + * there is no need to run into deeper levels + */ + if (reg_entry->type != FS_ENTRY_FILE || + strcmp(reg_entry->name, + PERSISTENCE_DOMAIN) != 0 || + reg_entry->level != 1) + continue; + + int ret = util_snprintf(domain_path, PATH_MAX, + "%s/"PERSISTENCE_DOMAIN, region_path); + if (ret < 0) { + ERR("!snprintf"); + cpu_cache = -1; + goto end; + } + cpu_cache = check_cpu_cache(domain_path); + } + +end: + if (reg) + fs_delete(reg); + return cpu_cache; +} + +/* + * pmem2_auto_flush -- check if platform supports auto flush for all regions + * + * Traverse "/sys/bus/nd/devices" path to find all the nvdimm regions, + * then for each region checks if "persistence_domain" file exists and + * contains "cpu_cache" string. + * If for any region "persistence_domain" entry does not exists, or its + * context is not as expected, assume eADR is not available on this platform. + */ +int +pmem2_auto_flush(void) +{ + LOG(15, NULL); + + char *device_path; + int cpu_cache = 0; + + device_path = BUS_DEVICE_PATH; + + os_stat_t sdev; + if (os_stat(device_path, &sdev) != 0 || + S_ISDIR(sdev.st_mode) == 0) { + LOG(3, "eADR not supported"); + return cpu_cache; + } + + struct fs *dev = fs_new(device_path); + if (dev == NULL) { + ERR("!fs_new: \"%s\"", device_path); + return -1; + } + + struct fs_entry *dev_entry; + + while ((dev_entry = fs_read(dev)) != NULL) { + /* + * Skip if not a symlink, because we expect that + * region on sysfs path is a symlink. + * Skip if depth is different than 1, because region + * we are interested in should be the first level + * child for device. + */ + if ((dev_entry->type != FS_ENTRY_SYMLINK) || + !strstr(dev_entry->name, "region") || + dev_entry->level != 1) + continue; + + LOG(15, "Start traversing region: %s", dev_entry->path); + cpu_cache = check_domain_in_region(dev_entry->path); + if (cpu_cache != 1) + goto end; + } + +end: + fs_delete(dev); + return cpu_cache; +} diff --git a/src/pmdk/src/libpmem2/auto_flush_none.c b/src/pmdk/src/libpmem2/auto_flush_none.c new file mode 100644 index 000000000..77e930e49 --- /dev/null +++ b/src/pmdk/src/libpmem2/auto_flush_none.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019, Intel Corporation */ + +#include "auto_flush.h" +#include "out.h" + +/* + * pmem2_auto_flush -- check if platform supports auto flush for all regions + */ +int +pmem2_auto_flush(void) +{ + LOG(15, NULL); + + return 0; +} diff --git a/src/pmdk/src/libpmem2/auto_flush_windows.c b/src/pmdk/src/libpmem2/auto_flush_windows.c new file mode 100644 index 000000000..fe56e2ffa --- /dev/null +++ b/src/pmdk/src/libpmem2/auto_flush_windows.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2019, Intel Corporation */ + +/* + * auto_flush_windows.c -- Windows auto flush detection + */ + +#include +#include + +#include "alloc.h" +#include "out.h" +#include "os.h" +#include "endian.h" +#include "auto_flush_windows.h" + +/* + * is_nfit_available -- (internal) check if platform supports NFIT table. + */ +static int +is_nfit_available() +{ + LOG(3, "is_nfit_available()"); + + DWORD signatures_size; + char *signatures = NULL; + int is_nfit = 0; + DWORD offset = 0; + + signatures_size = EnumSystemFirmwareTables(ACPI_SIGNATURE, NULL, 0); + if (signatures_size == 0) { + ERR("!EnumSystemFirmwareTables"); + return -1; + } + signatures = (char *)Malloc(signatures_size + 1); + if (signatures == NULL) { + ERR("!malloc"); + return -1; + } + int ret = EnumSystemFirmwareTables(ACPI_SIGNATURE, + signatures, signatures_size); + signatures[signatures_size] = '\0'; + if (ret != signatures_size) { + ERR("!EnumSystemFirmwareTables"); + goto err; + } + + while (offset <= signatures_size) { + int nfit_sig = strncmp(signatures + offset, + NFIT_STR_SIGNATURE, NFIT_SIGNATURE_LEN); + if (nfit_sig == 0) { + is_nfit = 1; + break; + } + offset += NFIT_SIGNATURE_LEN; + } + + Free(signatures); + return is_nfit; + +err: + Free(signatures); + return -1; +} + +/* + * is_auto_flush_cap_set -- (internal) check if specific + * capabilities bits are set. + * + * ACPI 6.2A Specification: + * Bit[0] - CPU Cache Flush to NVDIMM Durability on + * Power Loss Capable. If set to 1, indicates that platform + * ensures the entire CPU store data path is flushed to + * persistent memory on system power loss. + * Bit[1] - Memory Controller Flush to NVDIMM Durability on Power Loss Capable. + * If set to 1, indicates that platform provides mechanisms to automatically + * flush outstanding write data from the memory controller to persistent memory + * in the event of platform power loss. Note: If bit 0 is set to 1 then this bit + * shall be set to 1 as well. + */ +static int +is_auto_flush_cap_set(uint32_t capabilities) +{ + LOG(3, "is_auto_flush_cap_set capabilities 0x%" PRIx32, capabilities); + + int CPU_cache_flush = CHECK_BIT(capabilities, 0); + int memory_controller_flush = CHECK_BIT(capabilities, 1); + + LOG(15, "CPU_cache_flush %d, memory_controller_flush %d", + CPU_cache_flush, memory_controller_flush); + if (memory_controller_flush == 1 && CPU_cache_flush == 1) + return 1; + + return 0; +} + +/* + * parse_nfit_buffer -- (internal) parse nfit buffer + * if platform_capabilities struct is available return pcs structure. + */ +static struct platform_capabilities +parse_nfit_buffer(const unsigned char *nfit_buffer, unsigned long buffer_size) +{ + LOG(3, "parse_nfit_buffer nfit_buffer %s, buffer_size %lu", + nfit_buffer, buffer_size); + + uint16_t type; + uint16_t length; + size_t offset = sizeof(struct nfit_header); + struct platform_capabilities pcs = {0}; + + while (offset < buffer_size) { + type = *(nfit_buffer + offset); + length = *(nfit_buffer + offset + 2); + if (type == PCS_TYPE_NUMBER) { + if (length == sizeof(struct platform_capabilities)) { + memmove(&pcs, nfit_buffer + offset, length); + return pcs; + } + } + offset += length; + } + return pcs; +} + +/* + * pmem2_auto_flush -- check if platform supports auto flush. + */ +int +pmem2_auto_flush(void) +{ + LOG(3, NULL); + + DWORD nfit_buffer_size = 0; + DWORD nfit_written = 0; + PVOID nfit_buffer = NULL; + struct nfit_header *nfit_data; + struct platform_capabilities *pc = NULL; + + int eADR = 0; + int is_nfit = is_nfit_available(); + if (is_nfit == 0) { + LOG(15, "ACPI NFIT table not available"); + return 0; + } + if (is_nfit < 0 || is_nfit != 1) { + LOG(1, "!is_nfit_available"); + return -1; + } + + /* get the entire nfit size */ + nfit_buffer_size = GetSystemFirmwareTable( + (DWORD)ACPI_SIGNATURE, (DWORD)NFIT_REV_SIGNATURE, NULL, 0); + if (nfit_buffer_size == 0) { + ERR("!GetSystemFirmwareTable"); + return -1; + } + /* reserve buffer */ + nfit_buffer = (unsigned char *)Malloc(nfit_buffer_size); + if (nfit_buffer == NULL) { + ERR("!malloc"); + goto err; + } + /* write actual nfit to buffer */ + nfit_written = GetSystemFirmwareTable( + (DWORD)ACPI_SIGNATURE, (DWORD)NFIT_REV_SIGNATURE, + nfit_buffer, nfit_buffer_size); + if (nfit_written == 0) { + ERR("!GetSystemFirmwareTable"); + goto err; + } + + if (nfit_buffer_size != nfit_written) { + errno = ERROR_INVALID_DATA; + ERR("!GetSystemFirmwareTable invalid data"); + goto err; + } + + nfit_data = (struct nfit_header *)nfit_buffer; + int nfit_sig = strncmp(nfit_data->signature, + NFIT_STR_SIGNATURE, NFIT_SIGNATURE_LEN); + if (nfit_sig != 0) { + ERR("!NFIT buffer has invalid data"); + goto err; + } + + struct platform_capabilities pcs = parse_nfit_buffer( + nfit_buffer, nfit_buffer_size); + eADR = is_auto_flush_cap_set(pcs.capabilities); + + Free(nfit_buffer); + return eADR; + +err: + Free(nfit_buffer); + return -1; +} diff --git a/src/pmdk/src/libpmem2/auto_flush_windows.h b/src/pmdk/src/libpmem2/auto_flush_windows.h new file mode 100644 index 000000000..4153d84d6 --- /dev/null +++ b/src/pmdk/src/libpmem2/auto_flush_windows.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +#ifndef PMEM2_AUTO_FLUSH_WINDOWS_H +#define PMEM2_AUTO_FLUSH_WINDOWS_H 1 + +#define ACPI_SIGNATURE 0x41435049 /* hex value of ACPI signature */ +#define NFIT_REV_SIGNATURE 0x5449464e /* hex value of htonl(NFIT) signature */ +#define NFIT_STR_SIGNATURE "NFIT" + +#define NFIT_SIGNATURE_LEN 4 +#define NFIT_OEM_ID_LEN 6 +#define NFIT_OEM_TABLE_ID_LEN 8 +#define NFIT_MAX_STRUCTURES 8 + +#define PCS_RESERVED 3 +#define PCS_RESERVED_2 4 +#define PCS_TYPE_NUMBER 7 + +/* check if bit on 'bit' position in number 'num' is set */ +#define CHECK_BIT(num, bit) (((num) >> (bit)) & 1) +/* + * sets alignment of members of structure + */ +#pragma pack(1) +struct platform_capabilities +{ + uint16_t type; + uint16_t length; + uint8_t highest_valid; + uint8_t reserved[PCS_RESERVED]; + uint32_t capabilities; + uint8_t reserved2[PCS_RESERVED_2]; +}; + +struct nfit_header +{ + uint8_t signature[NFIT_SIGNATURE_LEN]; + uint32_t length; + uint8_t revision; + uint8_t checksum; + uint8_t oem_id[NFIT_OEM_ID_LEN]; + uint8_t oem_table_id[NFIT_OEM_TABLE_ID_LEN]; + uint32_t oem_revision; + uint8_t creator_id[4]; + uint32_t creator_revision; + uint32_t reserved; +}; +#pragma pack() +#endif diff --git a/src/pmdk/src/libpmem2/badblocks.c b/src/pmdk/src/libpmem2/badblocks.c new file mode 100644 index 000000000..e126f3e29 --- /dev/null +++ b/src/pmdk/src/libpmem2/badblocks.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * badblocks.c -- implementation of common bad blocks API + */ + +#include "badblocks.h" +#include "alloc.h" +#include "out.h" + +/* + * badblocks_new -- zalloc bad blocks structure + */ +struct badblocks * +badblocks_new(void) +{ + LOG(3, " "); + + struct badblocks *bbs = Zalloc(sizeof(struct badblocks)); + if (bbs == NULL) { + ERR("!Zalloc"); + } + + return bbs; +} + +/* + * badblocks_delete -- free bad blocks structure + */ +void +badblocks_delete(struct badblocks *bbs) +{ + LOG(3, "badblocks %p", bbs); + + if (bbs == NULL) + return; + + Free(bbs->bbv); + Free(bbs); +} diff --git a/src/pmdk/src/libpmem2/badblocks_ndctl.c b/src/pmdk/src/libpmem2/badblocks_ndctl.c new file mode 100644 index 000000000..76f724c12 --- /dev/null +++ b/src/pmdk/src/libpmem2/badblocks_ndctl.c @@ -0,0 +1,771 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +/* + * badblocks_ndctl.c -- implementation of DIMMs API based on the ndctl library + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libpmem2.h" +#include "pmem2_utils.h" +#include "source.h" +#include "region_namespace_ndctl.h" + +#include "file.h" +#include "out.h" +#include "badblocks.h" +#include "set_badblocks.h" +#include "extent.h" + +typedef int pmem2_badblock_next_type( + struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb); + +typedef void *pmem2_badblock_get_next_type( + struct pmem2_badblock_context *bbctx); + +struct pmem2_badblock_context { + /* file descriptor */ + int fd; + + /* pmem2 file type */ + enum pmem2_file_type file_type; + + /* ndctl context */ + struct ndctl_ctx *ctx; + + /* + * Function pointer to: + * - pmem2_badblock_next_namespace() or + * - pmem2_badblock_next_region() + */ + pmem2_badblock_next_type *pmem2_badblock_next_func; + + /* + * Function pointer to: + * - pmem2_namespace_get_first_badblock() or + * - pmem2_namespace_get_next_badblock() or + * - pmem2_region_get_first_badblock() or + * - pmem2_region_get_next_badblock() + */ + pmem2_badblock_get_next_type *pmem2_badblock_get_next_func; + + /* needed only by the ndctl namespace badblock iterator */ + struct ndctl_namespace *ndns; + + /* needed only by the ndctl region badblock iterator */ + struct { + struct ndctl_bus *bus; + struct ndctl_region *region; + unsigned long long ns_res; /* address of the namespace */ + unsigned long long ns_beg; /* the begining of the namespace */ + unsigned long long ns_end; /* the end of the namespace */ + } rgn; + + /* file's extents */ + struct extents *exts; + unsigned first_extent; + struct pmem2_badblock last_bb; +}; + +/* forward declarations */ +static int pmem2_badblock_next_namespace( + struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb); +static int pmem2_badblock_next_region( + struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb); +static void *pmem2_namespace_get_first_badblock( + struct pmem2_badblock_context *bbctx); +static void *pmem2_region_get_first_badblock( + struct pmem2_badblock_context *bbctx); + +/* + * badblocks_get_namespace_bounds -- (internal) returns the bounds + * (offset and size) of the given namespace + * relative to the beginning of its region + */ +static int +badblocks_get_namespace_bounds(struct ndctl_region *region, + struct ndctl_namespace *ndns, + unsigned long long *ns_offset, + unsigned long long *ns_size) +{ + LOG(3, "region %p namespace %p ns_offset %p ns_size %p", + region, ndns, ns_offset, ns_size); + + struct ndctl_pfn *pfn = ndctl_namespace_get_pfn(ndns); + struct ndctl_dax *dax = ndctl_namespace_get_dax(ndns); + + ASSERTne(ns_offset, NULL); + ASSERTne(ns_size, NULL); + + if (pfn) { + *ns_offset = ndctl_pfn_get_resource(pfn); + if (*ns_offset == ULLONG_MAX) { + ERR("(pfn) cannot read offset of the namespace"); + return PMEM2_E_CANNOT_READ_BOUNDS; + } + + *ns_size = ndctl_pfn_get_size(pfn); + if (*ns_size == ULLONG_MAX) { + ERR("(pfn) cannot read size of the namespace"); + return PMEM2_E_CANNOT_READ_BOUNDS; + } + + LOG(10, "(pfn) ns_offset 0x%llx ns_size %llu", + *ns_offset, *ns_size); + } else if (dax) { + *ns_offset = ndctl_dax_get_resource(dax); + if (*ns_offset == ULLONG_MAX) { + ERR("(dax) cannot read offset of the namespace"); + return PMEM2_E_CANNOT_READ_BOUNDS; + } + + *ns_size = ndctl_dax_get_size(dax); + if (*ns_size == ULLONG_MAX) { + ERR("(dax) cannot read size of the namespace"); + return PMEM2_E_CANNOT_READ_BOUNDS; + } + + LOG(10, "(dax) ns_offset 0x%llx ns_size %llu", + *ns_offset, *ns_size); + } else { /* raw or btt */ + *ns_offset = ndctl_namespace_get_resource(ndns); + if (*ns_offset == ULLONG_MAX) { + ERR("(raw/btt) cannot read offset of the namespace"); + return PMEM2_E_CANNOT_READ_BOUNDS; + } + + *ns_size = ndctl_namespace_get_size(ndns); + if (*ns_size == ULLONG_MAX) { + ERR("(raw/btt) cannot read size of the namespace"); + return PMEM2_E_CANNOT_READ_BOUNDS; + } + + LOG(10, "(raw/btt) ns_offset 0x%llx ns_size %llu", + *ns_offset, *ns_size); + } + + unsigned long long region_offset = ndctl_region_get_resource(region); + if (region_offset == ULLONG_MAX) { + ERR("!cannot read offset of the region"); + return PMEM2_E_ERRNO; + } + + LOG(10, "region_offset 0x%llx", region_offset); + *ns_offset -= region_offset; + + return 0; +} + +/* + * badblocks_devdax_clear_one_badblock -- (internal) clear one bad block + * in the dax device + */ +static int +badblocks_devdax_clear_one_badblock(struct ndctl_bus *bus, + unsigned long long address, + unsigned long long length) +{ + LOG(3, "bus %p address 0x%llx length %llu (bytes)", + bus, address, length); + + int ret; + + struct ndctl_cmd *cmd_ars_cap = ndctl_bus_cmd_new_ars_cap(bus, + address, length); + if (cmd_ars_cap == NULL) { + ERR("ndctl_bus_cmd_new_ars_cap() failed (bus '%s')", + ndctl_bus_get_provider(bus)); + return PMEM2_E_ERRNO; + } + + ret = ndctl_cmd_submit(cmd_ars_cap); + if (ret) { + ERR("ndctl_cmd_submit() failed (bus '%s')", + ndctl_bus_get_provider(bus)); + /* ndctl_cmd_submit() returns -errno */ + goto out_ars_cap; + } + + struct ndctl_range range; + ret = ndctl_cmd_ars_cap_get_range(cmd_ars_cap, &range); + if (ret) { + ERR("ndctl_cmd_ars_cap_get_range() failed"); + /* ndctl_cmd_ars_cap_get_range() returns -errno */ + goto out_ars_cap; + } + + struct ndctl_cmd *cmd_clear_error = ndctl_bus_cmd_new_clear_error( + range.address, range.length, cmd_ars_cap); + + ret = ndctl_cmd_submit(cmd_clear_error); + if (ret) { + ERR("ndctl_cmd_submit() failed (bus '%s')", + ndctl_bus_get_provider(bus)); + /* ndctl_cmd_submit() returns -errno */ + goto out_clear_error; + } + + size_t cleared = ndctl_cmd_clear_error_get_cleared(cmd_clear_error); + + LOG(4, "cleared %zu out of %llu bad blocks", cleared, length); + + ASSERT(cleared <= length); + + if (cleared < length) { + ERR("failed to clear %llu out of %llu bad blocks", + length - cleared, length); + errno = ENXIO; /* ndctl handles such error in this way */ + ret = PMEM2_E_ERRNO; + } else { + ret = 0; + } + +out_clear_error: + ndctl_cmd_unref(cmd_clear_error); +out_ars_cap: + ndctl_cmd_unref(cmd_ars_cap); + + return ret; +} + +/* + * pmem2_badblock_context_new -- allocate and create a new bad block context + */ +int +pmem2_badblock_context_new(struct pmem2_badblock_context **bbctx, + const struct pmem2_source *src) +{ + LOG(3, "src %p bbctx %p", src, bbctx); + PMEM2_ERR_CLR(); + + ASSERTne(bbctx, NULL); + + if (src->type == PMEM2_SOURCE_ANON) { + ERR("Anonymous source does not support bad blocks"); + return PMEM2_E_NOSUPP; + } + + ASSERTeq(src->type, PMEM2_SOURCE_FD); + + struct ndctl_ctx *ctx; + struct ndctl_region *region; + struct ndctl_namespace *ndns; + struct pmem2_badblock_context *tbbctx = NULL; + enum pmem2_file_type pmem2_type; + int ret = PMEM2_E_UNKNOWN; + *bbctx = NULL; + + errno = ndctl_new(&ctx) * (-1); + if (errno) { + ERR("!ndctl_new"); + return PMEM2_E_ERRNO; + } + + pmem2_type = src->value.ftype; + + ret = pmem2_region_namespace(ctx, src, ®ion, &ndns); + if (ret) { + LOG(1, "getting region and namespace failed"); + goto exit_ndctl_unref; + } + + tbbctx = pmem2_zalloc(sizeof(struct pmem2_badblock_context), &ret); + if (ret) + goto exit_ndctl_unref; + + tbbctx->fd = src->value.fd; + tbbctx->file_type = pmem2_type; + tbbctx->ctx = ctx; + + if (region == NULL || ndns == NULL) { + /* did not found any matching device */ + *bbctx = tbbctx; + return 0; + } + + if (ndctl_namespace_get_mode(ndns) == NDCTL_NS_MODE_FSDAX) { + tbbctx->ndns = ndns; + tbbctx->pmem2_badblock_next_func = + pmem2_badblock_next_namespace; + tbbctx->pmem2_badblock_get_next_func = + pmem2_namespace_get_first_badblock; + } else { + unsigned long long ns_beg, ns_size, ns_end; + ret = badblocks_get_namespace_bounds( + region, ndns, + &ns_beg, &ns_size); + if (ret) { + LOG(1, "cannot read namespace's bounds"); + goto error_free_all; + } + + ns_end = ns_beg + ns_size - 1; + + LOG(10, + "namespace: begin %llu, end %llu size %llu (in 512B sectors)", + B2SEC(ns_beg), B2SEC(ns_end + 1) - 1, B2SEC(ns_size)); + + tbbctx->rgn.bus = ndctl_region_get_bus(region); + tbbctx->rgn.region = region; + tbbctx->rgn.ns_beg = ns_beg; + tbbctx->rgn.ns_end = ns_end; + tbbctx->rgn.ns_res = ns_beg + ndctl_region_get_resource(region); + tbbctx->pmem2_badblock_next_func = + pmem2_badblock_next_region; + tbbctx->pmem2_badblock_get_next_func = + pmem2_region_get_first_badblock; + } + + if (pmem2_type == PMEM2_FTYPE_REG) { + /* only regular files have extents */ + ret = pmem2_extents_create_get(src->value.fd, &tbbctx->exts); + if (ret) { + LOG(1, "getting extents of fd %i failed", + src->value.fd); + goto error_free_all; + } + } + + /* set the context */ + *bbctx = tbbctx; + + return 0; + +error_free_all: + pmem2_extents_destroy(&tbbctx->exts); + Free(tbbctx); + +exit_ndctl_unref: + ndctl_unref(ctx); + + return ret; +} + +/* + * pmem2_badblock_context_delete -- delete and free the bad block context + */ +void +pmem2_badblock_context_delete(struct pmem2_badblock_context **bbctx) +{ + LOG(3, "bbctx %p", bbctx); + PMEM2_ERR_CLR(); + + ASSERTne(bbctx, NULL); + + if (*bbctx == NULL) + return; + + struct pmem2_badblock_context *tbbctx = *bbctx; + + pmem2_extents_destroy(&tbbctx->exts); + ndctl_unref(tbbctx->ctx); + Free(tbbctx); + + *bbctx = NULL; +} + +/* + * pmem2_namespace_get_next_badblock -- (internal) wrapper for + * ndctl_namespace_get_next_badblock + */ +static void * +pmem2_namespace_get_next_badblock(struct pmem2_badblock_context *bbctx) +{ + LOG(3, "bbctx %p", bbctx); + + return ndctl_namespace_get_next_badblock(bbctx->ndns); +} + +/* + * pmem2_namespace_get_first_badblock -- (internal) wrapper for + * ndctl_namespace_get_first_badblock + */ +static void * +pmem2_namespace_get_first_badblock(struct pmem2_badblock_context *bbctx) +{ + LOG(3, "bbctx %p", bbctx); + + bbctx->pmem2_badblock_get_next_func = pmem2_namespace_get_next_badblock; + return ndctl_namespace_get_first_badblock(bbctx->ndns); +} + +/* + * pmem2_region_get_next_badblock -- (internal) wrapper for + * ndctl_region_get_next_badblock + */ +static void * +pmem2_region_get_next_badblock(struct pmem2_badblock_context *bbctx) +{ + LOG(3, "bbctx %p", bbctx); + + return ndctl_region_get_next_badblock(bbctx->rgn.region); +} + +/* + * pmem2_region_get_first_badblock -- (internal) wrapper for + * ndctl_region_get_first_badblock + */ +static void * +pmem2_region_get_first_badblock(struct pmem2_badblock_context *bbctx) +{ + LOG(3, "bbctx %p", bbctx); + + bbctx->pmem2_badblock_get_next_func = pmem2_region_get_next_badblock; + return ndctl_region_get_first_badblock(bbctx->rgn.region); +} + +/* + * pmem2_badblock_next_namespace -- (internal) version of pmem2_badblock_next() + * called for ndctl with namespace badblock + * iterator + * + * This function works only for fsdax, but does not require any special + * permissions. + */ +static int +pmem2_badblock_next_namespace(struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb) +{ + LOG(3, "bbctx %p bb %p", bbctx, bb); + + ASSERTne(bbctx, NULL); + ASSERTne(bb, NULL); + + struct badblock *bbn; + + bbn = bbctx->pmem2_badblock_get_next_func(bbctx); + if (bbn == NULL) + return PMEM2_E_NO_BAD_BLOCK_FOUND; + + /* + * libndctl returns offset and length of a bad block + * both expressed in 512B sectors. Offset is relative + * to the beginning of the namespace. + */ + bb->offset = SEC2B(bbn->offset); + bb->length = SEC2B(bbn->len); + + return 0; +} + +/* + * pmem2_badblock_next_region -- (internal) version of pmem2_badblock_next() + * called for ndctl with region badblock iterator + * + * This function works for all types of namespaces, but requires read access to + * privileged device information. + */ +static int +pmem2_badblock_next_region(struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb) +{ + LOG(3, "bbctx %p bb %p", bbctx, bb); + + ASSERTne(bbctx, NULL); + ASSERTne(bb, NULL); + + unsigned long long bb_beg, bb_end; + unsigned long long beg, end; + struct badblock *bbn; + + unsigned long long ns_beg = bbctx->rgn.ns_beg; + unsigned long long ns_end = bbctx->rgn.ns_end; + + do { + bbn = bbctx->pmem2_badblock_get_next_func(bbctx); + if (bbn == NULL) + return PMEM2_E_NO_BAD_BLOCK_FOUND; + + LOG(10, + "region bad block: begin %llu end %llu length %u (in 512B sectors)", + bbn->offset, bbn->offset + bbn->len - 1, bbn->len); + + /* + * libndctl returns offset and length of a bad block + * both expressed in 512B sectors. Offset is relative + * to the beginning of the region. + */ + bb_beg = SEC2B(bbn->offset); + bb_end = bb_beg + SEC2B(bbn->len) - 1; + + } while (bb_beg > ns_end || ns_beg > bb_end); + + beg = (bb_beg > ns_beg) ? bb_beg : ns_beg; + end = (bb_end < ns_end) ? bb_end : ns_end; + + /* + * Form a new bad block structure with offset and length + * expressed in bytes and offset relative to the beginning + * of the namespace. + */ + bb->offset = beg - ns_beg; + bb->length = end - beg + 1; + + LOG(4, + "namespace bad block: begin %llu end %llu length %llu (in 512B sectors)", + B2SEC(beg - ns_beg), B2SEC(end - ns_beg), B2SEC(end - beg) + 1); + + return 0; +} + +/* + * pmem2_badblock_next -- get the next bad block + */ +int +pmem2_badblock_next(struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb) +{ + LOG(3, "bbctx %p bb %p", bbctx, bb); + PMEM2_ERR_CLR(); + + ASSERTne(bbctx, NULL); + ASSERTne(bb, NULL); + + struct pmem2_badblock bbn; + unsigned long long bb_beg; + unsigned long long bb_end; + unsigned long long bb_len; + unsigned long long bb_off; + unsigned long long ext_beg = 0; /* placate compiler warnings */ + unsigned long long ext_end = -1ULL; + unsigned e; + int ret; + + if (bbctx->rgn.region == NULL && bbctx->ndns == NULL) { + ERR("Cannot find any matching device, no bad blocks found"); + return PMEM2_E_NO_BAD_BLOCK_FOUND; + } + + struct extents *exts = bbctx->exts; + + /* DAX devices have no extents */ + if (!exts) { + ret = bbctx->pmem2_badblock_next_func(bbctx, &bbn); + *bb = bbn; + return ret; + } + + /* + * There is at least one extent. + * Loop until: + * 1) a bad block overlaps with an extent or + * 2) there are no more bad blocks. + */ + int bb_overlaps_with_extent = 0; + do { + if (bbctx->last_bb.length) { + /* + * We have saved the last bad block to check it + * with the next extent saved + * in bbctx->first_extent. + */ + ASSERTne(bbctx->first_extent, 0); + bbn = bbctx->last_bb; + bbctx->last_bb.offset = 0; + bbctx->last_bb.length = 0; + } else { + ASSERTeq(bbctx->first_extent, 0); + /* look for the next bad block */ + ret = bbctx->pmem2_badblock_next_func(bbctx, &bbn); + if (ret) + return ret; + } + + bb_beg = bbn.offset; + bb_end = bb_beg + bbn.length - 1; + + for (e = bbctx->first_extent; + e < exts->extents_count; + e++) { + + ext_beg = exts->extents[e].offset_physical; + ext_end = ext_beg + exts->extents[e].length - 1; + + /* check if the bad block overlaps with the extent */ + if (bb_beg <= ext_end && ext_beg <= bb_end) { + /* bad block overlaps with the extent */ + bb_overlaps_with_extent = 1; + + if (bb_end > ext_end && + e + 1 < exts->extents_count) { + /* + * The bad block is longer than + * the extent and there are + * more extents. + * Save the current bad block + * to check it with the next extent. + */ + bbctx->first_extent = e + 1; + bbctx->last_bb = bbn; + } else { + /* + * All extents were checked + * with the current bad block. + */ + bbctx->first_extent = 0; + bbctx->last_bb.length = 0; + bbctx->last_bb.offset = 0; + } + break; + } + } + + /* check all extents with the next bad block */ + if (bb_overlaps_with_extent == 0) { + bbctx->first_extent = 0; + bbctx->last_bb.length = 0; + bbctx->last_bb.offset = 0; + } + + } while (bb_overlaps_with_extent == 0); + + /* bad block overlaps with an extent */ + + bb_beg = (bb_beg > ext_beg) ? bb_beg : ext_beg; + bb_end = (bb_end < ext_end) ? bb_end : ext_end; + bb_len = bb_end - bb_beg + 1; + bb_off = bb_beg + exts->extents[e].offset_logical + - exts->extents[e].offset_physical; + + LOG(10, "bad block found: physical offset: %llu, length: %llu", + bb_beg, bb_len); + + /* make sure the offset is block-aligned */ + unsigned long long not_block_aligned = bb_off & (exts->blksize - 1); + if (not_block_aligned) { + bb_off -= not_block_aligned; + bb_len += not_block_aligned; + } + + /* make sure the length is block-aligned */ + bb_len = ALIGN_UP(bb_len, exts->blksize); + + LOG(4, "bad block found: logical offset: %llu, length: %llu", + bb_off, bb_len); + + /* + * Return the bad block with offset and length + * expressed in bytes and offset relative + * to the beginning of the file. + */ + bb->offset = bb_off; + bb->length = bb_len; + + return 0; +} + +/* + * pmem2_badblock_clear_fsdax -- (internal) clear one bad block + * in a FSDAX device + */ +static int +pmem2_badblock_clear_fsdax(int fd, const struct pmem2_badblock *bb) +{ + LOG(3, "fd %i badblock %p", fd, bb); + PMEM2_ERR_CLR(); + + ASSERTne(bb, NULL); + + LOG(10, + "clearing a bad block: fd %i logical offset %zu length %zu (in 512B sectors)", + fd, B2SEC(bb->offset), B2SEC(bb->length)); + + /* fallocate() takes offset as the off_t type */ + if (bb->offset > (size_t)INT64_MAX) { + ERR("bad block's offset is greater than INT64_MAX"); + return PMEM2_E_OFFSET_OUT_OF_RANGE; + } + + /* fallocate() takes length as the off_t type */ + if (bb->length > (size_t)INT64_MAX) { + ERR("bad block's length is greater than INT64_MAX"); + return PMEM2_E_LENGTH_OUT_OF_RANGE; + } + + off_t offset = (off_t)bb->offset; + off_t length = (off_t)bb->length; + + /* deallocate bad blocks */ + if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset, length)) { + ERR("!fallocate"); + return PMEM2_E_ERRNO; + } + + /* allocate new blocks */ + if (fallocate(fd, FALLOC_FL_KEEP_SIZE, offset, length)) { + ERR("!fallocate"); + return PMEM2_E_ERRNO; + } + + return 0; +} + +/* + * pmem2_badblock_clear_devdax -- (internal) clear one bad block + * in a DAX device + */ +static int +pmem2_badblock_clear_devdax(const struct pmem2_badblock_context *bbctx, + const struct pmem2_badblock *bb) +{ + LOG(3, "bbctx %p bb %p", bbctx, bb); + + ASSERTne(bb, NULL); + ASSERTne(bbctx, NULL); + ASSERTne(bbctx->rgn.bus, NULL); + ASSERTne(bbctx->rgn.ns_res, 0); + + LOG(4, + "clearing a bad block: offset %zu length %zu (in 512B sectors)", + B2SEC(bb->offset), B2SEC(bb->length)); + + int ret = badblocks_devdax_clear_one_badblock(bbctx->rgn.bus, + bb->offset + bbctx->rgn.ns_res, + bb->length); + if (ret) { + LOG(1, + "failed to clear a bad block: offset %zu length %zu (in 512B sectors)", + B2SEC(bb->offset), + B2SEC(bb->length)); + return ret; + } + + return 0; +} + +/* + * pmem2_badblock_clear -- clear one bad block + */ +int +pmem2_badblock_clear(struct pmem2_badblock_context *bbctx, + const struct pmem2_badblock *bb) +{ + LOG(3, "bbctx %p badblock %p", bbctx, bb); + PMEM2_ERR_CLR(); + + ASSERTne(bbctx, NULL); + ASSERTne(bb, NULL); + + if (bbctx->file_type == PMEM2_FTYPE_DEVDAX) + return pmem2_badblock_clear_devdax(bbctx, bb); + + ASSERTeq(bbctx->file_type, PMEM2_FTYPE_REG); + + return pmem2_badblock_clear_fsdax(bbctx->fd, bb); +} diff --git a/src/pmdk/src/libpmem2/badblocks_none.c b/src/pmdk/src/libpmem2/badblocks_none.c new file mode 100644 index 000000000..84ee79da0 --- /dev/null +++ b/src/pmdk/src/libpmem2/badblocks_none.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * badblocks_none.c -- fake bad blocks functions + */ + +#include + +#include "libpmem2.h" +#include "out.h" + +/* + * pmem2_badblock_context_new -- allocate and create a new bad block context + */ +int +pmem2_badblock_context_new(struct pmem2_badblock_context **bbctx, + const struct pmem2_source *src) +{ + return PMEM2_E_NOSUPP; +} + +/* + * pmem2_badblock_context_delete -- delete and free the bad block context + */ +void +pmem2_badblock_context_delete( + struct pmem2_badblock_context **bbctx) +{ +} + +/* + * pmem2_badblock_next -- get the next bad block + */ +int +pmem2_badblock_next(struct pmem2_badblock_context *bbctx, + struct pmem2_badblock *bb) +{ + return PMEM2_E_NOSUPP; +} + +/* + * pmem2_badblock_clear -- clear one bad block + */ +int +pmem2_badblock_clear(struct pmem2_badblock_context *bbctx, + const struct pmem2_badblock *bb) +{ + return PMEM2_E_NOSUPP; +} diff --git a/src/pmdk/src/libpmem2/config.c b/src/pmdk/src/libpmem2/config.c new file mode 100644 index 000000000..9274788aa --- /dev/null +++ b/src/pmdk/src/libpmem2/config.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * config.c -- pmem2_config implementation + */ + +#include +#include "alloc.h" +#include "config.h" +#include "libpmem2.h" +#include "out.h" +#include "pmem2.h" +#include "pmem2_utils.h" + +/* + * pmem2_config_init -- initialize cfg structure. + */ +void +pmem2_config_init(struct pmem2_config *cfg) +{ + cfg->offset = 0; + cfg->length = 0; + cfg->requested_max_granularity = PMEM2_GRANULARITY_INVALID; + cfg->sharing = PMEM2_SHARED; + cfg->protection_flag = PMEM2_PROT_READ | PMEM2_PROT_WRITE; + cfg->reserv = NULL; + cfg->reserv_offset = 0; +} + +/* + * pmem2_config_new -- allocates and initialize cfg structure. + */ +int +pmem2_config_new(struct pmem2_config **cfg) +{ + PMEM2_ERR_CLR(); + + int ret; + *cfg = pmem2_malloc(sizeof(**cfg), &ret); + + if (ret) + return ret; + + ASSERTne(cfg, NULL); + + pmem2_config_init(*cfg); + return 0; +} + +/* + * pmem2_config_delete -- deallocate cfg structure. + */ +int +pmem2_config_delete(struct pmem2_config **cfg) +{ + /* we do not need to clear err because this function cannot fail */ + + Free(*cfg); + *cfg = NULL; + return 0; +} + +/* + * pmem2_config_set_required_store_granularity -- set granularity + * requested by user in the pmem2_config structure + */ +int +pmem2_config_set_required_store_granularity(struct pmem2_config *cfg, + enum pmem2_granularity g) +{ + PMEM2_ERR_CLR(); + + switch (g) { + case PMEM2_GRANULARITY_BYTE: + case PMEM2_GRANULARITY_CACHE_LINE: + case PMEM2_GRANULARITY_PAGE: + break; + default: + ERR("unknown granularity value %d", g); + return PMEM2_E_GRANULARITY_NOT_SUPPORTED; + } + + cfg->requested_max_granularity = g; + + return 0; +} + +/* + * pmem2_config_set_offset -- set offset in the pmem2_config structure + */ +int +pmem2_config_set_offset(struct pmem2_config *cfg, size_t offset) +{ + PMEM2_ERR_CLR(); + + /* mmap func takes offset as a type of off_t */ + if (offset > (size_t)INT64_MAX) { + ERR("offset is greater than INT64_MAX"); + return PMEM2_E_OFFSET_OUT_OF_RANGE; + } + + cfg->offset = offset; + + return 0; +} + +/* + * pmem2_config_set_length -- set length in the pmem2_config structure + */ +int +pmem2_config_set_length(struct pmem2_config *cfg, size_t length) +{ + PMEM2_ERR_CLR(); + + cfg->length = length; + + return 0; +} + +/* + * pmem2_config_validate_length -- validate that length in the pmem2_config + * structure is consistent with the file length + */ +int +pmem2_config_validate_length(const struct pmem2_config *cfg, + size_t file_len, size_t alignment) +{ + ASSERTne(alignment, 0); + + if (file_len == 0) { + ERR("file length is equal 0"); + return PMEM2_E_SOURCE_EMPTY; + } + + if (cfg->length % alignment) { + ERR("length is not a multiple of %lu", alignment); + return PMEM2_E_LENGTH_UNALIGNED; + } + + /* overflow check */ + const size_t end = cfg->offset + cfg->length; + if (end < cfg->offset) { + ERR("overflow of offset and length"); + return PMEM2_E_MAP_RANGE; + } + + /* let's align the file size */ + size_t aligned_file_len = file_len; + if (file_len % alignment) + aligned_file_len = ALIGN_UP(file_len, alignment); + + /* validate mapping fit into the file */ + if (end > aligned_file_len) { + ERR("mapping larger than file size"); + return PMEM2_E_MAP_RANGE; + } + + return 0; +} + +/* + * pmem2_config_set_sharing -- set the way pmem2_map_new will map the file + */ +int +pmem2_config_set_sharing(struct pmem2_config *cfg, enum pmem2_sharing_type type) +{ + PMEM2_ERR_CLR(); + + switch (type) { + case PMEM2_SHARED: + case PMEM2_PRIVATE: + cfg->sharing = type; + break; + default: + ERR("unknown sharing value %d", type); + return PMEM2_E_INVALID_SHARING_VALUE; + } + + return 0; +} + +/* + * pmem2_config_set_vm_reservation -- set vm_reservation in the + * pmem2_config structure + */ +int +pmem2_config_set_vm_reservation(struct pmem2_config *cfg, + struct pmem2_vm_reservation *rsv, size_t offset) +{ + PMEM2_ERR_CLR(); + + cfg->reserv = rsv; + cfg->reserv_offset = offset; + + return 0; +} + +/* + * pmem2_config_set_protection -- set protection flags + * in the config struct + */ +int +pmem2_config_set_protection(struct pmem2_config *cfg, + unsigned prot) +{ + PMEM2_ERR_CLR(); + + unsigned unknown_prot = prot & ~(PMEM2_PROT_READ | PMEM2_PROT_WRITE | + PMEM2_PROT_EXEC | PMEM2_PROT_NONE); + if (unknown_prot) { + ERR("invalid flag %u", prot); + return PMEM2_E_INVALID_PROT_FLAG; + } + + cfg->protection_flag = prot; + return 0; +} diff --git a/src/pmdk/src/libpmem2/config.h b/src/pmdk/src/libpmem2/config.h new file mode 100644 index 000000000..beb693c43 --- /dev/null +++ b/src/pmdk/src/libpmem2/config.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +/* + * config.h -- internal definitions for pmem2_config + */ +#ifndef PMEM2_CONFIG_H +#define PMEM2_CONFIG_H + +#include "libpmem2.h" + +#define PMEM2_GRANULARITY_INVALID ((enum pmem2_granularity) (-1)) +#define PMEM2_ADDRESS_ANY 0 /* default value of the address request type */ + +struct pmem2_config { + /* offset from the beginning of the file */ + size_t offset; + size_t length; /* length of the mapping */ + /* persistence granularity requested by user */ + void *addr; /* address of the mapping */ + int addr_request; /* address request type */ + enum pmem2_granularity requested_max_granularity; + enum pmem2_sharing_type sharing; /* the way the file will be mapped */ + unsigned protection_flag; + struct pmem2_vm_reservation *reserv; + size_t reserv_offset; +}; + +void pmem2_config_init(struct pmem2_config *cfg); + +int pmem2_config_validate_length(const struct pmem2_config *cfg, + size_t file_len, size_t alignment); + +#endif /* PMEM2_CONFIG_H */ diff --git a/src/pmdk/src/libpmem2/deep_flush.c b/src/pmdk/src/libpmem2/deep_flush.c new file mode 100644 index 000000000..585772ae1 --- /dev/null +++ b/src/pmdk/src/libpmem2/deep_flush.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * deep_flush.c -- pmem2_deep_flush implementation + */ + +#include + +#include "libpmem2.h" +#include "deep_flush.h" +#include "out.h" +#include "pmem2_utils.h" + +/* + * pmem2_deep_flush -- performs deep flush operation + */ +int +pmem2_deep_flush(struct pmem2_map *map, void *ptr, size_t size) +{ + LOG(3, "map %p ptr %p size %zu", map, ptr, size); + PMEM2_ERR_CLR(); + + uintptr_t map_addr = (uintptr_t)map->addr; + uintptr_t map_end = map_addr + map->content_length; + uintptr_t flush_addr = (uintptr_t)ptr; + uintptr_t flush_end = flush_addr + size; + + if (flush_addr < map_addr || flush_end > map_end) { + ERR("requested deep flush rage ptr %p size %zu" + "exceeds map range %p", ptr, size, map); + return PMEM2_E_DEEP_FLUSH_RANGE; + } + + int ret = map->deep_flush_fn(map, ptr, size); + if (ret) { + LOG(1, "cannot perform deep flush operation for map %p", map); + return ret; + } + + return 0; +} diff --git a/src/pmdk/src/libpmem2/deep_flush.h b/src/pmdk/src/libpmem2/deep_flush.h new file mode 100644 index 000000000..258f6703e --- /dev/null +++ b/src/pmdk/src/libpmem2/deep_flush.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, Intel Corporation */ + +/* + * deep_flush.h -- functions for deep flush functionality + */ + +#ifndef PMEM2_DEEP_FLUSH_H +#define PMEM2_DEEP_FLUSH_H 1 + +#include "map.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int pmem2_deep_flush_write(unsigned region_id); +int pmem2_deep_flush_dax(struct pmem2_map *map, void *ptr, size_t size); +int pmem2_deep_flush_page(struct pmem2_map *map, void *ptr, size_t size); +int pmem2_deep_flush_cache(struct pmem2_map *map, void *ptr, size_t size); +int pmem2_deep_flush_byte(struct pmem2_map *map, void *ptr, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmem2/deep_flush_linux.c b/src/pmdk/src/libpmem2/deep_flush_linux.c new file mode 100644 index 000000000..567174b42 --- /dev/null +++ b/src/pmdk/src/libpmem2/deep_flush_linux.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * deep_flush_linux.c -- deep_flush functionality + */ + +#include +#include +#include +#include + +#include "deep_flush.h" +#include "libpmem2.h" +#include "map.h" +#include "os.h" +#include "out.h" +#include "persist.h" +#include "pmem2_utils.h" +#include "region_namespace.h" + +/* + * pmem2_deep_flush_write -- perform write to deep_flush file + * on given region_id + */ +int +pmem2_deep_flush_write(unsigned region_id) +{ + LOG(3, "region_id %d", region_id); + + char deep_flush_path[PATH_MAX]; + int deep_flush_fd; + char rbuf[2]; + + if (util_snprintf(deep_flush_path, PATH_MAX, + "/sys/bus/nd/devices/region%u/deep_flush", region_id) < 0) { + ERR("!snprintf"); + return PMEM2_E_ERRNO; + } + + if ((deep_flush_fd = os_open(deep_flush_path, O_RDONLY)) < 0) { + LOG(1, "!os_open(\"%s\", O_RDONLY)", deep_flush_path); + return 0; + } + + if (read(deep_flush_fd, rbuf, sizeof(rbuf)) != 2) { + LOG(1, "!read(%d)", deep_flush_fd); + goto end; + } + + if (rbuf[0] == '0' && rbuf[1] == '\n') { + LOG(3, "Deep flushing not needed"); + goto end; + } + + os_close(deep_flush_fd); + + if ((deep_flush_fd = os_open(deep_flush_path, O_WRONLY)) < 0) { + LOG(1, "Cannot open deep_flush file %s to write", + deep_flush_path); + return 0; + } + + if (write(deep_flush_fd, "1", 1) != 1) { + LOG(1, "Cannot write to deep_flush file %d", deep_flush_fd); + goto end; + } + +end: + os_close(deep_flush_fd); + return 0; +} + +/* + * pmem2_deep_flush_dax -- reads file type for map and check + * if it is device dax or reg file, depend on file type + * performs proper flush operation + */ +int +pmem2_deep_flush_dax(struct pmem2_map *map, void *ptr, size_t size) +{ + int ret; + enum pmem2_file_type type = map->source.value.ftype; + + if (type == PMEM2_FTYPE_REG) { + /* + * Flushing using OS-provided mechanisms requires that + * the address be a multiple of the page size. + * Align address down and change len so that [addr, addr + len) + * still contains the initial range. + */ + + /* round address down to page boundary */ + uintptr_t new_addr = ALIGN_DOWN((uintptr_t)ptr, Pagesize); + /* increase len by the amount we gain when we round addr down */ + size += (uintptr_t)ptr - new_addr; + ptr = (void *)new_addr; + + ret = pmem2_flush_file_buffers_os(map, ptr, size, 0); + if (ret) { + LOG(1, "cannot flush buffers addr %p len %zu", + ptr, size); + return ret; + } + } else if (type == PMEM2_FTYPE_DEVDAX) { + unsigned region_id; + int ret = pmem2_get_region_id(&map->source, ®ion_id); + if (ret < 0) { + LOG(1, "cannot find region id for dev %lu", + map->source.value.st_rdev); + return ret; + } + ret = pmem2_deep_flush_write(region_id); + if (ret) { + LOG(1, "cannot write to deep_flush file for region %d", + region_id); + return ret; + } + } else { + ASSERT(0); + } + + return 0; +} diff --git a/src/pmdk/src/libpmem2/deep_flush_other.c b/src/pmdk/src/libpmem2/deep_flush_other.c new file mode 100644 index 000000000..dd1d89a45 --- /dev/null +++ b/src/pmdk/src/libpmem2/deep_flush_other.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * deep_flush_other.c -- deep_flush functionality + */ + +#include +#include +#include + +#include "deep_flush.h" +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" +#include "persist.h" + +/* + * pmem2_deep_flush_dax -- performs flush buffer operation + */ +int +pmem2_deep_flush_dax(struct pmem2_map *map, void *ptr, size_t size) +{ + int ret = pmem2_flush_file_buffers_os(map, ptr, size, 0); + if (ret) { + LOG(1, "cannot flush buffers addr %p len %zu", ptr, size); + return ret; + } + + return 0; +} + +/* + * pmem2_deep_flush_write -- perform write to deep_flush file + * on given region_id (Device Dax only) + */ +int +pmem2_deep_flush_write(unsigned region_id) +{ + const char *err = + "BUG: pmem2_deep_flush_write should never be called on this OS"; + ERR("%s", err); + ASSERTinfo(0, err); + + /* not supported */ + return PMEM2_E_NOSUPP; +} diff --git a/src/pmdk/src/libpmem2/deep_flush_windows.c b/src/pmdk/src/libpmem2/deep_flush_windows.c new file mode 100644 index 000000000..01a1d0959 --- /dev/null +++ b/src/pmdk/src/libpmem2/deep_flush_windows.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * deep_flush_windows.c -- deeep_flush functionality + */ + +#include +#include +#include + +#include "deep_flush.h" +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" +#include "persist.h" + +/* + * pmem2_deep_flush_dax -- performs flush buffer operation + */ +int +pmem2_deep_flush_dax(struct pmem2_map *map, void *ptr, size_t size) +{ + int ret = pmem2_flush_file_buffers_os(map, ptr, size, 0); + if (ret) { + LOG(1, "cannot flush buffers addr %p len %zu", ptr, size); + return ret; + } + + return 0; +} + +/* + * pmem2_deep_flush_write -- perform write to deep_flush file + * on given region_id (Device Dax only) + */ +int +pmem2_deep_flush_write(unsigned region_id) +{ + const char *err = + "BUG: pmem2_deep_flush_write should never be called on this OS"; + ERR("%s", err); + ASSERTinfo(0, err); + + /* not supported */ + return PMEM2_E_NOSUPP; +} diff --git a/src/pmdk/src/libpmem2/errormsg.c b/src/pmdk/src/libpmem2/errormsg.c new file mode 100644 index 000000000..9c7b090f6 --- /dev/null +++ b/src/pmdk/src/libpmem2/errormsg.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * errormsg.c -- pmem2_errormsg* implementation + */ + +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" + +/* + * pmem2_errormsgU -- return last error message + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmem2_errormsgU(void) +{ + return out_get_errormsg(); +} + +#ifndef _WIN32 +/* + * pmem2_errormsg -- return last error message + */ +const char * +pmem2_errormsg(void) +{ + return pmem2_errormsgU(); +} +#else +/* + * pmem2_errormsgW -- return last error message as wchar_t + */ +const wchar_t * +pmem2_errormsgW(void) +{ + return out_get_errormsgW(); +} +#endif + +/* + * pmem2_perrorU -- prints a descriptive error message to the stderr + */ +#ifndef _WIN32 +static inline void +pmem2_perrorU(const char *format, va_list args) +{ + vfprintf(stderr, format, args); + fprintf(stderr, ": %s\n", pmem2_errormsg()); +} +#else +void +pmem2_perrorU(const char *format, ...) +{ + va_list args; + va_start(args, format); + + vfprintf(stderr, format, args); + fprintf(stderr, ": %s\n", pmem2_errormsg()); + + va_end(args); +} +#endif + +#ifndef _WIN32 +/* + * pmem2_perror -- prints a descriptive error message to the stderr + */ +void +pmem2_perror(const char *format, ...) +{ + va_list args; + va_start(args, format); + + pmem2_perrorU(format, args); + + va_end(args); +} +#else +/* + * pmem2_perrorW -- prints a descriptive error message to the stderr + */ +void +pmem2_perrorW(const wchar_t *format, ...) +{ + va_list args; + va_start(args, format); + + vfwprintf(stderr, format, args); + fwprintf(stderr, L": %s\n", pmem2_errormsgW()); + + va_end(args); +} +#endif diff --git a/src/pmdk/src/libpmem2/extent.h b/src/pmdk/src/libpmem2/extent.h new file mode 100644 index 000000000..8c8c68986 --- /dev/null +++ b/src/pmdk/src/libpmem2/extent.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * extent.h -- fs extent query API + */ + +#ifndef PMDK_EXTENT_H +#define PMDK_EXTENT_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct extent { + uint64_t offset_physical; + uint64_t offset_logical; + uint64_t length; +}; + +struct extents { + uint64_t blksize; + uint32_t extents_count; + struct extent *extents; +}; + +int pmem2_extents_create_get(int fd, struct extents **exts); +void pmem2_extents_destroy(struct extents **exts); + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_EXTENT_H */ diff --git a/src/pmdk/src/libpmem2/extent_linux.c b/src/pmdk/src/libpmem2/extent_linux.c new file mode 100644 index 000000000..6491e67b6 --- /dev/null +++ b/src/pmdk/src/libpmem2/extent_linux.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * extent_linux.c - implementation of the linux fs extent query API + */ + +#include +#include +#include +#include +#include + +#include "libpmem2.h" +#include "pmem2_utils.h" + +#include "file.h" +#include "out.h" +#include "extent.h" +#include "alloc.h" + +/* + * pmem2_extents_create_get -- allocate extents structure and get extents + * of the given file + */ +int +pmem2_extents_create_get(int fd, struct extents **exts) +{ + LOG(3, "fd %i extents %p", fd, exts); + + ASSERT(fd > 2); + ASSERTne(exts, NULL); + + enum pmem2_file_type pmem2_type; + struct extents *pexts = NULL; + struct fiemap *fmap = NULL; + os_stat_t st; + + if (os_fstat(fd, &st) < 0) { + ERR("!fstat %d", fd); + return PMEM2_E_ERRNO; + } + + int ret = pmem2_get_type_from_stat(&st, &pmem2_type); + if (ret) + return ret; + + /* directories do not have any extents */ + if (pmem2_type == PMEM2_FTYPE_DIR) { + ERR( + "checking extents does not make sense in case of directories"); + return PMEM2_E_INVALID_FILE_TYPE; + } + + /* allocate extents structure */ + pexts = pmem2_zalloc(sizeof(struct extents), &ret); + if (ret) + return ret; + + /* save block size */ + LOG(10, "fd %i: block size: %li", fd, (long int)st.st_blksize); + pexts->blksize = (uint64_t)st.st_blksize; + + /* DAX device does not have any extents */ + if (pmem2_type == PMEM2_FTYPE_DEVDAX) { + *exts = pexts; + return 0; + } + + ASSERTeq(pmem2_type, PMEM2_FTYPE_REG); + + fmap = pmem2_zalloc(sizeof(struct fiemap), &ret); + if (ret) + goto error_free; + + fmap->fm_start = 0; + fmap->fm_length = (size_t)st.st_size; + fmap->fm_flags = 0; + fmap->fm_extent_count = 0; + fmap->fm_mapped_extents = 0; + + if (ioctl(fd, FS_IOC_FIEMAP, fmap) != 0) { + ERR("!fiemap ioctl() for fd=%d failed", fd); + ret = PMEM2_E_ERRNO; + goto error_free; + } + + size_t newsize = sizeof(struct fiemap) + + fmap->fm_mapped_extents * sizeof(struct fiemap_extent); + + struct fiemap *newfmap = pmem2_realloc(fmap, newsize, &ret); + if (ret) + goto error_free; + + fmap = newfmap; + memset(fmap->fm_extents, 0, fmap->fm_mapped_extents * + sizeof(struct fiemap_extent)); + fmap->fm_extent_count = fmap->fm_mapped_extents; + fmap->fm_mapped_extents = 0; + + if (ioctl(fd, FS_IOC_FIEMAP, fmap) != 0) { + ERR("!fiemap ioctl() for fd=%d failed", fd); + ret = PMEM2_E_ERRNO; + goto error_free; + } + + LOG(4, "file with fd=%i has %u extents:", fd, fmap->fm_mapped_extents); + + /* save number of extents */ + pexts->extents_count = fmap->fm_mapped_extents; + + pexts->extents = pmem2_malloc( + pexts->extents_count * sizeof(struct extent), + &ret); + if (ret) + goto error_free; + + /* save extents */ + unsigned e; + for (e = 0; e < fmap->fm_mapped_extents; e++) { + pexts->extents[e].offset_physical = + fmap->fm_extents[e].fe_physical; + pexts->extents[e].offset_logical = + fmap->fm_extents[e].fe_logical; + pexts->extents[e].length = + fmap->fm_extents[e].fe_length; + + LOG(10, " #%u: off_phy: %lu off_log: %lu len: %lu", + e, + pexts->extents[e].offset_physical, + pexts->extents[e].offset_logical, + pexts->extents[e].length); + } + + *exts = pexts; + + Free(fmap); + + return 0; + +error_free: + Free(pexts->extents); + Free(pexts); + Free(fmap); + + return ret; +} + +/* + * pmem2_extents_destroy -- free extents structure + */ +void +pmem2_extents_destroy(struct extents **exts) +{ + LOG(3, "extents %p", exts); + + ASSERTne(exts, NULL); + + if (*exts) { + Free((*exts)->extents); + Free(*exts); + *exts = NULL; + } +} diff --git a/src/pmdk/src/libpmem2/extent_none.c b/src/pmdk/src/libpmem2/extent_none.c new file mode 100644 index 000000000..9cd8876d1 --- /dev/null +++ b/src/pmdk/src/libpmem2/extent_none.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * extent_none.c - fake implementation of the FS extent query API + */ + +#include "libpmem2.h" +#include "out.h" +#include "extent.h" + +/* + * pmem2_extents_create_get -- allocate extents structure and get extents + * of the given file + */ +int +pmem2_extents_create_get(int fd, struct extents **exts) +{ + LOG(3, "fd %i extents %p", fd, exts); + + return PMEM2_E_NOSUPP; +} + +/* + * pmem2_extents_destroy -- free extents structure + */ +void +pmem2_extents_destroy(struct extents **exts) +{ + LOG(3, "extents %p", exts); +} diff --git a/src/pmdk/src/libpmem2/libpmem2.c b/src/pmdk/src/libpmem2/libpmem2.c new file mode 100644 index 000000000..e768e2ffc --- /dev/null +++ b/src/pmdk/src/libpmem2/libpmem2.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * libpmem2.c -- pmem2 library constructor & destructor + */ + +#include "libpmem2.h" + +#include "map.h" +#include "out.h" +#include "persist.h" +#include "pmem2.h" +#include "util.h" + +/* + * libpmem2_init -- load-time initialization for libpmem2 + * + * Called automatically by the run-time loader. + */ +ATTR_CONSTRUCTOR +void +libpmem2_init(void) +{ + util_init(); + out_init(PMEM2_LOG_PREFIX, PMEM2_LOG_LEVEL_VAR, PMEM2_LOG_FILE_VAR, + PMEM2_MAJOR_VERSION, PMEM2_MINOR_VERSION); + + LOG(3, NULL); + + pmem2_map_init(); + pmem2_persist_init(); +} + +/* + * libpmem2_fini -- libpmem2 cleanup routine + * + * Called automatically when the process terminates. + */ +ATTR_DESTRUCTOR +void +libpmem2_fini(void) +{ + LOG(3, NULL); + + pmem2_map_fini(); + out_fini(); +} diff --git a/src/pmdk/src/libpmem2/libpmem2.def b/src/pmdk/src/libpmem2/libpmem2.def new file mode 100644 index 000000000..ccda52c72 --- /dev/null +++ b/src/pmdk/src/libpmem2/libpmem2.def @@ -0,0 +1,55 @@ +;;;; Begin Copyright Notice +; SPDX-License-Identifier: BSD-3-Clause +; Copyright 2019-2020, Intel Corporation +;;;; End Copyright Notice + +LIBRARY libpmem2 + +VERSION 1.0 + +EXPORTS + pmem2_badblock_clear + pmem2_badblock_context_delete + pmem2_badblock_context_new + pmem2_badblock_next + pmem2_config_delete + pmem2_config_new + pmem2_config_set_length + pmem2_config_set_offset + pmem2_config_set_protection + pmem2_config_set_required_store_granularity + pmem2_config_set_sharing + pmem2_config_set_vm_reservation + pmem2_deep_flush + pmem2_errormsgU + pmem2_errormsgW + pmem2_get_drain_fn + pmem2_get_flush_fn + pmem2_get_memcpy_fn + pmem2_get_memmove_fn + pmem2_get_memset_fn + pmem2_get_persist_fn + pmem2_map_delete + pmem2_map_get_address + pmem2_map_get_size + pmem2_map_get_store_granularity + pmem2_map_new + pmem2_map_from_existing + pmem2_perrorU + pmem2_perrorW + pmem2_source_alignment + pmem2_source_delete + pmem2_source_device_idU + pmem2_source_device_idW + pmem2_source_device_usc + pmem2_source_from_anon + pmem2_source_from_fd + pmem2_source_from_handle + pmem2_source_get_handle + pmem2_source_size + pmem2_vm_reservation_delete + pmem2_vm_reservation_get_address + pmem2_vm_reservation_get_size + pmem2_vm_reservation_new + + DllMain diff --git a/src/pmdk/src/libpmem2/libpmem2.link.in b/src/pmdk/src/libpmem2/libpmem2.link.in new file mode 100644 index 000000000..320b7e7e3 --- /dev/null +++ b/src/pmdk/src/libpmem2/libpmem2.link.in @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019-2020, Intel Corporation +# +# +# src/libpmem2.link -- linker link file for libpmem2 +# +LIBPMEM2_1.0 { + global: + pmem2_badblock_clear; + pmem2_badblock_context_delete; + pmem2_badblock_context_new; + pmem2_badblock_next; + pmem2_config_delete; + pmem2_config_new; + pmem2_config_set_length; + pmem2_config_set_offset; + pmem2_config_set_protection; + pmem2_config_set_required_store_granularity; + pmem2_config_set_sharing; + pmem2_config_set_vm_reservation; + pmem2_deep_flush; + pmem2_errormsg; + pmem2_get_drain_fn; + pmem2_get_flush_fn; + pmem2_get_memcpy_fn; + pmem2_get_memmove_fn; + pmem2_get_memset_fn; + pmem2_get_persist_fn; + pmem2_map_delete; + pmem2_map_get_address; + pmem2_map_get_size; + pmem2_map_get_store_granularity; + pmem2_map_new; + pmem2_map_from_existing; + pmem2_perror; + pmem2_source_alignment; + pmem2_source_delete; + pmem2_source_device_id; + pmem2_source_device_usc; + pmem2_source_from_anon; + pmem2_source_from_fd; + pmem2_source_from_handle; + pmem2_source_get_fd; + pmem2_source_size; + pmem2_vm_reservation_delete; + pmem2_vm_reservation_get_address; + pmem2_vm_reservation_get_size; + pmem2_vm_reservation_new; + local: + *; +}; diff --git a/src/pmdk/src/libpmem2/libpmem2.rc b/src/pmdk/src/libpmem2/libpmem2.rc new file mode 100644 index 000000000..d077a0a14 --- /dev/null +++ b/src/pmdk/src/libpmem2/libpmem2.rc @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019, Intel Corporation */ + +/* + * libpmem2.rc -- libpmem2 resource file + */ + +#include +#define FILE_NAME "libpmem2.dll" +#define DESCRIPTION "libpmem2 - persistent memory support library v2" +#define TYPE VFT_DLL +#include diff --git a/src/pmdk/src/libpmem2/libpmem2.vcxproj b/src/pmdk/src/libpmem2/libpmem2.vcxproj new file mode 100644 index 000000000..6492e6065 --- /dev/null +++ b/src/pmdk/src/libpmem2/libpmem2.vcxproj @@ -0,0 +1,144 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + AdvancedVectorExtensions + AdvancedVectorExtensions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {901f04db-e1a5-4a41-8b81-9d31c19acd59} + + + + + + + {F596C36C-5C96-4F08-B420-8908AF500954} + DynamicLibrary + libpmem2 + libpmem2 + en-US + 14.0 + 10.0.10240.0 + 10.0.17134.0 + + + + DynamicLibrary + true + v140 + + + DynamicLibrary + false + false + v140 + + + + + + + + + + + + $(VC_IncludePath);$(WindowsSDK_IncludePath);x86_64\ + + + $(VC_IncludePath);$(WindowsSDK_IncludePath);x86_64\ + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmem2/libpmem2.vcxproj.filters b/src/pmdk/src/libpmem2/libpmem2.vcxproj.filters new file mode 100644 index 000000000..1d2b5f0cb --- /dev/null +++ b/src/pmdk/src/libpmem2/libpmem2.vcxproj.filters @@ -0,0 +1,223 @@ + + + + + {16473205-8f12-4d4c-b1e9-e14ea3013e70} + h + + + {16473205-8f12-4d4c-b1e9-e14ea3013e71} + h + + + {17275273-f923-45ff-9b7e-b2ea76561168} + c;def + + + {6c8580b3-4650-42ca-9589-ec45a8f4278c} + + + {6c8580b3-4650-42ca-9589-ec45a8f4278d} + + + + + Source Files\common + + + Source Files\common + + + Source Files\common + + + Source Files\common + + + Source Files\common + + + Source Files\common + + + Source Files\common + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files\x86_64 + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files\x86_64 + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmem2/libpmem2_main.c b/src/pmdk/src/libpmem2/libpmem2_main.c new file mode 100644 index 000000000..8807aa1ed --- /dev/null +++ b/src/pmdk/src/libpmem2/libpmem2_main.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019, Intel Corporation */ + +/* + * libpmem2_main.c -- entry point for libpmem2.dll + */ + +void libpmem2_init(void); +void libpmem2_fini(void); + +int APIENTRY +DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) +{ + switch (dwReason) { + case DLL_PROCESS_ATTACH: + libpmem2_init(); + break; + + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + + case DLL_PROCESS_DETACH: + libpmem2_fini(); + break; + } + return TRUE; +} diff --git a/src/pmdk/src/libpmem2/map.c b/src/pmdk/src/libpmem2/map.c new file mode 100644 index 000000000..cce1e74bc --- /dev/null +++ b/src/pmdk/src/libpmem2/map.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * map.c -- pmem2_map (common) + */ + +#include "out.h" + +#include "alloc.h" +#include "config.h" +#include "map.h" +#include "ravl_interval.h" +#include "os.h" +#include "os_thread.h" +#include "persist.h" +#include "pmem2.h" +#include "pmem2_utils.h" +#include "ravl.h" +#include "sys_util.h" +#include "valgrind_internal.h" + +#include + +/* + * pmem2_map_get_address -- get mapping address + */ +void * +pmem2_map_get_address(struct pmem2_map *map) +{ + LOG(3, "map %p", map); + + /* we do not need to clear err because this function cannot fail */ + return map->addr; +} + +/* + * pmem2_map_get_size -- get mapping size + */ +size_t +pmem2_map_get_size(struct pmem2_map *map) +{ + LOG(3, "map %p", map); + + /* we do not need to clear err because this function cannot fail */ + return map->content_length; +} + +/* + * pmem2_map_get_store_granularity -- returns granularity of the mapped + * file + */ +enum pmem2_granularity +pmem2_map_get_store_granularity(struct pmem2_map *map) +{ + LOG(3, "map %p", map); + + /* we do not need to clear err because this function cannot fail */ + return map->effective_granularity; +} + +/* + * parse_force_granularity -- parse PMEM2_FORCE_GRANULARITY environment variable + */ +static enum pmem2_granularity +parse_force_granularity() +{ + char *ptr = os_getenv("PMEM2_FORCE_GRANULARITY"); + if (ptr) { + char str[11]; /* strlen("CACHE_LINE") + 1 */ + + if (util_safe_strcpy(str, ptr, sizeof(str))) { + LOG(1, "Invalid value of PMEM2_FORCE_GRANULARITY"); + return PMEM2_GRANULARITY_INVALID; + } + + char *s = str; + while (*s) { + *s = (char)toupper((char)*s); + s++; + } + + if (strcmp(str, "BYTE") == 0) { + return PMEM2_GRANULARITY_BYTE; + } else if (strcmp(str, "CACHE_LINE") == 0) { + return PMEM2_GRANULARITY_CACHE_LINE; + } else if (strcmp(str, "CACHELINE") == 0) { + return PMEM2_GRANULARITY_CACHE_LINE; + } else if (strcmp(str, "PAGE") == 0) { + return PMEM2_GRANULARITY_PAGE; + } + + LOG(1, "Invalid value of PMEM2_FORCE_GRANULARITY"); + } + return PMEM2_GRANULARITY_INVALID; +} + +/* + * get_min_granularity -- checks min available granularity + */ +enum pmem2_granularity +get_min_granularity(bool eADR, bool is_pmem, enum pmem2_sharing_type sharing) +{ + enum pmem2_granularity force = parse_force_granularity(); + /* PMEM2_PRIVATE sharing does not require data flushing */ + if (sharing == PMEM2_PRIVATE) + return PMEM2_GRANULARITY_BYTE; + if (force != PMEM2_GRANULARITY_INVALID) + return force; + if (!is_pmem) + return PMEM2_GRANULARITY_PAGE; + if (!eADR) + return PMEM2_GRANULARITY_CACHE_LINE; + + return PMEM2_GRANULARITY_BYTE; +} + +/* + * pmem2_validate_offset -- verify if the offset is a multiple of + * the alignment required for the config + */ +int +pmem2_validate_offset(const struct pmem2_config *cfg, size_t *offset, + size_t alignment) +{ + ASSERTne(alignment, 0); + if (cfg->offset % alignment) { + ERR("offset is not a multiple of %lu", alignment); + return PMEM2_E_OFFSET_UNALIGNED; + } + + *offset = cfg->offset; + + return 0; +} + +/* + * mapping_min - return min boundary for mapping + */ +static size_t +mapping_min(void *addr) +{ + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr; +} + +/* + * mapping_max - return max boundary for mapping + */ +static size_t +mapping_max(void *addr) +{ + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr + map->content_length; +} + +static struct pmem2_state { + struct ravl_interval *range_map; + os_rwlock_t range_map_lock; +} State; + +/* + * pmem2_map_init -- initialize the map module + */ +void +pmem2_map_init() +{ + util_rwlock_init(&State.range_map_lock); + + util_rwlock_wrlock(&State.range_map_lock); + State.range_map = ravl_interval_new(mapping_min, mapping_max); + util_rwlock_unlock(&State.range_map_lock); + + if (!State.range_map) + abort(); +} + +/* + * pmem2_map_fini -- finalize the map module + */ +void +pmem2_map_fini(void) +{ + util_rwlock_wrlock(&State.range_map_lock); + ravl_interval_delete(State.range_map); + util_rwlock_unlock(&State.range_map_lock); +} + +/* + * pmem2_register_mapping -- register mapping in the mappings tree + */ +int +pmem2_register_mapping(struct pmem2_map *map) +{ + util_rwlock_wrlock(&State.range_map_lock); + int ret = ravl_interval_insert(State.range_map, map); + util_rwlock_unlock(&State.range_map_lock); + + return ret; +} + +/* + * pmem2_unregister_mapping -- unregister mapping from the mappings tree + */ +int +pmem2_unregister_mapping(struct pmem2_map *map) +{ + int ret = 0; + struct ravl_interval_node *node; + + util_rwlock_wrlock(&State.range_map_lock); + node = ravl_interval_find_equal(State.range_map, map); + if (node) { + ret = ravl_interval_remove(State.range_map, node); + } else { + ERR("Cannot find mapping %p to delete", map); + ret = PMEM2_E_MAPPING_NOT_FOUND; + } + util_rwlock_unlock(&State.range_map_lock); + + return ret; +} + +/* + * pmem2_map_find -- find the earliest mapping overlapping with + * (addr, addr+size) range + */ +struct pmem2_map * +pmem2_map_find(const void *addr, size_t len) +{ + struct pmem2_map map; + map.addr = (void *)addr; + map.content_length = len; + + struct ravl_interval_node *node; + + util_rwlock_rdlock(&State.range_map_lock); + node = ravl_interval_find(State.range_map, &map); + util_rwlock_unlock(&State.range_map_lock); + + if (!node) + return NULL; + + return (struct pmem2_map *)ravl_interval_data(node); +} + +/* + * pmem2_map_from_existing -- create map object for exisiting mapping + */ +int +pmem2_map_from_existing(struct pmem2_map **map_ptr, + const struct pmem2_source *src, void *addr, size_t len, + enum pmem2_granularity gran) +{ + int ret; + struct pmem2_map *map = + (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret); + + if (!map) + return ret; + + map->reserv = NULL; + map->addr = addr; + map->reserved_length = 0; + map->content_length = len; + map->effective_granularity = gran; + pmem2_set_flush_fns(map); + pmem2_set_mem_fns(map); + map->source = *src; + +#ifndef _WIN32 + /* fd should not be used after map */ + map->source.value.fd = INVALID_FD; +#endif + ret = pmem2_register_mapping(map); + if (ret) { + Free(map); + if (ret == -EEXIST) { + ERR( + "Provided mappping(addr %p len %zu) is already registered by libpmem2", + addr, len); + return PMEM2_E_MAP_EXISTS; + } + return ret; + } +#ifndef _WIN32 + if (src->type == PMEM2_SOURCE_FD) { + VALGRIND_REGISTER_PMEM_MAPPING(map->addr, + map->content_length); + } +#endif + *map_ptr = map; + return 0; +} diff --git a/src/pmdk/src/libpmem2/map.h b/src/pmdk/src/libpmem2/map.h new file mode 100644 index 000000000..9cc172905 --- /dev/null +++ b/src/pmdk/src/libpmem2/map.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +/* + * map.h -- internal definitions for libpmem2 + */ +#ifndef PMEM2_MAP_H +#define PMEM2_MAP_H + +#include +#include +#include "libpmem2.h" +#include "os.h" +#include "source.h" +#include "vm_reservation.h" + +#ifdef _WIN32 +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*pmem2_deep_flush_fn)(struct pmem2_map *map, + void *ptr, size_t size); + +struct pmem2_map { + void *addr; /* base address */ + size_t reserved_length; /* length of the mapping reservation */ + size_t content_length; /* length of the mapped content */ + /* effective persistence granularity */ + enum pmem2_granularity effective_granularity; + + pmem2_persist_fn persist_fn; + pmem2_flush_fn flush_fn; + pmem2_drain_fn drain_fn; + pmem2_deep_flush_fn deep_flush_fn; + + pmem2_memmove_fn memmove_fn; + pmem2_memcpy_fn memcpy_fn; + pmem2_memset_fn memset_fn; + + struct pmem2_source source; + struct pmem2_vm_reservation *reserv; +}; + +enum pmem2_granularity get_min_granularity(bool eADR, bool is_pmem, + enum pmem2_sharing_type sharing); +struct pmem2_map *pmem2_map_find(const void *addr, size_t len); +int pmem2_register_mapping(struct pmem2_map *map); +int pmem2_unregister_mapping(struct pmem2_map *map); +void pmem2_map_init(void); +void pmem2_map_fini(void); + +int pmem2_validate_offset(const struct pmem2_config *cfg, + size_t *offset, size_t alignment); + +#ifdef __cplusplus +} +#endif + +#endif /* map.h */ diff --git a/src/pmdk/src/libpmem2/map_posix.c b/src/pmdk/src/libpmem2/map_posix.c new file mode 100644 index 000000000..0b042aa68 --- /dev/null +++ b/src/pmdk/src/libpmem2/map_posix.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * map_posix.c -- pmem2_map (POSIX) + */ + +#include +#include +#include +#include + +#include "libpmem2.h" + +#include "alloc.h" +#include "auto_flush.h" +#include "config.h" +#include "file.h" +#include "map.h" +#include "out.h" +#include "persist.h" +#include "pmem2_utils.h" +#include "source.h" +#include "sys_util.h" +#include "valgrind_internal.h" + +#ifndef MAP_SYNC +#define MAP_SYNC 0x80000 +#endif + +#ifndef MAP_SHARED_VALIDATE +#define MAP_SHARED_VALIDATE 0x03 +#endif + +#define MEGABYTE ((uintptr_t)1 << 20) +#define GIGABYTE ((uintptr_t)1 << 30) + +/* indicates the cases in which the error cannot occur */ +#define GRAN_IMPOSSIBLE "impossible" +#ifdef __linux__ + /* requested CACHE_LINE, available PAGE */ +#define REQ_CL_AVAIL_PG \ + "requested granularity not available because fd doesn't point to DAX-enabled file " \ + "or kernel doesn't support MAP_SYNC flag (Linux >= 4.15)" + +/* requested BYTE, available PAGE */ +#define REQ_BY_AVAIL_PG REQ_CL_AVAIL_PG + +/* requested BYTE, available CACHE_LINE */ +#define REQ_BY_AVAIL_CL \ + "requested granularity not available because the platform doesn't support eADR" + +static const char *granularity_err_msg[3][3] = { +/* requested granularity / available granularity */ +/* -------------------------------------------------------------------- */ +/* BYTE CACHE_LINE PAGE */ +/* -------------------------------------------------------------------- */ +/* BYTE */ {GRAN_IMPOSSIBLE, REQ_BY_AVAIL_CL, REQ_BY_AVAIL_PG}, +/* CL */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, REQ_CL_AVAIL_PG}, +/* PAGE */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE}}; +#else +/* requested CACHE_LINE, available PAGE */ +#define REQ_CL_AVAIL_PG \ + "the operating system doesn't provide a method of detecting granularity" + +/* requested BYTE, available PAGE */ +#define REQ_BY_AVAIL_PG \ + "the operating system doesn't provide a method of detecting whether the platform supports eADR" + +static const char *granularity_err_msg[3][3] = { +/* requested granularity / available granularity */ +/* -------------------------------------------------------------------- */ +/* BYTE CACHE_LINE PAGE */ +/* -------------------------------------------------------------------- */ +/* BYTE */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, REQ_BY_AVAIL_PG}, +/* CL */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, REQ_CL_AVAIL_PG}, +/* PAGE */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE}}; +#endif + +/* + * get_map_alignment -- (internal) choose the desired mapping alignment + * + * The smallest supported alignment is 2 megabytes because of the object + * alignment requirements. Changing this value to 4 kilobytes constitutes a + * layout change. + * + * Use 1GB page alignment only if the mapping length is at least + * twice as big as the page size. + */ +static inline size_t +get_map_alignment(size_t len, size_t req_align) +{ + size_t align = 2 * MEGABYTE; + if (req_align) + align = req_align; + else if (len >= 2 * GIGABYTE) + align = GIGABYTE; + + return align; +} + +/* + * map_reserve -- (internal) reserve an address for mmap() + * + * ALSR in 64-bit Linux kernel uses 28-bit of randomness for mmap + * (bit positions 12-39), which means the base mapping address is randomized + * within [0..1024GB] range, with 4KB granularity. Assuming additional + * 1GB alignment, it results in 1024 possible locations. + */ +static int +map_reserve(size_t len, size_t alignment, void **reserv, size_t *reslen, + const struct pmem2_config *cfg) +{ + ASSERTne(reserv, NULL); + + size_t dlength = len + alignment; /* dummy length */ + + /* + * Create dummy mapping to find an unused region of given size. + * Request for increased size for later address alignment. + * Use MAP_PRIVATE with read-only access to simulate + * zero cost for overcommit accounting. Note: MAP_NORESERVE + * flag is ignored if overcommit is disabled (mode 2). + */ + char *daddr = mmap(NULL, dlength, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (daddr == MAP_FAILED) { + if (errno == EEXIST) { + ERR("!mmap MAP_FIXED_NOREPLACE"); + return PMEM2_E_MAPPING_EXISTS; + } + ERR("!mmap MAP_ANONYMOUS"); + return PMEM2_E_ERRNO; + } + + LOG(4, "system choice %p", daddr); + *reserv = (void *)roundup((uintptr_t)daddr, alignment); + /* + * since the last part of the reservation from (reserv + reslen == end) + * will be unmapped, the 'end' address has to be page-aligned. + * 'reserv' is already page-aligned (or even aligned to multiple of page + * size) so it is enough to page-align the 'reslen' value. + */ + *reslen = roundup(len, Pagesize); + LOG(4, "hint %p", *reserv); + + /* + * The placeholder mapping is divided into few parts: + * + * 1 2 3 4 5 + * |......|uuuuuuuuu|rrr|.................| + * + * Addresses: + * 1 == daddr + * 2 == reserv + * 3 == reserv + len + * 4 == reserv + reslen == end (has to be page-aligned) + * 5 == daddr + dlength + * + * Key: + * - '.' is an unused part of the placeholder + * - 'u' is where the actual mapping lies + * - 'r' is what reserved as padding + */ + + /* unmap the placeholder before the actual mapping */ + const size_t before = (uintptr_t)(*reserv) - (uintptr_t)daddr; + if (before) { + if (munmap(daddr, before)) { + ERR("!munmap"); + return PMEM2_E_ERRNO; + } + } + + /* unmap the placeholder after the actual mapping */ + const size_t after = dlength - *reslen - before; + void *end = (void *)((uintptr_t)(*reserv) + (uintptr_t)*reslen); + if (after) + if (munmap(end, after)) { + ERR("!munmap"); + return PMEM2_E_ERRNO; + } + + return 0; +} + +/* + * file_map -- (internal) memory map given file into memory + * If (flags & MAP_PRIVATE) it uses just mmap. Otherwise, it tries to mmap with + * (flags | MAP_SHARED_VALIDATE | MAP_SYNC) which allows flushing from the + * user-space. If MAP_SYNC fails and the user did not specify it by himself it + * falls back to the mmap with user-provided flags. + */ +static int +file_map(void *reserv, size_t len, int proto, int flags, + int fd, off_t offset, bool *map_sync, void **base) +{ + LOG(15, "reserve %p len %zu proto %x flags %x fd %d offset %ld " + "map_sync %p", reserv, len, proto, flags, fd, offset, + map_sync); + + ASSERTne(map_sync, NULL); + ASSERTne(base, NULL); + + /* + * MAP_PRIVATE and MAP_SHARED are mutually exclusive, therefore mmap + * with MAP_PRIVATE is executed separately. + */ + if (flags & MAP_PRIVATE) { + *base = mmap(reserv, len, proto, flags, fd, offset); + if (*base == MAP_FAILED) { + ERR("!mmap"); + return PMEM2_E_ERRNO; + } + LOG(4, "mmap with MAP_PRIVATE succeeded"); + *map_sync = false; + return 0; + } + + /* try to mmap with MAP_SYNC flag */ + const int sync_flags = MAP_SHARED_VALIDATE | MAP_SYNC; + *base = mmap(reserv, len, proto, flags | sync_flags, fd, offset); + if (*base != MAP_FAILED) { + LOG(4, "mmap with MAP_SYNC succeeded"); + *map_sync = true; + return 0; + } + + /* try to mmap with MAP_SHARED flag (without MAP_SYNC) */ + if (errno == EINVAL || errno == ENOTSUP) { + LOG(4, "mmap with MAP_SYNC not supported"); + *base = mmap(reserv, len, proto, flags | MAP_SHARED, fd, + offset); + if (*base != MAP_FAILED) { + *map_sync = false; + return 0; + } + } + + ERR("!mmap"); + return PMEM2_E_ERRNO; +} + +/* + * unmap -- (internal) unmap a memory range + */ +static int +unmap(void *addr, size_t len) +{ + int retval = munmap(addr, len); + if (retval < 0) { + ERR("!munmap"); + return PMEM2_E_ERRNO; + } + + return 0; +} + +/* + * vm_reservation_mend -- replaces the given mapping with anonymous + * reservation, mending the reservation area + */ +static int +vm_reservation_mend(struct pmem2_vm_reservation *rsv, void *addr, size_t size) +{ + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + + ASSERT((char *)addr >= (char *)rsv_addr && + (char *)addr + size <= (char *)rsv_addr + rsv_size); + + char *daddr = mmap(addr, size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (daddr == MAP_FAILED) { + ERR("!mmap MAP_ANONYMOUS"); + return PMEM2_E_ERRNO; + } + + return 0; +} + +/* + * pmem2_map_new -- map memory according to provided config + */ +int +pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, + const struct pmem2_source *src) +{ + LOG(3, "cfg %p src %p map_ptr %p", cfg, src, map_ptr); + PMEM2_ERR_CLR(); + + int ret = 0; + struct pmem2_map *map; + size_t file_len; + *map_ptr = NULL; + + if (cfg->requested_max_granularity == PMEM2_GRANULARITY_INVALID) { + ERR( + "please define the max granularity requested for the mapping"); + + return PMEM2_E_GRANULARITY_NOT_SET; + } + + size_t src_alignment; + ret = pmem2_source_alignment(src, &src_alignment); + if (ret) + return ret; + + /* get file size */ + ret = pmem2_source_size(src, &file_len); + if (ret) + return ret; + + /* get offset */ + size_t effective_offset; + ret = pmem2_validate_offset(cfg, &effective_offset, src_alignment); + if (ret) + return ret; + ASSERTeq(effective_offset, cfg->offset); + + if (src->type == PMEM2_SOURCE_ANON) + effective_offset = 0; + + os_off_t off = (os_off_t)effective_offset; + + /* map input and output variables */ + bool map_sync = false; + /* + * MAP_SHARED - is required to mmap directly the underlying hardware + * MAP_FIXED - is required to mmap at exact address pointed by hint + */ + int flags = MAP_FIXED; + void *addr; + + /* "translate" pmem2 protection flags into linux flags */ + int proto = 0; + if (cfg->protection_flag == PMEM2_PROT_NONE) + proto = PROT_NONE; + if (cfg->protection_flag & PMEM2_PROT_EXEC) + proto |= PROT_EXEC; + if (cfg->protection_flag & PMEM2_PROT_READ) + proto |= PROT_READ; + if (cfg->protection_flag & PMEM2_PROT_WRITE) + proto |= PROT_WRITE; + + if (src->type == PMEM2_SOURCE_FD) { + if (src->value.ftype == PMEM2_FTYPE_DIR) { + ERR("the directory is not a supported file type"); + return PMEM2_E_INVALID_FILE_TYPE; + } + + ASSERT(src->value.ftype == PMEM2_FTYPE_REG || + src->value.ftype == PMEM2_FTYPE_DEVDAX); + + if (cfg->sharing == PMEM2_PRIVATE && + src->value.ftype == PMEM2_FTYPE_DEVDAX) { + ERR( + "device DAX does not support mapping with MAP_PRIVATE"); + return PMEM2_E_SRC_DEVDAX_PRIVATE; + } + } + + size_t content_length, reserved_length = 0; + ret = pmem2_config_validate_length(cfg, file_len, src_alignment); + if (ret) + return ret; + + /* without user-provided length, map to the end of the file */ + if (cfg->length) + content_length = cfg->length; + else + content_length = file_len - effective_offset; + + size_t alignment = get_map_alignment(content_length, + src_alignment); + + void *reserv_region = NULL; + void *rsv = cfg->reserv; + if (rsv) { + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + size_t rsv_offset = cfg->reserv_offset; + + reserved_length = roundup(content_length, Pagesize); + + if (rsv_offset % Mmap_align) { + ret = PMEM2_E_OFFSET_UNALIGNED; + ERR( + "virtual memory reservation offset %zu is not a multiple of %llu", + rsv_offset, Mmap_align); + return ret; + } + + if (rsv_offset + reserved_length > rsv_size) { + ret = PMEM2_E_LENGTH_OUT_OF_RANGE; + ERR( + "Reservation %p has not enough space for the intended content", + rsv); + return ret; + } + + reserv_region = (char *)rsv_addr + rsv_offset; + if ((size_t)reserv_region % alignment) { + ret = PMEM2_E_ADDRESS_UNALIGNED; + ERR( + "base mapping address %p (virtual memory reservation address + offset)" \ + " is not a multiple of %zu required by device DAX", + reserv_region, alignment); + return ret; + } + + /* check if the region in the reservation is occupied */ + if (vm_reservation_map_find_acquire(rsv, rsv_offset, + reserved_length)) { + ret = PMEM2_E_MAPPING_EXISTS; + ERR( + "region of the reservation %p at the offset %zu and " + "length %zu is at least partly occupied by other mapping", + rsv, rsv_offset, reserved_length); + goto err_reservation_release; + } + } else { + /* find a hint for the mapping */ + ret = map_reserve(content_length, alignment, &reserv_region, + &reserved_length, cfg); + if (ret != 0) { + if (ret == PMEM2_E_MAPPING_EXISTS) + LOG(1, + "given mapping region is already occupied"); + else + LOG(1, + "cannot find a contiguous region of given size"); + return ret; + } + } + + ASSERTne(reserv_region, NULL); + + if (cfg->sharing == PMEM2_PRIVATE) { + flags |= MAP_PRIVATE; + } + + int map_fd = INVALID_FD; + if (src->type == PMEM2_SOURCE_FD) { + map_fd = src->value.fd; + } else if (src->type == PMEM2_SOURCE_ANON) { + flags |= MAP_ANONYMOUS; + } else { + ASSERT(0); + } + + ret = file_map(reserv_region, content_length, proto, flags, map_fd, off, + &map_sync, &addr); + if (ret) { + /* + * unmap the reservation mapping only + * if it wasn't provided by the config + */ + if (!rsv) + munmap(reserv_region, reserved_length); + + if (ret == -EACCES) + ret = PMEM2_E_NO_ACCESS; + else if (ret == -ENOTSUP) + ret = PMEM2_E_NOSUPP; + else if (ret == -EEXIST) + ret = PMEM2_E_MAPPING_EXISTS; + goto err_reservation_release; + } + + LOG(3, "mapped at %p", addr); + + bool eADR = (pmem2_auto_flush() == 1); + enum pmem2_granularity available_min_granularity = + src->type == PMEM2_SOURCE_ANON ? PMEM2_GRANULARITY_BYTE : + get_min_granularity(eADR, map_sync, cfg->sharing); + + if (available_min_granularity > cfg->requested_max_granularity) { + const char *err = granularity_err_msg + [cfg->requested_max_granularity] + [available_min_granularity]; + if (strcmp(err, GRAN_IMPOSSIBLE) == 0) + FATAL( + "unhandled granularity error: available_min_granularity: %d" \ + "requested_max_granularity: %d", + available_min_granularity, + cfg->requested_max_granularity); + ERR("%s", err); + ret = PMEM2_E_GRANULARITY_NOT_SUPPORTED; + goto err_undo_mapping; + } + + /* prepare pmem2_map structure */ + map = (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret); + if (!map) + goto err_undo_mapping; + + map->addr = addr; + map->reserved_length = reserved_length; + map->content_length = content_length; + map->effective_granularity = available_min_granularity; + pmem2_set_flush_fns(map); + pmem2_set_mem_fns(map); + map->reserv = rsv; + map->source = *src; + map->source.value.fd = INVALID_FD; /* fd should not be used after map */ + + ret = pmem2_register_mapping(map); + if (ret) { + goto err_free_map_struct; + } + + if (rsv) { + ret = vm_reservation_map_register_release(rsv, map); + if (ret) + goto err_unregister_map; + } + + *map_ptr = map; + + if (src->type == PMEM2_SOURCE_FD) { + VALGRIND_REGISTER_PMEM_MAPPING(map->addr, map->content_length); + VALGRIND_REGISTER_PMEM_FILE(src->value.fd, + map->addr, map->content_length, 0); + } + + return 0; + +err_unregister_map: + pmem2_unregister_mapping(map); +err_free_map_struct: + Free(map); +err_undo_mapping: + /* + * if the reservation was given by pmem2_config, instead of unmapping, + * we will need to mend the reservation + */ + if (rsv) + vm_reservation_mend(rsv, addr, reserved_length); + else + unmap(addr, reserved_length); +err_reservation_release: + if (rsv) + vm_reservation_release(rsv); + return ret; +} + +/* + * pmem2_map_delete -- unmap the specified mapping + */ +int +pmem2_map_delete(struct pmem2_map **map_ptr) +{ + LOG(3, "map_ptr %p", map_ptr); + PMEM2_ERR_CLR(); + + int ret = 0; + struct pmem2_map *map = *map_ptr; + size_t map_len = map->content_length; + void *map_addr = map->addr; + struct pmem2_vm_reservation *rsv = map->reserv; + + ret = pmem2_unregister_mapping(map); + if (ret) + return ret; + + /* + * when reserved_length==0 mapping is created by pmem2_map_from_existing + * such mappings are provided by the users and shouldn't be unmapped + * by pmem2. + */ + if (map->reserved_length) { + VALGRIND_REMOVE_PMEM_MAPPING(map_addr, map_len); + + if (rsv) { + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_offset = (size_t)map_addr - (size_t)rsv_addr; + if (!vm_reservation_map_find_acquire(rsv, rsv_offset, + map_len)) { + ret = PMEM2_E_MAPPING_NOT_FOUND; + goto err_reservation_release; + } + + ret = vm_reservation_mend(rsv, map_addr, map_len); + if (ret) + goto err_reservation_release; + + ret = vm_reservation_map_unregister_release(rsv, map); + if (ret) + goto err_register_map; + } else { + ret = unmap(map_addr, map_len); + if (ret) + goto err_register_map; + } + } + + Free(map); + *map_ptr = NULL; + + return 0; + +err_reservation_release: + vm_reservation_release(rsv); +err_register_map: + VALGRIND_REGISTER_PMEM_MAPPING(map_addr, map_len); + pmem2_register_mapping(map); + return ret; +} diff --git a/src/pmdk/src/libpmem2/map_windows.c b/src/pmdk/src/libpmem2/map_windows.c new file mode 100644 index 000000000..67901eed3 --- /dev/null +++ b/src/pmdk/src/libpmem2/map_windows.c @@ -0,0 +1,590 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * map_windows.c -- pmem2_map (Windows) + */ + +#include + +#include "libpmem2.h" + +#include "alloc.h" +#include "auto_flush.h" +#include "config.h" +#include "map.h" +#include "os_thread.h" +#include "out.h" +#include "persist.h" +#include "pmem2_utils.h" +#include "source.h" +#include "sys_util.h" +#include "util.h" + +#define HIDWORD(x) ((DWORD)((x) >> 32)) +#define LODWORD(x) ((DWORD)((x) & 0xFFFFFFFF)) + +/* requested CACHE_LINE, available PAGE */ +#define REQ_CL_AVAIL_PG \ + "requested granularity not available because specified volume is not a direct access (DAX) volume" + +/* requested BYTE, available PAGE */ +#define REQ_BY_AVAIL_PG REQ_CL_AVAIL_PG + +/* requested BYTE, available CACHE_LINE */ +#define REQ_BY_AVAIL_CL \ + "requested granularity not available because the platform doesn't support eADR" + +/* indicates the cases in which the error cannot occur */ +#define GRAN_IMPOSSIBLE "impossible" +static const char *granularity_err_msg[3][3] = { +/* requested granularity / available granularity */ +/* -------------------------------------------------------------------- */ +/* BYTE CACHE_LINE PAGE */ +/* -------------------------------------------------------------------- */ +/* BYTE */ {GRAN_IMPOSSIBLE, REQ_BY_AVAIL_CL, REQ_BY_AVAIL_PG}, +/* CL */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, REQ_CL_AVAIL_PG}, +/* PAGE */ {GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE, GRAN_IMPOSSIBLE}}; + +/* + * create_mapping -- creates file mapping object for a file + */ +static HANDLE +create_mapping(HANDLE hfile, size_t offset, size_t length, DWORD protect, + unsigned long *err) +{ + size_t max_size = length + offset; + SetLastError(0); + HANDLE mh = CreateFileMapping(hfile, + NULL, /* security attributes */ + protect, + HIDWORD(max_size), + LODWORD(max_size), + NULL); + + *err = GetLastError(); + if (!mh) { + ERR("!!CreateFileMapping"); + return NULL; + } + + if (*err == ERROR_ALREADY_EXISTS) { + ERR("!!CreateFileMapping"); + CloseHandle(mh); + return NULL; + } + + /* if the handle is valid the last error is undefined */ + *err = 0; + return mh; +} + +/* + * is_direct_access -- check if the specified volume is a + * direct access (DAX) volume + */ +static int +is_direct_access(HANDLE fh) +{ + DWORD filesystemFlags; + + if (!GetVolumeInformationByHandleW(fh, NULL, 0, NULL, + NULL, &filesystemFlags, NULL, 0)) { + ERR("!!GetVolumeInformationByHandleW"); + /* always return a negative value */ + return pmem2_lasterror_to_err(); + } + + if (filesystemFlags & FILE_DAX_VOLUME) + return 1; + + return 0; +} + +struct pmem2_map *vm_reservation_map_find_closest_prior( + struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len); +struct pmem2_map *vm_reservation_map_find_closest_later( + struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len); + +/* + * vm_reservation_unmap -- unmaps given region of the reservation, + * preserves the placeholder + */ +static int +vm_reservation_unmap(struct pmem2_vm_reservation *rsv, void *addr, + size_t length) +{ + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + + if (addr < rsv_addr || + (char *)addr + length > (char *)rsv_addr + rsv_size) + return PMEM2_E_LENGTH_OUT_OF_RANGE; + + int ret = UnmapViewOfFile2(GetCurrentProcess(), + addr, + MEM_PRESERVE_PLACEHOLDER); + if (!ret) { + ERR("!!UnmapViewOfFile2"); + return pmem2_lasterror_to_err(); + } + + return 0; +} + +/* + * vm_reservation_merge -- merges given placeholder region with his neighbouring + * placeholders + */ +static int +vm_reservation_merge(struct pmem2_vm_reservation *rsv, void *addr, + size_t length) +{ + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + size_t rsv_offset = (size_t)addr - (size_t)rsv_addr; + + /* + * After unmapping from the reservation, it is neccessary to merge + * the unoccupied neighbours so that the placeholders will be available + * for splitting for the required size of the mapping. + */ + void *merge_addr = addr; + size_t merge_size = length; + struct pmem2_map *map = NULL; + + if (rsv_offset > 0) { + map = vm_reservation_map_find_closest_prior(rsv, rsv_offset, + length); + if (map) { + merge_addr = (char *)map->addr + map->reserved_length; + merge_size += (char *)addr - (char *)merge_addr; + } else { + merge_addr = rsv_addr; + merge_size += rsv_offset; + } + } + + if (rsv_offset + length < rsv_size) { + map = vm_reservation_map_find_closest_later(rsv, rsv_offset, + length); + if (map) + merge_size += (char *)map->addr - (char *)addr - length; + else + merge_size += rsv_size - rsv_offset - length; + } + + if ((addr != merge_addr) || (length != merge_size)) { + int ret = VirtualFree(merge_addr, + merge_size, + MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS); + if (!ret) { + ERR("!!VirtualFree"); + return pmem2_lasterror_to_err(); + + } + } + + return 0; +} + +/* + * vm_reservation_split - splits the virtual memory reservation into + * separate regions + */ +int +vm_reservation_split(struct pmem2_vm_reservation *rsv, size_t rsv_offset, + size_t length) +{ + LOG(3, "rsv %p rsv_offset %zu length %zu", rsv, rsv_offset, length); + + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + + LOG(3, "rsv_addr %p rsv_size %zu", rsv_addr, rsv_size); + + if ((rsv_offset > 0 && !vm_reservation_map_find(rsv, + rsv_offset - 1, 1)) || + (rsv_offset + length < rsv_size && + !vm_reservation_map_find(rsv, + rsv_offset + length, 1))) { + /* split the placeholder */ + int ret = VirtualFree((char *)rsv_addr + rsv_offset, + length, + MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER); + if (!ret) { + ERR("!!VirtualFree"); + ret = pmem2_lasterror_to_err(); + return ret; + } + } + + return 0; +} + +/* + * pmem2_map_new -- map memory according to provided config + */ +int +pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, + const struct pmem2_source *src) +{ + LOG(3, "cfg %p src %p map_ptr %p", cfg, src, map_ptr); + PMEM2_ERR_CLR(); + + int ret = 0; + unsigned long err = 0; + size_t file_size; + *map_ptr = NULL; + + if ((int)cfg->requested_max_granularity == PMEM2_GRANULARITY_INVALID) { + ERR( + "please define the max granularity requested for the mapping"); + + return PMEM2_E_GRANULARITY_NOT_SET; + } + + ret = pmem2_source_size(src, &file_size); + if (ret) + return ret; + + size_t src_alignment; + ret = pmem2_source_alignment(src, &src_alignment); + if (ret) + return ret; + + size_t length; + ret = pmem2_config_validate_length(cfg, file_size, src_alignment); + if (ret) + return ret; + + size_t effective_offset; + ret = pmem2_validate_offset(cfg, &effective_offset, src_alignment); + if (ret) + return ret; + + if (src->type == PMEM2_SOURCE_ANON) + effective_offset = 0; + + /* without user-provided length, map to the end of the file */ + if (cfg->length) + length = cfg->length; + else + length = file_size - effective_offset; + + HANDLE map_handle = INVALID_HANDLE_VALUE; + if (src->type == PMEM2_SOURCE_HANDLE) { + map_handle = src->value.handle; + } else if (src->type == PMEM2_SOURCE_ANON) { + /* no extra settings */ + } else { + ASSERT(0); + } + + DWORD proto = PAGE_READWRITE; + DWORD access = FILE_MAP_ALL_ACCESS; + + /* Unsupported flag combinations */ + if ((cfg->protection_flag == PMEM2_PROT_NONE) || + (cfg->protection_flag == PMEM2_PROT_WRITE) || + (cfg->protection_flag == PMEM2_PROT_EXEC) || + (cfg->protection_flag == (PMEM2_PROT_WRITE | + PMEM2_PROT_EXEC))) { + ERR("Windows does not support " + "this protection flag combination."); + return PMEM2_E_NOSUPP; + } + + /* Translate protection flags into Windows flags */ + if (cfg->protection_flag & PMEM2_PROT_WRITE) { + if (cfg->protection_flag & PMEM2_PROT_EXEC) { + proto = PAGE_EXECUTE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE | + FILE_MAP_EXECUTE; + } else { + /* + * Due to the already done exclusion + * of incorrect combinations, PROT_WRITE + * implies PROT_READ + */ + proto = PAGE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE; + } + } else if (cfg->protection_flag & PMEM2_PROT_READ) { + if (cfg->protection_flag & PMEM2_PROT_EXEC) { + proto = PAGE_EXECUTE_READ; + access = FILE_MAP_READ | FILE_MAP_EXECUTE; + } else { + proto = PAGE_READONLY; + access = FILE_MAP_READ; + } + } + + if (cfg->sharing == PMEM2_PRIVATE) { + if (cfg->protection_flag & PMEM2_PROT_EXEC) { + proto = PAGE_EXECUTE_WRITECOPY; + access = FILE_MAP_EXECUTE | FILE_MAP_COPY; + } else { + /* + * If FILE_MAP_COPY is set, + * protection is changed to read/write + */ + proto = PAGE_READONLY; + access = FILE_MAP_COPY; + } + } + + /* create a file mapping handle */ + HANDLE mh = create_mapping(map_handle, effective_offset, length, + proto, &err); + + if (!mh) { + if (err == ERROR_ALREADY_EXISTS) { + ERR("mapping already exists"); + return PMEM2_E_MAPPING_EXISTS; + } else if (err == ERROR_ACCESS_DENIED) { + return PMEM2_E_NO_ACCESS; + } + return pmem2_lasterror_to_err(); + } + + void *base; + void *rsv = cfg->reserv; + if (rsv) { + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + size_t rsv_offset = cfg->reserv_offset; + + if (rsv_offset % Mmap_align) { + ret = PMEM2_E_OFFSET_UNALIGNED; + ERR( + "offset from the beggining of virtual memory " + "reservation %zu is not a multiple of %llu", + rsv_offset, Mmap_align); + goto err_close_mapping_handle; + } + + if (rsv_offset + length > rsv_size) { + ret = PMEM2_E_LENGTH_OUT_OF_RANGE; + ERR( + "length of the mapping %zu combined with the " + "offset into the reservation %zu exceeds virtual " + "memory reservation size %zu", + length, effective_offset, rsv_size); + goto err_close_mapping_handle; + } + + if (vm_reservation_map_find_acquire(rsv, rsv_offset, length)) { + ret = PMEM2_E_MAPPING_EXISTS; + ERR( + "region of the reservation %p at the offset %zu and " + "length %zu is at least partly occupied by other mapping", + rsv, rsv_offset, length); + goto err_reservation_release; + } + + void *addr = (char *)rsv_addr + rsv_offset; + /* + * Before mapping to the reservation, it is neccessary to split + * the unoccupied region into separate placeholders, + * so that the size to be mapped and the cut out placeholder + * size will be the same. + */ + ret = vm_reservation_split(rsv, rsv_offset, length); + if (ret) + goto err_reservation_release; + + /* replace placeholder with a regular mapping */ + base = MapViewOfFile3(mh, + NULL, + addr, /* addr in reservation */ + effective_offset, + length, + MEM_REPLACE_PLACEHOLDER, + proto, + NULL, + 0); + + if (base == NULL) { + ERR("!!MapViewOfFile3"); + DWORD ret_windows = GetLastError(); + if (ret_windows == ERROR_INVALID_ADDRESS) + ret = PMEM2_E_MAPPING_EXISTS; + else + ret = pmem2_lasterror_to_err(); + goto err_merge_reservation_regions; + } + + ASSERTeq(base, addr); + } else { + /* obtain a pointer to the mapping view */ + base = MapViewOfFile(mh, + access, + HIDWORD(effective_offset), + LODWORD(effective_offset), + length); + + if (base == NULL) { + ERR("!!MapViewOfFile"); + ret = pmem2_lasterror_to_err(); + goto err_close_mapping_handle; + } + } + + if (!CloseHandle(mh)) { + ERR("!!CloseHandle"); + ret = pmem2_lasterror_to_err(); + goto err_undo_mapping; + } + + enum pmem2_granularity available_min_granularity = + PMEM2_GRANULARITY_PAGE; + if (src->type == PMEM2_SOURCE_HANDLE) { + int direct_access = is_direct_access(src->value.handle); + if (direct_access < 0) { + ret = direct_access; + goto err_undo_mapping; + } + + bool eADR = (pmem2_auto_flush() == 1); + available_min_granularity = + get_min_granularity(eADR, direct_access, cfg->sharing); + } else if (src->type == PMEM2_SOURCE_ANON) { + available_min_granularity = PMEM2_GRANULARITY_BYTE; + } else { + ASSERT(0); + } + + if (available_min_granularity > cfg->requested_max_granularity) { + const char *err = granularity_err_msg + [cfg->requested_max_granularity] + [available_min_granularity]; + if (strcmp(err, GRAN_IMPOSSIBLE) == 0) + FATAL( + "unhandled granularity error: available_min_granularity: %d" \ + "requested_max_granularity: %d", + available_min_granularity, + cfg->requested_max_granularity); + ERR("%s", err); + ret = PMEM2_E_GRANULARITY_NOT_SUPPORTED; + goto err_undo_mapping; + } + + /* prepare pmem2_map structure */ + struct pmem2_map *map; + map = (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret); + if (!map) + goto err_undo_mapping; + + map->addr = base; + /* + * XXX probably in some cases the reserved length > the content length. + * Maybe it is worth to do the research. + */ + map->reserved_length = length; + map->content_length = length; + map->effective_granularity = available_min_granularity; + map->reserv = rsv; + map->source = *src; + pmem2_set_flush_fns(map); + pmem2_set_mem_fns(map); + + ret = pmem2_register_mapping(map); + if (ret) { + goto err_free_map_struct; + } + + if (rsv) { + ret = vm_reservation_map_register_release(rsv, map); + if (ret) + goto err_unregister_map; + } + + /* return a pointer to the pmem2_map structure */ + *map_ptr = map; + + return ret; + +err_unregister_map: + pmem2_unregister_mapping(map); +err_free_map_struct: + free(map); +err_undo_mapping: + if (rsv) + vm_reservation_unmap(rsv, base, length); + else + UnmapViewOfFile(base); +err_merge_reservation_regions: + if (rsv) + vm_reservation_merge(rsv, base, length); +err_reservation_release: + if (rsv) + vm_reservation_release(rsv); +err_close_mapping_handle: + CloseHandle(mh); + return ret; +} + +/* + * pmem2_map_delete -- unmap the specified region + */ +int +pmem2_map_delete(struct pmem2_map **map_ptr) +{ + LOG(3, "map_ptr %p", map_ptr); + PMEM2_ERR_CLR(); + + struct pmem2_map *map = *map_ptr; + size_t map_len = map->content_length; + void *map_addr = map->addr; + struct pmem2_vm_reservation *rsv = map->reserv; + + int ret = pmem2_unregister_mapping(map); + if (ret) + return ret; + + if (map->reserved_length != 0) { + if (rsv) { + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); + size_t rsv_offset = (size_t)map_addr - (size_t)rsv_addr; + if (!vm_reservation_map_find_acquire(rsv, rsv_offset, + map_len)) { + ret = PMEM2_E_MAPPING_NOT_FOUND; + goto err_reservation_release; + } + + ret = vm_reservation_unmap(rsv, map->addr, + map->reserved_length); + if (ret) + goto err_reservation_release; + + ret = vm_reservation_merge(rsv, map->addr, + map->reserved_length); + if (ret) + goto err_reservation_release; + + ret = vm_reservation_map_unregister_release(rsv, map); + if (ret) + goto err_register_map; + } else { + if (!UnmapViewOfFile(map->addr)) { + ERR("!!UnmapViewOfFile"); + ret = pmem2_lasterror_to_err(); + goto err_register_map; + } + } + } + + Free(map); + *map_ptr = NULL; + + return 0; + +err_reservation_release: + vm_reservation_release(rsv); +err_register_map: + pmem2_register_mapping(map); + return ret; +} diff --git a/src/pmdk/src/libpmem2/memops_generic.c b/src/pmdk/src/libpmem2/memops_generic.c new file mode 100644 index 000000000..fccb0bdc6 --- /dev/null +++ b/src/pmdk/src/libpmem2/memops_generic.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * memops_generic.c -- architecture-independent memmove & memset fallback + * + * This fallback is needed to fulfill guarantee that pmem_mem[cpy|set|move] + * will use at least 8-byte stores (for 8-byte aligned buffers and sizes), + * even when accelerated implementation is missing or disabled. + * This guarantee is needed to maintain correctness eg in pmemobj. + * Libc may do the same, but this behavior is not documented, so we can't rely + * on that. + */ + +#include + +#include "out.h" +#include "pmem2_arch.h" +#include "util.h" + +/* + * pmem2_flush_flags -- internal wrapper around pmem_flush + */ +static inline void +pmem2_flush_flags(const void *addr, size_t len, unsigned flags, + flush_func flush) +{ + if (!(flags & PMEM2_F_MEM_NOFLUSH)) + flush(addr, len); +} + +/* + * cpy128 -- (internal) copy 128 bytes from src to dst + */ +static force_inline void +cpy128(uint64_t *dst, const uint64_t *src) +{ + /* + * We use atomics here just to be sure compiler will not split stores. + * Order of stores doesn't matter. + */ + uint64_t tmp[16]; + util_atomic_load_explicit64(&src[0], &tmp[0], memory_order_relaxed); + util_atomic_load_explicit64(&src[1], &tmp[1], memory_order_relaxed); + util_atomic_load_explicit64(&src[2], &tmp[2], memory_order_relaxed); + util_atomic_load_explicit64(&src[3], &tmp[3], memory_order_relaxed); + util_atomic_load_explicit64(&src[4], &tmp[4], memory_order_relaxed); + util_atomic_load_explicit64(&src[5], &tmp[5], memory_order_relaxed); + util_atomic_load_explicit64(&src[6], &tmp[6], memory_order_relaxed); + util_atomic_load_explicit64(&src[7], &tmp[7], memory_order_relaxed); + util_atomic_load_explicit64(&src[8], &tmp[8], memory_order_relaxed); + util_atomic_load_explicit64(&src[9], &tmp[9], memory_order_relaxed); + util_atomic_load_explicit64(&src[10], &tmp[10], memory_order_relaxed); + util_atomic_load_explicit64(&src[11], &tmp[11], memory_order_relaxed); + util_atomic_load_explicit64(&src[12], &tmp[12], memory_order_relaxed); + util_atomic_load_explicit64(&src[13], &tmp[13], memory_order_relaxed); + util_atomic_load_explicit64(&src[14], &tmp[14], memory_order_relaxed); + util_atomic_load_explicit64(&src[15], &tmp[15], memory_order_relaxed); + + util_atomic_store_explicit64(&dst[0], tmp[0], memory_order_relaxed); + util_atomic_store_explicit64(&dst[1], tmp[1], memory_order_relaxed); + util_atomic_store_explicit64(&dst[2], tmp[2], memory_order_relaxed); + util_atomic_store_explicit64(&dst[3], tmp[3], memory_order_relaxed); + util_atomic_store_explicit64(&dst[4], tmp[4], memory_order_relaxed); + util_atomic_store_explicit64(&dst[5], tmp[5], memory_order_relaxed); + util_atomic_store_explicit64(&dst[6], tmp[6], memory_order_relaxed); + util_atomic_store_explicit64(&dst[7], tmp[7], memory_order_relaxed); + util_atomic_store_explicit64(&dst[8], tmp[8], memory_order_relaxed); + util_atomic_store_explicit64(&dst[9], tmp[9], memory_order_relaxed); + util_atomic_store_explicit64(&dst[10], tmp[10], memory_order_relaxed); + util_atomic_store_explicit64(&dst[11], tmp[11], memory_order_relaxed); + util_atomic_store_explicit64(&dst[12], tmp[12], memory_order_relaxed); + util_atomic_store_explicit64(&dst[13], tmp[13], memory_order_relaxed); + util_atomic_store_explicit64(&dst[14], tmp[14], memory_order_relaxed); + util_atomic_store_explicit64(&dst[15], tmp[15], memory_order_relaxed); +} + +/* + * cpy64 -- (internal) copy 64 bytes from src to dst + */ +static force_inline void +cpy64(uint64_t *dst, const uint64_t *src) +{ + /* + * We use atomics here just to be sure compiler will not split stores. + * Order of stores doesn't matter. + */ + uint64_t tmp[8]; + util_atomic_load_explicit64(&src[0], &tmp[0], memory_order_relaxed); + util_atomic_load_explicit64(&src[1], &tmp[1], memory_order_relaxed); + util_atomic_load_explicit64(&src[2], &tmp[2], memory_order_relaxed); + util_atomic_load_explicit64(&src[3], &tmp[3], memory_order_relaxed); + util_atomic_load_explicit64(&src[4], &tmp[4], memory_order_relaxed); + util_atomic_load_explicit64(&src[5], &tmp[5], memory_order_relaxed); + util_atomic_load_explicit64(&src[6], &tmp[6], memory_order_relaxed); + util_atomic_load_explicit64(&src[7], &tmp[7], memory_order_relaxed); + + util_atomic_store_explicit64(&dst[0], tmp[0], memory_order_relaxed); + util_atomic_store_explicit64(&dst[1], tmp[1], memory_order_relaxed); + util_atomic_store_explicit64(&dst[2], tmp[2], memory_order_relaxed); + util_atomic_store_explicit64(&dst[3], tmp[3], memory_order_relaxed); + util_atomic_store_explicit64(&dst[4], tmp[4], memory_order_relaxed); + util_atomic_store_explicit64(&dst[5], tmp[5], memory_order_relaxed); + util_atomic_store_explicit64(&dst[6], tmp[6], memory_order_relaxed); + util_atomic_store_explicit64(&dst[7], tmp[7], memory_order_relaxed); +} + +/* + * cpy8 -- (internal) copy 8 bytes from src to dst + */ +static force_inline void +cpy8(uint64_t *dst, const uint64_t *src) +{ + uint64_t tmp; + util_atomic_load_explicit64(src, &tmp, memory_order_relaxed); + util_atomic_store_explicit64(dst, tmp, memory_order_relaxed); +} + +/* + * store8 -- (internal) store 8 bytes + */ +static force_inline void +store8(uint64_t *dst, uint64_t c) +{ + util_atomic_store_explicit64(dst, c, memory_order_relaxed); +} + +/* + * memmove_nodrain_generic -- generic memmove to pmem without hw drain + */ +void * +memmove_nodrain_generic(void *dst, const void *src, size_t len, + unsigned flags, flush_func flush) +{ + LOG(15, "pmemdest %p src %p len %zu flags 0x%x", dst, src, len, + flags); + + char *cdst = dst; + const char *csrc = src; + size_t remaining; + (void) flags; + + if ((uintptr_t)cdst - (uintptr_t)csrc >= len) { + size_t cnt = (uint64_t)cdst & 7; + if (cnt > 0) { + cnt = 8 - cnt; + + if (cnt > len) + cnt = len; + + for (size_t i = 0; i < cnt; ++i) + cdst[i] = csrc[i]; + + pmem2_flush_flags(cdst, cnt, flags, flush); + + cdst += cnt; + csrc += cnt; + len -= cnt; + } + + uint64_t *dst8 = (uint64_t *)cdst; + const uint64_t *src8 = (const uint64_t *)csrc; + + while (len >= 128 && CACHELINE_SIZE == 128) { + cpy128(dst8, src8); + pmem2_flush_flags(dst8, 128, flags, flush); + len -= 128; + dst8 += 16; + src8 += 16; + } + + while (len >= 64) { + cpy64(dst8, src8); + pmem2_flush_flags(dst8, 64, flags, flush); + len -= 64; + dst8 += 8; + src8 += 8; + } + + remaining = len; + while (len >= 8) { + cpy8(dst8, src8); + len -= 8; + dst8++; + src8++; + } + + cdst = (char *)dst8; + csrc = (const char *)src8; + + for (size_t i = 0; i < len; ++i) + *cdst++ = *csrc++; + + if (remaining) + pmem2_flush_flags(cdst - remaining, remaining, flags, + flush); + } else { + cdst += len; + csrc += len; + + size_t cnt = (uint64_t)cdst & 7; + if (cnt > 0) { + if (cnt > len) + cnt = len; + + cdst -= cnt; + csrc -= cnt; + len -= cnt; + + for (size_t i = cnt; i > 0; --i) + cdst[i - 1] = csrc[i - 1]; + pmem2_flush_flags(cdst, cnt, flags, flush); + } + + uint64_t *dst8 = (uint64_t *)cdst; + const uint64_t *src8 = (const uint64_t *)csrc; + + while (len >= 128 && CACHELINE_SIZE == 128) { + dst8 -= 16; + src8 -= 16; + cpy128(dst8, src8); + pmem2_flush_flags(dst8, 128, flags, flush); + len -= 128; + } + + while (len >= 64) { + dst8 -= 8; + src8 -= 8; + cpy64(dst8, src8); + pmem2_flush_flags(dst8, 64, flags, flush); + len -= 64; + } + + remaining = len; + while (len >= 8) { + --dst8; + --src8; + cpy8(dst8, src8); + len -= 8; + } + + cdst = (char *)dst8; + csrc = (const char *)src8; + + for (size_t i = len; i > 0; --i) + *--cdst = *--csrc; + + if (remaining) + pmem2_flush_flags(cdst, remaining, flags, flush); + } + + return dst; +} + +/* + * memset_nodrain_generic -- generic memset to pmem without hw drain + */ +void * +memset_nodrain_generic(void *dst, int c, size_t len, unsigned flags, + flush_func flush) +{ + LOG(15, "pmemdest %p c 0x%x len %zu flags 0x%x", dst, c, len, + flags); + (void) flags; + + char *cdst = dst; + size_t cnt = (uint64_t)cdst & 7; + if (cnt > 0) { + cnt = 8 - cnt; + + if (cnt > len) + cnt = len; + + for (size_t i = 0; i < cnt; ++i) + cdst[i] = (char)c; + pmem2_flush_flags(cdst, cnt, flags, flush); + + cdst += cnt; + len -= cnt; + } + + uint64_t *dst8 = (uint64_t *)cdst; + + uint64_t u = (unsigned char)c; + uint64_t tmp = (u << 56) | (u << 48) | (u << 40) | (u << 32) | + (u << 24) | (u << 16) | (u << 8) | u; + + while (len >= 128 && CACHELINE_SIZE == 128) { + store8(&dst8[0], tmp); + store8(&dst8[1], tmp); + store8(&dst8[2], tmp); + store8(&dst8[3], tmp); + store8(&dst8[4], tmp); + store8(&dst8[5], tmp); + store8(&dst8[6], tmp); + store8(&dst8[7], tmp); + store8(&dst8[8], tmp); + store8(&dst8[9], tmp); + store8(&dst8[10], tmp); + store8(&dst8[11], tmp); + store8(&dst8[12], tmp); + store8(&dst8[13], tmp); + store8(&dst8[14], tmp); + store8(&dst8[15], tmp); + pmem2_flush_flags(dst8, 128, flags, flush); + len -= 128; + dst8 += 16; + } + + while (len >= 64) { + store8(&dst8[0], tmp); + store8(&dst8[1], tmp); + store8(&dst8[2], tmp); + store8(&dst8[3], tmp); + store8(&dst8[4], tmp); + store8(&dst8[5], tmp); + store8(&dst8[6], tmp); + store8(&dst8[7], tmp); + pmem2_flush_flags(dst8, 64, flags, flush); + len -= 64; + dst8 += 8; + } + + size_t remaining = len; + while (len >= 8) { + store8(dst8, tmp); + len -= 8; + dst8++; + } + + cdst = (char *)dst8; + + for (size_t i = 0; i < len; ++i) + *cdst++ = (char)c; + + if (remaining) + pmem2_flush_flags(cdst - remaining, remaining, flags, flush); + return dst; +} diff --git a/src/pmdk/src/libpmem2/persist.c b/src/pmdk/src/libpmem2/persist.c new file mode 100644 index 000000000..a7ec0fc4c --- /dev/null +++ b/src/pmdk/src/libpmem2/persist.c @@ -0,0 +1,610 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * persist.c -- pmem2_get_[persist|flush|drain]_fn + */ + +#include +#include + +#include "libpmem2.h" +#include "map.h" +#include "out.h" +#include "os.h" +#include "persist.h" +#include "deep_flush.h" +#include "pmem2_arch.h" +#include "pmem2_utils.h" +#include "valgrind_internal.h" + +static struct pmem2_arch_info Info; + +/* + * memmove_nodrain_libc -- (internal) memmove to pmem using libc + */ +static void * +memmove_nodrain_libc(void *pmemdest, const void *src, size_t len, + unsigned flags, flush_func flush) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + LOG(15, "pmemdest %p src %p len %zu flags 0x%x", pmemdest, src, len, + flags); + + memmove(pmemdest, src, len); + + if (!(flags & PMEM2_F_MEM_NOFLUSH)) + flush(pmemdest, len); + + return pmemdest; +} + +/* + * memset_nodrain_libc -- (internal) memset to pmem using libc + */ +static void * +memset_nodrain_libc(void *pmemdest, int c, size_t len, unsigned flags, + flush_func flush) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + LOG(15, "pmemdest %p c 0x%x len %zu flags 0x%x", pmemdest, c, len, + flags); + + memset(pmemdest, c, len); + + if (!(flags & PMEM2_F_MEM_NOFLUSH)) + flush(pmemdest, len); + + return pmemdest; +} + +/* + * pmem2_persist_init -- initialize persist module + */ +void +pmem2_persist_init(void) +{ + Info.memmove_nodrain = NULL; + Info.memset_nodrain = NULL; + Info.memmove_nodrain_eadr = NULL; + Info.memset_nodrain_eadr = NULL; + Info.flush = NULL; + Info.fence = NULL; + Info.flush_has_builtin_fence = 0; + + pmem2_arch_init(&Info); + + char *ptr = os_getenv("PMEM_NO_GENERIC_MEMCPY"); + long long no_generic = 0; + if (ptr) + no_generic = atoll(ptr); + + if (Info.memmove_nodrain == NULL) { + if (no_generic) { + Info.memmove_nodrain = memmove_nodrain_libc; + Info.memmove_nodrain_eadr = memmove_nodrain_libc; + LOG(3, "using libc memmove"); + } else { + Info.memmove_nodrain = memmove_nodrain_generic; + Info.memmove_nodrain_eadr = memmove_nodrain_generic; + LOG(3, "using generic memmove"); + } + } + + if (Info.memset_nodrain == NULL) { + if (no_generic) { + Info.memset_nodrain = memset_nodrain_libc; + Info.memset_nodrain_eadr = memset_nodrain_libc; + LOG(3, "using libc memset"); + } else { + Info.memset_nodrain = memset_nodrain_generic; + Info.memset_nodrain_eadr = memset_nodrain_generic; + LOG(3, "using generic memset"); + } + } +} + +/* + * pmem2_drain -- wait for any PM stores to drain from HW buffers + */ +static void +pmem2_drain(void) +{ + LOG(15, NULL); + + Info.fence(); +} + +/* + * pmem2_log_flush -- log the flush attempt for the given range + */ +static inline void +pmem2_log_flush(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + VALGRIND_DO_CHECK_MEM_IS_ADDRESSABLE(addr, len); +} + +/* + * pmem2_flush_nop -- NOP version of the flush routine, used in cases where + * memory behind the mapping is already in persistence domain + */ +static void +pmem2_flush_nop(const void *addr, size_t len) +{ + pmem2_log_flush(addr, len); + + /* nothing more to do, other than telling pmemcheck about it */ + VALGRIND_DO_FLUSH(addr, len); +} + +/* + * pmem2_flush_cpu_cache -- flush processor cache for the given range + */ +static void +pmem2_flush_cpu_cache(const void *addr, size_t len) +{ + pmem2_log_flush(addr, len); + + Info.flush(addr, len); +} + +/* + * pmem2_persist_noflush -- make all changes to a range of pmem persistent + */ +static void +pmem2_persist_noflush(const void *addr, size_t len) +{ + pmem2_flush_nop(addr, len); + pmem2_drain(); +} + +/* + * pmem2_persist_cpu_cache -- make all changes to a range of pmem persistent + */ +static void +pmem2_persist_cpu_cache(const void *addr, size_t len) +{ + pmem2_flush_cpu_cache(addr, len); + pmem2_drain(); +} + +/* + * pmem2_flush_file_buffers -- flush CPU and OS caches for the given range + */ +static int +pmem2_flush_file_buffers(const void *addr, size_t len, int autorestart) +{ + int olderrno = errno; + + pmem2_log_flush(addr, len); + + /* + * Flushing using OS-provided mechanisms requires that the address + * be a multiple of the page size. + * Align address down and change len so that [addr, addr + len) still + * contains the initial range. + */ + + /* round address down to page boundary */ + uintptr_t new_addr = ALIGN_DOWN((uintptr_t)addr, Pagesize); + + /* increase len by the amount we gain when we round addr down */ + len += (uintptr_t)addr - new_addr; + + addr = (const void *)new_addr; + + int ret = 0; + + /* + * Find all the mappings overlapping with the [addr, addr + len) range + * and flush them, one by one. + */ + do { + struct pmem2_map *map = pmem2_map_find(addr, len); + if (!map) + break; + + size_t flush; + size_t remaining = map->reserved_length; + if (map->addr < addr) { + /* + * Addr is inside of the mapping, so we have to decrease + * the remaining length by an offset from the start + * of our mapping. + */ + remaining -= (uintptr_t)addr - (uintptr_t)map->addr; + } else if (map->addr == addr) { + /* perfect match, there's nothing to do in this case */ + } else { + /* + * map->addr > addr, so we have to skip the hole + * between addr and map->addr. + */ + len -= (uintptr_t)map->addr - (uintptr_t)addr; + addr = map->addr; + } + + if (len > remaining) + flush = remaining; + else + flush = len; + + int ret1 = pmem2_flush_file_buffers_os(map, addr, flush, + autorestart); + if (ret1 != 0) + ret = ret1; + + addr = ((const char *)addr) + flush; + len -= flush; + } while (len > 0); + + errno = olderrno; + + return ret; +} + +/* + * pmem2_persist_pages -- flush processor cache for the given range + */ +static void +pmem2_persist_pages(const void *addr, size_t len) +{ + /* + * Restarting on EINTR in general is a bad idea, but we don't have + * any way to communicate the failure outside. + */ + const int autorestart = 1; + + int ret = pmem2_flush_file_buffers(addr, len, autorestart); + if (ret) { + /* + * 1) There's no way to propagate this error. Silently ignoring + * it would lead to data corruption. + * 2) non-pmem code path shouldn't be used in production. + * + * The only sane thing to do is to crash the application. Sorry. + */ + abort(); + } +} + +/* + * pmem2_drain_nop -- variant of pmem2_drain for page granularity; + * it is a NOP because the flush part has built-in drain + */ +static void +pmem2_drain_nop(void) +{ + LOG(15, NULL); +} + +/* + * pmem2_deep_flush_page -- do nothing - pmem2_persist_fn already did msync + */ +int +pmem2_deep_flush_page(struct pmem2_map *map, void *ptr, size_t size) +{ + LOG(3, "map %p ptr %p size %zu", map, ptr, size); + return 0; +} + +/* + * pmem2_deep_flush_cache -- flush buffers for fsdax or write + * to deep_flush for DevDax + */ +int +pmem2_deep_flush_cache(struct pmem2_map *map, void *ptr, size_t size) +{ + LOG(3, "map %p ptr %p size %zu", map, ptr, size); + + enum pmem2_file_type type = map->source.value.ftype; + + /* + * XXX: this should be moved to pmem2_deep_flush_dax + * while refactoring abstraction + */ + if (type == PMEM2_FTYPE_DEVDAX) + pmem2_persist_cpu_cache(ptr, size); + + int ret = pmem2_deep_flush_dax(map, ptr, size); + if (ret < 0) { + LOG(1, "cannot perform deep flush cache for map %p", map); + return ret; + } + + return 0; +} + +/* + * pmem2_deep_flush_byte -- flush cpu cache and perform deep flush for dax + */ +int +pmem2_deep_flush_byte(struct pmem2_map *map, void *ptr, size_t size) +{ + LOG(3, "map %p ptr %p size %zu", map, ptr, size); + + if (map->source.type == PMEM2_SOURCE_ANON) { + ERR("Anonymous source does not support deep flush"); + return PMEM2_E_NOSUPP; + } + + ASSERT(map->source.type == PMEM2_SOURCE_FD || + map->source.type == PMEM2_SOURCE_HANDLE); + + enum pmem2_file_type type = map->source.value.ftype; + + /* + * XXX: this should be moved to pmem2_deep_flush_dax + * while refactoring abstraction + */ + if (type == PMEM2_FTYPE_DEVDAX) + pmem2_persist_cpu_cache(ptr, size); + + int ret = pmem2_deep_flush_dax(map, ptr, size); + if (ret < 0) { + LOG(1, "cannot perform deep flush byte for map %p", map); + return ret; + } + + return 0; +} + +/* + * pmem2_set_flush_fns -- set function pointers related to flushing + */ +void +pmem2_set_flush_fns(struct pmem2_map *map) +{ + switch (map->effective_granularity) { + case PMEM2_GRANULARITY_PAGE: + map->persist_fn = pmem2_persist_pages; + map->flush_fn = pmem2_persist_pages; + map->drain_fn = pmem2_drain_nop; + map->deep_flush_fn = pmem2_deep_flush_page; + break; + case PMEM2_GRANULARITY_CACHE_LINE: + map->persist_fn = pmem2_persist_cpu_cache; + map->flush_fn = pmem2_flush_cpu_cache; + map->drain_fn = pmem2_drain; + map->deep_flush_fn = pmem2_deep_flush_cache; + break; + case PMEM2_GRANULARITY_BYTE: + map->persist_fn = pmem2_persist_noflush; + map->flush_fn = pmem2_flush_nop; + map->drain_fn = pmem2_drain; + map->deep_flush_fn = pmem2_deep_flush_byte; + break; + default: + abort(); + } + +} + +/* + * pmem2_get_persist_fn - return a pointer to a function responsible for + * persisting data in range owned by pmem2_map + */ +pmem2_persist_fn +pmem2_get_persist_fn(struct pmem2_map *map) +{ + /* we do not need to clear err because this function cannot fail */ + return map->persist_fn; +} + +/* + * pmem2_get_flush_fn - return a pointer to a function responsible for + * flushing data in range owned by pmem2_map + */ +pmem2_flush_fn +pmem2_get_flush_fn(struct pmem2_map *map) +{ + /* we do not need to clear err because this function cannot fail */ + return map->flush_fn; +} + +/* + * pmem2_get_drain_fn - return a pointer to a function responsible for + * draining flushes in range owned by pmem2_map + */ +pmem2_drain_fn +pmem2_get_drain_fn(struct pmem2_map *map) +{ + /* we do not need to clear err because this function cannot fail */ + return map->drain_fn; +} + +/* + * pmem2_memmove_nonpmem -- mem[move|cpy] followed by an msync + */ +static void * +pmem2_memmove_nonpmem(void *pmemdest, const void *src, size_t len, + unsigned flags) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM2_API_START("pmem2_memmove"); + Info.memmove_nodrain(pmemdest, src, len, flags & ~PMEM2_F_MEM_NODRAIN, + Info.flush); + + pmem2_persist_pages(pmemdest, len); + + PMEM2_API_END("pmem2_memmove"); + return pmemdest; +} + +/* + * pmem2_memset_nonpmem -- memset followed by an msync + */ +static void * +pmem2_memset_nonpmem(void *pmemdest, int c, size_t len, unsigned flags) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM2_API_START("pmem2_memset"); + Info.memset_nodrain(pmemdest, c, len, flags & ~PMEM2_F_MEM_NODRAIN, + Info.flush); + + pmem2_persist_pages(pmemdest, len); + + PMEM2_API_END("pmem2_memset"); + return pmemdest; +} + +/* + * pmem2_memmove -- mem[move|cpy] to pmem + */ +static void * +pmem2_memmove(void *pmemdest, const void *src, size_t len, + unsigned flags) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM2_API_START("pmem2_memmove"); + Info.memmove_nodrain(pmemdest, src, len, flags, Info.flush); + if ((flags & (PMEM2_F_MEM_NODRAIN | PMEM2_F_MEM_NOFLUSH)) == 0) + pmem2_drain(); + + PMEM2_API_END("pmem2_memmove"); + return pmemdest; +} + +/* + * pmem2_memset -- memset to pmem + */ +static void * +pmem2_memset(void *pmemdest, int c, size_t len, unsigned flags) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM2_API_START("pmem2_memset"); + Info.memset_nodrain(pmemdest, c, len, flags, Info.flush); + if ((flags & (PMEM2_F_MEM_NODRAIN | PMEM2_F_MEM_NOFLUSH)) == 0) + pmem2_drain(); + + PMEM2_API_END("pmem2_memset"); + return pmemdest; +} + +/* + * pmem2_memmove_eadr -- mem[move|cpy] to pmem, platform supports eADR + */ +static void * +pmem2_memmove_eadr(void *pmemdest, const void *src, size_t len, + unsigned flags) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM2_API_START("pmem2_memmove"); + Info.memmove_nodrain_eadr(pmemdest, src, len, flags, Info.flush); + if ((flags & (PMEM2_F_MEM_NODRAIN | PMEM2_F_MEM_NOFLUSH)) == 0) + pmem2_drain(); + + PMEM2_API_END("pmem2_memmove"); + return pmemdest; +} + +/* + * pmem2_memset_eadr -- memset to pmem, platform supports eADR + */ +static void * +pmem2_memset_eadr(void *pmemdest, int c, size_t len, unsigned flags) +{ +#ifdef DEBUG + if (flags & ~PMEM2_F_MEM_VALID_FLAGS) + ERR("invalid flags 0x%x", flags); +#endif + PMEM2_API_START("pmem2_memset"); + Info.memset_nodrain_eadr(pmemdest, c, len, flags, Info.flush); + if ((flags & (PMEM2_F_MEM_NODRAIN | PMEM2_F_MEM_NOFLUSH)) == 0) + pmem2_drain(); + + PMEM2_API_END("pmem2_memset"); + return pmemdest; +} + +/* + * pmem2_set_mem_fns -- set function pointers related to mem[move|cpy|set] + */ +void +pmem2_set_mem_fns(struct pmem2_map *map) +{ + switch (map->effective_granularity) { + case PMEM2_GRANULARITY_PAGE: + map->memmove_fn = pmem2_memmove_nonpmem; + map->memcpy_fn = pmem2_memmove_nonpmem; + map->memset_fn = pmem2_memset_nonpmem; + break; + case PMEM2_GRANULARITY_CACHE_LINE: + map->memmove_fn = pmem2_memmove; + map->memcpy_fn = pmem2_memmove; + map->memset_fn = pmem2_memset; + break; + case PMEM2_GRANULARITY_BYTE: + map->memmove_fn = pmem2_memmove_eadr; + map->memcpy_fn = pmem2_memmove_eadr; + map->memset_fn = pmem2_memset_eadr; + break; + default: + abort(); + } + +} + +/* + * pmem2_get_memmove_fn - return a pointer to a function + */ +pmem2_memmove_fn +pmem2_get_memmove_fn(struct pmem2_map *map) +{ + /* we do not need to clear err because this function cannot fail */ + return map->memmove_fn; +} + +/* + * pmem2_get_memcpy_fn - return a pointer to a function + */ +pmem2_memcpy_fn +pmem2_get_memcpy_fn(struct pmem2_map *map) +{ + /* we do not need to clear err because this function cannot fail */ + return map->memcpy_fn; +} + +/* + * pmem2_get_memset_fn - return a pointer to a function + */ +pmem2_memset_fn +pmem2_get_memset_fn(struct pmem2_map *map) +{ + /* we do not need to clear err because this function cannot fail */ + return map->memset_fn; +} + +#if VG_PMEMCHECK_ENABLED +/* + * pmem2_emit_log -- logs library and function names to pmemcheck store log + */ +void +pmem2_emit_log(const char *func, int order) +{ + util_emit_log("libpmem2", func, order); +} +#endif diff --git a/src/pmdk/src/libpmem2/persist.h b/src/pmdk/src/libpmem2/persist.h new file mode 100644 index 000000000..525508c7e --- /dev/null +++ b/src/pmdk/src/libpmem2/persist.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +/* + * persist.h -- internal definitions for libpmem2 persist module + */ +#ifndef PMEM2_PERSIST_H +#define PMEM2_PERSIST_H + +#include + +#include "map.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void pmem2_persist_init(void); + +int pmem2_flush_file_buffers_os(struct pmem2_map *map, const void *addr, + size_t len, int autorestart); +void pmem2_set_flush_fns(struct pmem2_map *map); +void pmem2_set_mem_fns(struct pmem2_map *map); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmem2/persist_posix.c b/src/pmdk/src/libpmem2/persist_posix.c new file mode 100644 index 000000000..34c0ae6d8 --- /dev/null +++ b/src/pmdk/src/libpmem2/persist_posix.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * persist_posix.c -- POSIX-specific part of persist implementation + */ + +#include +#include +#include + +#include "out.h" +#include "persist.h" +#include "pmem2_utils.h" +#include "valgrind_internal.h" + +/* + * pmem2_flush_file_buffers_os -- flush CPU and OS file caches for the given + * range + */ +int +pmem2_flush_file_buffers_os(struct pmem2_map *map, const void *addr, size_t len, + int autorestart) +{ + /* + * msync accepts addresses aligned to the page boundary, so we may sync + * more and part of it may have been marked as undefined/inaccessible. + * Msyncing such memory is not a bug, so as a workaround temporarily + * disable error reporting. + */ + VALGRIND_DO_DISABLE_ERROR_REPORTING; + int ret; + do { + ret = msync((void *)addr, len, MS_SYNC); + + if (ret < 0) { + ERR("!msync"); + } else { + /* full flush */ + VALGRIND_DO_PERSIST((uintptr_t)addr, len); + } + } while (autorestart && ret < 0 && errno == EINTR); + + VALGRIND_DO_ENABLE_ERROR_REPORTING; + + if (ret) + return PMEM2_E_ERRNO; + + return 0; +} diff --git a/src/pmdk/src/libpmem2/persist_windows.c b/src/pmdk/src/libpmem2/persist_windows.c new file mode 100644 index 000000000..a169d1df5 --- /dev/null +++ b/src/pmdk/src/libpmem2/persist_windows.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * persist_windows.c -- Windows-specific part of persist implementation + */ + +#include +#include + +#include "out.h" +#include "persist.h" +#include "pmem2_utils.h" + +/* + * pmem2_flush_file_buffers_os -- flush CPU and OS file caches for the given + * range + */ +int +pmem2_flush_file_buffers_os(struct pmem2_map *map, const void *addr, size_t len, + int autorestart) +{ + ASSERTeq(map->source.type, PMEM2_SOURCE_HANDLE); + + if (FlushViewOfFile(addr, len) == FALSE) { + ERR("!!FlushViewOfFile"); + return pmem2_lasterror_to_err(); + } + + if (FlushFileBuffers(map->source.value.handle) == FALSE) { + ERR("!!FlushFileBuffers"); + return pmem2_lasterror_to_err(); + } + + return 0; +} diff --git a/src/pmdk/src/libpmem2/pmem2.h b/src/pmdk/src/libpmem2/pmem2.h new file mode 100644 index 000000000..415695ace --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +/* + * pmem2.h -- internal definitions for libpmem2 + */ +#ifndef PMEM2_H +#define PMEM2_H + +#include "libpmem2.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PMEM2_MAJOR_VERSION 0 +#define PMEM2_MINOR_VERSION 0 + +#define PMEM2_LOG_PREFIX "libpmem2" +#define PMEM2_LOG_LEVEL_VAR "PMEM2_LOG_LEVEL" +#define PMEM2_LOG_FILE_VAR "PMEM2_LOG_FILE" + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmem2/pmem2_arch.h b/src/pmdk/src/libpmem2/pmem2_arch.h new file mode 100644 index 000000000..80be875f4 --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2_arch.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * pmem2_arch.h -- core-arch interface + */ +#ifndef PMEM2_ARCH_H +#define PMEM2_ARCH_H + +#include +#include "libpmem2.h" +#include "util.h" +#include "valgrind_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct pmem2_arch_info; + +typedef void (*fence_func)(void); +typedef void (*flush_func)(const void *, size_t); +typedef void *(*memmove_nodrain_func)(void *pmemdest, const void *src, + size_t len, unsigned flags, flush_func flush); +typedef void *(*memset_nodrain_func)(void *pmemdest, int c, size_t len, + unsigned flags, flush_func flush); + +struct pmem2_arch_info { + memmove_nodrain_func memmove_nodrain; + memmove_nodrain_func memmove_nodrain_eadr; + memset_nodrain_func memset_nodrain; + memset_nodrain_func memset_nodrain_eadr; + flush_func flush; + fence_func fence; + int flush_has_builtin_fence; +}; + +void pmem2_arch_init(struct pmem2_arch_info *info); + +/* + * flush_empty_nolog -- (internal) do not flush the CPU cache + */ +static force_inline void +flush_empty_nolog(const void *addr, size_t len) +{ + /* NOP, but tell pmemcheck about it */ + VALGRIND_DO_FLUSH(addr, len); +} + +void *memmove_nodrain_generic(void *pmemdest, const void *src, size_t len, + unsigned flags, flush_func flush); +void *memset_nodrain_generic(void *pmemdest, int c, size_t len, unsigned flags, + flush_func flush); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmem2/pmem2_utils.c b/src/pmdk/src/libpmem2/pmem2_utils.c new file mode 100644 index 000000000..23c258df6 --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2_utils.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * pmem2_utils.c -- libpmem2 utilities functions + */ + +#include +#include "alloc.h" +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" +#include "util.h" + +/* + * pmem2_malloc -- allocate a buffer and handle an error + */ +void * +pmem2_malloc(size_t size, int *err) +{ + void *ptr = Malloc(size); + *err = 0; + + if (ptr == NULL) { + ERR("!malloc(%zu)", size); + *err = PMEM2_E_ERRNO; + } + + return ptr; +} + +/* + * pmem2_zalloc -- allocate a buffer, zero it and handle an error + */ +void * +pmem2_zalloc(size_t size, int *err) +{ + void *ptr = Zalloc(size); + *err = 0; + + if (ptr == NULL) { + ERR("!malloc(%zu)", size); + *err = PMEM2_E_ERRNO; + } + + return ptr; +} + +/* + * pmem2_realloc -- reallocate a buffer and handle an error + */ +void * +pmem2_realloc(void *ptr, size_t size, int *err) +{ + void *newptr = Realloc(ptr, size); + *err = 0; + + if (newptr == NULL) { + ERR("!realloc(%zu)", size); + *err = PMEM2_E_ERRNO; + } + + return newptr; +} + +int +pmem2_err_to_errno(int err) +{ + if (err > 0) + FATAL("positive error code is a bug in libpmem2"); + + if (err == PMEM2_E_NOSUPP) + return ENOTSUP; + + if (err <= PMEM2_E_UNKNOWN) + return EINVAL; + + return -err; +} + +#ifdef _WIN32 +/* + * converts windows error codes to pmem2 error + */ +int +pmem2_lasterror_to_err() +{ + int err = util_lasterror_to_errno(GetLastError()); + + if (err == -1) + return PMEM2_E_UNKNOWN; + + return -err; +} +#endif diff --git a/src/pmdk/src/libpmem2/pmem2_utils.h b/src/pmdk/src/libpmem2/pmem2_utils.h new file mode 100644 index 000000000..84464f47a --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2_utils.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2019-2020, Intel Corporation */ + +/* + * pmem2_utils.h -- libpmem2 utilities functions + */ + +#ifndef PMEM2_UTILS_H +#define PMEM2_UTILS_H 1 + +#include + +#include "os.h" +#include "out.h" +#include "source.h" + +static inline int +pmem2_assert_errno(void) +{ + if (!errno) { + ERR("errno is not set"); + ASSERTinfo(0, "errno is not set"); + return -EINVAL; + } + + return -errno; +} + +#define PMEM2_E_ERRNO (pmem2_assert_errno()) + +#ifdef DEBUG +#define PMEM2_ERR_CLR() \ +{\ + errno = 0;\ + char *errormsg = (char *)out_get_errormsg();\ + strcpy(errormsg, "\0");\ +} +#else +#define PMEM2_ERR_CLR() +#endif + +void *pmem2_malloc(size_t size, int *err); +void *pmem2_zalloc(size_t size, int *err); +void *pmem2_realloc(void *ptr, size_t size, int *err); + +#ifdef _WIN32 +int pmem2_lasterror_to_err(); +#endif + +int pmem2_get_type_from_stat(const os_stat_t *st, enum pmem2_file_type *type); +int pmem2_device_dax_size(const struct pmem2_source *src, size_t *size); +int pmem2_device_dax_alignment(const struct pmem2_source *src, + size_t *alignment); + +#endif /* PMEM2_UTILS_H */ diff --git a/src/pmdk/src/libpmem2/pmem2_utils_linux.c b/src/pmdk/src/libpmem2/pmem2_utils_linux.c new file mode 100644 index 000000000..57cc432ff --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2_utils_linux.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +#include +#include +#include +#include +#include +#include +#include + +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" +#include "region_namespace.h" +#include "source.h" + +/* + * pmem2_get_type_from_stat -- determine type of file based on output of stat + * syscall + */ +int +pmem2_get_type_from_stat(const os_stat_t *st, enum pmem2_file_type *type) +{ + if (S_ISREG(st->st_mode)) { + *type = PMEM2_FTYPE_REG; + return 0; + } + + if (S_ISDIR(st->st_mode)) { + *type = PMEM2_FTYPE_DIR; + return 0; + } + + if (!S_ISCHR(st->st_mode)) { + ERR("file type 0%o not supported", st->st_mode & S_IFMT); + return PMEM2_E_INVALID_FILE_TYPE; + } + + char spath[PATH_MAX]; + int ret = util_snprintf(spath, PATH_MAX, + "/sys/dev/char/%u:%u/subsystem", + os_major(st->st_rdev), os_minor(st->st_rdev)); + + if (ret < 0) { + /* impossible */ + ERR("!snprintf"); + ASSERTinfo(0, "snprintf failed"); + return PMEM2_E_ERRNO; + } + + LOG(4, "device subsystem path \"%s\"", spath); + + char npath[PATH_MAX]; + char *rpath = realpath(spath, npath); + if (rpath == NULL) { + ERR("!realpath \"%s\"", spath); + return PMEM2_E_ERRNO; + } + + char *basename = strrchr(rpath, '/'); + if (!basename || strcmp("dax", basename + 1) != 0) { + LOG(3, "%s path does not match device dax prefix path", rpath); + return PMEM2_E_INVALID_FILE_TYPE; + } + + *type = PMEM2_FTYPE_DEVDAX; + + return 0; +} diff --git a/src/pmdk/src/libpmem2/pmem2_utils_ndctl.c b/src/pmdk/src/libpmem2/pmem2_utils_ndctl.c new file mode 100644 index 000000000..fed823063 --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2_utils_ndctl.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +#include +#include + +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" +#include "region_namespace_ndctl.h" +#include "source.h" + +/* + * pmem2_device_dax_alignment -- checks the alignment of a given + * dax device from given source + */ +int +pmem2_device_dax_alignment(const struct pmem2_source *src, size_t *alignment) +{ + int ret = 0; + size_t size = 0; + struct ndctl_ctx *ctx; + struct ndctl_namespace *ndns; + + errno = ndctl_new(&ctx) * (-1); + if (errno) { + ERR("!ndctl_new"); + return PMEM2_E_ERRNO; + } + + ret = pmem2_region_namespace(ctx, src, NULL, &ndns); + if (ret) { + LOG(1, "getting region and namespace failed"); + goto end; + } + + struct ndctl_dax *dax = ndctl_namespace_get_dax(ndns); + + if (dax) + size = ndctl_dax_get_align(dax); + else + ret = PMEM2_E_INVALID_ALIGNMENT_FORMAT; + +end: + ndctl_unref(ctx); + + *alignment = size; + LOG(4, "device alignment %zu", *alignment); + + return ret; +} + +/* + * pmem2_device_dax_size -- checks the size of a given + * dax device from given source structure + */ +int +pmem2_device_dax_size(const struct pmem2_source *src, size_t *size) +{ + int ret = 0; + struct ndctl_ctx *ctx; + struct ndctl_namespace *ndns; + + errno = ndctl_new(&ctx) * (-1); + if (errno) { + ERR("!ndctl_new"); + return PMEM2_E_ERRNO; + } + + ret = pmem2_region_namespace(ctx, src, NULL, &ndns); + if (ret) { + LOG(1, "getting region and namespace failed"); + goto end; + } + + struct ndctl_dax *dax = ndctl_namespace_get_dax(ndns); + + if (dax) { + *size = ndctl_dax_get_size(dax); + } else { + ret = PMEM2_E_DAX_REGION_NOT_FOUND; + ERR("Issue while reading Device Dax size - cannot " + "find dax region"); + } + +end: + ndctl_unref(ctx); + LOG(4, "device size %zu", *size); + + return ret; +} diff --git a/src/pmdk/src/libpmem2/pmem2_utils_none.c b/src/pmdk/src/libpmem2/pmem2_utils_none.c new file mode 100644 index 000000000..d24a9d45e --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2_utils_none.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +#include + +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" +#include "source.h" + +/* + * pmem2_device_dax_alignment -- checks the alignment of a given + * dax device from given source + */ +int +pmem2_device_dax_alignment(const struct pmem2_source *src, size_t *alignment) +{ + ERR("Cannot read Device Dax alignment - ndctl is not available"); + + return PMEM2_E_NOSUPP; +} + +/* + * pmem2_device_dax_size -- checks the size of a given dax device from + * given source + */ +int +pmem2_device_dax_size(const struct pmem2_source *src, size_t *size) +{ + ERR("Cannot read Device Dax size - ndctl is not available"); + + return PMEM2_E_NOSUPP; +} diff --git a/src/pmdk/src/libpmem2/pmem2_utils_other.c b/src/pmdk/src/libpmem2/pmem2_utils_other.c new file mode 100644 index 000000000..3df5e6f07 --- /dev/null +++ b/src/pmdk/src/libpmem2/pmem2_utils_other.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +#include +#include + +#include "libpmem2.h" +#include "out.h" +#include "pmem2_utils.h" + +#ifdef _WIN32 +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif + +int +pmem2_get_type_from_stat(const os_stat_t *st, enum pmem2_file_type *type) +{ + if (S_ISREG(st->st_mode)) { + *type = PMEM2_FTYPE_REG; + return 0; + } + + if (S_ISDIR(st->st_mode)) { + *type = PMEM2_FTYPE_DIR; + return 0; + } + + ERR("file type 0%o not supported", st->st_mode & S_IFMT); + return PMEM2_E_INVALID_FILE_TYPE; +} + +/* + * pmem2_device_dax_size -- checks the size of a given + * dax device from given source structure + */ +int +pmem2_device_dax_size(const struct pmem2_source *src, size_t *size) +{ + const char *err = + "BUG: pmem2_device_dax_size should never be called on this OS"; + ERR("%s", err); + ASSERTinfo(0, err); + return PMEM2_E_NOSUPP; +} + +/* + * pmem2_device_dax_alignment -- checks the alignment of a given + * dax device from given source + */ +int +pmem2_device_dax_alignment(const struct pmem2_source *src, size_t *alignment) +{ + const char *err = + "BUG: pmem2_device_dax_alignment should never be called on this OS"; + ERR("%s", err); + ASSERTinfo(0, err); + return PMEM2_E_NOSUPP; +} diff --git a/src/pmdk/src/libpmem2/ppc64/.cstyleignore b/src/pmdk/src/libpmem2/ppc64/.cstyleignore new file mode 100644 index 000000000..27bb1279e --- /dev/null +++ b/src/pmdk/src/libpmem2/ppc64/.cstyleignore @@ -0,0 +1 @@ +init.c diff --git a/src/pmdk/src/libpmem2/ppc64/flags.inc b/src/pmdk/src/libpmem2/ppc64/flags.inc new file mode 100644 index 000000000..35148f57c --- /dev/null +++ b/src/pmdk/src/libpmem2/ppc64/flags.inc @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019, IBM Corporation + +# Add ppc64 directory to the gnu-make search path +vpath %.c $(TOP)/src/libpmem2/ppc64 +vpath %.h $(TOP)/src/libpmem2/ppc64 + +# Include the ppc64 directory in gcc include search path +CFLAGS += -Ippc64 diff --git a/src/pmdk/src/libpmem2/ppc64/init.c b/src/pmdk/src/libpmem2/ppc64/init.c new file mode 100644 index 000000000..9fef99d86 --- /dev/null +++ b/src/pmdk/src/libpmem2/ppc64/init.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019, IBM Corporation */ +/* Copyright 2019-2020, Intel Corporation */ + +#include +#include + +#include "out.h" +#include "pmem2_arch.h" +#include "util.h" + +/* + * Older assemblers versions do not support the latest versions of L, e.g. + * Binutils 2.34. + * Workaround this by using longs. + */ +#define __SYNC(l) ".long (0x7c0004AC | ((" #l ") << 21))" +#define __DCBF(ra, rb, l) ".long (0x7c0000AC | ((" #l ") << 21)" \ + " | ((" #ra ") << 16) | ((" #rb ") << 11))" + +static void +ppc_fence(void) +{ + LOG(15, NULL); + + /* + * Force a memory barrier to flush out all cache lines. + * Uses a heavyweight sync in order to guarantee the memory ordering + * even with a data cache flush. + * According to the POWER ISA 3.1, phwsync (aka. sync (L=4)) is treated + * as a hwsync by processors compatible with previous versions of the + * POWER ISA. + */ + asm volatile(__SYNC(4) : : : "memory"); +} + +static void +ppc_flush(const void *addr, size_t size) +{ + LOG(15, "addr %p size %zu", addr, size); + + uintptr_t uptr = (uintptr_t)addr; + uintptr_t end = uptr + size; + + /* round down the address */ + uptr &= ~(CACHELINE_SIZE - 1); + while (uptr < end) { + /* + * Flush the data cache block. + * According to the POWER ISA 3.1, dcbstps (aka. dcbf (L=6)) + * behaves as dcbf (L=0) on previous processors. + */ + asm volatile(__DCBF(0, %0, 6) : :"r"(uptr) : "memory"); + + uptr += CACHELINE_SIZE; + } +} + +void +pmem2_arch_init(struct pmem2_arch_info *info) +{ + LOG(3, "libpmem*: PPC64 support"); + + info->fence = ppc_fence; + info->flush = ppc_flush; +} diff --git a/src/pmdk/src/libpmem2/ppc64/sources.inc b/src/pmdk/src/libpmem2/ppc64/sources.inc new file mode 100644 index 000000000..601ae0318 --- /dev/null +++ b/src/pmdk/src/libpmem2/ppc64/sources.inc @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019, IBM Corporation + +LIBPMEM2_ARCH_SOURCE += init.c diff --git a/src/pmdk/src/libpmem2/ravl_interval.c b/src/pmdk/src/libpmem2/ravl_interval.c new file mode 100644 index 000000000..fe4debe7c --- /dev/null +++ b/src/pmdk/src/libpmem2/ravl_interval.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * ravl_interval.c -- ravl_interval implementation + */ + +#include "alloc.h" +#include "map.h" +#include "ravl_interval.h" +#include "pmem2_utils.h" +#include "sys_util.h" +#include "os_thread.h" +#include "ravl.h" + +/* + * ravl_interval - structure representing two points + * on the number line + */ +struct ravl_interval { + struct ravl *tree; + ravl_interval_min *get_min; + ravl_interval_max *get_max; +}; + +/* + * ravl_interval_node - structure holding min, max functions and address + */ +struct ravl_interval_node { + void *addr; + ravl_interval_min *get_min; + ravl_interval_max *get_max; +}; + +/* + * ravl_interval_compare -- compare intervals by its boundaries, + * no overlapping allowed + */ +static int +ravl_interval_compare(const void *lhs, const void *rhs) +{ + const struct ravl_interval_node *left = lhs; + const struct ravl_interval_node *right = rhs; + + if (left->get_max(left->addr) <= right->get_min(right->addr)) + return -1; + if (left->get_min(left->addr) >= right->get_max(right->addr)) + return 1; + return 0; +} + +/* + * ravl_interval_delete - finalize the ravl interval module + */ +void +ravl_interval_delete(struct ravl_interval *ri) +{ + ravl_delete(ri->tree); + ri->tree = NULL; + Free(ri); +} + +/* + * ravl_interval_new -- initialize the ravl interval module + */ +struct ravl_interval * +ravl_interval_new(ravl_interval_min *get_min, ravl_interval_max *get_max) +{ + int ret; + struct ravl_interval *interval = pmem2_malloc(sizeof(*interval), &ret); + if (ret) + goto ret_null; + + interval->tree = ravl_new_sized(ravl_interval_compare, + sizeof(struct ravl_interval_node)); + if (!(interval->tree)) + goto free_alloc; + + interval->get_min = get_min; + interval->get_max = get_max; + + return interval; + +free_alloc: + Free(interval); +ret_null: + return NULL; +} + +/* + * ravl_interval_insert -- insert interval entry into the tree + */ +int +ravl_interval_insert(struct ravl_interval *ri, void *addr) +{ + struct ravl_interval_node rin; + rin.addr = addr; + rin.get_min = ri->get_min; + rin.get_max = ri->get_max; + + if (ravl_emplace_copy(ri->tree, &rin)) + return PMEM2_E_ERRNO; + + return 0; +} + +/* + * ravl_interval_remove -- remove interval entry from the tree + */ +int +ravl_interval_remove(struct ravl_interval *ri, struct ravl_interval_node *rin) +{ + struct ravl_node *node = ravl_find(ri->tree, rin, + RAVL_PREDICATE_EQUAL); + if (!node) + return PMEM2_E_MAPPING_NOT_FOUND; + + ravl_remove(ri->tree, node); + + return 0; +} + +/* + * ravl_interval_find_prior_or_eq -- find overlapping interval starting prior to + * the current one or at the same place + */ +static struct ravl_interval_node * +ravl_interval_find_prior_or_eq(struct ravl *tree, + struct ravl_interval_node *rin) +{ + struct ravl_node *node; + struct ravl_interval_node *cur; + + node = ravl_find(tree, rin, RAVL_PREDICATE_LESS_EQUAL); + if (!node) + return NULL; + + cur = ravl_data(node); + /* + * If the end of the found interval is below the searched boundary, then + * this is not our interval. + */ + if (cur->get_max(cur->addr) <= rin->get_min(rin->addr)) + return NULL; + + return cur; +} + +/* + * ravl_interval_find_later -- find overlapping interval starting later than + * the current one + */ +static struct ravl_interval_node * +ravl_interval_find_later(struct ravl *tree, struct ravl_interval_node *rin) +{ + struct ravl_node *node; + struct ravl_interval_node *cur; + + node = ravl_find(tree, rin, RAVL_PREDICATE_GREATER); + if (!node) + return NULL; + + cur = ravl_data(node); + + /* + * If the beginning of the found interval is above the end of + * the searched range, then this is not our interval. + */ + if (cur->get_min(cur->addr) >= rin->get_max(rin->addr)) + return NULL; + + return cur; +} + +/* + * ravl_interval_find_equal -- find the interval with exact (min, max) range + */ +struct ravl_interval_node * +ravl_interval_find_equal(struct ravl_interval *ri, void *addr) +{ + struct ravl_interval_node range; + range.addr = addr; + range.get_min = ri->get_min; + range.get_max = ri->get_max; + + struct ravl_node *node; + node = ravl_find(ri->tree, &range, RAVL_PREDICATE_EQUAL); + if (!node) + return NULL; + + return ravl_data(node); +} + +/* + * ravl_interval_find -- find the earliest interval within (min, max) range + */ +struct ravl_interval_node * +ravl_interval_find(struct ravl_interval *ri, void *addr) +{ + struct ravl_interval_node range; + range.addr = addr; + range.get_min = ri->get_min; + range.get_max = ri->get_max; + + struct ravl_interval_node *cur; + cur = ravl_interval_find_prior_or_eq(ri->tree, &range); + if (!cur) + cur = ravl_interval_find_later(ri->tree, &range); + + return cur; +} + +/* + * ravl_interval_find_closest_prior -- find the closest interval + * neighbor prior to the current one + */ +struct ravl_interval_node * +ravl_interval_find_closest_prior(struct ravl_interval *ri, void *addr) +{ + struct ravl_interval_node range; + range.addr = addr; + range.get_min = ri->get_min; + range.get_max = ri->get_max; + + struct ravl_node *node; + node = ravl_find(ri->tree, &range, RAVL_PREDICATE_LESS); + if (!node) + return NULL; + + return ravl_data(node); +} + +/* + * ravl_interval_find_closest_later -- find the closest interval neighbor + * that occurs after the current one + */ +struct ravl_interval_node * +ravl_interval_find_closest_later(struct ravl_interval *ri, void *addr) +{ + struct ravl_interval_node range; + range.addr = addr; + range.get_min = ri->get_min; + range.get_max = ri->get_max; + + struct ravl_node *node; + node = ravl_find(ri->tree, &range, RAVL_PREDICATE_GREATER); + if (!node) + return NULL; + + return ravl_data(node); +} + +/* + * ravl_interval_data -- returns the data contained within an interval node + */ +void * +ravl_interval_data(struct ravl_interval_node *rin) +{ + return (void *)rin->addr; +} diff --git a/src/pmdk/src/libpmem2/ravl_interval.h b/src/pmdk/src/libpmem2/ravl_interval.h new file mode 100644 index 000000000..5dd8b592a --- /dev/null +++ b/src/pmdk/src/libpmem2/ravl_interval.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, Intel Corporation */ + +/* + * ravl_interval.h -- internal definitions for ravl_interval + */ + +#ifndef RAVL_INTERVAL_H +#define RAVL_INTERVAL_H + +#include "libpmem2.h" +#include "os_thread.h" +#include "ravl.h" + +struct ravl_interval; +struct ravl_interval_node; + +typedef size_t ravl_interval_min(void *addr); +typedef size_t ravl_interval_max(void *addr); + +struct ravl_interval *ravl_interval_new(ravl_interval_min *min, + ravl_interval_min *max); +void ravl_interval_delete(struct ravl_interval *ri); +int ravl_interval_insert(struct ravl_interval *ri, void *addr); +int ravl_interval_remove(struct ravl_interval *ri, + struct ravl_interval_node *rin); +struct ravl_interval_node *ravl_interval_find_equal(struct ravl_interval *ri, + void *addr); +struct ravl_interval_node *ravl_interval_find(struct ravl_interval *ri, + void *addr); +struct ravl_interval_node *ravl_interval_find_closest_prior( + struct ravl_interval *ri, void *addr); +struct ravl_interval_node *ravl_interval_find_closest_later( + struct ravl_interval *ri, void *addr); +void *ravl_interval_data(struct ravl_interval_node *rin); +#endif diff --git a/src/pmdk/src/libpmem2/region_namespace.h b/src/pmdk/src/libpmem2/region_namespace.h new file mode 100644 index 000000000..2957ea68b --- /dev/null +++ b/src/pmdk/src/libpmem2/region_namespace.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, Intel Corporation */ + +/* + * region_namespace.h -- internal definitions for libpmem2 + * common region related functions + */ + +#ifndef PMDK_REGION_NAMESPACE_H +#define PMDK_REGION_NAMESPACE_H 1 + +#include "os.h" +#include "pmem2_utils.h" +#include "source.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int pmem2_get_region_id(const struct pmem2_source *src, unsigned *region_id); + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_REGION_NAMESPACE_H */ diff --git a/src/pmdk/src/libpmem2/region_namespace_ndctl.c b/src/pmdk/src/libpmem2/region_namespace_ndctl.c new file mode 100644 index 000000000..1383796a9 --- /dev/null +++ b/src/pmdk/src/libpmem2/region_namespace_ndctl.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * region_namespace_ndctl.c -- common ndctl functions + */ + +#include +#include +#include +#include + +#include "libpmem2.h" +#include "pmem2_utils.h" + +#include "region_namespace_ndctl.h" +#include "region_namespace.h" +#include "out.h" + +/* + * ndctl_match_devdax -- (internal) returns 0 if the devdax matches + * with the given file, 1 if it doesn't match, + * and a negative value in case of an error. + */ +static int +ndctl_match_devdax(dev_t st_rdev, const char *devname) +{ + LOG(3, "st_rdev %lu devname %s", st_rdev, devname); + + if (*devname == '\0') + return 1; + + char path[PATH_MAX]; + os_stat_t stat; + + if (util_snprintf(path, PATH_MAX, "/dev/%s", devname) < 0) { + ERR("!snprintf"); + return PMEM2_E_ERRNO; + } + + if (os_stat(path, &stat)) { + ERR("!stat %s", path); + return PMEM2_E_ERRNO; + } + + if (st_rdev != stat.st_rdev) { + LOG(10, "skipping not matching device: %s", path); + return 1; + } + + LOG(4, "found matching device: %s", path); + + return 0; +} + +#define BUFF_LENGTH 64 + +/* + * ndctl_match_fsdax -- (internal) returns 0 if the device matches + * with the given file, 1 if it doesn't match, + * and a negative value in case of an error. + */ +static int +ndctl_match_fsdax(dev_t st_dev, const char *devname) +{ + LOG(3, "st_dev %lu devname %s", st_dev, devname); + + if (*devname == '\0') + return 1; + + char path[PATH_MAX]; + char dev_id[BUFF_LENGTH]; + + if (util_snprintf(path, PATH_MAX, "/sys/block/%s/dev", devname) < 0) { + ERR("!snprintf"); + return PMEM2_E_ERRNO; + } + + if (util_snprintf(dev_id, BUFF_LENGTH, "%d:%d", + major(st_dev), minor(st_dev)) < 0) { + ERR("!snprintf"); + return PMEM2_E_ERRNO; + } + + int fd = os_open(path, O_RDONLY); + if (fd < 0) { + ERR("!open \"%s\"", path); + return PMEM2_E_ERRNO; + } + + char buff[BUFF_LENGTH]; + ssize_t nread = read(fd, buff, BUFF_LENGTH); + if (nread < 0) { + ERR("!read"); + int oerrno = errno; /* save the errno */ + os_close(fd); + errno = oerrno; + return PMEM2_E_ERRNO; + } + + os_close(fd); + + if (nread == 0) { + ERR("%s is empty", path); + return PMEM2_E_INVALID_DEV_FORMAT; + } + + if (buff[nread - 1] != '\n') { + ERR("%s doesn't end with new line", path); + return PMEM2_E_INVALID_DEV_FORMAT; + } + + buff[nread - 1] = '\0'; + + if (strcmp(buff, dev_id) != 0) { + LOG(10, "skipping not matching device: %s", path); + return 1; + } + + LOG(4, "found matching device: %s", path); + + return 0; +} + +/* + * pmem2_region_namespace -- returns the region + * (and optionally the namespace) + * where the given file is located + */ +int +pmem2_region_namespace(struct ndctl_ctx *ctx, + const struct pmem2_source *src, + struct ndctl_region **pregion, + struct ndctl_namespace **pndns) +{ + LOG(3, "ctx %p src %p pregion %p pnamespace %p", + ctx, src, pregion, pndns); + + struct ndctl_bus *bus; + struct ndctl_region *region; + struct ndctl_namespace *ndns; + + if (pregion) + *pregion = NULL; + + if (pndns) + *pndns = NULL; + + if (src->value.ftype == PMEM2_FTYPE_DIR) { + ERR("cannot check region or namespace of a directory"); + return PMEM2_E_INVALID_FILE_TYPE; + } + + FOREACH_BUS_REGION_NAMESPACE(ctx, bus, region, ndns) { + struct ndctl_btt *btt; + struct ndctl_dax *dax = NULL; + struct ndctl_pfn *pfn; + const char *devname; + + if ((dax = ndctl_namespace_get_dax(ndns))) { + if (src->value.ftype == PMEM2_FTYPE_REG) + continue; + ASSERTeq(src->value.ftype, PMEM2_FTYPE_DEVDAX); + + struct daxctl_region *dax_region; + dax_region = ndctl_dax_get_daxctl_region(dax); + if (!dax_region) { + ERR("!cannot find dax region"); + return PMEM2_E_DAX_REGION_NOT_FOUND; + } + struct daxctl_dev *dev; + daxctl_dev_foreach(dax_region, dev) { + devname = daxctl_dev_get_devname(dev); + int ret = ndctl_match_devdax(src->value.st_rdev, + devname); + if (ret < 0) + return ret; + + if (ret == 0) { + if (pregion) + *pregion = region; + if (pndns) + *pndns = ndns; + + return 0; + } + } + + } else { + if (src->value.ftype == PMEM2_FTYPE_DEVDAX) + continue; + ASSERTeq(src->value.ftype, PMEM2_FTYPE_REG); + + if ((btt = ndctl_namespace_get_btt(ndns))) { + devname = ndctl_btt_get_block_device(btt); + } else if ((pfn = ndctl_namespace_get_pfn(ndns))) { + devname = ndctl_pfn_get_block_device(pfn); + } else { + devname = + ndctl_namespace_get_block_device(ndns); + } + + int ret = ndctl_match_fsdax(src->value.st_dev, devname); + if (ret < 0) + return ret; + + if (ret == 0) { + if (pregion) + *pregion = region; + if (pndns) + *pndns = ndns; + + return 0; + } + } + } + + LOG(10, "did not found any matching device"); + + return 0; +} + +/* + * pmem2_region_get_id -- returns the region id + */ +int +pmem2_get_region_id(const struct pmem2_source *src, unsigned *region_id) +{ + LOG(3, "src %p region_id %p", src, region_id); + + struct ndctl_region *region; + struct ndctl_namespace *ndns; + struct ndctl_ctx *ctx; + + errno = ndctl_new(&ctx) * (-1); + if (errno) { + ERR("!ndctl_new"); + return PMEM2_E_ERRNO; + } + + int rv = pmem2_region_namespace(ctx, src, ®ion, &ndns); + if (rv) { + LOG(1, "getting region and namespace failed"); + goto end; + } + + if (!region) { + ERR("unknown region"); + rv = PMEM2_E_DAX_REGION_NOT_FOUND; + goto end; + } + + *region_id = ndctl_region_get_id(region); + +end: + ndctl_unref(ctx); + return rv; +} diff --git a/src/pmdk/src/libpmem2/region_namespace_ndctl.h b/src/pmdk/src/libpmem2/region_namespace_ndctl.h new file mode 100644 index 000000000..8c89a16d9 --- /dev/null +++ b/src/pmdk/src/libpmem2/region_namespace_ndctl.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * region_namespace_ndctl.h -- internal definitions for libpmem2 + * common ndctl functions + */ + +#ifndef PMDK_REGION_NAMESPACE_NDCTL_H +#define PMDK_REGION_NAMESPACE_NDCTL_H 1 + +#include "os.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FOREACH_BUS_REGION_NAMESPACE(ctx, bus, region, ndns) \ + ndctl_bus_foreach(ctx, bus) \ + ndctl_region_foreach(bus, region) \ + ndctl_namespace_foreach(region, ndns) + +int pmem2_region_namespace(struct ndctl_ctx *ctx, + const struct pmem2_source *src, + struct ndctl_region **pregion, + struct ndctl_namespace **pndns); + +#ifdef __cplusplus +} +#endif + +#endif /* PMDK_REGION_NAMESPACE_NDCTL_H */ diff --git a/src/pmdk/src/libpmem2/region_namespace_none.c b/src/pmdk/src/libpmem2/region_namespace_none.c new file mode 100644 index 000000000..9fd934a64 --- /dev/null +++ b/src/pmdk/src/libpmem2/region_namespace_none.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +#include "region_namespace.h" +#include "out.h" + +/* + * pmem2_get_region_id -- define behavior without ndctl + */ +int +pmem2_get_region_id(const struct pmem2_source *src, unsigned *region_id) +{ + LOG(3, "Cannot read region id - ndctl is not available"); + + return 0; +} diff --git a/src/pmdk/src/libpmem2/source.c b/src/pmdk/src/libpmem2/source.c new file mode 100644 index 000000000..380d21a8c --- /dev/null +++ b/src/pmdk/src/libpmem2/source.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +#include "source.h" +#include "alloc.h" +#include "libpmem2.h" +#include "out.h" +#include "pmem2.h" +#include "pmem2_utils.h" + +int +pmem2_source_from_anon(struct pmem2_source **src, size_t size) +{ + PMEM2_ERR_CLR(); + + int ret; + struct pmem2_source *srcp = pmem2_malloc(sizeof(**src), &ret); + if (ret) + return ret; + + srcp->type = PMEM2_SOURCE_ANON; + srcp->value.size = size; + + *src = srcp; + + return 0; +} + +int +pmem2_source_delete(struct pmem2_source **src) +{ + /* we do not need to clear err because this function cannot fail */ + + Free(*src); + *src = NULL; + return 0; +} diff --git a/src/pmdk/src/libpmem2/source.h b/src/pmdk/src/libpmem2/source.h new file mode 100644 index 000000000..38ae32110 --- /dev/null +++ b/src/pmdk/src/libpmem2/source.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, Intel Corporation */ + +#ifndef PMEM2_SOURCE_H +#define PMEM2_SOURCE_H + +#include "os.h" + +#define INVALID_FD (-1) + +enum pmem2_file_type { + PMEM2_FTYPE_REG = 1, + PMEM2_FTYPE_DEVDAX = 2, + PMEM2_FTYPE_DIR = 3, +}; + +enum pmem2_source_type { + PMEM2_SOURCE_UNSPECIFIED, + PMEM2_SOURCE_ANON, + PMEM2_SOURCE_FD, + PMEM2_SOURCE_HANDLE, + + MAX_PMEM2_SOURCE_TYPE +}; + +struct pmem2_source { + /* a source file descriptor / handle for the designed mapping */ + enum pmem2_source_type type; + struct { + enum pmem2_file_type ftype; + union { + /* PMEM2_SOURCE_ANON */ + size_t size; +#ifdef _WIN32 + /* PMEM2_SOURCE_HANDLE */ + HANDLE handle; +#else + /* PMEM2_SOURCE_FD */ + struct { + int fd; + dev_t st_rdev; + dev_t st_dev; + }; +#endif + }; + } value; +}; + +#endif /* PMEM2_SOURCE_H */ diff --git a/src/pmdk/src/libpmem2/source_posix.c b/src/pmdk/src/libpmem2/source_posix.c new file mode 100644 index 000000000..92f618609 --- /dev/null +++ b/src/pmdk/src/libpmem2/source_posix.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +#include +#include +#include "os.h" +#include "source.h" +#include "alloc.h" +#include "libpmem2.h" +#include "out.h" +#include "pmem2.h" +#include "pmem2_utils.h" +#include "util.h" + +/* + * pmem2_source_from_fd -- create a new data source instance + */ +int +pmem2_source_from_fd(struct pmem2_source **src, int fd) +{ + PMEM2_ERR_CLR(); + + *src = NULL; + + if (fd < 0) + return PMEM2_E_INVALID_FILE_HANDLE; + + int flags = fcntl(fd, F_GETFL); + + if (flags == -1) { + ERR("!fcntl"); + if (errno == EBADF) + return PMEM2_E_INVALID_FILE_HANDLE; + return PMEM2_E_ERRNO; + } + + if ((flags & O_ACCMODE) == O_WRONLY) { + ERR("fd must be open with O_RDONLY or O_RDWR"); + return PMEM2_E_INVALID_FILE_HANDLE; + } + + /* + * XXX Files with FS_APPEND_FL attribute should also generate an error. + * If it is possible to filter them out pmem2_map_new would not generate + * -EACCESS trying to map them. Please update pmem2_map_new.3 when it + * will be fixed. + * For details please see the ioctl_iflags(2) manual page. + */ + + os_stat_t st; + + if (os_fstat(fd, &st) < 0) { + ERR("!fstat"); + if (errno == EBADF) + return PMEM2_E_INVALID_FILE_HANDLE; + return PMEM2_E_ERRNO; + } + + enum pmem2_file_type ftype; + int ret = pmem2_get_type_from_stat(&st, &ftype); + if (ret != 0) + return ret; + + if (ftype == PMEM2_FTYPE_DIR) { + ERR("cannot set fd to directory in pmem2_source_from_fd"); + return PMEM2_E_INVALID_FILE_TYPE; + } + + struct pmem2_source *srcp = pmem2_malloc(sizeof(**src), &ret); + if (ret) + return ret; + + ASSERTne(srcp, NULL); + + srcp->type = PMEM2_SOURCE_FD; + srcp->value.ftype = ftype; + srcp->value.fd = fd; + srcp->value.st_rdev = st.st_rdev; + srcp->value.st_dev = st.st_dev; + *src = srcp; + + return 0; +} + +/* + * pmem2_source_size -- get a size of the file descriptor stored in the provided + * source + */ +int +pmem2_source_size(const struct pmem2_source *src, size_t *size) +{ + LOG(3, "type %d", src->type); + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_ANON) { + *size = src->value.size; + return 0; + } + + ASSERT(src->type == PMEM2_SOURCE_FD); + + os_stat_t st; + + if (os_fstat(src->value.fd, &st) < 0) { + ERR("!fstat"); + if (errno == EBADF) + return PMEM2_E_INVALID_FILE_HANDLE; + return PMEM2_E_ERRNO; + } + + switch (src->value.ftype) { + case PMEM2_FTYPE_DEVDAX: { + int ret = pmem2_device_dax_size(src, size); + if (ret) + return ret; + break; + } + case PMEM2_FTYPE_REG: + if (st.st_size < 0) { + ERR( + "kernel says size of regular file is negative (%ld)", + st.st_size); + return PMEM2_E_INVALID_FILE_HANDLE; + } + *size = (size_t)st.st_size; + break; + default: + FATAL( + "BUG: unhandled file type in pmem2_source_size"); + } + + LOG(4, "file length %zu", *size); + return 0; +} + +/* + * pmem2_source_alignment -- get alignment from the file descriptor stored in + * the provided source + */ +int +pmem2_source_alignment(const struct pmem2_source *src, size_t *alignment) +{ + LOG(3, "type %d", src->type); + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_ANON) { + *alignment = Pagesize; + return 0; + } + + ASSERT(src->type == PMEM2_SOURCE_FD); + + switch (src->value.ftype) { + case PMEM2_FTYPE_DEVDAX: { + int ret = pmem2_device_dax_alignment(src, alignment); + if (ret) + return ret; + break; + } + case PMEM2_FTYPE_REG: + *alignment = Pagesize; + break; + default: + FATAL( + "BUG: unhandled file type in pmem2_source_alignment"); + } + + if (!util_is_pow2(*alignment)) { + ERR("alignment (%zu) has to be a power of two", *alignment); + return PMEM2_E_INVALID_ALIGNMENT_VALUE; + } + + LOG(4, "alignment %zu", *alignment); + + return 0; +} + +/* + * pmem2_source_get_fd -- get file descriptor from provided source + */ +int +pmem2_source_get_fd(const struct pmem2_source *src, int *fd) +{ + LOG(3, "src type %d", src->type); + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_FD) { + *fd = src->value.fd; + } else { + ERR( + "File descriptor is not set, source type does not support fd"); + return PMEM2_E_FILE_DESCRIPTOR_NOT_SET; + } + + return 0; +} diff --git a/src/pmdk/src/libpmem2/source_windows.c b/src/pmdk/src/libpmem2/source_windows.c new file mode 100644 index 000000000..fb813273e --- /dev/null +++ b/src/pmdk/src/libpmem2/source_windows.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2019-2020, Intel Corporation */ + +/* + * source_windows.c -- windows specific pmem2_source implementation + */ + +#include +#include "config.h" +#include "libpmem2.h" +#include "config.h" +#include "out.h" +#include "pmem2_utils.h" +#include "source.h" +#include "util.h" + +/* + * pmem2_source_from_fd -- create a new data source instance + */ +int +pmem2_source_from_fd(struct pmem2_source **src, int fd) +{ + PMEM2_ERR_CLR(); + + *src = NULL; + + if (fd < 0) { + ERR("Invalid file descriptor value %d", fd); + return PMEM2_E_INVALID_FILE_HANDLE; + } + + HANDLE handle = (HANDLE)_get_osfhandle(fd); + + if (handle == INVALID_HANDLE_VALUE) { + /* + * _get_osfhandle aborts in an error case, so technically + * this is dead code. But according to MSDN it is + * setting an errno on failure, so we can return it in case of + * "windows magic" happen and this function "accidentally" + * will not abort. + */ + ERR("!_get_osfhandle"); + if (errno == EBADF) + return PMEM2_E_INVALID_FILE_HANDLE; + return PMEM2_E_ERRNO; + } + + return pmem2_source_from_handle(src, handle); +} + +/* + * pmem2_win_stat -- retrieve information about handle + */ +static int +pmem2_win_stat(HANDLE handle, BY_HANDLE_FILE_INFORMATION *info) +{ + if (!GetFileInformationByHandle(handle, info)) { + ERR("!!GetFileInformationByHandle"); + if (GetLastError() == ERROR_INVALID_HANDLE) + return PMEM2_E_INVALID_FILE_HANDLE; + else + return pmem2_lasterror_to_err(); + } + + if (info->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + ERR( + "using directory doesn't make any sense in context of pmem2"); + return PMEM2_E_INVALID_FILE_TYPE; + } + + return 0; +} + +/* + * pmem2_source_from_fd -- create a new data source instance + */ +int +pmem2_source_from_handle(struct pmem2_source **src, HANDLE handle) +{ + PMEM2_ERR_CLR(); + + *src = NULL; + int ret; + + if (handle == INVALID_HANDLE_VALUE) { + ERR("Invalid file handle has been passed"); + return PMEM2_E_INVALID_FILE_HANDLE; + } + + BY_HANDLE_FILE_INFORMATION file_info; + ret = pmem2_win_stat(handle, &file_info); + if (ret) + return ret; + + /* XXX: winapi doesn't provide option to get open flags from HANDLE */ + + struct pmem2_source *srcp = pmem2_malloc(sizeof(**src), &ret); + + if (ret) + return ret; + + ASSERTne(srcp, NULL); + + srcp->type = PMEM2_SOURCE_HANDLE; + srcp->value.handle = handle; + *src = srcp; + + return 0; +} + +/* + * pmem2_source_size -- get a size of the file handle stored in the provided + * source + */ +int +pmem2_source_size(const struct pmem2_source *src, size_t *size) +{ + LOG(3, "type %d", src->type); + PMEM2_ERR_CLR(); + + int ret; + + if (src->type == PMEM2_SOURCE_ANON) { + *size = src->value.size; + return 0; + } + ASSERTeq(src->type, PMEM2_SOURCE_HANDLE); + + BY_HANDLE_FILE_INFORMATION info; + ret = pmem2_win_stat(src->value.handle, &info); + if (ret) + return ret; + + *size = ((size_t)info.nFileSizeHigh << 32) | info.nFileSizeLow; + + LOG(4, "file length %zu", *size); + + return 0; +} + +/* + * pmem2_source_alignment -- get alignment from the system info + */ +int +pmem2_source_alignment(const struct pmem2_source *src, size_t *alignment) +{ + LOG(3, "type %d", src->type); + PMEM2_ERR_CLR(); + + SYSTEM_INFO info; + GetSystemInfo(&info); + + *alignment = (size_t)info.dwAllocationGranularity; + + if (!util_is_pow2(*alignment)) { + ERR("alignment (%zu) has to be a power of two", *alignment); + return PMEM2_E_INVALID_ALIGNMENT_VALUE; + } + + LOG(4, "alignment %zu", *alignment); + + return 0; +} + +/* + * pmem2_source_get_handle -- get file handler from provided source + */ +int +pmem2_source_get_handle(const struct pmem2_source *src, HANDLE *h) +{ + LOG(3, "src type %d", src->type); + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_HANDLE) { + *h = src->value.handle; + } else { + ERR( + "File handle is not set, source type does not support file handles"); + return PMEM2_E_FILE_HANDLE_NOT_SET; + } + + return 0; +} diff --git a/src/pmdk/src/libpmem2/usc_ndctl.c b/src/pmdk/src/libpmem2/usc_ndctl.c new file mode 100644 index 000000000..be63e879b --- /dev/null +++ b/src/pmdk/src/libpmem2/usc_ndctl.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * usc_ndctl.c -- pmem2 usc function for platforms using ndctl + */ +#include +#include +#include +#include +#include + +#include "config.h" +#include "file.h" +#include "libpmem2.h" +#include "os.h" +#include "out.h" +#include "pmem2_utils.h" +#include "source.h" +#include "region_namespace_ndctl.h" + +int +pmem2_source_device_usc(const struct pmem2_source *src, uint64_t *usc) +{ + LOG(3, "type %d, uid %p", src->type, usc); + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_ANON) { + ERR("Anonymous source does not support unsafe shutdown count"); + return PMEM2_E_NOSUPP; + } + + ASSERTeq(src->type, PMEM2_SOURCE_FD); + + struct ndctl_ctx *ctx; + int ret = PMEM2_E_NOSUPP; + *usc = 0; + + errno = ndctl_new(&ctx) * (-1); + if (errno) { + ERR("!ndctl_new"); + return PMEM2_E_ERRNO; + } + + struct ndctl_region *region = NULL; + ret = pmem2_region_namespace(ctx, src, ®ion, NULL); + + if (ret < 0) + goto err; + + ret = PMEM2_E_NOSUPP; + + if (region == NULL) { + ERR( + "Unsafe shutdown count is not supported for this source"); + goto err; + } + + struct ndctl_dimm *dimm; + + ndctl_dimm_foreach_in_region(region, dimm) { + long long dimm_usc = ndctl_dimm_get_dirty_shutdown(dimm); + if (dimm_usc < 0) { + ret = PMEM2_E_NOSUPP; + ERR( + "Unsafe shutdown count is not supported for this source"); + goto err; + } + *usc += (unsigned long long)dimm_usc; + } + + ret = 0; + +err: + ndctl_unref(ctx); + return ret; +} + +int +pmem2_source_device_id(const struct pmem2_source *src, char *id, size_t *len) +{ + PMEM2_ERR_CLR(); + + struct ndctl_ctx *ctx; + struct ndctl_dimm *dimm; + int ret; + struct ndctl_region *region = NULL; + const char *dimm_uid; + + if (src->type == PMEM2_SOURCE_ANON) { + ERR("Anonymous source does not have device id"); + return PMEM2_E_NOSUPP; + } + + ASSERTeq(src->type, PMEM2_SOURCE_FD); + + errno = ndctl_new(&ctx) * (-1); + if (errno) { + ERR("!ndctl_new"); + return PMEM2_E_ERRNO; + } + + size_t len_base = 1; /* '\0' */ + + ret = pmem2_region_namespace(ctx, src, ®ion, NULL); + + if (ret < 0) + goto err; + + if (region == NULL) { + ret = PMEM2_E_NOSUPP; + goto err; + } + + if (id == NULL) { + ndctl_dimm_foreach_in_region(region, dimm) { + dimm_uid = ndctl_dimm_get_unique_id(dimm); + if (dimm_uid == NULL) { + ret = PMEM2_E_NOSUPP; + goto err; + } + len_base += strlen(ndctl_dimm_get_unique_id(dimm)); + } + goto end; + } + + size_t count = 1; + ndctl_dimm_foreach_in_region(region, dimm) { + dimm_uid = ndctl_dimm_get_unique_id(dimm); + if (dimm_uid == NULL) { + ret = PMEM2_E_NOSUPP; + goto err; + } + count += strlen(dimm_uid); + if (count > *len) { + ret = PMEM2_E_BUFFER_TOO_SMALL; + goto err; + } + strncat(id, dimm_uid, *len); + } + +end: + ret = 0; + if (id == NULL) + *len = len_base; +err: + ndctl_unref(ctx); + return ret; +} diff --git a/src/pmdk/src/libpmem2/usc_none.c b/src/pmdk/src/libpmem2/usc_none.c new file mode 100644 index 000000000..9093f2bca --- /dev/null +++ b/src/pmdk/src/libpmem2/usc_none.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * usc_none.c -- pmem2 usc function for non supported platform + */ + +#include "libpmem2.h" +#include "out.h" + +int +pmem2_source_device_id(const struct pmem2_source *src, char *id, size_t *len) +{ + ERR("Cannot read device id - ndctl is not available"); + return PMEM2_E_NOSUPP; +} + +int +pmem2_source_device_usc(const struct pmem2_source *src, uint64_t *usc) +{ + ERR("Cannot read device usc - ndctl is not available"); + return PMEM2_E_NOSUPP; +} diff --git a/src/pmdk/src/libpmem2/usc_windows.c b/src/pmdk/src/libpmem2/usc_windows.c new file mode 100644 index 000000000..1a949bc2e --- /dev/null +++ b/src/pmdk/src/libpmem2/usc_windows.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * usc_windows.c -- pmem2 usc function for windows + */ + +#include "alloc.h" +#include "source.h" +#include "out.h" +#include "libpmem2.h" +#include "pmem2_utils.h" +#define GUID_SIZE sizeof("XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX") +#define VOLUME_PATH_SIZE sizeof("\\\\?\\Volume{}") + (GUID_SIZE - 2 /* \0 */) + +/* + * get_volume_handle -- returns volume handle + */ +static int +get_volume_handle(HANDLE handle, HANDLE *volume_handle) +{ + wchar_t *volume; + wchar_t tmp[10]; + DWORD len = + GetFinalPathNameByHandleW(handle, tmp, 10, VOLUME_NAME_GUID); + + if (len == 0) { + ERR("!!GetFinalPathNameByHandleW"); + return pmem2_lasterror_to_err(); + } + + len *= sizeof(wchar_t); + + int err; + volume = pmem2_malloc(len, &err); + if (volume == NULL) + return err; + + if (!GetFinalPathNameByHandleW(handle, volume, len, + VOLUME_NAME_GUID)) { + Free(volume); + ERR("!!GetFinalPathNameByHandleW"); + return pmem2_lasterror_to_err(); + } + + ASSERTeq(volume[VOLUME_PATH_SIZE], '\\'); + volume[VOLUME_PATH_SIZE] = '\0'; + + *volume_handle = CreateFileW(volume, /* path to the file */ + /* request access to send ioctl to the file */ + FILE_READ_ATTRIBUTES, + /* do not block access to the file */ + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, /* security attributes */ + OPEN_EXISTING, /* open only if it exists */ + FILE_ATTRIBUTE_NORMAL, /* no attributes */ + NULL); /* used only for new files */ + + Free(volume); + if (*volume_handle == INVALID_HANDLE_VALUE) { + ERR("!!CreateFileW"); + return pmem2_lasterror_to_err(); + } + + return 0; +} + +static int +get_device_guid(HANDLE handle, GUID *guid) +{ + HANDLE vHandle; + int ret = get_volume_handle(handle, &vHandle); + if (vHandle == INVALID_HANDLE_VALUE) + return ret; + + STORAGE_DEVICE_NUMBER_EX sdn; + sdn.DeviceNumber = -1; + DWORD dwBytesReturned = 0; + if (!DeviceIoControl(vHandle, + IOCTL_STORAGE_GET_DEVICE_NUMBER_EX, + NULL, 0, + &sdn, sizeof(sdn), + &dwBytesReturned, NULL)) { + /* + * IOCTL_STORAGE_GET_DEVICE_NUMBER_EX is not supported + * on this server + */ + ERR( + "Getting device id (IOCTL_STORAGE_GET_DEVICE_NUMBER_EX) is not supported on this system"); + CloseHandle(vHandle); + return PMEM2_E_NOSUPP; + } + *guid = sdn.DeviceGuid; + + CloseHandle(vHandle); + return 0; +} + +int +pmem2_source_device_idW(const struct pmem2_source *src, wchar_t *id, + size_t *len) +{ + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_ANON) { + ERR("Anonymous source does not have device id"); + return PMEM2_E_NOSUPP; + } + + ASSERTeq(src->type, PMEM2_SOURCE_HANDLE); + + if (id == NULL) { + *len = GUID_SIZE * sizeof(*id); + return 0; + } + + if (*len < GUID_SIZE * sizeof(*id)) { + ERR("id buffer is to small"); + return PMEM2_E_BUFFER_TOO_SMALL; + } + + GUID guid; + int ret = get_device_guid(src->value.handle, &guid); + if (ret) + return ret; + + _snwprintf(id, GUID_SIZE, + L"%08lX-%04hX-%04hX-%02hhX%02hhX-%02hhX%02hhX%02hhX%02hhX%02hhX%02hhX", + guid.Data1, guid.Data2, guid.Data3, guid.Data4[0], + guid.Data4[1], guid.Data4[2], guid.Data4[3], + guid.Data4[4], guid.Data4[5], guid.Data4[6], + guid.Data4[7]); + return 0; +} + +int +pmem2_source_device_idU(const struct pmem2_source *src, char *id, size_t *len) +{ + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_ANON) { + ERR("Anonymous source does not have device id"); + return PMEM2_E_NOSUPP; + } + + ASSERTeq(src->type, PMEM2_SOURCE_HANDLE); + + if (id == NULL) { + *len = GUID_SIZE * sizeof(*id); + return 0; + } + if (*len < GUID_SIZE * sizeof(*id)) { + ERR("id buffer is to small"); + return PMEM2_E_BUFFER_TOO_SMALL; + } + + GUID guid; + int ret = get_device_guid(src->value.handle, &guid); + if (ret) + return ret; + + if (util_snprintf(id, GUID_SIZE, + "%08lX-%04hX-%04hX-%02hhX%02hhX-%02hhX%02hhX%02hhX%02hhX%02hhX%02hhX", + guid.Data1, guid.Data2, guid.Data3, guid.Data4[0], + guid.Data4[1], guid.Data4[2], guid.Data4[3], + guid.Data4[4], guid.Data4[5], guid.Data4[6], + guid.Data4[7]) < 0) { + ERR("!snprintf"); + return PMEM2_E_ERRNO; + } + + return 0; +} + +int +pmem2_source_device_usc(const struct pmem2_source *src, uint64_t *usc) +{ + LOG(3, "cfg %p, usc %p", src, usc); + PMEM2_ERR_CLR(); + + if (src->type == PMEM2_SOURCE_ANON) { + ERR("Anonymous source does not support unsafe shutdown count"); + return PMEM2_E_NOSUPP; + } + + ASSERTeq(src->type, PMEM2_SOURCE_HANDLE); + + *usc = 0; + + HANDLE vHandle; + int err = get_volume_handle(src->value.handle, &vHandle); + if (vHandle == INVALID_HANDLE_VALUE) + return err; + + STORAGE_PROPERTY_QUERY prop; + DWORD dwSize; + prop.PropertyId = StorageDeviceUnsafeShutdownCount; + prop.QueryType = PropertyExistsQuery; + prop.AdditionalParameters[0] = 0; + STORAGE_DEVICE_UNSAFE_SHUTDOWN_COUNT ret; + + BOOL bResult = DeviceIoControl(vHandle, + IOCTL_STORAGE_QUERY_PROPERTY, + &prop, sizeof(prop), + &ret, sizeof(ret), + (LPDWORD)&dwSize, (LPOVERLAPPED)NULL); + + if (!bResult) { + ERR( + "Getting unsafe shutdown count is not supported on this system"); + CloseHandle(vHandle); + return PMEM2_E_NOSUPP; + } + prop.QueryType = PropertyStandardQuery; + bResult = DeviceIoControl(vHandle, + IOCTL_STORAGE_QUERY_PROPERTY, + &prop, sizeof(prop), + &ret, sizeof(ret), + (LPDWORD)&dwSize, (LPOVERLAPPED)NULL); + + CloseHandle(vHandle); + if (!bResult) { + ERR("!!DeviceIoControl"); + return pmem2_lasterror_to_err(); + } + + *usc = ret.UnsafeShutdownCount; + + return 0; +} diff --git a/src/pmdk/src/libpmem2/vm_reservation.c b/src/pmdk/src/libpmem2/vm_reservation.c new file mode 100644 index 000000000..7ea400387 --- /dev/null +++ b/src/pmdk/src/libpmem2/vm_reservation.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * vm_reservation.c -- implementation of virtual memory allocation API + */ + +#include "alloc.h" +#include "map.h" +#include "pmem2_utils.h" +#include "ravl_interval.h" +#include "sys_util.h" +#include "vm_reservation.h" + +#ifdef _WIN32 +#include +#endif + +struct pmem2_vm_reservation { + struct ravl_interval *itree; + void *addr; + size_t size; + os_rwlock_t lock; +}; + +int vm_reservation_reserve_memory(void *addr, size_t size, void **raddr, + size_t *rsize); +int vm_reservation_release_memory(void *addr, size_t size); +struct ravl_interval *vm_reservation_get_interval_tree( + struct pmem2_vm_reservation *rsv); + +/* + * pmem2_vm_reservation_get_address -- get reservation address + */ +void * +pmem2_vm_reservation_get_address(struct pmem2_vm_reservation *rsv) +{ + LOG(3, "reservation %p", rsv); + /* we do not need to clear err because this function cannot fail */ + + return rsv->addr; +} + +/* + * pmem2_vm_reservation_get_size -- get reservation size + */ +size_t +pmem2_vm_reservation_get_size(struct pmem2_vm_reservation *rsv) +{ + LOG(3, "reservation %p", rsv); + /* we do not need to clear err because this function cannot fail */ + + return rsv->size; +} + +/* + * mapping_min - return min boundary for mapping + */ +static size_t +mapping_min(void *addr) +{ + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr; +} + +/* + * mapping_max - return max boundary for mapping + */ +static size_t +mapping_max(void *addr) +{ + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr + map->content_length; +} + +/* + * pmem2_vm_reservation_init - initialize the reservation structure + */ +static int +vm_reservation_init(struct pmem2_vm_reservation *rsv) +{ + util_rwlock_init(&rsv->lock); + + rsv->itree = ravl_interval_new(mapping_min, mapping_max); + if (!rsv->itree) + return -1; + + return 0; +} + +/* + * pmem2_vm_reservation_fini - finalize the reservation structure + */ +static void +vm_reservation_fini(struct pmem2_vm_reservation *rsv) +{ + ravl_interval_delete(rsv->itree); + util_rwlock_destroy(&rsv->lock); +} + +/* + * pmem2_vm_reservation_new -- creates new virtual memory reservation + */ +int +pmem2_vm_reservation_new(struct pmem2_vm_reservation **rsv_ptr, + void *addr, size_t size) +{ + PMEM2_ERR_CLR(); + *rsv_ptr = NULL; + + /* + * base address has to be aligned to the allocation granularity + * on Windows, and to page size otherwise + */ + if (addr && (unsigned long long)addr % Mmap_align) { + ERR("address %p is not a multiple of 0x%llx", addr, + Mmap_align); + return PMEM2_E_ADDRESS_UNALIGNED; + } + + /* the size must always be a multiple of the page size */ + if (size % Pagesize) { + ERR("reservation size %zu is not a multiple of %llu", + size, Pagesize); + return PMEM2_E_LENGTH_UNALIGNED; + } + + int ret; + struct pmem2_vm_reservation *rsv = pmem2_malloc( + sizeof(struct pmem2_vm_reservation), &ret); + if (ret) + return ret; + + /* initialize the ravl interval tree */ + ret = vm_reservation_init(rsv); + if (ret) + goto err_rsv_init; + + void *raddr = NULL; + size_t rsize = 0; + ret = vm_reservation_reserve_memory(addr, size, &raddr, &rsize); + if (ret) + goto err_reserve; + + rsv->addr = raddr; + rsv->size = rsize; + + *rsv_ptr = rsv; + + return 0; + +err_reserve: + vm_reservation_fini(rsv); +err_rsv_init: + Free(rsv); + return ret; +} + +/* + * pmem2_vm_reservation_delete -- deletes reservation bound to + * the pmem2_vm_reservation structure + */ +int +pmem2_vm_reservation_delete(struct pmem2_vm_reservation **rsv_ptr) +{ + PMEM2_ERR_CLR(); + + struct pmem2_vm_reservation *rsv = *rsv_ptr; + + /* check if reservation contains any mapping */ + if (vm_reservation_map_find(rsv, 0, rsv->size)) { + ERR("vm reservation %p isn't empty", rsv); + return PMEM2_E_VM_RESERVATION_NOT_EMPTY; + } + + int ret = vm_reservation_release_memory(rsv->addr, rsv->size); + if (ret) + return ret; + + vm_reservation_fini(rsv); + Free(rsv); + + return 0; +} + +/* + * vm_reservation_map_register_release -- register mapping in the mappings tree + * of reservation structure and release previously acquired lock regardless + * of the success or failure of the function. + */ +int +vm_reservation_map_register_release(struct pmem2_vm_reservation *rsv, + struct pmem2_map *map) +{ + int ret = ravl_interval_insert(rsv->itree, map); + util_rwlock_unlock(&rsv->lock); + + if (ret == -EEXIST) { + ERR( + "mapping at the given region of the reservation already exist"); + return PMEM2_E_MAPPING_EXISTS; + } + + return ret; +} + +/* + * vm_reservation_map_unregister_release -- unregister mapping from the mapping + * tree of reservation structure and release previously acquired lock regardless + * of the success or failure of the function. + */ +int +vm_reservation_map_unregister_release(struct pmem2_vm_reservation *rsv, + struct pmem2_map *map) +{ + int ret = 0; + struct ravl_interval_node *node; + + node = ravl_interval_find_equal(rsv->itree, map); + if (node) { + ret = ravl_interval_remove(rsv->itree, node); + } else { + ERR("Cannot find mapping %p in the reservation %p", + map, rsv); + ret = PMEM2_E_MAPPING_NOT_FOUND; + } + util_rwlock_unlock(&rsv->lock); + + return ret; +} + +/* + * vm_reservation_map_find -- find the earliest mapping overlapping + * with (addr, addr+size) range + */ +struct pmem2_map * +vm_reservation_map_find(struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len) +{ + struct pmem2_map map; + map.addr = (char *)rsv->addr + reserv_offset; + map.content_length = len; + + struct ravl_interval_node *node; + + node = ravl_interval_find(rsv->itree, &map); + + if (!node) + return NULL; + + return (struct pmem2_map *)ravl_interval_data(node); +} + +/* + * vm_reservation_map_find_acquire -- find the earliest mapping overlapping + * with (addr, addr+size) range. This function acquires a lock and keeps it + * until next release operation. + */ +struct pmem2_map * +vm_reservation_map_find_acquire(struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len) +{ + struct pmem2_map map; + map.addr = (char *)rsv->addr + reserv_offset; + map.content_length = len; + + struct ravl_interval_node *node; + + util_rwlock_wrlock(&rsv->lock); + node = ravl_interval_find(rsv->itree, &map); + + if (!node) + return NULL; + + return (struct pmem2_map *)ravl_interval_data(node); +} + +/* + * vm_reservation_release -- releases previously acquired lock + */ +void +vm_reservation_release(struct pmem2_vm_reservation *rsv) +{ + util_rwlock_unlock(&rsv->lock); +} + +/* + * vm_reservation_get_interval_tree -- get interval tree + */ +struct ravl_interval * +vm_reservation_get_interval_tree(struct pmem2_vm_reservation *rsv) +{ + return rsv->itree; +} diff --git a/src/pmdk/src/libpmem2/vm_reservation.h b/src/pmdk/src/libpmem2/vm_reservation.h new file mode 100644 index 000000000..68e4273f3 --- /dev/null +++ b/src/pmdk/src/libpmem2/vm_reservation.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, Intel Corporation */ + +/* + * vm_reservation.h -- internal definitions for virtual memory reservation + */ +#ifndef PMEM2_VM_RESERVATION_H +#define PMEM2_VM_RESERVATION_H + +#include "ravl_interval.h" + +struct pmem2_vm_reservation; + +int vm_reservation_map_register_release(struct pmem2_vm_reservation *rsv, + struct pmem2_map *map); +int vm_reservation_map_unregister_release(struct pmem2_vm_reservation *rsv, + struct pmem2_map *map); +struct pmem2_map *vm_reservation_map_find(struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len); +struct pmem2_map *vm_reservation_map_find_acquire( + struct pmem2_vm_reservation *rsv, size_t reserv_offset, + size_t len); +void vm_reservation_release(struct pmem2_vm_reservation *rsv); + +#endif /* vm_reservation.h */ diff --git a/src/pmdk/src/libpmem2/vm_reservation_posix.c b/src/pmdk/src/libpmem2/vm_reservation_posix.c new file mode 100644 index 000000000..ecbe3a96d --- /dev/null +++ b/src/pmdk/src/libpmem2/vm_reservation_posix.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * vm_reservation_posix.c -- implementation of virtual memory + * reservation API (POSIX) + */ + +#include + +#include "alloc.h" +#include "map.h" +#include "out.h" +#include "pmem2_utils.h" + +int vm_reservation_reserve_memory(void *addr, size_t size, void **raddr, + size_t *rsize); +int vm_reservation_release_memory(void *addr, size_t size); + +/* + * vm_reservation_reserve_memory -- create a blank virual memory mapping + */ +int +vm_reservation_reserve_memory(void *addr, size_t size, void **raddr, + size_t *rsize) +{ + int map_flag = 0; + if (addr) { +/* + * glibc started exposing MAP_FIXED_NOREPLACE flag in version 4.17, + * but even if the flag is not supported, we can imitate its behavior + */ +#ifdef MAP_FIXED_NOREPLACE + map_flag = MAP_FIXED_NOREPLACE; +#endif + } + + /* + * Create a dummy mapping to find an unused region of given size. + * If the flag is supported and requested region is occupied, + * mmap will fail with EEXIST. + */ + char *daddr = mmap(addr, size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | map_flag, -1, 0); + if (daddr == MAP_FAILED) { + if (errno == EEXIST) { + ERR("!mmap MAP_FIXED_NOREPLACE"); + return PMEM2_E_MAPPING_EXISTS; + } + ERR("!mmap MAP_ANONYMOUS"); + return PMEM2_E_ERRNO; + } + + /* + * When requested address is not specified, any returned address + * is acceptable. If kernel does not support flag and given addr + * is occupied, kernel chooses new addr randomly and returns it. + * We do not want that behavior, so we validate it and fail when + * addresses do not match. + */ + if (addr && daddr != addr) { + munmap(daddr, size); + ERR("mapping exists in the given address"); + return PMEM2_E_MAPPING_EXISTS; + } + + *raddr = daddr; + *rsize = roundup(size, Pagesize); + + return 0; +} + +/* + * vm_reservation_release_memory -- releases blank virtual memory mapping + */ +int +vm_reservation_release_memory(void *addr, size_t size) +{ + if (munmap(addr, size)) { + ERR("!munmap"); + return PMEM2_E_ERRNO; + } + + return 0; +} diff --git a/src/pmdk/src/libpmem2/vm_reservation_windows.c b/src/pmdk/src/libpmem2/vm_reservation_windows.c new file mode 100644 index 000000000..6e8c2a776 --- /dev/null +++ b/src/pmdk/src/libpmem2/vm_reservation_windows.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2020, Intel Corporation */ + +/* + * vm_reservation_windows.c -- implementation of virtual memory + * reservation API (Windows) + */ + +#include "alloc.h" +#include "map.h" +#include "os_thread.h" +#include "out.h" +#include "pmem2_utils.h" +#include "sys_util.h" + +int vm_reservation_reserve_memory(void *addr, size_t size, void **raddr, + size_t *rsize); +int vm_reservation_release_memory(void *addr, size_t size); +struct pmem2_map *vm_reservation_map_find_closest_prior( + struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len); +struct pmem2_map *vm_reservation_map_find_closest_later( + struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len); +struct ravl_interval *vm_reservation_get_interval_tree( + struct pmem2_vm_reservation *rsv); + +/* + * vm_reservation_reserve_memory -- create a blank virual memory mapping + */ +int +vm_reservation_reserve_memory(void *addr, size_t size, void **raddr, + size_t *rsize) +{ + void *daddr = VirtualAlloc2(GetCurrentProcess(), + addr, + size, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, + NULL, + 0); + + if (daddr == NULL) { + ERR("!!VirtualAlloc2"); + DWORD ret_windows = GetLastError(); + if (ret_windows == ERROR_INVALID_ADDRESS) + return PMEM2_E_MAPPING_EXISTS; + else + return pmem2_lasterror_to_err(); + } + + *raddr = daddr; + *rsize = size; + + return 0; +} + +/* + * vm_reservation_release_memory -- releases blank virtual memory mapping + */ +int +vm_reservation_release_memory(void *addr, size_t size) +{ + int ret = VirtualFree(addr, + 0, + MEM_RELEASE); + if (!ret) { + ERR("!!VirtualFree"); + return pmem2_lasterror_to_err(); + } + + return 0; +} + +/* + * vm_reservation_map_find_closest_prior -- find closest mapping neighbor + * prior to the provided mapping + */ +struct pmem2_map * +vm_reservation_map_find_closest_prior(struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len) +{ + struct pmem2_map map; + + map.addr = (char *)pmem2_vm_reservation_get_address(rsv) + + reserv_offset; + map.content_length = len; + + struct ravl_interval_node *node; + struct ravl_interval *itree = vm_reservation_get_interval_tree(rsv); + node = ravl_interval_find_closest_prior(itree, &map); + + if (!node) + return NULL; + + return (struct pmem2_map *)ravl_interval_data(node); +} + +/* + * vm_reservation_map_find_closest_later -- find closest mapping neighbor later + * than the mapping provided + */ +struct pmem2_map * +vm_reservation_map_find_closest_later(struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len) +{ + struct pmem2_map map; + map.addr = (char *)pmem2_vm_reservation_get_address(rsv) + + reserv_offset; + map.content_length = len; + + struct ravl_interval_node *node; + struct ravl_interval *itree = vm_reservation_get_interval_tree(rsv); + node = ravl_interval_find_closest_later(itree, &map); + + if (!node) + return NULL; + + return (struct pmem2_map *)ravl_interval_data(node); +} diff --git a/src/pmdk/src/libpmem2/x86_64/avx.h b/src/pmdk/src/libpmem2/x86_64/avx.h new file mode 100644 index 000000000..8726d3ddc --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/avx.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +#ifndef PMEM_AVX_H +#define PMEM_AVX_H + +#include +#include "util.h" + +/* + * avx_zeroupper -- _mm256_zeroupper wrapper + * + * _mm256_zeroupper clears upper parts of avx registers. + * + * It's needed for 2 reasons: + * - it improves performance of non-avx code after avx + * - it works around problem discovered by Valgrind + * + * In optimized builds gcc inserts VZEROUPPER automatically before + * calling non-avx code (or at the end of the function). But in release + * builds it doesn't, so if we don't do this by ourselves, then when + * someone memcpy'ies uninitialized data, Valgrind complains whenever + * someone reads those registers. + * + * One notable example is loader, which tries to detect whether it + * needs to save whole ymm registers by looking at their current + * (possibly uninitialized) value. + * + * Valgrind complains like that: + * Conditional jump or move depends on uninitialised value(s) + * at 0x4015CC9: _dl_runtime_resolve_avx_slow + * (in /lib/x86_64-linux-gnu/ld-2.24.so) + * by 0x10B531: test_realloc_api (obj_basic_integration.c:185) + * by 0x10F1EE: main (obj_basic_integration.c:594) + * + * Note: We have to be careful to not read AVX registers after this + * intrinsic, because of this stupid gcc bug: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735 + */ +static force_inline void +avx_zeroupper(void) +{ + _mm256_zeroupper(); +} + +static force_inline __m128i +m256_get16b(__m256i ymm) +{ + return _mm256_extractf128_si256(ymm, 0); +} + +#ifdef _MSC_VER +static force_inline uint64_t +m256_get8b(__m256i ymm) +{ + return (uint64_t)_mm_extract_epi64(m256_get16b(ymm), 0); +} +static force_inline uint32_t +m256_get4b(__m256i ymm) +{ + return (uint32_t)m256_get8b(ymm); +} +static force_inline uint16_t +m256_get2b(__m256i ymm) +{ + return (uint16_t)m256_get8b(ymm); +} +#else +static force_inline uint64_t +m256_get8b(__m256i ymm) +{ + return (uint64_t)_mm256_extract_epi64(ymm, 0); +} +static force_inline uint32_t +m256_get4b(__m256i ymm) +{ + return (uint32_t)_mm256_extract_epi32(ymm, 0); +} +static force_inline uint16_t +m256_get2b(__m256i ymm) +{ + return (uint16_t)_mm256_extract_epi16(ymm, 0); +} +#endif + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/cpu.c b/src/pmdk/src/libpmem2/x86_64/cpu.c new file mode 100644 index 000000000..7c368c5d3 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/cpu.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * cpu.c -- CPU features detection + */ + +/* + * Reference: + * http://www.intel.com/content/www/us/en/processors/ + * architectures-software-developer-manuals.html + * + * https://support.amd.com/TechDocs/24594.pdf + */ + +#include + +#include "out.h" +#include "cpu.h" + +#define EAX_IDX 0 +#define EBX_IDX 1 +#define ECX_IDX 2 +#define EDX_IDX 3 + +#if defined(__x86_64__) || defined(__amd64__) + +#include + +static inline void +cpuid(unsigned func, unsigned subfunc, unsigned cpuinfo[4]) +{ + __cpuid_count(func, subfunc, cpuinfo[EAX_IDX], cpuinfo[EBX_IDX], + cpuinfo[ECX_IDX], cpuinfo[EDX_IDX]); +} + +#elif defined(_M_X64) || defined(_M_AMD64) + +#include + +static inline void +cpuid(unsigned func, unsigned subfunc, unsigned cpuinfo[4]) +{ + __cpuidex(cpuinfo, func, subfunc); +} + +#else + +#error unsupported compiler + +#endif + +#ifndef bit_CLFLUSH +#define bit_CLFLUSH (1 << 19) +#endif + +#ifndef bit_CLFLUSHOPT +#define bit_CLFLUSHOPT (1 << 23) +#endif + +#ifndef bit_CLWB +#define bit_CLWB (1 << 24) +#endif + +#ifndef bit_AVX +#define bit_AVX (1 << 28) +#endif + +#ifndef bit_AVX512F +#define bit_AVX512F (1 << 16) +#endif + +/* + * is_cpu_feature_present -- (internal) checks if CPU feature is supported + */ +static int +is_cpu_feature_present(unsigned func, unsigned reg, unsigned bit) +{ + unsigned cpuinfo[4] = { 0 }; + + /* check CPUID level first */ + cpuid(0x0, 0x0, cpuinfo); + if (cpuinfo[EAX_IDX] < func) + return 0; + + cpuid(func, 0x0, cpuinfo); + return (cpuinfo[reg] & bit) != 0; +} + +/* + * is_cpu_genuine_intel -- checks for genuine Intel CPU + */ +int +is_cpu_genuine_intel(void) +{ + unsigned cpuinfo[4] = { 0 }; + + union { + char name[0x20]; + unsigned cpuinfo[3]; + } vendor; + + memset(&vendor, 0, sizeof(vendor)); + + cpuid(0x0, 0x0, cpuinfo); + + vendor.cpuinfo[0] = cpuinfo[EBX_IDX]; + vendor.cpuinfo[1] = cpuinfo[EDX_IDX]; + vendor.cpuinfo[2] = cpuinfo[ECX_IDX]; + + LOG(4, "CPU vendor: %s", vendor.name); + return (strncmp(vendor.name, "GenuineIntel", + sizeof(vendor.name))) == 0; +} + +/* + * is_cpu_clflush_present -- checks if CLFLUSH instruction is supported + */ +int +is_cpu_clflush_present(void) +{ + int ret = is_cpu_feature_present(0x1, EDX_IDX, bit_CLFLUSH); + LOG(4, "CLFLUSH %ssupported", ret == 0 ? "not " : ""); + + return ret; +} + +/* + * is_cpu_clflushopt_present -- checks if CLFLUSHOPT instruction is supported + */ +int +is_cpu_clflushopt_present(void) +{ + int ret = is_cpu_feature_present(0x7, EBX_IDX, bit_CLFLUSHOPT); + LOG(4, "CLFLUSHOPT %ssupported", ret == 0 ? "not " : ""); + + return ret; +} + +/* + * is_cpu_clwb_present -- checks if CLWB instruction is supported + */ +int +is_cpu_clwb_present(void) +{ + int ret = is_cpu_feature_present(0x7, EBX_IDX, bit_CLWB); + LOG(4, "CLWB %ssupported", ret == 0 ? "not " : ""); + + return ret; +} + +/* + * is_cpu_avx_present -- checks if AVX instructions are supported + */ +int +is_cpu_avx_present(void) +{ + int ret = is_cpu_feature_present(0x1, ECX_IDX, bit_AVX); + LOG(4, "AVX %ssupported", ret == 0 ? "not " : ""); + + return ret; +} + +/* + * is_cpu_avx512f_present -- checks if AVX-512f instructions are supported + */ +int +is_cpu_avx512f_present(void) +{ + int ret = is_cpu_feature_present(0x7, EBX_IDX, bit_AVX512F); + LOG(4, "AVX512f %ssupported", ret == 0 ? "not " : ""); + + return ret; +} diff --git a/src/pmdk/src/libpmem2/x86_64/cpu.h b/src/pmdk/src/libpmem2/x86_64/cpu.h new file mode 100644 index 000000000..f02df5c30 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/cpu.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +#ifndef PMDK_CPU_H +#define PMDK_CPU_H 1 + +/* + * cpu.h -- definitions for "cpu" module + */ + +int is_cpu_genuine_intel(void); +int is_cpu_clflush_present(void); +int is_cpu_clflushopt_present(void); +int is_cpu_clwb_present(void); +int is_cpu_avx_present(void); +int is_cpu_avx512f_present(void); + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/flags.inc b/src/pmdk/src/libpmem2/x86_64/flags.inc new file mode 100644 index 000000000..33b7d319a --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/flags.inc @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2020, Intel Corporation + +# +# src/libpmem2/x86_64/flags.inc -- flags for libpmem2/x86_64 +# + +vpath %.c $(TOP)/src/libpmem2/x86_64 +vpath %.h $(TOP)/src/libpmem2/x86_64 +vpath %.c $(TOP)/src/libpmem2/x86_64/memcpy +vpath %.c $(TOP)/src/libpmem2/x86_64/memset + +$(objdir)/memcpy_nt_avx512f.o: CFLAGS += -mavx512f +$(objdir)/memset_nt_avx512f.o: CFLAGS += -mavx512f + +$(objdir)/memcpy_nt_avx.o: CFLAGS += -mavx +$(objdir)/memset_nt_avx.o: CFLAGS += -mavx + +$(objdir)/memcpy_t_avx512f.o: CFLAGS += -mavx512f +$(objdir)/memset_t_avx512f.o: CFLAGS += -mavx512f + +$(objdir)/memcpy_t_avx.o: CFLAGS += -mavx +$(objdir)/memset_t_avx.o: CFLAGS += -mavx + +CFLAGS += -I$(TOP)/src/libpmem2/x86_64 + +ifeq ($(AVX512F_AVAILABLE), y) +CFLAGS += -DAVX512F_AVAILABLE=1 +else +CFLAGS += -DAVX512F_AVAILABLE=0 +endif diff --git a/src/pmdk/src/libpmem2/x86_64/flush.h b/src/pmdk/src/libpmem2/x86_64/flush.h new file mode 100644 index 000000000..f25450ba1 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/flush.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +#ifndef X86_64_FLUSH_H +#define X86_64_FLUSH_H + +#include +#include +#include +#include "util.h" +#include "valgrind_internal.h" + +#define FLUSH_ALIGN ((uintptr_t)64) + +static force_inline void +pmem_clflush(const void *addr) +{ + _mm_clflush(addr); +} + +#ifdef _MSC_VER +static force_inline void +pmem_clflushopt(const void *addr) +{ + _mm_clflushopt(addr); +} + +static force_inline void +pmem_clwb(const void *addr) +{ + _mm_clwb(addr); +} +#else +/* + * The x86 memory instructions are new enough that the compiler + * intrinsic functions are not always available. The intrinsic + * functions are defined here in terms of asm statements for now. + */ +static force_inline void +pmem_clflushopt(const void *addr) +{ + asm volatile(".byte 0x66; clflush %0" : "+m" \ + (*(volatile char *)(addr))); +} +static force_inline void +pmem_clwb(const void *addr) +{ + asm volatile(".byte 0x66; xsaveopt %0" : "+m" \ + (*(volatile char *)(addr))); +} +#endif /* _MSC_VER */ + +typedef void flush_fn(const void *, size_t); + +/* + * flush_clflush_nolog -- flush the CPU cache, using clflush + */ +static force_inline void +flush_clflush_nolog(const void *addr, size_t len) +{ + uintptr_t uptr; + + /* + * Loop through cache-line-size (typically 64B) aligned chunks + * covering the given range. + */ + for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) + _mm_clflush((char *)uptr); +} + +/* + * flush_clflushopt_nolog -- flush the CPU cache, using clflushopt + */ +static force_inline void +flush_clflushopt_nolog(const void *addr, size_t len) +{ + uintptr_t uptr; + + /* + * Loop through cache-line-size (typically 64B) aligned chunks + * covering the given range. + */ + for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) { + pmem_clflushopt((char *)uptr); + } +} + +/* + * flush_clwb_nolog -- flush the CPU cache, using clwb + */ +static force_inline void +flush_clwb_nolog(const void *addr, size_t len) +{ + uintptr_t uptr; + + /* + * Loop through cache-line-size (typically 64B) aligned chunks + * covering the given range. + */ + for (uptr = (uintptr_t)addr & ~(FLUSH_ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += FLUSH_ALIGN) { + pmem_clwb((char *)uptr); + } +} + +/* + * flush64b_empty -- (internal) do not flush the CPU cache + */ +static force_inline void +flush64b_empty(const void *addr) +{ + /* NOP, but tell pmemcheck about it */ + VALGRIND_DO_FLUSH(addr, 64); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/init.c b/src/pmdk/src/libpmem2/x86_64/init.c new file mode 100644 index 000000000..d0e383b0a --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/init.c @@ -0,0 +1,528 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +#include +#include + +#include "auto_flush.h" +#include "cpu.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "os.h" +#include "out.h" +#include "pmem2_arch.h" +#include "valgrind_internal.h" + +#define MOVNT_THRESHOLD 256 + +size_t Movnt_threshold = MOVNT_THRESHOLD; + +/* + * memory_barrier -- (internal) issue the fence instruction + */ +static void +memory_barrier(void) +{ + LOG(15, NULL); + _mm_sfence(); /* ensure CLWB or CLFLUSHOPT completes */ +} + +/* + * flush_clflush -- (internal) flush the CPU cache, using clflush + */ +static void +flush_clflush(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + flush_clflush_nolog(addr, len); +} + +/* + * flush_clflushopt -- (internal) flush the CPU cache, using clflushopt + */ +static void +flush_clflushopt(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + flush_clflushopt_nolog(addr, len); +} + +/* + * flush_clwb -- (internal) flush the CPU cache, using clwb + */ +static void +flush_clwb(const void *addr, size_t len) +{ + LOG(15, "addr %p len %zu", addr, len); + + flush_clwb_nolog(addr, len); +} + +#if SSE2_AVAILABLE || AVX_AVAILABLE || AVX512F_AVAILABLE +#define PMEM2_F_MEM_MOVNT (PMEM2_F_MEM_WC | PMEM2_F_MEM_NONTEMPORAL) +#define PMEM2_F_MEM_MOV (PMEM2_F_MEM_WB | PMEM2_F_MEM_TEMPORAL) + +#define MEMCPY_TEMPLATE(isa, flush, perfbarrier) \ +static void *\ +memmove_nodrain_##isa##_##flush##perfbarrier(void *dest, const void *src, \ + size_t len, unsigned flags, flush_func flushf)\ +{\ + if (len == 0 || src == dest)\ + return dest;\ +\ + if (flags & PMEM2_F_MEM_NOFLUSH) \ + memmove_mov_##isa##_noflush(dest, src, len); \ + else if (flags & PMEM2_F_MEM_MOVNT)\ + memmove_movnt_##isa ##_##flush##perfbarrier(dest, src, len);\ + else if (flags & PMEM2_F_MEM_MOV)\ + memmove_mov_##isa##_##flush(dest, src, len);\ + else if (len < Movnt_threshold)\ + memmove_mov_##isa##_##flush(dest, src, len);\ + else\ + memmove_movnt_##isa##_##flush##perfbarrier(dest, src, len);\ +\ + return dest;\ +} + +#define MEMCPY_TEMPLATE_EADR(isa, perfbarrier) \ +static void *\ +memmove_nodrain_##isa##_eadr##perfbarrier(void *dest, const void *src, \ + size_t len, unsigned flags, flush_func flushf)\ +{\ + if (len == 0 || src == dest)\ + return dest;\ +\ + if (flags & PMEM2_F_MEM_NOFLUSH)\ + memmove_mov_##isa##_noflush(dest, src, len);\ + else if (flags & PMEM2_F_MEM_NONTEMPORAL)\ + memmove_movnt_##isa##_empty##perfbarrier(dest, src, len);\ + else\ + memmove_mov_##isa##_empty(dest, src, len);\ +\ + return dest;\ +} + +#define MEMSET_TEMPLATE(isa, flush, perfbarrier)\ +static void *\ +memset_nodrain_##isa##_##flush##perfbarrier(void *dest, int c, size_t len, \ + unsigned flags, flush_func flushf)\ +{\ + if (len == 0)\ + return dest;\ +\ + if (flags & PMEM2_F_MEM_NOFLUSH) \ + memset_mov_##isa##_noflush(dest, c, len); \ + else if (flags & PMEM2_F_MEM_MOVNT)\ + memset_movnt_##isa##_##flush##perfbarrier(dest, c, len);\ + else if (flags & PMEM2_F_MEM_MOV)\ + memset_mov_##isa##_##flush(dest, c, len);\ + else if (len < Movnt_threshold)\ + memset_mov_##isa##_##flush(dest, c, len);\ + else\ + memset_movnt_##isa##_##flush##perfbarrier(dest, c, len);\ +\ + return dest;\ +} + +#define MEMSET_TEMPLATE_EADR(isa, perfbarrier) \ +static void *\ +memset_nodrain_##isa##_eadr##perfbarrier(void *dest, int c, size_t len, \ + unsigned flags, flush_func flushf)\ +{\ + if (len == 0)\ + return dest;\ +\ + if (flags & PMEM2_F_MEM_NOFLUSH)\ + memset_mov_##isa##_noflush(dest, c, len);\ + else if (flags & PMEM2_F_MEM_NONTEMPORAL)\ + memset_movnt_##isa##_empty##perfbarrier(dest, c, len);\ + else\ + memset_mov_##isa##_empty(dest, c, len);\ +\ + return dest;\ +} +#endif + +#if SSE2_AVAILABLE +MEMCPY_TEMPLATE(sse2, clflush, _nobarrier) +MEMCPY_TEMPLATE(sse2, clflushopt, _nobarrier) +MEMCPY_TEMPLATE(sse2, clwb, _nobarrier) +MEMCPY_TEMPLATE_EADR(sse2, _nobarrier) + +MEMSET_TEMPLATE(sse2, clflush, _nobarrier) +MEMSET_TEMPLATE(sse2, clflushopt, _nobarrier) +MEMSET_TEMPLATE(sse2, clwb, _nobarrier) +MEMSET_TEMPLATE_EADR(sse2, _nobarrier) + +MEMCPY_TEMPLATE(sse2, clflush, _wcbarrier) +MEMCPY_TEMPLATE(sse2, clflushopt, _wcbarrier) +MEMCPY_TEMPLATE(sse2, clwb, _wcbarrier) +MEMCPY_TEMPLATE_EADR(sse2, _wcbarrier) + +MEMSET_TEMPLATE(sse2, clflush, _wcbarrier) +MEMSET_TEMPLATE(sse2, clflushopt, _wcbarrier) +MEMSET_TEMPLATE(sse2, clwb, _wcbarrier) +MEMSET_TEMPLATE_EADR(sse2, _wcbarrier) +#endif + +#if AVX_AVAILABLE +MEMCPY_TEMPLATE(avx, clflush, _nobarrier) +MEMCPY_TEMPLATE(avx, clflushopt, _nobarrier) +MEMCPY_TEMPLATE(avx, clwb, _nobarrier) +MEMCPY_TEMPLATE_EADR(avx, _nobarrier) + +MEMSET_TEMPLATE(avx, clflush, _nobarrier) +MEMSET_TEMPLATE(avx, clflushopt, _nobarrier) +MEMSET_TEMPLATE(avx, clwb, _nobarrier) +MEMSET_TEMPLATE_EADR(avx, _nobarrier) + +MEMCPY_TEMPLATE(avx, clflush, _wcbarrier) +MEMCPY_TEMPLATE(avx, clflushopt, _wcbarrier) +MEMCPY_TEMPLATE(avx, clwb, _wcbarrier) +MEMCPY_TEMPLATE_EADR(avx, _wcbarrier) + +MEMSET_TEMPLATE(avx, clflush, _wcbarrier) +MEMSET_TEMPLATE(avx, clflushopt, _wcbarrier) +MEMSET_TEMPLATE(avx, clwb, _wcbarrier) +MEMSET_TEMPLATE_EADR(avx, _wcbarrier) +#endif + +#if AVX512F_AVAILABLE +MEMCPY_TEMPLATE(avx512f, clflush, /* cstyle wa */) +MEMCPY_TEMPLATE(avx512f, clflushopt, /* */) +MEMCPY_TEMPLATE(avx512f, clwb, /* */) +MEMCPY_TEMPLATE_EADR(avx512f, /* */) + +MEMSET_TEMPLATE(avx512f, clflush, /* */) +MEMSET_TEMPLATE(avx512f, clflushopt, /* */) +MEMSET_TEMPLATE(avx512f, clwb, /* */) +MEMSET_TEMPLATE_EADR(avx512f, /* */) +#endif + +enum memcpy_impl { + MEMCPY_INVALID, + MEMCPY_SSE2, + MEMCPY_AVX, + MEMCPY_AVX512F +}; + +/* + * use_sse2_memcpy_memset -- (internal) SSE2 detected, use it if possible + */ +static void +use_sse2_memcpy_memset(struct pmem2_arch_info *info, enum memcpy_impl *impl, + int wc_workaround) +{ +#if SSE2_AVAILABLE + *impl = MEMCPY_SSE2; + if (wc_workaround) { + info->memmove_nodrain_eadr = + memmove_nodrain_sse2_eadr_wcbarrier; + if (info->flush == flush_clflush) + info->memmove_nodrain = + memmove_nodrain_sse2_clflush_wcbarrier; + else if (info->flush == flush_clflushopt) + info->memmove_nodrain = + memmove_nodrain_sse2_clflushopt_wcbarrier; + else if (info->flush == flush_clwb) + info->memmove_nodrain = + memmove_nodrain_sse2_clwb_wcbarrier; + else + ASSERT(0); + + info->memset_nodrain_eadr = memset_nodrain_sse2_eadr_wcbarrier; + if (info->flush == flush_clflush) + info->memset_nodrain = + memset_nodrain_sse2_clflush_wcbarrier; + else if (info->flush == flush_clflushopt) + info->memset_nodrain = + memset_nodrain_sse2_clflushopt_wcbarrier; + else if (info->flush == flush_clwb) + info->memset_nodrain = + memset_nodrain_sse2_clwb_wcbarrier; + else + ASSERT(0); + } else { + info->memmove_nodrain_eadr = + memmove_nodrain_sse2_eadr_nobarrier; + if (info->flush == flush_clflush) + info->memmove_nodrain = + memmove_nodrain_sse2_clflush_nobarrier; + else if (info->flush == flush_clflushopt) + info->memmove_nodrain = + memmove_nodrain_sse2_clflushopt_nobarrier; + else if (info->flush == flush_clwb) + info->memmove_nodrain = + memmove_nodrain_sse2_clwb_nobarrier; + else + ASSERT(0); + + info->memset_nodrain_eadr = + memset_nodrain_sse2_eadr_nobarrier; + if (info->flush == flush_clflush) + info->memset_nodrain = + memset_nodrain_sse2_clflush_nobarrier; + else if (info->flush == flush_clflushopt) + info->memset_nodrain = + memset_nodrain_sse2_clflushopt_nobarrier; + else if (info->flush == flush_clwb) + info->memset_nodrain = + memset_nodrain_sse2_clwb_nobarrier; + else + ASSERT(0); + } + +#else + LOG(3, "sse2 disabled at build time"); +#endif + +} + +/* + * use_avx_memcpy_memset -- (internal) AVX detected, use it if possible + */ +static void +use_avx_memcpy_memset(struct pmem2_arch_info *info, enum memcpy_impl *impl, + int wc_workaround) +{ +#if AVX_AVAILABLE + LOG(3, "avx supported"); + + char *e = os_getenv("PMEM_AVX"); + if (e != NULL && strcmp(e, "0") == 0) { + LOG(3, "PMEM_AVX set to 0"); + return; + } + + LOG(3, "PMEM_AVX enabled"); + *impl = MEMCPY_AVX; + + if (wc_workaround) { + info->memmove_nodrain_eadr = + memmove_nodrain_avx_eadr_wcbarrier; + if (info->flush == flush_clflush) + info->memmove_nodrain = + memmove_nodrain_avx_clflush_wcbarrier; + else if (info->flush == flush_clflushopt) + info->memmove_nodrain = + memmove_nodrain_avx_clflushopt_wcbarrier; + else if (info->flush == flush_clwb) + info->memmove_nodrain = + memmove_nodrain_avx_clwb_wcbarrier; + else + ASSERT(0); + + info->memset_nodrain_eadr = + memset_nodrain_avx_eadr_wcbarrier; + if (info->flush == flush_clflush) + info->memset_nodrain = + memset_nodrain_avx_clflush_wcbarrier; + else if (info->flush == flush_clflushopt) + info->memset_nodrain = + memset_nodrain_avx_clflushopt_wcbarrier; + else if (info->flush == flush_clwb) + info->memset_nodrain = + memset_nodrain_avx_clwb_wcbarrier; + else + ASSERT(0); + } else { + info->memmove_nodrain_eadr = + memmove_nodrain_avx_eadr_nobarrier; + if (info->flush == flush_clflush) + info->memmove_nodrain = + memmove_nodrain_avx_clflush_nobarrier; + else if (info->flush == flush_clflushopt) + info->memmove_nodrain = + memmove_nodrain_avx_clflushopt_nobarrier; + else if (info->flush == flush_clwb) + info->memmove_nodrain = + memmove_nodrain_avx_clwb_nobarrier; + else + ASSERT(0); + + info->memset_nodrain_eadr = + memset_nodrain_avx_eadr_nobarrier; + if (info->flush == flush_clflush) + info->memset_nodrain = + memset_nodrain_avx_clflush_nobarrier; + else if (info->flush == flush_clflushopt) + info->memset_nodrain = + memset_nodrain_avx_clflushopt_nobarrier; + else if (info->flush == flush_clwb) + info->memset_nodrain = + memset_nodrain_avx_clwb_nobarrier; + else + ASSERT(0); + } +#else + LOG(3, "avx supported, but disabled at build time"); +#endif +} + +/* + * use_avx512f_memcpy_memset -- (internal) AVX512F detected, use it if possible + */ +static void +use_avx512f_memcpy_memset(struct pmem2_arch_info *info, + enum memcpy_impl *impl) +{ +#if AVX512F_AVAILABLE + LOG(3, "avx512f supported"); + + char *e = os_getenv("PMEM_AVX512F"); + if (e != NULL && strcmp(e, "0") == 0) { + LOG(3, "PMEM_AVX512F set to 0"); + return; + } + + LOG(3, "PMEM_AVX512F enabled"); + *impl = MEMCPY_AVX512F; + + info->memmove_nodrain_eadr = memmove_nodrain_avx512f_eadr; + if (info->flush == flush_clflush) + info->memmove_nodrain = memmove_nodrain_avx512f_clflush; + else if (info->flush == flush_clflushopt) + info->memmove_nodrain = memmove_nodrain_avx512f_clflushopt; + else if (info->flush == flush_clwb) + info->memmove_nodrain = memmove_nodrain_avx512f_clwb; + else + ASSERT(0); + + info->memset_nodrain_eadr = memset_nodrain_avx512f_eadr; + if (info->flush == flush_clflush) + info->memset_nodrain = memset_nodrain_avx512f_clflush; + else if (info->flush == flush_clflushopt) + info->memset_nodrain = memset_nodrain_avx512f_clflushopt; + else if (info->flush == flush_clwb) + info->memset_nodrain = memset_nodrain_avx512f_clwb; + else + ASSERT(0); +#else + LOG(3, "avx512f supported, but disabled at build time"); +#endif +} + +/* + * pmem_get_cpuinfo -- configure libpmem based on CPUID + */ +static void +pmem_cpuinfo_to_funcs(struct pmem2_arch_info *info, enum memcpy_impl *impl) +{ + LOG(3, NULL); + + if (is_cpu_clflush_present()) { + LOG(3, "clflush supported"); + + info->flush = flush_clflush; + info->flush_has_builtin_fence = 1; + info->fence = memory_barrier; + } + + if (is_cpu_clflushopt_present()) { + LOG(3, "clflushopt supported"); + + char *e = os_getenv("PMEM_NO_CLFLUSHOPT"); + if (e && strcmp(e, "1") == 0) { + LOG(3, "PMEM_NO_CLFLUSHOPT forced no clflushopt"); + } else { + info->flush = flush_clflushopt; + info->flush_has_builtin_fence = 0; + info->fence = memory_barrier; + } + } + + if (is_cpu_clwb_present()) { + LOG(3, "clwb supported"); + + char *e = os_getenv("PMEM_NO_CLWB"); + if (e && strcmp(e, "1") == 0) { + LOG(3, "PMEM_NO_CLWB forced no clwb"); + } else { + info->flush = flush_clwb; + info->flush_has_builtin_fence = 0; + info->fence = memory_barrier; + } + } + + /* + * XXX Disable this work around for Intel CPUs with optimized + * WC eviction. + */ + int wc_workaround = is_cpu_genuine_intel(); + + char *ptr = os_getenv("PMEM_WC_WORKAROUND"); + if (ptr) { + if (strcmp(ptr, "1") == 0) { + LOG(3, "WC workaround forced to 1"); + wc_workaround = 1; + } else if (strcmp(ptr, "0") == 0) { + LOG(3, "WC workaround forced to 0"); + wc_workaround = 0; + } else { + LOG(3, "incorrect value of PMEM_WC_WORKAROUND (%s)", + ptr); + } + } + LOG(3, "WC workaround = %d", wc_workaround); + + ptr = os_getenv("PMEM_NO_MOVNT"); + if (ptr && strcmp(ptr, "1") == 0) { + LOG(3, "PMEM_NO_MOVNT forced no movnt"); + } else { + use_sse2_memcpy_memset(info, impl, wc_workaround); + + if (is_cpu_avx_present()) + use_avx_memcpy_memset(info, impl, wc_workaround); + + if (is_cpu_avx512f_present()) + use_avx512f_memcpy_memset(info, impl); + } +} + +/* + * pmem2_arch_init -- initialize architecture-specific list of pmem operations + */ +void +pmem2_arch_init(struct pmem2_arch_info *info) +{ + LOG(3, NULL); + enum memcpy_impl impl = MEMCPY_INVALID; + + pmem_cpuinfo_to_funcs(info, &impl); + + /* + * For testing, allow overriding the default threshold + * for using non-temporal stores in pmem_memcpy_*(), pmem_memmove_*() + * and pmem_memset_*(). + * It has no effect if movnt is not supported or disabled. + */ + const char *ptr = os_getenv("PMEM_MOVNT_THRESHOLD"); + if (ptr) { + long long val = atoll(ptr); + + if (val < 0) { + LOG(3, "Invalid PMEM_MOVNT_THRESHOLD"); + } else { + LOG(3, "PMEM_MOVNT_THRESHOLD set to %zu", (size_t)val); + Movnt_threshold = (size_t)val; + } + } + + if (info->flush == flush_clwb) + LOG(3, "using clwb"); + else if (info->flush == flush_clflushopt) + LOG(3, "using clflushopt"); + else if (info->flush == flush_clflush) + LOG(3, "using clflush"); + else + FATAL("invalid deep flush function address"); + + if (impl == MEMCPY_AVX512F) + LOG(3, "using movnt AVX512F"); + else if (impl == MEMCPY_AVX) + LOG(3, "using movnt AVX"); + else if (impl == MEMCPY_SSE2) + LOG(3, "using movnt SSE2"); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx.h b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx.h new file mode 100644 index 000000000..9b952660a --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +#ifndef PMEM2_MEMCPY_AVX_H +#define PMEM2_MEMCPY_AVX_H + +#include +#include +#include + +#include "out.h" + +static force_inline void +memmove_small_avx_noflush(char *dest, const char *src, size_t len) +{ + ASSERT(len <= 64); + + if (len <= 8) + goto le8; + if (len <= 32) + goto le32; + + /* 33..64 */ + __m256i ymm0 = _mm256_loadu_si256((__m256i *)src); + __m256i ymm1 = _mm256_loadu_si256((__m256i *)(src + len - 32)); + + _mm256_storeu_si256((__m256i *)dest, ymm0); + _mm256_storeu_si256((__m256i *)(dest + len - 32), ymm1); + return; + +le32: + if (len > 16) { + /* 17..32 */ + __m128i xmm0 = _mm_loadu_si128((__m128i *)src); + __m128i xmm1 = _mm_loadu_si128((__m128i *)(src + len - 16)); + + _mm_storeu_si128((__m128i *)dest, xmm0); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm1); + return; + } + + /* 9..16 */ + ua_uint64_t d80 = *(ua_uint64_t *)src; + ua_uint64_t d81 = *(ua_uint64_t *)(src + len - 8); + + *(ua_uint64_t *)dest = d80; + *(ua_uint64_t *)(dest + len - 8) = d81; + return; + +le8: + if (len <= 2) + goto le2; + + if (len > 4) { + /* 5..8 */ + ua_uint32_t d40 = *(ua_uint32_t *)src; + ua_uint32_t d41 = *(ua_uint32_t *)(src + len - 4); + + *(ua_uint32_t *)dest = d40; + *(ua_uint32_t *)(dest + len - 4) = d41; + return; + } + + /* 3..4 */ + ua_uint16_t d20 = *(ua_uint16_t *)src; + ua_uint16_t d21 = *(ua_uint16_t *)(src + len - 2); + + *(ua_uint16_t *)dest = d20; + *(ua_uint16_t *)(dest + len - 2) = d21; + return; + +le2: + if (len == 2) { + *(ua_uint16_t *)dest = *(ua_uint16_t *)src; + return; + } + + *(uint8_t *)dest = *(uint8_t *)src; +} + +static force_inline void +memmove_small_avx(char *dest, const char *src, size_t len, flush_fn flush) +{ + /* + * pmemcheck complains about "overwritten stores before they were made + * persistent" for overlapping stores (last instruction in each code + * path) in the optimized version. + * libc's memcpy also does that, so we can't use it here. + */ + if (On_pmemcheck) { + memmove_nodrain_generic(dest, src, len, PMEM2_F_MEM_NOFLUSH, + NULL); + } else { + memmove_small_avx_noflush(dest, src, len); + } + + flush(dest, len); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx512f.h b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx512f.h new file mode 100644 index 000000000..15701e1e4 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_avx512f.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +#ifndef PMEM2_MEMCPY_AVX512F_H +#define PMEM2_MEMCPY_AVX512F_H + +#include + +#include "memcpy_avx.h" + +static force_inline void +memmove_small_avx512f(char *dest, const char *src, size_t len, flush_fn flush) +{ + /* We can't do better than AVX here. */ + memmove_small_avx(dest, src, len, flush); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx.c b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx.c new file mode 100644 index 000000000..ff007fb3c --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx.c @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memcpy_avx.h" +#include "valgrind_internal.h" + +static force_inline __m256i +mm256_loadu_si256(const char *src, unsigned idx) +{ + return _mm256_loadu_si256((const __m256i *)src + idx); +} + +static force_inline void +mm256_stream_si256(char *dest, unsigned idx, __m256i src) +{ + _mm256_stream_si256((__m256i *)dest + idx, src); + barrier(); +} + +static force_inline void +memmove_movnt8x64b(char *dest, const char *src) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + __m256i ymm2 = mm256_loadu_si256(src, 2); + __m256i ymm3 = mm256_loadu_si256(src, 3); + __m256i ymm4 = mm256_loadu_si256(src, 4); + __m256i ymm5 = mm256_loadu_si256(src, 5); + __m256i ymm6 = mm256_loadu_si256(src, 6); + __m256i ymm7 = mm256_loadu_si256(src, 7); + __m256i ymm8 = mm256_loadu_si256(src, 8); + __m256i ymm9 = mm256_loadu_si256(src, 9); + __m256i ymm10 = mm256_loadu_si256(src, 10); + __m256i ymm11 = mm256_loadu_si256(src, 11); + __m256i ymm12 = mm256_loadu_si256(src, 12); + __m256i ymm13 = mm256_loadu_si256(src, 13); + __m256i ymm14 = mm256_loadu_si256(src, 14); + __m256i ymm15 = mm256_loadu_si256(src, 15); + + mm256_stream_si256(dest, 0, ymm0); + mm256_stream_si256(dest, 1, ymm1); + mm256_stream_si256(dest, 2, ymm2); + mm256_stream_si256(dest, 3, ymm3); + mm256_stream_si256(dest, 4, ymm4); + mm256_stream_si256(dest, 5, ymm5); + mm256_stream_si256(dest, 6, ymm6); + mm256_stream_si256(dest, 7, ymm7); + mm256_stream_si256(dest, 8, ymm8); + mm256_stream_si256(dest, 9, ymm9); + mm256_stream_si256(dest, 10, ymm10); + mm256_stream_si256(dest, 11, ymm11); + mm256_stream_si256(dest, 12, ymm12); + mm256_stream_si256(dest, 13, ymm13); + mm256_stream_si256(dest, 14, ymm14); + mm256_stream_si256(dest, 15, ymm15); +} + +static force_inline void +memmove_movnt4x64b(char *dest, const char *src) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + __m256i ymm2 = mm256_loadu_si256(src, 2); + __m256i ymm3 = mm256_loadu_si256(src, 3); + __m256i ymm4 = mm256_loadu_si256(src, 4); + __m256i ymm5 = mm256_loadu_si256(src, 5); + __m256i ymm6 = mm256_loadu_si256(src, 6); + __m256i ymm7 = mm256_loadu_si256(src, 7); + + mm256_stream_si256(dest, 0, ymm0); + mm256_stream_si256(dest, 1, ymm1); + mm256_stream_si256(dest, 2, ymm2); + mm256_stream_si256(dest, 3, ymm3); + mm256_stream_si256(dest, 4, ymm4); + mm256_stream_si256(dest, 5, ymm5); + mm256_stream_si256(dest, 6, ymm6); + mm256_stream_si256(dest, 7, ymm7); +} + +static force_inline void +memmove_movnt2x64b(char *dest, const char *src) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + __m256i ymm2 = mm256_loadu_si256(src, 2); + __m256i ymm3 = mm256_loadu_si256(src, 3); + + mm256_stream_si256(dest, 0, ymm0); + mm256_stream_si256(dest, 1, ymm1); + mm256_stream_si256(dest, 2, ymm2); + mm256_stream_si256(dest, 3, ymm3); +} + +static force_inline void +memmove_movnt1x64b(char *dest, const char *src) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + + mm256_stream_si256(dest, 0, ymm0); + mm256_stream_si256(dest, 1, ymm1); +} + +static force_inline void +memmove_movnt1x32b(char *dest, const char *src) +{ + __m256i ymm0 = _mm256_loadu_si256((__m256i *)src); + + mm256_stream_si256(dest, 0, ymm0); +} + +static force_inline void +memmove_movnt1x16b(char *dest, const char *src) +{ + __m128i xmm0 = _mm_loadu_si128((__m128i *)src); + + _mm_stream_si128((__m128i *)dest, xmm0); +} + +static force_inline void +memmove_movnt1x8b(char *dest, const char *src) +{ + _mm_stream_si64((long long *)dest, *(long long *)src); +} + +static force_inline void +memmove_movnt1x4b(char *dest, const char *src) +{ + _mm_stream_si32((int *)dest, *(int *)src); +} + +static force_inline void +memmove_movnt_avx_fw(char *dest, const char *src, size_t len, flush_fn flush, + perf_barrier_fn perf_barrier) +{ + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memmove_small_avx(dest, src, cnt, flush); + + dest += cnt; + src += cnt; + len -= cnt; + } + + const char *srcend = src + len; + prefetch_ini_fw(src, len); + + while (len >= PERF_BARRIER_SIZE) { + prefetch_next_fw(src, srcend); + + memmove_movnt8x64b(dest, src); + dest += 8 * 64; + src += 8 * 64; + len -= 8 * 64; + + memmove_movnt4x64b(dest, src); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + + COMPILE_ERROR_ON(PERF_BARRIER_SIZE != (8 + 4) * 64); + + if (len) + perf_barrier(); + } + + if (len >= 8 * 64) { + memmove_movnt8x64b(dest, src); + dest += 8 * 64; + src += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memmove_movnt4x64b(dest, src); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memmove_movnt2x64b(dest, src); + dest += 2 * 64; + src += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memmove_movnt1x64b(dest, src); + + dest += 1 * 64; + src += 1 * 64; + len -= 1 * 64; + } + + if (len == 0) + goto end; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) + memmove_movnt1x32b(dest, src); + else if (len == 16) + memmove_movnt1x16b(dest, src); + else if (len == 8) + memmove_movnt1x8b(dest, src); + else if (len == 4) + memmove_movnt1x4b(dest, src); + else + goto nonnt; + + goto end; + } + +nonnt: + memmove_small_avx(dest, src, len, flush); +end: + avx_zeroupper(); +} + +static force_inline void +memmove_movnt_avx_bw(char *dest, const char *src, size_t len, flush_fn flush, + perf_barrier_fn perf_barrier) +{ + dest += len; + src += len; + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + if (cnt > len) + cnt = len; + + dest -= cnt; + src -= cnt; + len -= cnt; + + memmove_small_avx(dest, src, cnt, flush); + } + + const char *srcbegin = src - len; + prefetch_ini_bw(src, len); + + while (len >= PERF_BARRIER_SIZE) { + prefetch_next_bw(src, srcbegin); + + dest -= 8 * 64; + src -= 8 * 64; + len -= 8 * 64; + memmove_movnt8x64b(dest, src); + + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_movnt4x64b(dest, src); + + COMPILE_ERROR_ON(PERF_BARRIER_SIZE != (8 + 4) * 64); + + if (len) + perf_barrier(); + } + + if (len >= 8 * 64) { + dest -= 8 * 64; + src -= 8 * 64; + len -= 8 * 64; + memmove_movnt8x64b(dest, src); + } + + if (len >= 4 * 64) { + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_movnt4x64b(dest, src); + } + + if (len >= 2 * 64) { + dest -= 2 * 64; + src -= 2 * 64; + len -= 2 * 64; + memmove_movnt2x64b(dest, src); + } + + if (len >= 1 * 64) { + dest -= 1 * 64; + src -= 1 * 64; + len -= 1 * 64; + memmove_movnt1x64b(dest, src); + } + + if (len == 0) + goto end; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) { + dest -= 32; + src -= 32; + memmove_movnt1x32b(dest, src); + } else if (len == 16) { + dest -= 16; + src -= 16; + memmove_movnt1x16b(dest, src); + } else if (len == 8) { + dest -= 8; + src -= 8; + memmove_movnt1x8b(dest, src); + } else if (len == 4) { + dest -= 4; + src -= 4; + memmove_movnt1x4b(dest, src); + } else { + goto nonnt; + } + + goto end; + } + +nonnt: + dest -= len; + src -= len; + memmove_small_avx(dest, src, len, flush); +end: + avx_zeroupper(); +} + +static force_inline void +memmove_movnt_avx(char *dest, const char *src, size_t len, flush_fn flush, + barrier_fn barrier, perf_barrier_fn perf_barrier) +{ + if ((uintptr_t)dest - (uintptr_t)src >= len) + memmove_movnt_avx_fw(dest, src, len, flush, perf_barrier); + else + memmove_movnt_avx_bw(dest, src, len, flush, perf_barrier); + + barrier(); + + VALGRIND_DO_FLUSH(dest, len); +} + +/* variants without perf_barrier */ + +void +memmove_movnt_avx_noflush_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, noflush, barrier_after_ntstores, + no_barrier); +} + +void +memmove_movnt_avx_empty_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_empty_nolog, + barrier_after_ntstores, no_barrier); +} +void +memmove_movnt_avx_clflush_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_clflush_nolog, + barrier_after_ntstores, no_barrier); +} + +void +memmove_movnt_avx_clflushopt_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, no_barrier); +} + +void +memmove_movnt_avx_clwb_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_clwb_nolog, + no_barrier_after_ntstores, no_barrier); +} + +/* variants with perf_barrier */ + +void +memmove_movnt_avx_noflush_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, noflush, barrier_after_ntstores, + wc_barrier); +} + +void +memmove_movnt_avx_empty_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_empty_nolog, + barrier_after_ntstores, wc_barrier); +} +void +memmove_movnt_avx_clflush_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_clflush_nolog, + barrier_after_ntstores, wc_barrier); +} + +void +memmove_movnt_avx_clflushopt_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, wc_barrier); +} + +void +memmove_movnt_avx_clwb_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx(dest, src, len, flush_clwb_nolog, + no_barrier_after_ntstores, wc_barrier); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx512f.c b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx512f.c new file mode 100644 index 000000000..fb19504e4 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_avx512f.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memcpy_avx512f.h" +#include "valgrind_internal.h" + +static force_inline __m512i +mm512_loadu_si512(const char *src, unsigned idx) +{ + return _mm512_loadu_si512((const __m512i *)src + idx); +} + +static force_inline void +mm512_stream_si512(char *dest, unsigned idx, __m512i src) +{ + _mm512_stream_si512((__m512i *)dest + idx, src); + barrier(); +} + +static force_inline void +memmove_movnt32x64b(char *dest, const char *src) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + __m512i zmm4 = mm512_loadu_si512(src, 4); + __m512i zmm5 = mm512_loadu_si512(src, 5); + __m512i zmm6 = mm512_loadu_si512(src, 6); + __m512i zmm7 = mm512_loadu_si512(src, 7); + __m512i zmm8 = mm512_loadu_si512(src, 8); + __m512i zmm9 = mm512_loadu_si512(src, 9); + __m512i zmm10 = mm512_loadu_si512(src, 10); + __m512i zmm11 = mm512_loadu_si512(src, 11); + __m512i zmm12 = mm512_loadu_si512(src, 12); + __m512i zmm13 = mm512_loadu_si512(src, 13); + __m512i zmm14 = mm512_loadu_si512(src, 14); + __m512i zmm15 = mm512_loadu_si512(src, 15); + __m512i zmm16 = mm512_loadu_si512(src, 16); + __m512i zmm17 = mm512_loadu_si512(src, 17); + __m512i zmm18 = mm512_loadu_si512(src, 18); + __m512i zmm19 = mm512_loadu_si512(src, 19); + __m512i zmm20 = mm512_loadu_si512(src, 20); + __m512i zmm21 = mm512_loadu_si512(src, 21); + __m512i zmm22 = mm512_loadu_si512(src, 22); + __m512i zmm23 = mm512_loadu_si512(src, 23); + __m512i zmm24 = mm512_loadu_si512(src, 24); + __m512i zmm25 = mm512_loadu_si512(src, 25); + __m512i zmm26 = mm512_loadu_si512(src, 26); + __m512i zmm27 = mm512_loadu_si512(src, 27); + __m512i zmm28 = mm512_loadu_si512(src, 28); + __m512i zmm29 = mm512_loadu_si512(src, 29); + __m512i zmm30 = mm512_loadu_si512(src, 30); + __m512i zmm31 = mm512_loadu_si512(src, 31); + + mm512_stream_si512(dest, 0, zmm0); + mm512_stream_si512(dest, 1, zmm1); + mm512_stream_si512(dest, 2, zmm2); + mm512_stream_si512(dest, 3, zmm3); + mm512_stream_si512(dest, 4, zmm4); + mm512_stream_si512(dest, 5, zmm5); + mm512_stream_si512(dest, 6, zmm6); + mm512_stream_si512(dest, 7, zmm7); + mm512_stream_si512(dest, 8, zmm8); + mm512_stream_si512(dest, 9, zmm9); + mm512_stream_si512(dest, 10, zmm10); + mm512_stream_si512(dest, 11, zmm11); + mm512_stream_si512(dest, 12, zmm12); + mm512_stream_si512(dest, 13, zmm13); + mm512_stream_si512(dest, 14, zmm14); + mm512_stream_si512(dest, 15, zmm15); + mm512_stream_si512(dest, 16, zmm16); + mm512_stream_si512(dest, 17, zmm17); + mm512_stream_si512(dest, 18, zmm18); + mm512_stream_si512(dest, 19, zmm19); + mm512_stream_si512(dest, 20, zmm20); + mm512_stream_si512(dest, 21, zmm21); + mm512_stream_si512(dest, 22, zmm22); + mm512_stream_si512(dest, 23, zmm23); + mm512_stream_si512(dest, 24, zmm24); + mm512_stream_si512(dest, 25, zmm25); + mm512_stream_si512(dest, 26, zmm26); + mm512_stream_si512(dest, 27, zmm27); + mm512_stream_si512(dest, 28, zmm28); + mm512_stream_si512(dest, 29, zmm29); + mm512_stream_si512(dest, 30, zmm30); + mm512_stream_si512(dest, 31, zmm31); +} + +static force_inline void +memmove_movnt16x64b(char *dest, const char *src) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + __m512i zmm4 = mm512_loadu_si512(src, 4); + __m512i zmm5 = mm512_loadu_si512(src, 5); + __m512i zmm6 = mm512_loadu_si512(src, 6); + __m512i zmm7 = mm512_loadu_si512(src, 7); + __m512i zmm8 = mm512_loadu_si512(src, 8); + __m512i zmm9 = mm512_loadu_si512(src, 9); + __m512i zmm10 = mm512_loadu_si512(src, 10); + __m512i zmm11 = mm512_loadu_si512(src, 11); + __m512i zmm12 = mm512_loadu_si512(src, 12); + __m512i zmm13 = mm512_loadu_si512(src, 13); + __m512i zmm14 = mm512_loadu_si512(src, 14); + __m512i zmm15 = mm512_loadu_si512(src, 15); + + mm512_stream_si512(dest, 0, zmm0); + mm512_stream_si512(dest, 1, zmm1); + mm512_stream_si512(dest, 2, zmm2); + mm512_stream_si512(dest, 3, zmm3); + mm512_stream_si512(dest, 4, zmm4); + mm512_stream_si512(dest, 5, zmm5); + mm512_stream_si512(dest, 6, zmm6); + mm512_stream_si512(dest, 7, zmm7); + mm512_stream_si512(dest, 8, zmm8); + mm512_stream_si512(dest, 9, zmm9); + mm512_stream_si512(dest, 10, zmm10); + mm512_stream_si512(dest, 11, zmm11); + mm512_stream_si512(dest, 12, zmm12); + mm512_stream_si512(dest, 13, zmm13); + mm512_stream_si512(dest, 14, zmm14); + mm512_stream_si512(dest, 15, zmm15); +} + +static force_inline void +memmove_movnt8x64b(char *dest, const char *src) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + __m512i zmm4 = mm512_loadu_si512(src, 4); + __m512i zmm5 = mm512_loadu_si512(src, 5); + __m512i zmm6 = mm512_loadu_si512(src, 6); + __m512i zmm7 = mm512_loadu_si512(src, 7); + + mm512_stream_si512(dest, 0, zmm0); + mm512_stream_si512(dest, 1, zmm1); + mm512_stream_si512(dest, 2, zmm2); + mm512_stream_si512(dest, 3, zmm3); + mm512_stream_si512(dest, 4, zmm4); + mm512_stream_si512(dest, 5, zmm5); + mm512_stream_si512(dest, 6, zmm6); + mm512_stream_si512(dest, 7, zmm7); +} + +static force_inline void +memmove_movnt4x64b(char *dest, const char *src) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + + mm512_stream_si512(dest, 0, zmm0); + mm512_stream_si512(dest, 1, zmm1); + mm512_stream_si512(dest, 2, zmm2); + mm512_stream_si512(dest, 3, zmm3); +} + +static force_inline void +memmove_movnt2x64b(char *dest, const char *src) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + + mm512_stream_si512(dest, 0, zmm0); + mm512_stream_si512(dest, 1, zmm1); +} + +static force_inline void +memmove_movnt1x64b(char *dest, const char *src) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + + mm512_stream_si512(dest, 0, zmm0); +} + +static force_inline void +memmove_movnt1x32b(char *dest, const char *src) +{ + __m256i zmm0 = _mm256_loadu_si256((__m256i *)src); + + _mm256_stream_si256((__m256i *)dest, zmm0); +} + +static force_inline void +memmove_movnt1x16b(char *dest, const char *src) +{ + __m128i ymm0 = _mm_loadu_si128((__m128i *)src); + + _mm_stream_si128((__m128i *)dest, ymm0); +} + +static force_inline void +memmove_movnt1x8b(char *dest, const char *src) +{ + _mm_stream_si64((long long *)dest, *(long long *)src); +} + +static force_inline void +memmove_movnt1x4b(char *dest, const char *src) +{ + _mm_stream_si32((int *)dest, *(int *)src); +} + +static force_inline void +memmove_movnt_avx512f_fw(char *dest, const char *src, size_t len, + flush_fn flush) +{ + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memmove_small_avx512f(dest, src, cnt, flush); + + dest += cnt; + src += cnt; + len -= cnt; + } + + while (len >= 32 * 64) { + memmove_movnt32x64b(dest, src); + dest += 32 * 64; + src += 32 * 64; + len -= 32 * 64; + } + + if (len >= 16 * 64) { + memmove_movnt16x64b(dest, src); + dest += 16 * 64; + src += 16 * 64; + len -= 16 * 64; + } + + if (len >= 8 * 64) { + memmove_movnt8x64b(dest, src); + dest += 8 * 64; + src += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memmove_movnt4x64b(dest, src); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memmove_movnt2x64b(dest, src); + dest += 2 * 64; + src += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memmove_movnt1x64b(dest, src); + + dest += 1 * 64; + src += 1 * 64; + len -= 1 * 64; + } + + if (len == 0) + goto end; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) + memmove_movnt1x32b(dest, src); + else if (len == 16) + memmove_movnt1x16b(dest, src); + else if (len == 8) + memmove_movnt1x8b(dest, src); + else if (len == 4) + memmove_movnt1x4b(dest, src); + else + goto nonnt; + + goto end; + } + +nonnt: + memmove_small_avx512f(dest, src, len, flush); +end: + avx_zeroupper(); +} + +static force_inline void +memmove_movnt_avx512f_bw(char *dest, const char *src, size_t len, + flush_fn flush) +{ + dest += len; + src += len; + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + if (cnt > len) + cnt = len; + + dest -= cnt; + src -= cnt; + len -= cnt; + + memmove_small_avx512f(dest, src, cnt, flush); + } + + while (len >= 32 * 64) { + dest -= 32 * 64; + src -= 32 * 64; + len -= 32 * 64; + memmove_movnt32x64b(dest, src); + } + + if (len >= 16 * 64) { + dest -= 16 * 64; + src -= 16 * 64; + len -= 16 * 64; + memmove_movnt16x64b(dest, src); + } + + if (len >= 8 * 64) { + dest -= 8 * 64; + src -= 8 * 64; + len -= 8 * 64; + memmove_movnt8x64b(dest, src); + } + + if (len >= 4 * 64) { + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_movnt4x64b(dest, src); + } + + if (len >= 2 * 64) { + dest -= 2 * 64; + src -= 2 * 64; + len -= 2 * 64; + memmove_movnt2x64b(dest, src); + } + + if (len >= 1 * 64) { + dest -= 1 * 64; + src -= 1 * 64; + len -= 1 * 64; + + memmove_movnt1x64b(dest, src); + } + + if (len == 0) + goto end; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) { + dest -= 32; + src -= 32; + memmove_movnt1x32b(dest, src); + } else if (len == 16) { + dest -= 16; + src -= 16; + memmove_movnt1x16b(dest, src); + } else if (len == 8) { + dest -= 8; + src -= 8; + memmove_movnt1x8b(dest, src); + } else if (len == 4) { + dest -= 4; + src -= 4; + memmove_movnt1x4b(dest, src); + } else { + goto nonnt; + } + + goto end; + } + +nonnt: + dest -= len; + src -= len; + + memmove_small_avx512f(dest, src, len, flush); +end: + avx_zeroupper(); +} + +static force_inline void +memmove_movnt_avx512f(char *dest, const char *src, size_t len, flush_fn flush, + barrier_fn barrier) +{ + if ((uintptr_t)dest - (uintptr_t)src >= len) + memmove_movnt_avx512f_fw(dest, src, len, flush); + else + memmove_movnt_avx512f_bw(dest, src, len, flush); + + barrier(); + + VALGRIND_DO_FLUSH(dest, len); +} + +void +memmove_movnt_avx512f_noflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx512f(dest, src, len, noflush, barrier_after_ntstores); +} + +void +memmove_movnt_avx512f_empty(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx512f(dest, src, len, flush_empty_nolog, + barrier_after_ntstores); +} + +void +memmove_movnt_avx512f_clflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx512f(dest, src, len, flush_clflush_nolog, + barrier_after_ntstores); +} + +void +memmove_movnt_avx512f_clflushopt(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx512f(dest, src, len, flush_clflushopt_nolog, + no_barrier_after_ntstores); +} + +void +memmove_movnt_avx512f_clwb(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_avx512f(dest, src, len, flush_clwb_nolog, + no_barrier_after_ntstores); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_sse2.c b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_sse2.c new file mode 100644 index 000000000..b633be9da --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_nt_sse2.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memcpy_sse2.h" +#include "valgrind_internal.h" + +static force_inline __m128i +mm_loadu_si128(const char *src, unsigned idx) +{ + return _mm_loadu_si128((const __m128i *)src + idx); +} + +static force_inline void +mm_stream_si128(char *dest, unsigned idx, __m128i src) +{ + _mm_stream_si128((__m128i *)dest + idx, src); + barrier(); +} + +static force_inline void +memmove_movnt4x64b(char *dest, const char *src) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + __m128i xmm1 = mm_loadu_si128(src, 1); + __m128i xmm2 = mm_loadu_si128(src, 2); + __m128i xmm3 = mm_loadu_si128(src, 3); + __m128i xmm4 = mm_loadu_si128(src, 4); + __m128i xmm5 = mm_loadu_si128(src, 5); + __m128i xmm6 = mm_loadu_si128(src, 6); + __m128i xmm7 = mm_loadu_si128(src, 7); + __m128i xmm8 = mm_loadu_si128(src, 8); + __m128i xmm9 = mm_loadu_si128(src, 9); + __m128i xmm10 = mm_loadu_si128(src, 10); + __m128i xmm11 = mm_loadu_si128(src, 11); + __m128i xmm12 = mm_loadu_si128(src, 12); + __m128i xmm13 = mm_loadu_si128(src, 13); + __m128i xmm14 = mm_loadu_si128(src, 14); + __m128i xmm15 = mm_loadu_si128(src, 15); + + mm_stream_si128(dest, 0, xmm0); + mm_stream_si128(dest, 1, xmm1); + mm_stream_si128(dest, 2, xmm2); + mm_stream_si128(dest, 3, xmm3); + mm_stream_si128(dest, 4, xmm4); + mm_stream_si128(dest, 5, xmm5); + mm_stream_si128(dest, 6, xmm6); + mm_stream_si128(dest, 7, xmm7); + mm_stream_si128(dest, 8, xmm8); + mm_stream_si128(dest, 9, xmm9); + mm_stream_si128(dest, 10, xmm10); + mm_stream_si128(dest, 11, xmm11); + mm_stream_si128(dest, 12, xmm12); + mm_stream_si128(dest, 13, xmm13); + mm_stream_si128(dest, 14, xmm14); + mm_stream_si128(dest, 15, xmm15); +} + +static force_inline void +memmove_movnt2x64b(char *dest, const char *src) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + __m128i xmm1 = mm_loadu_si128(src, 1); + __m128i xmm2 = mm_loadu_si128(src, 2); + __m128i xmm3 = mm_loadu_si128(src, 3); + __m128i xmm4 = mm_loadu_si128(src, 4); + __m128i xmm5 = mm_loadu_si128(src, 5); + __m128i xmm6 = mm_loadu_si128(src, 6); + __m128i xmm7 = mm_loadu_si128(src, 7); + + mm_stream_si128(dest, 0, xmm0); + mm_stream_si128(dest, 1, xmm1); + mm_stream_si128(dest, 2, xmm2); + mm_stream_si128(dest, 3, xmm3); + mm_stream_si128(dest, 4, xmm4); + mm_stream_si128(dest, 5, xmm5); + mm_stream_si128(dest, 6, xmm6); + mm_stream_si128(dest, 7, xmm7); +} + +static force_inline void +memmove_movnt1x64b(char *dest, const char *src) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + __m128i xmm1 = mm_loadu_si128(src, 1); + __m128i xmm2 = mm_loadu_si128(src, 2); + __m128i xmm3 = mm_loadu_si128(src, 3); + + mm_stream_si128(dest, 0, xmm0); + mm_stream_si128(dest, 1, xmm1); + mm_stream_si128(dest, 2, xmm2); + mm_stream_si128(dest, 3, xmm3); +} + +static force_inline void +memmove_movnt1x32b(char *dest, const char *src) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + __m128i xmm1 = mm_loadu_si128(src, 1); + + mm_stream_si128(dest, 0, xmm0); + mm_stream_si128(dest, 1, xmm1); +} + +static force_inline void +memmove_movnt1x16b(char *dest, const char *src) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + + mm_stream_si128(dest, 0, xmm0); +} + +static force_inline void +memmove_movnt1x8b(char *dest, const char *src) +{ + _mm_stream_si64((long long *)dest, *(long long *)src); +} + +static force_inline void +memmove_movnt1x4b(char *dest, const char *src) +{ + _mm_stream_si32((int *)dest, *(int *)src); +} + +static force_inline void +memmove_movnt_sse_fw(char *dest, const char *src, size_t len, flush_fn flush, + perf_barrier_fn perf_barrier) +{ + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memmove_small_sse2(dest, src, cnt, flush); + + dest += cnt; + src += cnt; + len -= cnt; + } + + const char *srcend = src + len; + prefetch_ini_fw(src, len); + + while (len >= PERF_BARRIER_SIZE) { + prefetch_next_fw(src, srcend); + + memmove_movnt4x64b(dest, src); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + + memmove_movnt4x64b(dest, src); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + + memmove_movnt4x64b(dest, src); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + + COMPILE_ERROR_ON(PERF_BARRIER_SIZE != (4 + 4 + 4) * 64); + + if (len) + perf_barrier(); + } + + while (len >= 4 * 64) { + memmove_movnt4x64b(dest, src); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memmove_movnt2x64b(dest, src); + dest += 2 * 64; + src += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memmove_movnt1x64b(dest, src); + + dest += 1 * 64; + src += 1 * 64; + len -= 1 * 64; + } + + if (len == 0) + return; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) + memmove_movnt1x32b(dest, src); + else if (len == 16) + memmove_movnt1x16b(dest, src); + else if (len == 8) + memmove_movnt1x8b(dest, src); + else if (len == 4) + memmove_movnt1x4b(dest, src); + else + goto nonnt; + + return; + } + +nonnt: + memmove_small_sse2(dest, src, len, flush); +} + +static force_inline void +memmove_movnt_sse_bw(char *dest, const char *src, size_t len, flush_fn flush, + perf_barrier_fn perf_barrier) +{ + dest += len; + src += len; + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + if (cnt > len) + cnt = len; + + dest -= cnt; + src -= cnt; + len -= cnt; + + memmove_small_sse2(dest, src, cnt, flush); + } + + const char *srcbegin = src - len; + prefetch_ini_bw(src, len); + + while (len >= PERF_BARRIER_SIZE) { + prefetch_next_bw(src, srcbegin); + + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_movnt4x64b(dest, src); + + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_movnt4x64b(dest, src); + + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_movnt4x64b(dest, src); + + COMPILE_ERROR_ON(PERF_BARRIER_SIZE != (4 + 4 + 4) * 64); + + if (len) + perf_barrier(); + } + + while (len >= 4 * 64) { + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_movnt4x64b(dest, src); + } + + if (len >= 2 * 64) { + dest -= 2 * 64; + src -= 2 * 64; + len -= 2 * 64; + memmove_movnt2x64b(dest, src); + } + + if (len >= 1 * 64) { + dest -= 1 * 64; + src -= 1 * 64; + len -= 1 * 64; + memmove_movnt1x64b(dest, src); + } + + if (len == 0) + return; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) { + dest -= 32; + src -= 32; + memmove_movnt1x32b(dest, src); + } else if (len == 16) { + dest -= 16; + src -= 16; + memmove_movnt1x16b(dest, src); + } else if (len == 8) { + dest -= 8; + src -= 8; + memmove_movnt1x8b(dest, src); + } else if (len == 4) { + dest -= 4; + src -= 4; + memmove_movnt1x4b(dest, src); + } else { + goto nonnt; + } + + return; + } + +nonnt: + dest -= len; + src -= len; + memmove_small_sse2(dest, src, len, flush); +} + +static force_inline void +memmove_movnt_sse2(char *dest, const char *src, size_t len, flush_fn flush, + barrier_fn barrier, perf_barrier_fn perf_barrier) +{ + if ((uintptr_t)dest - (uintptr_t)src >= len) + memmove_movnt_sse_fw(dest, src, len, flush, perf_barrier); + else + memmove_movnt_sse_bw(dest, src, len, flush, perf_barrier); + + barrier(); + + VALGRIND_DO_FLUSH(dest, len); +} + +/* variants without perf_barrier */ + +void +memmove_movnt_sse2_noflush_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, noflush, barrier_after_ntstores, + no_barrier); +} + +void +memmove_movnt_sse2_empty_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_empty_nolog, + barrier_after_ntstores, no_barrier); +} + +void +memmove_movnt_sse2_clflush_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_clflush_nolog, + barrier_after_ntstores, no_barrier); +} + +void +memmove_movnt_sse2_clflushopt_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, no_barrier); +} + +void +memmove_movnt_sse2_clwb_nobarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_clwb_nolog, + no_barrier_after_ntstores, no_barrier); +} + +/* variants with perf_barrier */ + +void +memmove_movnt_sse2_noflush_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, noflush, barrier_after_ntstores, + wc_barrier); +} + +void +memmove_movnt_sse2_empty_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_empty_nolog, + barrier_after_ntstores, wc_barrier); +} + +void +memmove_movnt_sse2_clflush_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_clflush_nolog, + barrier_after_ntstores, wc_barrier); +} + +void +memmove_movnt_sse2_clflushopt_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, wc_barrier); +} + +void +memmove_movnt_sse2_clwb_wcbarrier(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_movnt_sse2(dest, src, len, flush_clwb_nolog, + no_barrier_after_ntstores, wc_barrier); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_sse2.h b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_sse2.h new file mode 100644 index 000000000..1e18bf8f4 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_sse2.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +#ifndef PMEM2_MEMCPY_SSE2_H +#define PMEM2_MEMCPY_SSE2_H + +#include +#include +#include + +#include "out.h" + +static force_inline void +memmove_small_sse2_noflush(char *dest, const char *src, size_t len) +{ + ASSERT(len <= 64); + + if (len <= 8) + goto le8; + if (len <= 32) + goto le32; + + if (len > 48) { + /* 49..64 */ + __m128i xmm0 = _mm_loadu_si128((__m128i *)src); + __m128i xmm1 = _mm_loadu_si128((__m128i *)(src + 16)); + __m128i xmm2 = _mm_loadu_si128((__m128i *)(src + 32)); + __m128i xmm3 = _mm_loadu_si128((__m128i *)(src + len - 16)); + + _mm_storeu_si128((__m128i *)dest, xmm0); + _mm_storeu_si128((__m128i *)(dest + 16), xmm1); + _mm_storeu_si128((__m128i *)(dest + 32), xmm2); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm3); + return; + } + + /* 33..48 */ + __m128i xmm0 = _mm_loadu_si128((__m128i *)src); + __m128i xmm1 = _mm_loadu_si128((__m128i *)(src + 16)); + __m128i xmm2 = _mm_loadu_si128((__m128i *)(src + len - 16)); + + _mm_storeu_si128((__m128i *)dest, xmm0); + _mm_storeu_si128((__m128i *)(dest + 16), xmm1); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm2); + return; + +le32: + if (len > 16) { + /* 17..32 */ + __m128i xmm0 = _mm_loadu_si128((__m128i *)src); + __m128i xmm1 = _mm_loadu_si128((__m128i *)(src + len - 16)); + + _mm_storeu_si128((__m128i *)dest, xmm0); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm1); + return; + } + + /* 9..16 */ + uint64_t d80 = *(ua_uint64_t *)src; + uint64_t d81 = *(ua_uint64_t *)(src + len - 8); + + *(ua_uint64_t *)dest = d80; + *(ua_uint64_t *)(dest + len - 8) = d81; + return; + +le8: + if (len <= 2) + goto le2; + + if (len > 4) { + /* 5..8 */ + uint32_t d40 = *(ua_uint32_t *)src; + uint32_t d41 = *(ua_uint32_t *)(src + len - 4); + + *(ua_uint32_t *)dest = d40; + *(ua_uint32_t *)(dest + len - 4) = d41; + return; + } + + /* 3..4 */ + uint16_t d20 = *(ua_uint16_t *)src; + uint16_t d21 = *(ua_uint16_t *)(src + len - 2); + + *(ua_uint16_t *)dest = d20; + *(ua_uint16_t *)(dest + len - 2) = d21; + return; + +le2: + if (len == 2) { + *(ua_uint16_t *)dest = *(ua_uint16_t *)src; + return; + } + + *(uint8_t *)dest = *(uint8_t *)src; +} + +static force_inline void +memmove_small_sse2(char *dest, const char *src, size_t len, flush_fn flush) +{ + /* + * pmemcheck complains about "overwritten stores before they were made + * persistent" for overlapping stores (last instruction in each code + * path) in the optimized version. + * libc's memcpy also does that, so we can't use it here. + */ + if (On_pmemcheck) { + memmove_nodrain_generic(dest, src, len, PMEM2_F_MEM_NOFLUSH, + NULL); + } else { + memmove_small_sse2_noflush(dest, src, len); + } + + flush(dest, len); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx.c b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx.c new file mode 100644 index 000000000..c4780caba --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memcpy_avx.h" + +static force_inline __m256i +mm256_loadu_si256(const char *src, unsigned idx) +{ + return _mm256_loadu_si256((const __m256i *)src + idx); +} + +static force_inline void +mm256_store_si256(char *dest, unsigned idx, __m256i src) +{ + _mm256_store_si256((__m256i *)dest + idx, src); +} + +static force_inline void +memmove_mov8x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + __m256i ymm2 = mm256_loadu_si256(src, 2); + __m256i ymm3 = mm256_loadu_si256(src, 3); + __m256i ymm4 = mm256_loadu_si256(src, 4); + __m256i ymm5 = mm256_loadu_si256(src, 5); + __m256i ymm6 = mm256_loadu_si256(src, 6); + __m256i ymm7 = mm256_loadu_si256(src, 7); + __m256i ymm8 = mm256_loadu_si256(src, 8); + __m256i ymm9 = mm256_loadu_si256(src, 9); + __m256i ymm10 = mm256_loadu_si256(src, 10); + __m256i ymm11 = mm256_loadu_si256(src, 11); + __m256i ymm12 = mm256_loadu_si256(src, 12); + __m256i ymm13 = mm256_loadu_si256(src, 13); + __m256i ymm14 = mm256_loadu_si256(src, 14); + __m256i ymm15 = mm256_loadu_si256(src, 15); + + mm256_store_si256(dest, 0, ymm0); + mm256_store_si256(dest, 1, ymm1); + mm256_store_si256(dest, 2, ymm2); + mm256_store_si256(dest, 3, ymm3); + mm256_store_si256(dest, 4, ymm4); + mm256_store_si256(dest, 5, ymm5); + mm256_store_si256(dest, 6, ymm6); + mm256_store_si256(dest, 7, ymm7); + mm256_store_si256(dest, 8, ymm8); + mm256_store_si256(dest, 9, ymm9); + mm256_store_si256(dest, 10, ymm10); + mm256_store_si256(dest, 11, ymm11); + mm256_store_si256(dest, 12, ymm12); + mm256_store_si256(dest, 13, ymm13); + mm256_store_si256(dest, 14, ymm14); + mm256_store_si256(dest, 15, ymm15); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); +} + +static force_inline void +memmove_mov4x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + __m256i ymm2 = mm256_loadu_si256(src, 2); + __m256i ymm3 = mm256_loadu_si256(src, 3); + __m256i ymm4 = mm256_loadu_si256(src, 4); + __m256i ymm5 = mm256_loadu_si256(src, 5); + __m256i ymm6 = mm256_loadu_si256(src, 6); + __m256i ymm7 = mm256_loadu_si256(src, 7); + + mm256_store_si256(dest, 0, ymm0); + mm256_store_si256(dest, 1, ymm1); + mm256_store_si256(dest, 2, ymm2); + mm256_store_si256(dest, 3, ymm3); + mm256_store_si256(dest, 4, ymm4); + mm256_store_si256(dest, 5, ymm5); + mm256_store_si256(dest, 6, ymm6); + mm256_store_si256(dest, 7, ymm7); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); +} + +static force_inline void +memmove_mov2x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + __m256i ymm2 = mm256_loadu_si256(src, 2); + __m256i ymm3 = mm256_loadu_si256(src, 3); + + mm256_store_si256(dest, 0, ymm0); + mm256_store_si256(dest, 1, ymm1); + mm256_store_si256(dest, 2, ymm2); + mm256_store_si256(dest, 3, ymm3); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); +} + +static force_inline void +memmove_mov1x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m256i ymm0 = mm256_loadu_si256(src, 0); + __m256i ymm1 = mm256_loadu_si256(src, 1); + + mm256_store_si256(dest, 0, ymm0); + mm256_store_si256(dest, 1, ymm1); + + flush64b(dest + 0 * 64); +} + +static force_inline void +memmove_mov_avx_fw(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memmove_small_avx(dest, src, cnt, flush); + + dest += cnt; + src += cnt; + len -= cnt; + } + + while (len >= 8 * 64) { + memmove_mov8x64b(dest, src, flush64b); + dest += 8 * 64; + src += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memmove_mov4x64b(dest, src, flush64b); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memmove_mov2x64b(dest, src, flush64b); + dest += 2 * 64; + src += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memmove_mov1x64b(dest, src, flush64b); + + dest += 1 * 64; + src += 1 * 64; + len -= 1 * 64; + } + + if (len) + memmove_small_avx(dest, src, len, flush); +} + +static force_inline void +memmove_mov_avx_bw(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + dest += len; + src += len; + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + if (cnt > len) + cnt = len; + + dest -= cnt; + src -= cnt; + len -= cnt; + memmove_small_avx(dest, src, cnt, flush); + } + + while (len >= 8 * 64) { + dest -= 8 * 64; + src -= 8 * 64; + len -= 8 * 64; + memmove_mov8x64b(dest, src, flush64b); + } + + if (len >= 4 * 64) { + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_mov4x64b(dest, src, flush64b); + } + + if (len >= 2 * 64) { + dest -= 2 * 64; + src -= 2 * 64; + len -= 2 * 64; + memmove_mov2x64b(dest, src, flush64b); + } + + if (len >= 1 * 64) { + dest -= 1 * 64; + src -= 1 * 64; + len -= 1 * 64; + memmove_mov1x64b(dest, src, flush64b); + } + + if (len) + memmove_small_avx(dest - len, src - len, len, flush); +} + +static force_inline void +memmove_mov_avx(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + if ((uintptr_t)dest - (uintptr_t)src >= len) + memmove_mov_avx_fw(dest, src, len, flush, flush64b); + else + memmove_mov_avx_bw(dest, src, len, flush, flush64b); + + avx_zeroupper(); +} + +void +memmove_mov_avx_noflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx(dest, src, len, noflush, noflush64b); +} + +void +memmove_mov_avx_empty(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx(dest, src, len, flush_empty_nolog, flush64b_empty); +} + +void +memmove_mov_avx_clflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx(dest, src, len, flush_clflush_nolog, pmem_clflush); +} + +void +memmove_mov_avx_clflushopt(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx(dest, src, len, flush_clflushopt_nolog, + pmem_clflushopt); +} + +void +memmove_mov_avx_clwb(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx(dest, src, len, flush_clwb_nolog, pmem_clwb); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx512f.c b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx512f.c new file mode 100644 index 000000000..b1775283e --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_avx512f.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memcpy_avx512f.h" + +static force_inline __m512i +mm512_loadu_si512(const char *src, unsigned idx) +{ + return _mm512_loadu_si512((const __m512i *)src + idx); +} + +static force_inline void +mm512_store_si512(char *dest, unsigned idx, __m512i src) +{ + _mm512_store_si512((__m512i *)dest + idx, src); +} + +static force_inline void +memmove_mov32x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + __m512i zmm4 = mm512_loadu_si512(src, 4); + __m512i zmm5 = mm512_loadu_si512(src, 5); + __m512i zmm6 = mm512_loadu_si512(src, 6); + __m512i zmm7 = mm512_loadu_si512(src, 7); + __m512i zmm8 = mm512_loadu_si512(src, 8); + __m512i zmm9 = mm512_loadu_si512(src, 9); + __m512i zmm10 = mm512_loadu_si512(src, 10); + __m512i zmm11 = mm512_loadu_si512(src, 11); + __m512i zmm12 = mm512_loadu_si512(src, 12); + __m512i zmm13 = mm512_loadu_si512(src, 13); + __m512i zmm14 = mm512_loadu_si512(src, 14); + __m512i zmm15 = mm512_loadu_si512(src, 15); + __m512i zmm16 = mm512_loadu_si512(src, 16); + __m512i zmm17 = mm512_loadu_si512(src, 17); + __m512i zmm18 = mm512_loadu_si512(src, 18); + __m512i zmm19 = mm512_loadu_si512(src, 19); + __m512i zmm20 = mm512_loadu_si512(src, 20); + __m512i zmm21 = mm512_loadu_si512(src, 21); + __m512i zmm22 = mm512_loadu_si512(src, 22); + __m512i zmm23 = mm512_loadu_si512(src, 23); + __m512i zmm24 = mm512_loadu_si512(src, 24); + __m512i zmm25 = mm512_loadu_si512(src, 25); + __m512i zmm26 = mm512_loadu_si512(src, 26); + __m512i zmm27 = mm512_loadu_si512(src, 27); + __m512i zmm28 = mm512_loadu_si512(src, 28); + __m512i zmm29 = mm512_loadu_si512(src, 29); + __m512i zmm30 = mm512_loadu_si512(src, 30); + __m512i zmm31 = mm512_loadu_si512(src, 31); + + mm512_store_si512(dest, 0, zmm0); + mm512_store_si512(dest, 1, zmm1); + mm512_store_si512(dest, 2, zmm2); + mm512_store_si512(dest, 3, zmm3); + mm512_store_si512(dest, 4, zmm4); + mm512_store_si512(dest, 5, zmm5); + mm512_store_si512(dest, 6, zmm6); + mm512_store_si512(dest, 7, zmm7); + mm512_store_si512(dest, 8, zmm8); + mm512_store_si512(dest, 9, zmm9); + mm512_store_si512(dest, 10, zmm10); + mm512_store_si512(dest, 11, zmm11); + mm512_store_si512(dest, 12, zmm12); + mm512_store_si512(dest, 13, zmm13); + mm512_store_si512(dest, 14, zmm14); + mm512_store_si512(dest, 15, zmm15); + mm512_store_si512(dest, 16, zmm16); + mm512_store_si512(dest, 17, zmm17); + mm512_store_si512(dest, 18, zmm18); + mm512_store_si512(dest, 19, zmm19); + mm512_store_si512(dest, 20, zmm20); + mm512_store_si512(dest, 21, zmm21); + mm512_store_si512(dest, 22, zmm22); + mm512_store_si512(dest, 23, zmm23); + mm512_store_si512(dest, 24, zmm24); + mm512_store_si512(dest, 25, zmm25); + mm512_store_si512(dest, 26, zmm26); + mm512_store_si512(dest, 27, zmm27); + mm512_store_si512(dest, 28, zmm28); + mm512_store_si512(dest, 29, zmm29); + mm512_store_si512(dest, 30, zmm30); + mm512_store_si512(dest, 31, zmm31); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); + flush64b(dest + 8 * 64); + flush64b(dest + 9 * 64); + flush64b(dest + 10 * 64); + flush64b(dest + 11 * 64); + flush64b(dest + 12 * 64); + flush64b(dest + 13 * 64); + flush64b(dest + 14 * 64); + flush64b(dest + 15 * 64); + flush64b(dest + 16 * 64); + flush64b(dest + 17 * 64); + flush64b(dest + 18 * 64); + flush64b(dest + 19 * 64); + flush64b(dest + 20 * 64); + flush64b(dest + 21 * 64); + flush64b(dest + 22 * 64); + flush64b(dest + 23 * 64); + flush64b(dest + 24 * 64); + flush64b(dest + 25 * 64); + flush64b(dest + 26 * 64); + flush64b(dest + 27 * 64); + flush64b(dest + 28 * 64); + flush64b(dest + 29 * 64); + flush64b(dest + 30 * 64); + flush64b(dest + 31 * 64); +} + +static force_inline void +memmove_mov16x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + __m512i zmm4 = mm512_loadu_si512(src, 4); + __m512i zmm5 = mm512_loadu_si512(src, 5); + __m512i zmm6 = mm512_loadu_si512(src, 6); + __m512i zmm7 = mm512_loadu_si512(src, 7); + __m512i zmm8 = mm512_loadu_si512(src, 8); + __m512i zmm9 = mm512_loadu_si512(src, 9); + __m512i zmm10 = mm512_loadu_si512(src, 10); + __m512i zmm11 = mm512_loadu_si512(src, 11); + __m512i zmm12 = mm512_loadu_si512(src, 12); + __m512i zmm13 = mm512_loadu_si512(src, 13); + __m512i zmm14 = mm512_loadu_si512(src, 14); + __m512i zmm15 = mm512_loadu_si512(src, 15); + + mm512_store_si512(dest, 0, zmm0); + mm512_store_si512(dest, 1, zmm1); + mm512_store_si512(dest, 2, zmm2); + mm512_store_si512(dest, 3, zmm3); + mm512_store_si512(dest, 4, zmm4); + mm512_store_si512(dest, 5, zmm5); + mm512_store_si512(dest, 6, zmm6); + mm512_store_si512(dest, 7, zmm7); + mm512_store_si512(dest, 8, zmm8); + mm512_store_si512(dest, 9, zmm9); + mm512_store_si512(dest, 10, zmm10); + mm512_store_si512(dest, 11, zmm11); + mm512_store_si512(dest, 12, zmm12); + mm512_store_si512(dest, 13, zmm13); + mm512_store_si512(dest, 14, zmm14); + mm512_store_si512(dest, 15, zmm15); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); + flush64b(dest + 8 * 64); + flush64b(dest + 9 * 64); + flush64b(dest + 10 * 64); + flush64b(dest + 11 * 64); + flush64b(dest + 12 * 64); + flush64b(dest + 13 * 64); + flush64b(dest + 14 * 64); + flush64b(dest + 15 * 64); +} + +static force_inline void +memmove_mov8x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + __m512i zmm4 = mm512_loadu_si512(src, 4); + __m512i zmm5 = mm512_loadu_si512(src, 5); + __m512i zmm6 = mm512_loadu_si512(src, 6); + __m512i zmm7 = mm512_loadu_si512(src, 7); + + mm512_store_si512(dest, 0, zmm0); + mm512_store_si512(dest, 1, zmm1); + mm512_store_si512(dest, 2, zmm2); + mm512_store_si512(dest, 3, zmm3); + mm512_store_si512(dest, 4, zmm4); + mm512_store_si512(dest, 5, zmm5); + mm512_store_si512(dest, 6, zmm6); + mm512_store_si512(dest, 7, zmm7); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); +} + +static force_inline void +memmove_mov4x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + __m512i zmm2 = mm512_loadu_si512(src, 2); + __m512i zmm3 = mm512_loadu_si512(src, 3); + + mm512_store_si512(dest, 0, zmm0); + mm512_store_si512(dest, 1, zmm1); + mm512_store_si512(dest, 2, zmm2); + mm512_store_si512(dest, 3, zmm3); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); +} + +static force_inline void +memmove_mov2x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + __m512i zmm1 = mm512_loadu_si512(src, 1); + + mm512_store_si512(dest, 0, zmm0); + mm512_store_si512(dest, 1, zmm1); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); +} + +static force_inline void +memmove_mov1x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m512i zmm0 = mm512_loadu_si512(src, 0); + + mm512_store_si512(dest, 0, zmm0); + + flush64b(dest + 0 * 64); +} + +static force_inline void +memmove_mov_avx512f_fw(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memmove_small_avx512f(dest, src, cnt, flush); + + dest += cnt; + src += cnt; + len -= cnt; + } + + while (len >= 32 * 64) { + memmove_mov32x64b(dest, src, flush64b); + dest += 32 * 64; + src += 32 * 64; + len -= 32 * 64; + } + + if (len >= 16 * 64) { + memmove_mov16x64b(dest, src, flush64b); + dest += 16 * 64; + src += 16 * 64; + len -= 16 * 64; + } + + if (len >= 8 * 64) { + memmove_mov8x64b(dest, src, flush64b); + dest += 8 * 64; + src += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memmove_mov4x64b(dest, src, flush64b); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memmove_mov2x64b(dest, src, flush64b); + dest += 2 * 64; + src += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memmove_mov1x64b(dest, src, flush64b); + + dest += 1 * 64; + src += 1 * 64; + len -= 1 * 64; + } + + if (len) + memmove_small_avx512f(dest, src, len, flush); +} + +static force_inline void +memmove_mov_avx512f_bw(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + dest += len; + src += len; + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + if (cnt > len) + cnt = len; + + dest -= cnt; + src -= cnt; + len -= cnt; + + memmove_small_avx512f(dest, src, cnt, flush); + } + + while (len >= 32 * 64) { + dest -= 32 * 64; + src -= 32 * 64; + len -= 32 * 64; + memmove_mov32x64b(dest, src, flush64b); + } + + if (len >= 16 * 64) { + dest -= 16 * 64; + src -= 16 * 64; + len -= 16 * 64; + memmove_mov16x64b(dest, src, flush64b); + } + + if (len >= 8 * 64) { + dest -= 8 * 64; + src -= 8 * 64; + len -= 8 * 64; + memmove_mov8x64b(dest, src, flush64b); + } + + if (len >= 4 * 64) { + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_mov4x64b(dest, src, flush64b); + } + + if (len >= 2 * 64) { + dest -= 2 * 64; + src -= 2 * 64; + len -= 2 * 64; + memmove_mov2x64b(dest, src, flush64b); + } + + if (len >= 1 * 64) { + dest -= 1 * 64; + src -= 1 * 64; + len -= 1 * 64; + memmove_mov1x64b(dest, src, flush64b); + } + + if (len) + memmove_small_avx512f(dest - len, src - len, len, flush); +} + +static force_inline void +memmove_mov_avx512f(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + if ((uintptr_t)dest - (uintptr_t)src >= len) + memmove_mov_avx512f_fw(dest, src, len, flush, flush64b); + else + memmove_mov_avx512f_bw(dest, src, len, flush, flush64b); + + avx_zeroupper(); +} + +void +memmove_mov_avx512f_noflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx512f(dest, src, len, noflush, noflush64b); +} + +void +memmove_mov_avx512f_empty(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx512f(dest, src, len, flush_empty_nolog, flush64b_empty); +} + +void +memmove_mov_avx512f_clflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx512f(dest, src, len, flush_clflush_nolog, pmem_clflush); +} + +void +memmove_mov_avx512f_clflushopt(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx512f(dest, src, len, flush_clflushopt_nolog, + pmem_clflushopt); +} + +void +memmove_mov_avx512f_clwb(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_avx512f(dest, src, len, flush_clwb_nolog, pmem_clwb); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_sse2.c b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_sse2.c new file mode 100644 index 000000000..10c03a002 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_sse2.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memcpy_sse2.h" +#include "out.h" + +static force_inline __m128i +mm_loadu_si128(const char *src, unsigned idx) +{ + return _mm_loadu_si128((const __m128i *)src + idx); +} + +static force_inline void +mm_store_si128(char *dest, unsigned idx, __m128i src) +{ + _mm_store_si128((__m128i *)dest + idx, src); +} + +static force_inline void +memmove_mov4x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + __m128i xmm1 = mm_loadu_si128(src, 1); + __m128i xmm2 = mm_loadu_si128(src, 2); + __m128i xmm3 = mm_loadu_si128(src, 3); + __m128i xmm4 = mm_loadu_si128(src, 4); + __m128i xmm5 = mm_loadu_si128(src, 5); + __m128i xmm6 = mm_loadu_si128(src, 6); + __m128i xmm7 = mm_loadu_si128(src, 7); + __m128i xmm8 = mm_loadu_si128(src, 8); + __m128i xmm9 = mm_loadu_si128(src, 9); + __m128i xmm10 = mm_loadu_si128(src, 10); + __m128i xmm11 = mm_loadu_si128(src, 11); + __m128i xmm12 = mm_loadu_si128(src, 12); + __m128i xmm13 = mm_loadu_si128(src, 13); + __m128i xmm14 = mm_loadu_si128(src, 14); + __m128i xmm15 = mm_loadu_si128(src, 15); + + mm_store_si128(dest, 0, xmm0); + mm_store_si128(dest, 1, xmm1); + mm_store_si128(dest, 2, xmm2); + mm_store_si128(dest, 3, xmm3); + mm_store_si128(dest, 4, xmm4); + mm_store_si128(dest, 5, xmm5); + mm_store_si128(dest, 6, xmm6); + mm_store_si128(dest, 7, xmm7); + mm_store_si128(dest, 8, xmm8); + mm_store_si128(dest, 9, xmm9); + mm_store_si128(dest, 10, xmm10); + mm_store_si128(dest, 11, xmm11); + mm_store_si128(dest, 12, xmm12); + mm_store_si128(dest, 13, xmm13); + mm_store_si128(dest, 14, xmm14); + mm_store_si128(dest, 15, xmm15); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); +} + +static force_inline void +memmove_mov2x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + __m128i xmm1 = mm_loadu_si128(src, 1); + __m128i xmm2 = mm_loadu_si128(src, 2); + __m128i xmm3 = mm_loadu_si128(src, 3); + __m128i xmm4 = mm_loadu_si128(src, 4); + __m128i xmm5 = mm_loadu_si128(src, 5); + __m128i xmm6 = mm_loadu_si128(src, 6); + __m128i xmm7 = mm_loadu_si128(src, 7); + + mm_store_si128(dest, 0, xmm0); + mm_store_si128(dest, 1, xmm1); + mm_store_si128(dest, 2, xmm2); + mm_store_si128(dest, 3, xmm3); + mm_store_si128(dest, 4, xmm4); + mm_store_si128(dest, 5, xmm5); + mm_store_si128(dest, 6, xmm6); + mm_store_si128(dest, 7, xmm7); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); +} + +static force_inline void +memmove_mov1x64b(char *dest, const char *src, flush64b_fn flush64b) +{ + __m128i xmm0 = mm_loadu_si128(src, 0); + __m128i xmm1 = mm_loadu_si128(src, 1); + __m128i xmm2 = mm_loadu_si128(src, 2); + __m128i xmm3 = mm_loadu_si128(src, 3); + + mm_store_si128(dest, 0, xmm0); + mm_store_si128(dest, 1, xmm1); + mm_store_si128(dest, 2, xmm2); + mm_store_si128(dest, 3, xmm3); + + flush64b(dest + 0 * 64); +} + +static force_inline void +memmove_mov_sse_fw(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memmove_small_sse2(dest, src, cnt, flush); + + dest += cnt; + src += cnt; + len -= cnt; + } + + while (len >= 4 * 64) { + memmove_mov4x64b(dest, src, flush64b); + dest += 4 * 64; + src += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memmove_mov2x64b(dest, src, flush64b); + dest += 2 * 64; + src += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memmove_mov1x64b(dest, src, flush64b); + + dest += 1 * 64; + src += 1 * 64; + len -= 1 * 64; + } + + if (len) + memmove_small_sse2(dest, src, len, flush); +} + +static force_inline void +memmove_mov_sse_bw(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + dest += len; + src += len; + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + if (cnt > len) + cnt = len; + + dest -= cnt; + src -= cnt; + len -= cnt; + memmove_small_sse2(dest, src, cnt, flush); + } + + while (len >= 4 * 64) { + dest -= 4 * 64; + src -= 4 * 64; + len -= 4 * 64; + memmove_mov4x64b(dest, src, flush64b); + } + + if (len >= 2 * 64) { + dest -= 2 * 64; + src -= 2 * 64; + len -= 2 * 64; + memmove_mov2x64b(dest, src, flush64b); + } + + if (len >= 1 * 64) { + dest -= 1 * 64; + src -= 1 * 64; + len -= 1 * 64; + memmove_mov1x64b(dest, src, flush64b); + } + + if (len) + memmove_small_sse2(dest - len, src - len, len, flush); +} + +static force_inline void +memmove_mov_sse2(char *dest, const char *src, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + if ((uintptr_t)dest - (uintptr_t)src >= len) + memmove_mov_sse_fw(dest, src, len, flush, flush64b); + else + memmove_mov_sse_bw(dest, src, len, flush, flush64b); +} + +void +memmove_mov_sse2_noflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_sse2(dest, src, len, noflush, noflush64b); +} + +void +memmove_mov_sse2_empty(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_sse2(dest, src, len, flush_empty_nolog, flush64b_empty); +} + +void +memmove_mov_sse2_clflush(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_sse2(dest, src, len, flush_clflush_nolog, pmem_clflush); +} + +void +memmove_mov_sse2_clflushopt(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_sse2(dest, src, len, flush_clflushopt_nolog, + pmem_clflushopt); +} + +void +memmove_mov_sse2_clwb(char *dest, const char *src, size_t len) +{ + LOG(15, "dest %p src %p len %zu", dest, src, len); + + memmove_mov_sse2(dest, src, len, flush_clwb_nolog, pmem_clwb); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memcpy_memset.h b/src/pmdk/src/libpmem2/x86_64/memcpy_memset.h new file mode 100644 index 000000000..70235dd39 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memcpy_memset.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +#ifndef MEMCPY_MEMSET_H +#define MEMCPY_MEMSET_H + +#include +#include +#include "pmem2_arch.h" + +typedef void barrier_fn(void); +typedef void flush64b_fn(const void *); + +static inline void +barrier_after_ntstores(void) +{ + /* + * In this configuration pmem_drain does not contain sfence, so we have + * to serialize non-temporal store instructions. + */ + _mm_sfence(); +} + +static inline void +no_barrier_after_ntstores(void) +{ + /* + * In this configuration pmem_drain contains sfence, so we don't have + * to serialize non-temporal store instructions + */ +} + +static inline void +noflush(const void *addr, size_t len) +{ + /* NOP, not even pmemcheck annotation */ +} + +static inline void +noflush64b(const void *addr) +{ + /* NOP, not even pmemcheck annotation */ +} + +typedef void perf_barrier_fn(void); + +static force_inline void +wc_barrier(void) +{ + /* + * Currently, for SSE2 and AVX code paths, use of non-temporal stores + * on all generations of CPUs must be limited to the number of + * write-combining buffers (12) because otherwise, suboptimal eviction + * policy might impact performance when writing more data than WC + * buffers can simultaneously hold. + * + * The AVX512 code path is not affected, probably because we are + * overwriting whole cache lines. + */ + _mm_sfence(); +} + +static force_inline void +no_barrier(void) +{ +} + +#ifndef AVX512F_AVAILABLE +/* + * XXX not supported in MSVC version we currently use. + * Enable Windows tests pmem2_mem_ext when MSVC we + * use will support AVX512F. + */ +#ifdef _MSC_VER +#define AVX512F_AVAILABLE 0 +#else +#define AVX512F_AVAILABLE 1 +#endif +#endif + +#ifndef AVX_AVAILABLE +#define AVX_AVAILABLE 1 +#endif + +#ifndef SSE2_AVAILABLE +#define SSE2_AVAILABLE 1 +#endif + +#if SSE2_AVAILABLE +void memmove_mov_sse2_clflush(char *dest, const char *src, size_t len); +void memmove_mov_sse2_clflushopt(char *dest, const char *src, size_t len); +void memmove_mov_sse2_clwb(char *dest, const char *src, size_t len); +void memmove_mov_sse2_empty(char *dest, const char *src, size_t len); +void memmove_mov_sse2_noflush(char *dest, const char *src, size_t len); + +void memmove_movnt_sse2_clflush_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_clflushopt_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_clwb_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_empty_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_noflush_nobarrier(char *dest, const char *src, + size_t len); + +void memmove_movnt_sse2_clflush_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_clflushopt_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_clwb_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_empty_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_sse2_noflush_wcbarrier(char *dest, const char *src, + size_t len); + +void memset_mov_sse2_clflush(char *dest, int c, size_t len); +void memset_mov_sse2_clflushopt(char *dest, int c, size_t len); +void memset_mov_sse2_clwb(char *dest, int c, size_t len); +void memset_mov_sse2_empty(char *dest, int c, size_t len); +void memset_mov_sse2_noflush(char *dest, int c, size_t len); + +void memset_movnt_sse2_clflush_nobarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_clflushopt_nobarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_clwb_nobarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_empty_nobarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_noflush_nobarrier(char *dest, int c, size_t len); + +void memset_movnt_sse2_clflush_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_clflushopt_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_clwb_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_empty_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_sse2_noflush_wcbarrier(char *dest, int c, size_t len); +#endif + +#if AVX_AVAILABLE +void memmove_mov_avx_clflush(char *dest, const char *src, size_t len); +void memmove_mov_avx_clflushopt(char *dest, const char *src, size_t len); +void memmove_mov_avx_clwb(char *dest, const char *src, size_t len); +void memmove_mov_avx_empty(char *dest, const char *src, size_t len); +void memmove_mov_avx_noflush(char *dest, const char *src, size_t len); + +void memmove_movnt_avx_clflush_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_clflushopt_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_clwb_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_empty_nobarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_noflush_nobarrier(char *dest, const char *src, + size_t len); + +void memmove_movnt_avx_clflush_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_clflushopt_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_clwb_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_empty_wcbarrier(char *dest, const char *src, + size_t len); +void memmove_movnt_avx_noflush_wcbarrier(char *dest, const char *src, + size_t len); + +void memset_mov_avx_clflush(char *dest, int c, size_t len); +void memset_mov_avx_clflushopt(char *dest, int c, size_t len); +void memset_mov_avx_clwb(char *dest, int c, size_t len); +void memset_mov_avx_empty(char *dest, int c, size_t len); +void memset_mov_avx_noflush(char *dest, int c, size_t len); + +void memset_movnt_avx_clflush_nobarrier(char *dest, int c, size_t len); +void memset_movnt_avx_clflushopt_nobarrier(char *dest, int c, size_t len); +void memset_movnt_avx_clwb_nobarrier(char *dest, int c, size_t len); +void memset_movnt_avx_empty_nobarrier(char *dest, int c, size_t len); +void memset_movnt_avx_noflush_nobarrier(char *dest, int c, size_t len); + +void memset_movnt_avx_clflush_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_avx_clflushopt_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_avx_clwb_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_avx_empty_wcbarrier(char *dest, int c, size_t len); +void memset_movnt_avx_noflush_wcbarrier(char *dest, int c, size_t len); +#endif + +#if AVX512F_AVAILABLE +void memmove_mov_avx512f_clflush(char *dest, const char *src, size_t len); +void memmove_mov_avx512f_clflushopt(char *dest, const char *src, size_t len); +void memmove_mov_avx512f_clwb(char *dest, const char *src, size_t len); +void memmove_mov_avx512f_empty(char *dest, const char *src, size_t len); +void memmove_mov_avx512f_noflush(char *dest, const char *src, size_t len); +void memmove_movnt_avx512f_clflush(char *dest, const char *src, size_t len); +void memmove_movnt_avx512f_clflushopt(char *dest, const char *src, size_t len); +void memmove_movnt_avx512f_clwb(char *dest, const char *src, size_t len); +void memmove_movnt_avx512f_empty(char *dest, const char *src, size_t len); +void memmove_movnt_avx512f_noflush(char *dest, const char *src, size_t len); +void memset_mov_avx512f_clflush(char *dest, int c, size_t len); +void memset_mov_avx512f_clflushopt(char *dest, int c, size_t len); +void memset_mov_avx512f_clwb(char *dest, int c, size_t len); +void memset_mov_avx512f_empty(char *dest, int c, size_t len); +void memset_mov_avx512f_noflush(char *dest, int c, size_t len); +void memset_movnt_avx512f_clflush(char *dest, int c, size_t len); +void memset_movnt_avx512f_clflushopt(char *dest, int c, size_t len); +void memset_movnt_avx512f_clwb(char *dest, int c, size_t len); +void memset_movnt_avx512f_empty(char *dest, int c, size_t len); +void memset_movnt_avx512f_noflush(char *dest, int c, size_t len); +#endif + +extern size_t Movnt_threshold; + +/* + * SSE2/AVX1 only: + * + * How much data WC buffers can hold at the same time, after which sfence + * is needed to flush them. + * + * For some reason sfence affects performance of reading from DRAM, so we have + * to prefetch the source data earlier. + */ +#define PERF_BARRIER_SIZE (12 * CACHELINE_SIZE /* 768 */) + +/* + * How much to prefetch initially. + * Cannot be bigger than the size of L1 (32kB) - PERF_BARRIER_SIZE. + */ +#define INI_PREFETCH_SIZE (64 * CACHELINE_SIZE /* 4096 */) + +static force_inline void +prefetch(const char *addr) +{ + _mm_prefetch(addr, _MM_HINT_T0); +} + +static force_inline void +prefetch_ini_fw(const char *src, size_t len) +{ + size_t pref = MIN(len, INI_PREFETCH_SIZE); + for (size_t i = 0; i < pref; i += CACHELINE_SIZE) + prefetch(src + i); +} + +static force_inline void +prefetch_ini_bw(const char *src, size_t len) +{ + size_t pref = MIN(len, INI_PREFETCH_SIZE); + for (size_t i = 0; i < pref; i += CACHELINE_SIZE) + prefetch(src - i); +} + +static force_inline void +prefetch_next_fw(const char *src, const char *srcend) +{ + const char *begin = src + INI_PREFETCH_SIZE; + const char *end = begin + PERF_BARRIER_SIZE; + if (end > srcend) + end = srcend; + + for (const char *addr = begin; addr < end; addr += CACHELINE_SIZE) + prefetch(addr); +} + +static force_inline void +prefetch_next_bw(const char *src, const char *srcbegin) +{ + const char *begin = src - INI_PREFETCH_SIZE; + const char *end = begin - PERF_BARRIER_SIZE; + if (end < srcbegin) + end = srcbegin; + + for (const char *addr = begin; addr >= end; addr -= CACHELINE_SIZE) + prefetch(addr); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_avx.h b/src/pmdk/src/libpmem2/x86_64/memset/memset_avx.h new file mode 100644 index 000000000..be8447670 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_avx.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +#ifndef PMEM2_MEMSET_AVX_H +#define PMEM2_MEMSET_AVX_H + +#include +#include +#include +#include + +#include "avx.h" +#include "out.h" + +static force_inline void +memset_small_avx_noflush(char *dest, __m256i ymm, size_t len) +{ + ASSERT(len <= 64); + + if (len <= 8) + goto le8; + if (len <= 32) + goto le32; + + /* 33..64 */ + _mm256_storeu_si256((__m256i *)dest, ymm); + _mm256_storeu_si256((__m256i *)(dest + len - 32), ymm); + return; + +le32: + if (len > 16) { + /* 17..32 */ + __m128i xmm = m256_get16b(ymm); + + _mm_storeu_si128((__m128i *)dest, xmm); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm); + return; + } + + /* 9..16 */ + uint64_t d8 = m256_get8b(ymm); + + *(ua_uint64_t *)dest = d8; + *(ua_uint64_t *)(dest + len - 8) = d8; + return; + +le8: + if (len <= 2) + goto le2; + + if (len > 4) { + /* 5..8 */ + uint32_t d = m256_get4b(ymm); + + *(ua_uint32_t *)dest = d; + *(ua_uint32_t *)(dest + len - 4) = d; + return; + } + + /* 3..4 */ + uint16_t d2 = m256_get2b(ymm); + + *(ua_uint16_t *)dest = d2; + *(ua_uint16_t *)(dest + len - 2) = d2; + return; + +le2: + if (len == 2) { + uint16_t d2 = m256_get2b(ymm); + + *(ua_uint16_t *)dest = d2; + return; + } + + *(uint8_t *)dest = (uint8_t)m256_get2b(ymm); +} + +static force_inline void +memset_small_avx(char *dest, __m256i ymm, size_t len, flush_fn flush) +{ + /* + * pmemcheck complains about "overwritten stores before they were made + * persistent" for overlapping stores (last instruction in each code + * path) in the optimized version. + * libc's memset also does that, so we can't use it here. + */ + if (On_pmemcheck) { + memset_nodrain_generic(dest, (uint8_t)m256_get2b(ymm), + len, PMEM2_F_MEM_NOFLUSH, NULL); + } else { + memset_small_avx_noflush(dest, ymm, len); + } + + flush(dest, len); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_avx512f.h b/src/pmdk/src/libpmem2/x86_64/memset/memset_avx512f.h new file mode 100644 index 000000000..a0f6cf97b --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_avx512f.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +#ifndef PMEM2_MEMSET_AVX512F_H +#define PMEM2_MEMSET_AVX512F_H + +#include + +#include "memset_avx.h" + +static force_inline void +memset_small_avx512f(char *dest, __m256i ymm, size_t len, flush_fn flush) +{ + /* We can't do better than AVX here. */ + memset_small_avx(dest, ymm, len, flush); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx.c b/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx.c new file mode 100644 index 000000000..4a4d5f6a2 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memset_avx.h" +#include "out.h" +#include "valgrind_internal.h" + +static force_inline void +mm256_stream_si256(char *dest, unsigned idx, __m256i src) +{ + _mm256_stream_si256((__m256i *)dest + idx, src); + barrier(); +} + +static force_inline void +memset_movnt8x64b(char *dest, __m256i ymm) +{ + mm256_stream_si256(dest, 0, ymm); + mm256_stream_si256(dest, 1, ymm); + mm256_stream_si256(dest, 2, ymm); + mm256_stream_si256(dest, 3, ymm); + mm256_stream_si256(dest, 4, ymm); + mm256_stream_si256(dest, 5, ymm); + mm256_stream_si256(dest, 6, ymm); + mm256_stream_si256(dest, 7, ymm); + mm256_stream_si256(dest, 8, ymm); + mm256_stream_si256(dest, 9, ymm); + mm256_stream_si256(dest, 10, ymm); + mm256_stream_si256(dest, 11, ymm); + mm256_stream_si256(dest, 12, ymm); + mm256_stream_si256(dest, 13, ymm); + mm256_stream_si256(dest, 14, ymm); + mm256_stream_si256(dest, 15, ymm); +} + +static force_inline void +memset_movnt4x64b(char *dest, __m256i ymm) +{ + mm256_stream_si256(dest, 0, ymm); + mm256_stream_si256(dest, 1, ymm); + mm256_stream_si256(dest, 2, ymm); + mm256_stream_si256(dest, 3, ymm); + mm256_stream_si256(dest, 4, ymm); + mm256_stream_si256(dest, 5, ymm); + mm256_stream_si256(dest, 6, ymm); + mm256_stream_si256(dest, 7, ymm); +} + +static force_inline void +memset_movnt2x64b(char *dest, __m256i ymm) +{ + mm256_stream_si256(dest, 0, ymm); + mm256_stream_si256(dest, 1, ymm); + mm256_stream_si256(dest, 2, ymm); + mm256_stream_si256(dest, 3, ymm); +} + +static force_inline void +memset_movnt1x64b(char *dest, __m256i ymm) +{ + mm256_stream_si256(dest, 0, ymm); + mm256_stream_si256(dest, 1, ymm); +} + +static force_inline void +memset_movnt1x32b(char *dest, __m256i ymm) +{ + mm256_stream_si256(dest, 0, ymm); +} + +static force_inline void +memset_movnt1x16b(char *dest, __m256i ymm) +{ + __m128i xmm0 = m256_get16b(ymm); + + _mm_stream_si128((__m128i *)dest, xmm0); +} + +static force_inline void +memset_movnt1x8b(char *dest, __m256i ymm) +{ + uint64_t x = m256_get8b(ymm); + + _mm_stream_si64((long long *)dest, (long long)x); +} + +static force_inline void +memset_movnt1x4b(char *dest, __m256i ymm) +{ + uint32_t x = m256_get4b(ymm); + + _mm_stream_si32((int *)dest, (int)x); +} + +static force_inline void +memset_movnt_avx(char *dest, int c, size_t len, flush_fn flush, + barrier_fn barrier, perf_barrier_fn perf_barrier) +{ + char *orig_dest = dest; + size_t orig_len = len; + + __m256i ymm = _mm256_set1_epi8((char)c); + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memset_small_avx(dest, ymm, cnt, flush); + + dest += cnt; + len -= cnt; + } + + while (len >= PERF_BARRIER_SIZE) { + memset_movnt8x64b(dest, ymm); + dest += 8 * 64; + len -= 8 * 64; + + memset_movnt4x64b(dest, ymm); + dest += 4 * 64; + len -= 4 * 64; + + COMPILE_ERROR_ON(PERF_BARRIER_SIZE != (8 + 4) * 64); + + if (len) + perf_barrier(); + } + + if (len >= 8 * 64) { + memset_movnt8x64b(dest, ymm); + dest += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memset_movnt4x64b(dest, ymm); + dest += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memset_movnt2x64b(dest, ymm); + dest += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memset_movnt1x64b(dest, ymm); + + dest += 1 * 64; + len -= 1 * 64; + } + + if (len == 0) + goto end; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) + memset_movnt1x32b(dest, ymm); + else if (len == 16) + memset_movnt1x16b(dest, ymm); + else if (len == 8) + memset_movnt1x8b(dest, ymm); + else if (len == 4) + memset_movnt1x4b(dest, ymm); + else + goto nonnt; + + goto end; + } + +nonnt: + memset_small_avx(dest, ymm, len, flush); +end: + avx_zeroupper(); + + barrier(); + + VALGRIND_DO_FLUSH(orig_dest, orig_len); +} + +/* variants without perf_barrier */ + +void +memset_movnt_avx_noflush_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, noflush, barrier_after_ntstores, + no_barrier); +} + +void +memset_movnt_avx_empty_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_empty_nolog, + barrier_after_ntstores, no_barrier); +} + +void +memset_movnt_avx_clflush_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_clflush_nolog, + barrier_after_ntstores, no_barrier); +} + +void +memset_movnt_avx_clflushopt_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, no_barrier); +} + +void +memset_movnt_avx_clwb_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_clwb_nolog, + no_barrier_after_ntstores, no_barrier); +} + +/* variants with perf_barrier */ + +void +memset_movnt_avx_noflush_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, noflush, barrier_after_ntstores, + wc_barrier); +} + +void +memset_movnt_avx_empty_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_empty_nolog, + barrier_after_ntstores, wc_barrier); +} + +void +memset_movnt_avx_clflush_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_clflush_nolog, + barrier_after_ntstores, wc_barrier); +} + +void +memset_movnt_avx_clflushopt_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, wc_barrier); +} + +void +memset_movnt_avx_clwb_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx(dest, c, len, flush_clwb_nolog, + no_barrier_after_ntstores, wc_barrier); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx512f.c b/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx512f.c new file mode 100644 index 000000000..b29402a93 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_avx512f.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memset_avx512f.h" +#include "out.h" +#include "util.h" +#include "valgrind_internal.h" + +static force_inline void +mm512_stream_si512(char *dest, unsigned idx, __m512i src) +{ + _mm512_stream_si512((__m512i *)dest + idx, src); + barrier(); +} + +static force_inline void +memset_movnt32x64b(char *dest, __m512i zmm) +{ + mm512_stream_si512(dest, 0, zmm); + mm512_stream_si512(dest, 1, zmm); + mm512_stream_si512(dest, 2, zmm); + mm512_stream_si512(dest, 3, zmm); + mm512_stream_si512(dest, 4, zmm); + mm512_stream_si512(dest, 5, zmm); + mm512_stream_si512(dest, 6, zmm); + mm512_stream_si512(dest, 7, zmm); + mm512_stream_si512(dest, 8, zmm); + mm512_stream_si512(dest, 9, zmm); + mm512_stream_si512(dest, 10, zmm); + mm512_stream_si512(dest, 11, zmm); + mm512_stream_si512(dest, 12, zmm); + mm512_stream_si512(dest, 13, zmm); + mm512_stream_si512(dest, 14, zmm); + mm512_stream_si512(dest, 15, zmm); + mm512_stream_si512(dest, 16, zmm); + mm512_stream_si512(dest, 17, zmm); + mm512_stream_si512(dest, 18, zmm); + mm512_stream_si512(dest, 19, zmm); + mm512_stream_si512(dest, 20, zmm); + mm512_stream_si512(dest, 21, zmm); + mm512_stream_si512(dest, 22, zmm); + mm512_stream_si512(dest, 23, zmm); + mm512_stream_si512(dest, 24, zmm); + mm512_stream_si512(dest, 25, zmm); + mm512_stream_si512(dest, 26, zmm); + mm512_stream_si512(dest, 27, zmm); + mm512_stream_si512(dest, 28, zmm); + mm512_stream_si512(dest, 29, zmm); + mm512_stream_si512(dest, 30, zmm); + mm512_stream_si512(dest, 31, zmm); +} + +static force_inline void +memset_movnt16x64b(char *dest, __m512i zmm) +{ + mm512_stream_si512(dest, 0, zmm); + mm512_stream_si512(dest, 1, zmm); + mm512_stream_si512(dest, 2, zmm); + mm512_stream_si512(dest, 3, zmm); + mm512_stream_si512(dest, 4, zmm); + mm512_stream_si512(dest, 5, zmm); + mm512_stream_si512(dest, 6, zmm); + mm512_stream_si512(dest, 7, zmm); + mm512_stream_si512(dest, 8, zmm); + mm512_stream_si512(dest, 9, zmm); + mm512_stream_si512(dest, 10, zmm); + mm512_stream_si512(dest, 11, zmm); + mm512_stream_si512(dest, 12, zmm); + mm512_stream_si512(dest, 13, zmm); + mm512_stream_si512(dest, 14, zmm); + mm512_stream_si512(dest, 15, zmm); +} + +static force_inline void +memset_movnt8x64b(char *dest, __m512i zmm) +{ + mm512_stream_si512(dest, 0, zmm); + mm512_stream_si512(dest, 1, zmm); + mm512_stream_si512(dest, 2, zmm); + mm512_stream_si512(dest, 3, zmm); + mm512_stream_si512(dest, 4, zmm); + mm512_stream_si512(dest, 5, zmm); + mm512_stream_si512(dest, 6, zmm); + mm512_stream_si512(dest, 7, zmm); +} + +static force_inline void +memset_movnt4x64b(char *dest, __m512i zmm) +{ + mm512_stream_si512(dest, 0, zmm); + mm512_stream_si512(dest, 1, zmm); + mm512_stream_si512(dest, 2, zmm); + mm512_stream_si512(dest, 3, zmm); +} + +static force_inline void +memset_movnt2x64b(char *dest, __m512i zmm) +{ + mm512_stream_si512(dest, 0, zmm); + mm512_stream_si512(dest, 1, zmm); +} + +static force_inline void +memset_movnt1x64b(char *dest, __m512i zmm) +{ + mm512_stream_si512(dest, 0, zmm); +} + +static force_inline void +memset_movnt1x32b(char *dest, __m256i ymm) +{ + _mm256_stream_si256((__m256i *)dest, ymm); +} + +static force_inline void +memset_movnt1x16b(char *dest, __m256i ymm) +{ + __m128i xmm = _mm256_extracti128_si256(ymm, 0); + + _mm_stream_si128((__m128i *)dest, xmm); +} + +static force_inline void +memset_movnt1x8b(char *dest, __m256i ymm) +{ + uint64_t x = m256_get8b(ymm); + + _mm_stream_si64((long long *)dest, (long long)x); +} + +static force_inline void +memset_movnt1x4b(char *dest, __m256i ymm) +{ + uint32_t x = m256_get4b(ymm); + + _mm_stream_si32((int *)dest, (int)x); +} + +static force_inline void +memset_movnt_avx512f(char *dest, int c, size_t len, flush_fn flush, + barrier_fn barrier) +{ + char *orig_dest = dest; + size_t orig_len = len; + + __m512i zmm = _mm512_set1_epi8((char)c); + /* + * Can't use _mm512_extracti64x4_epi64, because some versions of gcc + * crash. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82887 + */ + __m256i ymm = _mm256_set1_epi8((char)c); + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memset_small_avx512f(dest, ymm, cnt, flush); + + dest += cnt; + len -= cnt; + } + + while (len >= 32 * 64) { + memset_movnt32x64b(dest, zmm); + dest += 32 * 64; + len -= 32 * 64; + } + + if (len >= 16 * 64) { + memset_movnt16x64b(dest, zmm); + dest += 16 * 64; + len -= 16 * 64; + } + + if (len >= 8 * 64) { + memset_movnt8x64b(dest, zmm); + dest += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memset_movnt4x64b(dest, zmm); + dest += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memset_movnt2x64b(dest, zmm); + dest += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memset_movnt1x64b(dest, zmm); + + dest += 1 * 64; + len -= 1 * 64; + } + + if (len == 0) + goto end; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) + memset_movnt1x32b(dest, ymm); + else if (len == 16) + memset_movnt1x16b(dest, ymm); + else if (len == 8) + memset_movnt1x8b(dest, ymm); + else if (len == 4) + memset_movnt1x4b(dest, ymm); + else + goto nonnt; + + goto end; + } + +nonnt: + memset_small_avx512f(dest, ymm, len, flush); +end: + avx_zeroupper(); + + barrier(); + + VALGRIND_DO_FLUSH(orig_dest, orig_len); +} + +void +memset_movnt_avx512f_noflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx512f(dest, c, len, noflush, barrier_after_ntstores); +} + +void +memset_movnt_avx512f_empty(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx512f(dest, c, len, flush_empty_nolog, + barrier_after_ntstores); +} + +void +memset_movnt_avx512f_clflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx512f(dest, c, len, flush_clflush_nolog, + barrier_after_ntstores); +} + +void +memset_movnt_avx512f_clflushopt(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx512f(dest, c, len, flush_clflushopt_nolog, + no_barrier_after_ntstores); +} + +void +memset_movnt_avx512f_clwb(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_avx512f(dest, c, len, flush_clwb_nolog, + no_barrier_after_ntstores); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_sse2.c b/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_sse2.c new file mode 100644 index 000000000..5590a65f8 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_nt_sse2.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memset_sse2.h" +#include "out.h" +#include "valgrind_internal.h" + +static force_inline void +mm_stream_si128(char *dest, unsigned idx, __m128i src) +{ + _mm_stream_si128((__m128i *)dest + idx, src); + barrier(); +} + +static force_inline void +memset_movnt4x64b(char *dest, __m128i xmm) +{ + mm_stream_si128(dest, 0, xmm); + mm_stream_si128(dest, 1, xmm); + mm_stream_si128(dest, 2, xmm); + mm_stream_si128(dest, 3, xmm); + mm_stream_si128(dest, 4, xmm); + mm_stream_si128(dest, 5, xmm); + mm_stream_si128(dest, 6, xmm); + mm_stream_si128(dest, 7, xmm); + mm_stream_si128(dest, 8, xmm); + mm_stream_si128(dest, 9, xmm); + mm_stream_si128(dest, 10, xmm); + mm_stream_si128(dest, 11, xmm); + mm_stream_si128(dest, 12, xmm); + mm_stream_si128(dest, 13, xmm); + mm_stream_si128(dest, 14, xmm); + mm_stream_si128(dest, 15, xmm); +} + +static force_inline void +memset_movnt2x64b(char *dest, __m128i xmm) +{ + mm_stream_si128(dest, 0, xmm); + mm_stream_si128(dest, 1, xmm); + mm_stream_si128(dest, 2, xmm); + mm_stream_si128(dest, 3, xmm); + mm_stream_si128(dest, 4, xmm); + mm_stream_si128(dest, 5, xmm); + mm_stream_si128(dest, 6, xmm); + mm_stream_si128(dest, 7, xmm); +} + +static force_inline void +memset_movnt1x64b(char *dest, __m128i xmm) +{ + mm_stream_si128(dest, 0, xmm); + mm_stream_si128(dest, 1, xmm); + mm_stream_si128(dest, 2, xmm); + mm_stream_si128(dest, 3, xmm); +} + +static force_inline void +memset_movnt1x32b(char *dest, __m128i xmm) +{ + mm_stream_si128(dest, 0, xmm); + mm_stream_si128(dest, 1, xmm); +} + +static force_inline void +memset_movnt1x16b(char *dest, __m128i xmm) +{ + _mm_stream_si128((__m128i *)dest, xmm); +} + +static force_inline void +memset_movnt1x8b(char *dest, __m128i xmm) +{ + uint64_t x = (uint64_t)_mm_cvtsi128_si64(xmm); + + _mm_stream_si64((long long *)dest, (long long)x); +} + +static force_inline void +memset_movnt1x4b(char *dest, __m128i xmm) +{ + uint32_t x = (uint32_t)_mm_cvtsi128_si32(xmm); + + _mm_stream_si32((int *)dest, (int)x); +} + +static force_inline void +memset_movnt_sse2(char *dest, int c, size_t len, flush_fn flush, + barrier_fn barrier, perf_barrier_fn perf_barrier) +{ + char *orig_dest = dest; + size_t orig_len = len; + + __m128i xmm = _mm_set1_epi8((char)c); + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memset_small_sse2(dest, xmm, cnt, flush); + + dest += cnt; + len -= cnt; + } + + while (len >= PERF_BARRIER_SIZE) { + memset_movnt4x64b(dest, xmm); + dest += 4 * 64; + len -= 4 * 64; + + memset_movnt4x64b(dest, xmm); + dest += 4 * 64; + len -= 4 * 64; + + memset_movnt4x64b(dest, xmm); + dest += 4 * 64; + len -= 4 * 64; + + COMPILE_ERROR_ON(PERF_BARRIER_SIZE != (4 + 4 + 4) * 64); + + if (len) + perf_barrier(); + } + + while (len >= 4 * 64) { + memset_movnt4x64b(dest, xmm); + dest += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memset_movnt2x64b(dest, xmm); + dest += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memset_movnt1x64b(dest, xmm); + + dest += 1 * 64; + len -= 1 * 64; + } + + if (len == 0) + goto end; + + /* There's no point in using more than 1 nt store for 1 cache line. */ + if (util_is_pow2(len)) { + if (len == 32) + memset_movnt1x32b(dest, xmm); + else if (len == 16) + memset_movnt1x16b(dest, xmm); + else if (len == 8) + memset_movnt1x8b(dest, xmm); + else if (len == 4) + memset_movnt1x4b(dest, xmm); + else + goto nonnt; + + goto end; + } + +nonnt: + memset_small_sse2(dest, xmm, len, flush); +end: + barrier(); + + VALGRIND_DO_FLUSH(orig_dest, orig_len); +} + +/* variants without perf_barrier */ + +void +memset_movnt_sse2_noflush_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, noflush, barrier_after_ntstores, + no_barrier); +} + +void +memset_movnt_sse2_empty_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_empty_nolog, + barrier_after_ntstores, no_barrier); +} + +void +memset_movnt_sse2_clflush_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_clflush_nolog, + barrier_after_ntstores, no_barrier); +} + +void +memset_movnt_sse2_clflushopt_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, no_barrier); +} + +void +memset_movnt_sse2_clwb_nobarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_clwb_nolog, + no_barrier_after_ntstores, no_barrier); +} + +/* variants with perf_barrier */ + +void +memset_movnt_sse2_noflush_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, noflush, barrier_after_ntstores, + wc_barrier); +} + +void +memset_movnt_sse2_empty_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_empty_nolog, + barrier_after_ntstores, wc_barrier); +} + +void +memset_movnt_sse2_clflush_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_clflush_nolog, + barrier_after_ntstores, wc_barrier); +} + +void +memset_movnt_sse2_clflushopt_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_clflushopt_nolog, + no_barrier_after_ntstores, wc_barrier); +} + +void +memset_movnt_sse2_clwb_wcbarrier(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_movnt_sse2(dest, c, len, flush_clwb_nolog, + no_barrier_after_ntstores, wc_barrier); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_sse2.h b/src/pmdk/src/libpmem2/x86_64/memset/memset_sse2.h new file mode 100644 index 000000000..6d9b22e43 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_sse2.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +#ifndef PMEM2_MEMSET_SSE2_H +#define PMEM2_MEMSET_SSE2_H + +#include +#include +#include +#include + +#include "out.h" + +static force_inline void +memset_small_sse2_noflush(char *dest, __m128i xmm, size_t len) +{ + ASSERT(len <= 64); + + if (len <= 8) + goto le8; + if (len <= 32) + goto le32; + + if (len > 48) { + /* 49..64 */ + _mm_storeu_si128((__m128i *)(dest + 0), xmm); + _mm_storeu_si128((__m128i *)(dest + 16), xmm); + _mm_storeu_si128((__m128i *)(dest + 32), xmm); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm); + return; + } + + /* 33..48 */ + _mm_storeu_si128((__m128i *)(dest + 0), xmm); + _mm_storeu_si128((__m128i *)(dest + 16), xmm); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm); + return; + +le32: + if (len > 16) { + /* 17..32 */ + _mm_storeu_si128((__m128i *)(dest + 0), xmm); + _mm_storeu_si128((__m128i *)(dest + len - 16), xmm); + return; + } + + /* 9..16 */ + uint64_t d8 = (uint64_t)_mm_cvtsi128_si64(xmm); + + *(ua_uint64_t *)dest = d8; + *(ua_uint64_t *)(dest + len - 8) = d8; + return; + +le8: + if (len <= 2) + goto le2; + + if (len > 4) { + /* 5..8 */ + uint32_t d4 = (uint32_t)_mm_cvtsi128_si32(xmm); + + *(ua_uint32_t *)dest = d4; + *(ua_uint32_t *)(dest + len - 4) = d4; + return; + } + + /* 3..4 */ + uint16_t d2 = (uint16_t)(uint32_t)_mm_cvtsi128_si32(xmm); + + *(ua_uint16_t *)dest = d2; + *(ua_uint16_t *)(dest + len - 2) = d2; + return; + +le2: + if (len == 2) { + uint16_t d2 = (uint16_t)(uint32_t)_mm_cvtsi128_si32(xmm); + + *(ua_uint16_t *)dest = d2; + return; + } + + *(uint8_t *)dest = (uint8_t)_mm_cvtsi128_si32(xmm); +} + +static force_inline void +memset_small_sse2(char *dest, __m128i xmm, size_t len, flush_fn flush) +{ + /* + * pmemcheck complains about "overwritten stores before they were made + * persistent" for overlapping stores (last instruction in each code + * path) in the optimized version. + * libc's memset also does that, so we can't use it here. + */ + if (On_pmemcheck) { + memset_nodrain_generic(dest, (uint8_t)_mm_cvtsi128_si32(xmm), + len, PMEM2_F_MEM_NOFLUSH, NULL); + } else { + memset_small_sse2_noflush(dest, xmm, len); + } + + flush(dest, len); +} + +#endif diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx.c b/src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx.c new file mode 100644 index 000000000..4c989d867 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memset_avx.h" + +static force_inline void +mm256_store_si256(char *dest, unsigned idx, __m256i src) +{ + _mm256_store_si256((__m256i *)dest + idx, src); +} + +static force_inline void +memset_mov8x64b(char *dest, __m256i ymm, flush64b_fn flush64b) +{ + mm256_store_si256(dest, 0, ymm); + mm256_store_si256(dest, 1, ymm); + mm256_store_si256(dest, 2, ymm); + mm256_store_si256(dest, 3, ymm); + mm256_store_si256(dest, 4, ymm); + mm256_store_si256(dest, 5, ymm); + mm256_store_si256(dest, 6, ymm); + mm256_store_si256(dest, 7, ymm); + mm256_store_si256(dest, 8, ymm); + mm256_store_si256(dest, 9, ymm); + mm256_store_si256(dest, 10, ymm); + mm256_store_si256(dest, 11, ymm); + mm256_store_si256(dest, 12, ymm); + mm256_store_si256(dest, 13, ymm); + mm256_store_si256(dest, 14, ymm); + mm256_store_si256(dest, 15, ymm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); +} + +static force_inline void +memset_mov4x64b(char *dest, __m256i ymm, flush64b_fn flush64b) +{ + mm256_store_si256(dest, 0, ymm); + mm256_store_si256(dest, 1, ymm); + mm256_store_si256(dest, 2, ymm); + mm256_store_si256(dest, 3, ymm); + mm256_store_si256(dest, 4, ymm); + mm256_store_si256(dest, 5, ymm); + mm256_store_si256(dest, 6, ymm); + mm256_store_si256(dest, 7, ymm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); +} + +static force_inline void +memset_mov2x64b(char *dest, __m256i ymm, flush64b_fn flush64b) +{ + mm256_store_si256(dest, 0, ymm); + mm256_store_si256(dest, 1, ymm); + mm256_store_si256(dest, 2, ymm); + mm256_store_si256(dest, 3, ymm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); +} + +static force_inline void +memset_mov1x64b(char *dest, __m256i ymm, flush64b_fn flush64b) +{ + mm256_store_si256(dest, 0, ymm); + mm256_store_si256(dest, 1, ymm); + + flush64b(dest + 0 * 64); +} + +static force_inline void +memset_mov_avx(char *dest, int c, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + __m256i ymm = _mm256_set1_epi8((char)c); + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memset_small_avx(dest, ymm, cnt, flush); + + dest += cnt; + len -= cnt; + } + + while (len >= 8 * 64) { + memset_mov8x64b(dest, ymm, flush64b); + dest += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memset_mov4x64b(dest, ymm, flush64b); + dest += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memset_mov2x64b(dest, ymm, flush64b); + dest += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memset_mov1x64b(dest, ymm, flush64b); + + dest += 1 * 64; + len -= 1 * 64; + } + + if (len) + memset_small_avx(dest, ymm, len, flush); + + avx_zeroupper(); +} + +void +memset_mov_avx_noflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx(dest, c, len, noflush, noflush64b); +} + +void +memset_mov_avx_empty(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx(dest, c, len, flush_empty_nolog, flush64b_empty); +} + +void +memset_mov_avx_clflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx(dest, c, len, flush_clflush_nolog, pmem_clflush); +} + +void +memset_mov_avx_clflushopt(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx(dest, c, len, flush_clflushopt_nolog, + pmem_clflushopt); +} + +void +memset_mov_avx_clwb(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx(dest, c, len, flush_clwb_nolog, pmem_clwb); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx512f.c b/src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx512f.c new file mode 100644 index 000000000..f2127f2b0 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_t_avx512f.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "avx.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memset_avx512f.h" + +static force_inline void +mm512_store_si512(char *dest, unsigned idx, __m512i src) +{ + _mm512_store_si512((__m512i *)dest + idx, src); +} + +static force_inline void +memset_mov32x64b(char *dest, __m512i zmm, flush64b_fn flush64b) +{ + mm512_store_si512(dest, 0, zmm); + mm512_store_si512(dest, 1, zmm); + mm512_store_si512(dest, 2, zmm); + mm512_store_si512(dest, 3, zmm); + mm512_store_si512(dest, 4, zmm); + mm512_store_si512(dest, 5, zmm); + mm512_store_si512(dest, 6, zmm); + mm512_store_si512(dest, 7, zmm); + mm512_store_si512(dest, 8, zmm); + mm512_store_si512(dest, 9, zmm); + mm512_store_si512(dest, 10, zmm); + mm512_store_si512(dest, 11, zmm); + mm512_store_si512(dest, 12, zmm); + mm512_store_si512(dest, 13, zmm); + mm512_store_si512(dest, 14, zmm); + mm512_store_si512(dest, 15, zmm); + mm512_store_si512(dest, 16, zmm); + mm512_store_si512(dest, 17, zmm); + mm512_store_si512(dest, 18, zmm); + mm512_store_si512(dest, 19, zmm); + mm512_store_si512(dest, 20, zmm); + mm512_store_si512(dest, 21, zmm); + mm512_store_si512(dest, 22, zmm); + mm512_store_si512(dest, 23, zmm); + mm512_store_si512(dest, 24, zmm); + mm512_store_si512(dest, 25, zmm); + mm512_store_si512(dest, 26, zmm); + mm512_store_si512(dest, 27, zmm); + mm512_store_si512(dest, 28, zmm); + mm512_store_si512(dest, 29, zmm); + mm512_store_si512(dest, 30, zmm); + mm512_store_si512(dest, 31, zmm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); + flush64b(dest + 8 * 64); + flush64b(dest + 9 * 64); + flush64b(dest + 10 * 64); + flush64b(dest + 11 * 64); + flush64b(dest + 12 * 64); + flush64b(dest + 13 * 64); + flush64b(dest + 14 * 64); + flush64b(dest + 15 * 64); + flush64b(dest + 16 * 64); + flush64b(dest + 17 * 64); + flush64b(dest + 18 * 64); + flush64b(dest + 19 * 64); + flush64b(dest + 20 * 64); + flush64b(dest + 21 * 64); + flush64b(dest + 22 * 64); + flush64b(dest + 23 * 64); + flush64b(dest + 24 * 64); + flush64b(dest + 25 * 64); + flush64b(dest + 26 * 64); + flush64b(dest + 27 * 64); + flush64b(dest + 28 * 64); + flush64b(dest + 29 * 64); + flush64b(dest + 30 * 64); + flush64b(dest + 31 * 64); +} + +static force_inline void +memset_mov16x64b(char *dest, __m512i zmm, flush64b_fn flush64b) +{ + mm512_store_si512(dest, 0, zmm); + mm512_store_si512(dest, 1, zmm); + mm512_store_si512(dest, 2, zmm); + mm512_store_si512(dest, 3, zmm); + mm512_store_si512(dest, 4, zmm); + mm512_store_si512(dest, 5, zmm); + mm512_store_si512(dest, 6, zmm); + mm512_store_si512(dest, 7, zmm); + mm512_store_si512(dest, 8, zmm); + mm512_store_si512(dest, 9, zmm); + mm512_store_si512(dest, 10, zmm); + mm512_store_si512(dest, 11, zmm); + mm512_store_si512(dest, 12, zmm); + mm512_store_si512(dest, 13, zmm); + mm512_store_si512(dest, 14, zmm); + mm512_store_si512(dest, 15, zmm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); + flush64b(dest + 8 * 64); + flush64b(dest + 9 * 64); + flush64b(dest + 10 * 64); + flush64b(dest + 11 * 64); + flush64b(dest + 12 * 64); + flush64b(dest + 13 * 64); + flush64b(dest + 14 * 64); + flush64b(dest + 15 * 64); +} + +static force_inline void +memset_mov8x64b(char *dest, __m512i zmm, flush64b_fn flush64b) +{ + mm512_store_si512(dest, 0, zmm); + mm512_store_si512(dest, 1, zmm); + mm512_store_si512(dest, 2, zmm); + mm512_store_si512(dest, 3, zmm); + mm512_store_si512(dest, 4, zmm); + mm512_store_si512(dest, 5, zmm); + mm512_store_si512(dest, 6, zmm); + mm512_store_si512(dest, 7, zmm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); + flush64b(dest + 4 * 64); + flush64b(dest + 5 * 64); + flush64b(dest + 6 * 64); + flush64b(dest + 7 * 64); +} + +static force_inline void +memset_mov4x64b(char *dest, __m512i zmm, flush64b_fn flush64b) +{ + mm512_store_si512(dest, 0, zmm); + mm512_store_si512(dest, 1, zmm); + mm512_store_si512(dest, 2, zmm); + mm512_store_si512(dest, 3, zmm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); +} + +static force_inline void +memset_mov2x64b(char *dest, __m512i zmm, flush64b_fn flush64b) +{ + mm512_store_si512(dest, 0, zmm); + mm512_store_si512(dest, 1, zmm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); +} + +static force_inline void +memset_mov1x64b(char *dest, __m512i zmm, flush64b_fn flush64b) +{ + mm512_store_si512(dest, 0, zmm); + + flush64b(dest + 0 * 64); +} + +static force_inline void +memset_mov_avx512f(char *dest, int c, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + __m512i zmm = _mm512_set1_epi8((char)c); + /* See comment in memset_movnt_avx512f */ + __m256i ymm = _mm256_set1_epi8((char)c); + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memset_small_avx512f(dest, ymm, cnt, flush); + + dest += cnt; + len -= cnt; + } + + while (len >= 32 * 64) { + memset_mov32x64b(dest, zmm, flush64b); + dest += 32 * 64; + len -= 32 * 64; + } + + if (len >= 16 * 64) { + memset_mov16x64b(dest, zmm, flush64b); + dest += 16 * 64; + len -= 16 * 64; + } + + if (len >= 8 * 64) { + memset_mov8x64b(dest, zmm, flush64b); + dest += 8 * 64; + len -= 8 * 64; + } + + if (len >= 4 * 64) { + memset_mov4x64b(dest, zmm, flush64b); + dest += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memset_mov2x64b(dest, zmm, flush64b); + dest += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memset_mov1x64b(dest, zmm, flush64b); + + dest += 1 * 64; + len -= 1 * 64; + } + + if (len) + memset_small_avx512f(dest, ymm, len, flush); + + avx_zeroupper(); +} + +void +memset_mov_avx512f_noflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx512f(dest, c, len, noflush, noflush64b); +} + +void +memset_mov_avx512f_empty(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx512f(dest, c, len, flush_empty_nolog, flush64b_empty); +} + +void +memset_mov_avx512f_clflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx512f(dest, c, len, flush_clflush_nolog, pmem_clflush); +} + +void +memset_mov_avx512f_clflushopt(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx512f(dest, c, len, flush_clflushopt_nolog, + pmem_clflushopt); +} + +void +memset_mov_avx512f_clwb(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_avx512f(dest, c, len, flush_clwb_nolog, pmem_clwb); +} diff --git a/src/pmdk/src/libpmem2/x86_64/memset/memset_t_sse2.c b/src/pmdk/src/libpmem2/x86_64/memset/memset_t_sse2.c new file mode 100644 index 000000000..49a4a5c35 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/memset/memset_t_sse2.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2020, Intel Corporation */ + +#include +#include +#include + +#include "pmem2_arch.h" +#include "flush.h" +#include "memcpy_memset.h" +#include "memset_sse2.h" + +static force_inline void +mm_store_si128(char *dest, unsigned idx, __m128i src) +{ + _mm_store_si128((__m128i *)dest + idx, src); +} + +static force_inline void +memset_mov4x64b(char *dest, __m128i xmm, flush64b_fn flush64b) +{ + mm_store_si128(dest, 0, xmm); + mm_store_si128(dest, 1, xmm); + mm_store_si128(dest, 2, xmm); + mm_store_si128(dest, 3, xmm); + mm_store_si128(dest, 4, xmm); + mm_store_si128(dest, 5, xmm); + mm_store_si128(dest, 6, xmm); + mm_store_si128(dest, 7, xmm); + mm_store_si128(dest, 8, xmm); + mm_store_si128(dest, 9, xmm); + mm_store_si128(dest, 10, xmm); + mm_store_si128(dest, 11, xmm); + mm_store_si128(dest, 12, xmm); + mm_store_si128(dest, 13, xmm); + mm_store_si128(dest, 14, xmm); + mm_store_si128(dest, 15, xmm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); + flush64b(dest + 2 * 64); + flush64b(dest + 3 * 64); +} + +static force_inline void +memset_mov2x64b(char *dest, __m128i xmm, flush64b_fn flush64b) +{ + mm_store_si128(dest, 0, xmm); + mm_store_si128(dest, 1, xmm); + mm_store_si128(dest, 2, xmm); + mm_store_si128(dest, 3, xmm); + mm_store_si128(dest, 4, xmm); + mm_store_si128(dest, 5, xmm); + mm_store_si128(dest, 6, xmm); + mm_store_si128(dest, 7, xmm); + + flush64b(dest + 0 * 64); + flush64b(dest + 1 * 64); +} + +static force_inline void +memset_mov1x64b(char *dest, __m128i xmm, flush64b_fn flush64b) +{ + mm_store_si128(dest, 0, xmm); + mm_store_si128(dest, 1, xmm); + mm_store_si128(dest, 2, xmm); + mm_store_si128(dest, 3, xmm); + + flush64b(dest + 0 * 64); +} + +static force_inline void +memset_mov_sse2(char *dest, int c, size_t len, + flush_fn flush, flush64b_fn flush64b) +{ + __m128i xmm = _mm_set1_epi8((char)c); + + size_t cnt = (uint64_t)dest & 63; + if (cnt > 0) { + cnt = 64 - cnt; + + if (cnt > len) + cnt = len; + + memset_small_sse2(dest, xmm, cnt, flush); + + dest += cnt; + len -= cnt; + } + + while (len >= 4 * 64) { + memset_mov4x64b(dest, xmm, flush64b); + dest += 4 * 64; + len -= 4 * 64; + } + + if (len >= 2 * 64) { + memset_mov2x64b(dest, xmm, flush64b); + dest += 2 * 64; + len -= 2 * 64; + } + + if (len >= 1 * 64) { + memset_mov1x64b(dest, xmm, flush64b); + + dest += 1 * 64; + len -= 1 * 64; + } + + if (len) + memset_small_sse2(dest, xmm, len, flush); +} + +void +memset_mov_sse2_noflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_sse2(dest, c, len, noflush, noflush64b); +} + +void +memset_mov_sse2_empty(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_sse2(dest, c, len, flush_empty_nolog, flush64b_empty); +} + +void +memset_mov_sse2_clflush(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_sse2(dest, c, len, flush_clflush_nolog, pmem_clflush); +} + +void +memset_mov_sse2_clflushopt(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_sse2(dest, c, len, flush_clflushopt_nolog, + pmem_clflushopt); +} + +void +memset_mov_sse2_clwb(char *dest, int c, size_t len) +{ + LOG(15, "dest %p c %d len %zu", dest, c, len); + + memset_mov_sse2(dest, c, len, flush_clwb_nolog, pmem_clwb); +} diff --git a/src/pmdk/src/libpmem2/x86_64/sources.inc b/src/pmdk/src/libpmem2/x86_64/sources.inc new file mode 100644 index 000000000..3735c9052 --- /dev/null +++ b/src/pmdk/src/libpmem2/x86_64/sources.inc @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2020, Intel Corporation + +# +# src/libpmem2/x86_64/sources.inc -- list of files for libpmem2/x86_64 +# + +LIBPMEM2_ARCH_SOURCE = init.c\ + cpu.c\ + memcpy_nt_avx.c\ + memcpy_nt_sse2.c\ + memset_nt_avx.c\ + memset_nt_sse2.c\ + memcpy_t_avx.c\ + memcpy_t_sse2.c\ + memset_t_avx.c\ + memset_t_sse2.c + +AVX512F_PROG="\#include \n\#include \nint main(){ uint64_t v[8]; __m512i zmm0 = _mm512_loadu_si512((__m512i *)&v); return 0;}" +AVX512F_AVAILABLE := $(shell printf $(AVX512F_PROG) |\ + $(CC) $(CFLAGS) -x c -mavx512f -o /dev/null - 2>/dev/null && echo y || echo n) + +ifeq ($(AVX512F_AVAILABLE), y) +LIBPMEM2_ARCH_SOURCE += \ + memcpy_nt_avx512f.c\ + memset_nt_avx512f.c\ + memcpy_t_avx512f.c\ + memset_t_avx512f.c +endif + diff --git a/src/pmdk/src/libpmemblk/Makefile b/src/pmdk/src/libpmemblk/Makefile new file mode 100644 index 000000000..8f5d99ecd --- /dev/null +++ b/src/pmdk/src/libpmemblk/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# src/libpmemblk/Makefile -- Makefile for libpmemblk +# + +LIBRARY_NAME = pmemblk +LIBRARY_SO_VERSION = 1 +LIBRARY_VERSION = 0.0 + +include ../core/pmemcore.inc +include ../common/pmemcommon.inc +SOURCE +=\ + blk.c\ + btt.c\ + libpmemblk.c + +include ../Makefile.inc + +CFLAGS += $(LIBNDCTL_CFLAGS) +LIBS += -pthread -lpmem $(LIBNDCTL_LIBS) diff --git a/src/pmdk/src/libpmemblk/blk.c b/src/pmdk/src/libpmemblk/blk.c new file mode 100644 index 000000000..58f9c9fff --- /dev/null +++ b/src/pmdk/src/libpmemblk/blk.c @@ -0,0 +1,948 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * blk.c -- block memory pool entry points for libpmem + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libpmem.h" +#include "libpmemblk.h" + +#include "mmap.h" +#include "set.h" +#include "out.h" +#include "btt.h" +#include "blk.h" +#include "util.h" +#include "sys_util.h" +#include "util_pmem.h" +#include "valgrind_internal.h" + +static const struct pool_attr Blk_create_attr = { + BLK_HDR_SIG, + BLK_FORMAT_MAJOR, + BLK_FORMAT_FEAT_DEFAULT, + {0}, {0}, {0}, {0}, {0} +}; + +static const struct pool_attr Blk_open_attr = { + BLK_HDR_SIG, + BLK_FORMAT_MAJOR, + BLK_FORMAT_FEAT_CHECK, + {0}, {0}, {0}, {0}, {0} +}; + +/* + * lane_enter -- (internal) acquire a unique lane number + */ +static void +lane_enter(PMEMblkpool *pbp, unsigned *lane) +{ + unsigned mylane; + + mylane = util_fetch_and_add32(&pbp->next_lane, 1) % pbp->nlane; + + /* lane selected, grab the per-lane lock */ + util_mutex_lock(&pbp->locks[mylane]); + + *lane = mylane; +} + +/* + * lane_exit -- (internal) drop lane lock + */ +static void +lane_exit(PMEMblkpool *pbp, unsigned mylane) +{ + util_mutex_unlock(&pbp->locks[mylane]); +} + +/* + * nsread -- (internal) read data from the namespace encapsulating the BTT + * + * This routine is provided to btt_init() to allow the btt module to + * do I/O on the memory pool containing the BTT layout. + */ +static int +nsread(void *ns, unsigned lane, void *buf, size_t count, uint64_t off) +{ + struct pmemblk *pbp = (struct pmemblk *)ns; + + LOG(13, "pbp %p lane %u count %zu off %" PRIu64, pbp, lane, count, off); + + if (off + count > pbp->datasize) { + ERR("offset + count (%zu) past end of data area (%zu)", + (size_t)off + count, pbp->datasize); + errno = EINVAL; + return -1; + } + + memcpy(buf, (char *)pbp->data + off, count); + + return 0; +} + +/* + * nswrite -- (internal) write data to the namespace encapsulating the BTT + * + * This routine is provided to btt_init() to allow the btt module to + * do I/O on the memory pool containing the BTT layout. + */ +static int +nswrite(void *ns, unsigned lane, const void *buf, size_t count, + uint64_t off) +{ + struct pmemblk *pbp = (struct pmemblk *)ns; + + LOG(13, "pbp %p lane %u count %zu off %" PRIu64, pbp, lane, count, off); + + if (off + count > pbp->datasize) { + ERR("offset + count (%zu) past end of data area (%zu)", + (size_t)off + count, pbp->datasize); + errno = EINVAL; + return -1; + } + + void *dest = (char *)pbp->data + off; + +#ifdef DEBUG + /* grab debug write lock */ + util_mutex_lock(&pbp->write_lock); +#endif + + /* unprotect the memory (debug version only) */ + RANGE_RW(dest, count, pbp->is_dev_dax); + + if (pbp->is_pmem) + pmem_memcpy_nodrain(dest, buf, count); + else + memcpy(dest, buf, count); + + /* protect the memory again (debug version only) */ + RANGE_RO(dest, count, pbp->is_dev_dax); + +#ifdef DEBUG + /* release debug write lock */ + util_mutex_unlock(&pbp->write_lock); +#endif + + if (pbp->is_pmem) + pmem_drain(); + else + pmem_msync(dest, count); + + return 0; +} + +/* + * nsmap -- (internal) allow direct access to a range of a namespace + * + * The caller requests a range to be "mapped" but the return value + * may indicate a smaller amount (in which case the caller is expected + * to call back later for another mapping). + * + * This routine is provided to btt_init() to allow the btt module to + * do I/O on the memory pool containing the BTT layout. + */ +static ssize_t +nsmap(void *ns, unsigned lane, void **addrp, size_t len, uint64_t off) +{ + struct pmemblk *pbp = (struct pmemblk *)ns; + + LOG(12, "pbp %p lane %u len %zu off %" PRIu64, pbp, lane, len, off); + + ASSERT(((ssize_t)len) >= 0); + + if (off + len >= pbp->datasize) { + ERR("offset + len (%zu) past end of data area (%zu)", + (size_t)off + len, pbp->datasize - 1); + errno = EINVAL; + return -1; + } + + /* + * Since the entire file is memory-mapped, this callback + * can always provide the entire length requested. + */ + *addrp = (char *)pbp->data + off; + + LOG(12, "returning addr %p", *addrp); + + return (ssize_t)len; +} + +/* + * nssync -- (internal) flush changes made to a namespace range + * + * This is used in conjunction with the addresses handed out by + * nsmap() above. There's no need to sync things written via + * nswrite() since those changes are flushed each time nswrite() + * is called. + * + * This routine is provided to btt_init() to allow the btt module to + * do I/O on the memory pool containing the BTT layout. + */ +static void +nssync(void *ns, unsigned lane, void *addr, size_t len) +{ + struct pmemblk *pbp = (struct pmemblk *)ns; + + LOG(12, "pbp %p lane %u addr %p len %zu", pbp, lane, addr, len); + + if (pbp->is_pmem) + pmem_persist(addr, len); + else + pmem_msync(addr, len); +} + +/* + * nszero -- (internal) zero data in the namespace encapsulating the BTT + * + * This routine is provided to btt_init() to allow the btt module to + * zero the memory pool containing the BTT layout. + */ +static int +nszero(void *ns, unsigned lane, size_t count, uint64_t off) +{ + struct pmemblk *pbp = (struct pmemblk *)ns; + + LOG(13, "pbp %p lane %u count %zu off %" PRIu64, pbp, lane, count, off); + + if (off + count > pbp->datasize) { + ERR("offset + count (%zu) past end of data area (%zu)", + (size_t)off + count, pbp->datasize); + errno = EINVAL; + return -1; + } + + void *dest = (char *)pbp->data + off; + + /* unprotect the memory (debug version only) */ + RANGE_RW(dest, count, pbp->is_dev_dax); + + pmem_memset_persist(dest, 0, count); + + /* protect the memory again (debug version only) */ + RANGE_RO(dest, count, pbp->is_dev_dax); + + return 0; +} + +/* callbacks for btt_init() */ +static struct ns_callback ns_cb = { + .nsread = nsread, + .nswrite = nswrite, + .nszero = nszero, + .nsmap = nsmap, + .nssync = nssync, + .ns_is_zeroed = 0 +}; + +/* + * blk_descr_create -- (internal) create block memory pool descriptor + */ +static void +blk_descr_create(PMEMblkpool *pbp, uint32_t bsize, int zeroed) +{ + LOG(3, "pbp %p bsize %u zeroed %d", pbp, bsize, zeroed); + + /* create the required metadata */ + pbp->bsize = htole32(bsize); + util_persist(pbp->is_pmem, &pbp->bsize, sizeof(bsize)); + + pbp->is_zeroed = zeroed; + util_persist(pbp->is_pmem, &pbp->is_zeroed, sizeof(pbp->is_zeroed)); +} + +/* + * blk_descr_check -- (internal) validate block memory pool descriptor + */ +static int +blk_descr_check(PMEMblkpool *pbp, size_t *bsize) +{ + LOG(3, "pbp %p bsize %zu", pbp, *bsize); + + size_t hdr_bsize = le32toh(pbp->bsize); + if (*bsize && *bsize != hdr_bsize) { + ERR("wrong bsize (%zu), pool created with bsize %zu", + *bsize, hdr_bsize); + errno = EINVAL; + return -1; + } + *bsize = hdr_bsize; + LOG(3, "using block size from header: %zu", *bsize); + + return 0; +} + +/* + * blk_runtime_init -- (internal) initialize block memory pool runtime data + */ +static int +blk_runtime_init(PMEMblkpool *pbp, size_t bsize, int rdonly) +{ + LOG(3, "pbp %p bsize %zu rdonly %d", + pbp, bsize, rdonly); + + /* remove volatile part of header */ + VALGRIND_REMOVE_PMEM_MAPPING(&pbp->addr, + sizeof(struct pmemblk) - + sizeof(struct pool_hdr) - + sizeof(pbp->bsize) - + sizeof(pbp->is_zeroed)); + + /* + * Use some of the memory pool area for run-time info. This + * run-time state is never loaded from the file, it is always + * created here, so no need to worry about byte-order. + */ + pbp->rdonly = rdonly; + pbp->data = (char *)pbp->addr + + roundup(sizeof(*pbp), BLK_FORMAT_DATA_ALIGN); + ASSERT(((char *)pbp->addr + pbp->size) >= (char *)pbp->data); + pbp->datasize = (size_t) + (((char *)pbp->addr + pbp->size) - (char *)pbp->data); + + LOG(4, "data area %p data size %zu bsize %zu", + pbp->data, pbp->datasize, bsize); + + long ncpus = sysconf(_SC_NPROCESSORS_ONLN); + if (ncpus < 1) + ncpus = 1; + + ns_cb.ns_is_zeroed = pbp->is_zeroed; + + /* things free by "goto err" if not NULL */ + struct btt *bttp = NULL; + os_mutex_t *locks = NULL; + + bttp = btt_init(pbp->datasize, (uint32_t)bsize, pbp->hdr.poolset_uuid, + (unsigned)ncpus * 2, pbp, &ns_cb); + + if (bttp == NULL) + goto err; /* btt_init set errno, called LOG */ + + pbp->bttp = bttp; + + pbp->nlane = btt_nlane(pbp->bttp); + pbp->next_lane = 0; + if ((locks = Malloc(pbp->nlane * sizeof(*locks))) == NULL) { + ERR("!Malloc for lane locks"); + goto err; + } + + for (unsigned i = 0; i < pbp->nlane; i++) + util_mutex_init(&locks[i]); + + pbp->locks = locks; + +#ifdef DEBUG + /* initialize debug lock */ + util_mutex_init(&pbp->write_lock); +#endif + + /* + * If possible, turn off all permissions on the pool header page. + * + * The prototype PMFS doesn't allow this when large pages are in + * use. It is not considered an error if this fails. + */ + RANGE_NONE(pbp->addr, sizeof(struct pool_hdr), pbp->is_dev_dax); + + /* the data area should be kept read-only for debug version */ + RANGE_RO(pbp->data, pbp->datasize, pbp->is_dev_dax); + + return 0; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + if (bttp) + btt_fini(bttp); + errno = oerrno; + return -1; +} + +/* + * pmemblk_createU -- create a block memory pool + */ +#ifndef _WIN32 +static inline +#endif +PMEMblkpool * +pmemblk_createU(const char *path, size_t bsize, size_t poolsize, mode_t mode) +{ + LOG(3, "path %s bsize %zu poolsize %zu mode %o", + path, bsize, poolsize, mode); + + /* check if bsize is valid */ + if (bsize == 0) { + ERR("Invalid block size %zu", bsize); + errno = EINVAL; + return NULL; + } + + if (bsize > UINT32_MAX) { + ERR("Invalid block size %zu", bsize); + errno = EINVAL; + return NULL; + } + + struct pool_set *set; + struct pool_attr adj_pool_attr = Blk_create_attr; + + /* force set SDS feature */ + if (SDS_at_create) + adj_pool_attr.features.incompat |= POOL_FEAT_SDS; + else + adj_pool_attr.features.incompat &= ~POOL_FEAT_SDS; + + if (util_pool_create(&set, path, poolsize, PMEMBLK_MIN_POOL, + PMEMBLK_MIN_PART, &adj_pool_attr, NULL, + REPLICAS_DISABLED) != 0) { + LOG(2, "cannot create pool or pool set"); + return NULL; + } + + ASSERT(set->nreplicas > 0); + + struct pool_replica *rep = set->replica[0]; + PMEMblkpool *pbp = rep->part[0].addr; + + VALGRIND_REMOVE_PMEM_MAPPING(&pbp->addr, + sizeof(struct pmemblk) - + ((uintptr_t)&pbp->addr - (uintptr_t)&pbp->hdr)); + + pbp->addr = pbp; + pbp->size = rep->repsize; + pbp->set = set; + pbp->is_pmem = rep->is_pmem; + pbp->is_dev_dax = rep->part[0].is_dev_dax; + + /* is_dev_dax implies is_pmem */ + ASSERT(!pbp->is_dev_dax || pbp->is_pmem); + + /* create pool descriptor */ + blk_descr_create(pbp, (uint32_t)bsize, set->zeroed); + + /* initialize runtime parts */ + if (blk_runtime_init(pbp, bsize, 0) != 0) { + ERR("pool initialization failed"); + goto err; + } + + if (util_poolset_chmod(set, mode)) + goto err; + + util_poolset_fdclose(set); + + LOG(3, "pbp %p", pbp); + return pbp; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + util_poolset_close(set, DELETE_CREATED_PARTS); + errno = oerrno; + return NULL; +} + +#ifndef _WIN32 +/* + * pmemblk_create -- create a block memory pool + */ +PMEMblkpool * +pmemblk_create(const char *path, size_t bsize, size_t poolsize, mode_t mode) +{ + return pmemblk_createU(path, bsize, poolsize, mode); +} +#else +/* + * pmemblk_createW -- create a block memory pool + */ +PMEMblkpool * +pmemblk_createW(const wchar_t *path, size_t bsize, size_t poolsize, + mode_t mode) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return NULL; + + PMEMblkpool *ret = pmemblk_createU(upath, bsize, poolsize, mode); + + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * blk_open_common -- (internal) open a block memory pool + * + * This routine does all the work, but takes a cow flag so internal + * calls can map a read-only pool if required. + * + * Passing in bsize == 0 means a valid pool header must exist (which + * will supply the block size). + */ +static PMEMblkpool * +blk_open_common(const char *path, size_t bsize, unsigned flags) +{ + LOG(3, "path %s bsize %zu flags 0x%x", path, bsize, flags); + + struct pool_set *set; + + if (util_pool_open(&set, path, PMEMBLK_MIN_PART, &Blk_open_attr, + NULL, NULL, flags) != 0) { + LOG(2, "cannot open pool or pool set"); + return NULL; + } + + ASSERT(set->nreplicas > 0); + + struct pool_replica *rep = set->replica[0]; + PMEMblkpool *pbp = rep->part[0].addr; + + VALGRIND_REMOVE_PMEM_MAPPING(&pbp->addr, + sizeof(struct pmemblk) - + ((uintptr_t)&pbp->addr - (uintptr_t)&pbp->hdr)); + + pbp->addr = pbp; + pbp->size = rep->repsize; + pbp->set = set; + pbp->is_pmem = rep->is_pmem; + pbp->is_dev_dax = rep->part[0].is_dev_dax; + + /* is_dev_dax implies is_pmem */ + ASSERT(!pbp->is_dev_dax || pbp->is_pmem); + + if (set->nreplicas > 1) { + errno = ENOTSUP; + ERR("!replicas not supported"); + goto err; + } + + /* validate pool descriptor */ + if (blk_descr_check(pbp, &bsize) != 0) { + LOG(2, "descriptor check failed"); + goto err; + } + + /* initialize runtime parts */ + if (blk_runtime_init(pbp, bsize, set->rdonly) != 0) { + ERR("pool initialization failed"); + goto err; + } + + util_poolset_fdclose(set); + + LOG(3, "pbp %p", pbp); + return pbp; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + util_poolset_close(set, DO_NOT_DELETE_PARTS); + errno = oerrno; + return NULL; +} + +/* + * pmemblk_openU -- open a block memory pool + */ +#ifndef _WIN32 +static inline +#endif +PMEMblkpool * +pmemblk_openU(const char *path, size_t bsize) +{ + LOG(3, "path %s bsize %zu", path, bsize); + + return blk_open_common(path, bsize, COW_at_open ? POOL_OPEN_COW : 0); +} + +#ifndef _WIN32 +/* + * pmemblk_open -- open a block memory pool + */ +PMEMblkpool * +pmemblk_open(const char *path, size_t bsize) +{ + return pmemblk_openU(path, bsize); +} +#else +/* + * pmemblk_openW -- open a block memory pool + */ +PMEMblkpool * +pmemblk_openW(const wchar_t *path, size_t bsize) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return NULL; + + PMEMblkpool *ret = pmemblk_openU(upath, bsize); + + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * pmemblk_close -- close a block memory pool + */ +void +pmemblk_close(PMEMblkpool *pbp) +{ + LOG(3, "pbp %p", pbp); + + btt_fini(pbp->bttp); + if (pbp->locks) { + for (unsigned i = 0; i < pbp->nlane; i++) + util_mutex_destroy(&pbp->locks[i]); + Free((void *)pbp->locks); + } + +#ifdef DEBUG + /* destroy debug lock */ + util_mutex_destroy(&pbp->write_lock); +#endif + + util_poolset_close(pbp->set, DO_NOT_DELETE_PARTS); +} + +/* + * pmemblk_bsize -- return size of block for specified pool + */ +size_t +pmemblk_bsize(PMEMblkpool *pbp) +{ + LOG(3, "pbp %p", pbp); + + return le32toh(pbp->bsize); +} + +/* + * pmemblk_nblock -- return number of usable blocks in a block memory pool + */ +size_t +pmemblk_nblock(PMEMblkpool *pbp) +{ + LOG(3, "pbp %p", pbp); + + return btt_nlba(pbp->bttp); +} + +/* + * pmemblk_read -- read a block in a block memory pool + */ +int +pmemblk_read(PMEMblkpool *pbp, void *buf, long long blockno) +{ + LOG(3, "pbp %p buf %p blockno %lld", pbp, buf, blockno); + + if (blockno < 0) { + ERR("negative block number"); + errno = EINVAL; + return -1; + } + + unsigned lane; + + lane_enter(pbp, &lane); + + int err = btt_read(pbp->bttp, lane, (uint64_t)blockno, buf); + + lane_exit(pbp, lane); + + return err; +} + +/* + * pmemblk_write -- write a block (atomically) in a block memory pool + */ +int +pmemblk_write(PMEMblkpool *pbp, const void *buf, long long blockno) +{ + LOG(3, "pbp %p buf %p blockno %lld", pbp, buf, blockno); + + if (pbp->rdonly) { + ERR("EROFS (pool is read-only)"); + errno = EROFS; + return -1; + } + + if (blockno < 0) { + ERR("negative block number"); + errno = EINVAL; + return -1; + } + + unsigned lane; + + lane_enter(pbp, &lane); + + int err = btt_write(pbp->bttp, lane, (uint64_t)blockno, buf); + + lane_exit(pbp, lane); + + return err; +} + +/* + * pmemblk_set_zero -- zero a block in a block memory pool + */ +int +pmemblk_set_zero(PMEMblkpool *pbp, long long blockno) +{ + LOG(3, "pbp %p blockno %lld", pbp, blockno); + + if (pbp->rdonly) { + ERR("EROFS (pool is read-only)"); + errno = EROFS; + return -1; + } + + if (blockno < 0) { + ERR("negative block number"); + errno = EINVAL; + return -1; + } + + unsigned lane; + + lane_enter(pbp, &lane); + + int err = btt_set_zero(pbp->bttp, lane, (uint64_t)blockno); + + lane_exit(pbp, lane); + + return err; +} + +/* + * pmemblk_set_error -- set the error state on a block in a block memory pool + */ +int +pmemblk_set_error(PMEMblkpool *pbp, long long blockno) +{ + LOG(3, "pbp %p blockno %lld", pbp, blockno); + + if (pbp->rdonly) { + ERR("EROFS (pool is read-only)"); + errno = EROFS; + return -1; + } + + if (blockno < 0) { + ERR("negative block number"); + errno = EINVAL; + return -1; + } + + unsigned lane; + + lane_enter(pbp, &lane); + + int err = btt_set_error(pbp->bttp, lane, (uint64_t)blockno); + + lane_exit(pbp, lane); + + return err; +} + +/* + * pmemblk_checkU -- block memory pool consistency check + */ +#ifndef _WIN32 +static inline +#endif +int +pmemblk_checkU(const char *path, size_t bsize) +{ + LOG(3, "path \"%s\" bsize %zu", path, bsize); + + /* map the pool read-only */ + PMEMblkpool *pbp = blk_open_common(path, bsize, POOL_OPEN_COW); + if (pbp == NULL) + return -1; /* errno set by blk_open_common() */ + + int retval = btt_check(pbp->bttp); + int oerrno = errno; + pmemblk_close(pbp); + errno = oerrno; + + return retval; +} + +#ifndef _WIN32 +/* + * pmemblk_check -- block memory pool consistency check + */ +int +pmemblk_check(const char *path, size_t bsize) +{ + return pmemblk_checkU(path, bsize); +} +#else +/* + * pmemblk_checkW -- block memory pool consistency check + */ +int +pmemblk_checkW(const wchar_t *path, size_t bsize) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return -1; + + int ret = pmemblk_checkU(upath, bsize); + + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * pmemblk_ctl_getU -- programmatically executes a read ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemblk_ctl_getU(PMEMblkpool *pbp, const char *name, void *arg) +{ + LOG(3, "pbp %p name %s arg %p", pbp, name, arg); + return ctl_query(pbp == NULL ? NULL : pbp->ctl, pbp, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_READ, arg); +} + +/* + * pmemblk_ctl_setU -- programmatically executes a write ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemblk_ctl_setU(PMEMblkpool *pbp, const char *name, void *arg) +{ + LOG(3, "pbp %p name %s arg %p", pbp, name, arg); + return ctl_query(pbp == NULL ? NULL : pbp->ctl, pbp, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_WRITE, arg); +} + +/* + * pmemblk_ctl_execU -- programmatically executes a runnable ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemblk_ctl_execU(PMEMblkpool *pbp, const char *name, void *arg) +{ + LOG(3, "pbp %p name %s arg %p", pbp, name, arg); + return ctl_query(pbp == NULL ? NULL : pbp->ctl, pbp, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_RUNNABLE, arg); +} + +#ifndef _WIN32 +/* + * pmemblk_ctl_get -- programmatically executes a read ctl query + */ +int +pmemblk_ctl_get(PMEMblkpool *pbp, const char *name, void *arg) +{ + return pmemblk_ctl_getU(pbp, name, arg); +} + +/* + * pmemblk_ctl_set -- programmatically executes a write ctl query + */ +int +pmemblk_ctl_set(PMEMblkpool *pbp, const char *name, void *arg) +{ + return pmemblk_ctl_setU(pbp, name, arg); +} + +/* + * pmemblk_ctl_exec -- programmatically executes a runnable ctl query + */ +int +pmemblk_ctl_exec(PMEMblkpool *pbp, const char *name, void *arg) +{ + return pmemblk_ctl_execU(pbp, name, arg); +} +#else +/* + * pmemblk_ctl_getW -- programmatically executes a read ctl query + */ +int +pmemblk_ctl_getW(PMEMblkpool *pbp, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemblk_ctl_getU(pbp, uname, arg); + util_free_UTF8(uname); + + return ret; +} + +/* + * pmemblk_ctl_setW -- programmatically executes a write ctl query + */ +int +pmemblk_ctl_setW(PMEMblkpool *pbp, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemblk_ctl_setU(pbp, uname, arg); + util_free_UTF8(uname); + + return ret; +} + +/* + * pmemblk_ctl_execW -- programmatically executes a runnable ctl query + */ +int +pmemblk_ctl_execW(PMEMblkpool *pbp, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemblk_ctl_execU(pbp, uname, arg); + util_free_UTF8(uname); + + return ret; +} +#endif + +#if FAULT_INJECTION +void +pmemblk_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + core_inject_fault_at(type, nth, at); +} + +int +pmemblk_fault_injection_enabled(void) +{ + return core_fault_injection_enabled(); +} +#endif diff --git a/src/pmdk/src/libpmemblk/blk.h b/src/pmdk/src/libpmemblk/blk.h new file mode 100644 index 000000000..095331b8b --- /dev/null +++ b/src/pmdk/src/libpmemblk/blk.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * blk.h -- internal definitions for libpmem blk module + */ + +#ifndef BLK_H +#define BLK_H 1 + +#include + +#include "ctl.h" +#include "os_thread.h" +#include "pool_hdr.h" +#include "page_size.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "alloc.h" +#include "fault_injection.h" + +#define PMEMBLK_LOG_PREFIX "libpmemblk" +#define PMEMBLK_LOG_LEVEL_VAR "PMEMBLK_LOG_LEVEL" +#define PMEMBLK_LOG_FILE_VAR "PMEMBLK_LOG_FILE" + +/* attributes of the blk memory pool format for the pool header */ +#define BLK_HDR_SIG "PMEMBLK" /* must be 8 bytes including '\0' */ +#define BLK_FORMAT_MAJOR 1 + +#define BLK_FORMAT_FEAT_DEFAULT \ + {POOL_FEAT_COMPAT_DEFAULT, POOL_FEAT_INCOMPAT_DEFAULT, 0x0000} + +#define BLK_FORMAT_FEAT_CHECK \ + {POOL_FEAT_COMPAT_VALID, POOL_FEAT_INCOMPAT_VALID, 0x0000} + +static const features_t blk_format_feat_default = BLK_FORMAT_FEAT_DEFAULT; + +struct pmemblk { + struct pool_hdr hdr; /* memory pool header */ + + /* root info for on-media format... */ + uint32_t bsize; /* block size */ + + /* flag indicating if the pool was zero-initialized */ + int is_zeroed; + + /* some run-time state, allocated out of memory pool... */ + void *addr; /* mapped region */ + size_t size; /* size of mapped region */ + int is_pmem; /* true if pool is PMEM */ + int rdonly; /* true if pool is opened read-only */ + void *data; /* post-header data area */ + size_t datasize; /* size of data area */ + size_t nlba; /* number of LBAs in pool */ + struct btt *bttp; /* btt handle */ + unsigned nlane; /* number of lanes */ + unsigned next_lane; /* used to rotate through lanes */ + os_mutex_t *locks; /* one per lane */ + int is_dev_dax; /* true if mapped on device dax */ + struct ctl *ctl; /* top level node of the ctl tree structure */ + + struct pool_set *set; /* pool set info */ + +#ifdef DEBUG + /* held during read/write mprotected sections */ + os_mutex_t write_lock; +#endif +}; + +/* data area starts at this alignment after the struct pmemblk above */ +#define BLK_FORMAT_DATA_ALIGN ((uintptr_t)PMEM_PAGESIZE) + +#if FAULT_INJECTION +void +pmemblk_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at); + +int +pmemblk_fault_injection_enabled(void); +#else +static inline void +pmemblk_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + abort(); +} + +static inline int +pmemblk_fault_injection_enabled(void) +{ + return 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemblk/btt.c b/src/pmdk/src/libpmemblk/btt.c new file mode 100644 index 000000000..f59389f21 --- /dev/null +++ b/src/pmdk/src/libpmemblk/btt.c @@ -0,0 +1,2051 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * btt.c -- block translation table providing atomic block updates + * + * This is a user-space implementation of the BTT mechanism providing + * single block powerfail write atomicity, as described by: + * The NVDIMM Namespace Specification + * + * To use this module, the caller must provide five routines for + * accessing the namespace containing the data (in this context, + * "namespace" refers to the storage containing the BTT layout, such + * as a file). All namespace I/O is done by these callbacks: + * + * nsread Read count bytes from namespace at offset off + * nswrite Write count bytes to namespace at offset off + * nszero Zero count bytes in namespace at offset off + * nsmap Return direct access to a range of a namespace + * nssync Flush changes made to an nsmap'd range + * + * Data written by the nswrite callback is flushed out to the media + * (made durable) when the call returns. Data written directly via + * the nsmap callback must be flushed explicitly using nssync. + * + * The caller passes these callbacks, along with information such as + * namespace size and UUID to btt_init() and gets back an opaque handle + * which is then used with the rest of the entry points. + * + * Here is a brief list of the entry points to this module: + * + * btt_nlane Returns number of concurrent threads allowed + * + * btt_nlba Returns the usable size, as a count of LBAs + * + * btt_read Reads a single block at a given LBA + * + * btt_write Writes a single block (atomically) at a given LBA + * + * btt_set_zero Sets a block to read back as zeros + * + * btt_set_error Sets a block to return error on read + * + * btt_check Checks the BTT metadata for consistency + * + * btt_fini Frees run-time state, done using namespace + * + * If the caller is multi-threaded, it must only allow btt_nlane() threads + * to enter this module at a time, each assigned a unique "lane" number + * between 0 and btt_nlane() - 1. + * + * There are a number of static routines defined in this module. Here's + * a brief overview of the most important routines: + * + * read_layout Checks for valid BTT layout and builds run-time state. + * A number of helper functions are used by read_layout + * to handle various parts of the metadata: + * read_info + * read_arenas + * read_arena + * read_flogs + * read_flog_pair + * + * write_layout Generates a new BTT layout when one doesn't exist. + * Once a new layout is written, write_layout uses + * the same helper functions above to construct the + * run-time state. + * + * invalid_lba Range check done by each entry point that takes + * an LBA. + * + * lba_to_arena_lba + * Find the arena and LBA in that arena for a given + * external LBA. This is the heart of the arena + * range matching logic. + * + * flog_update Update the BTT free list/log combined data structure + * (known as the "flog"). This is the heart of the + * logic that makes writes powerfail atomic. + * + * map_lock These routines provide atomic access to the BTT map + * map_unlock data structure in an area. + * map_abort + * + * map_entry_setf Common code for btt_set_zero() and btt_set_error(). + * + * zero_block Generate a block of all zeros (instead of actually + * doing a read), when the metadata indicates the + * block should read as zeros. + * + * build_rtt These routines construct the run-time tracking + * build_map_locks data structures used during I/O. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "out.h" +#include "uuid.h" +#include "btt.h" +#include "btt_layout.h" +#include "sys_util.h" +#include "util.h" +#include "alloc.h" + +/* + * The opaque btt handle containing state tracked by this module + * for the btt namespace. This is created by btt_init(), handed to + * all the other btt_* entry points, and deleted by btt_fini(). + */ +struct btt { + unsigned nlane; /* number of concurrent threads allowed per btt */ + + /* + * The laidout flag indicates whether the namespace contains valid BTT + * metadata. It is initialized by read_layout() and if no valid layout + * is found, all reads return zeros and the first write will write the + * BTT layout. The layout_write_mutex protects the laidout flag so + * only one write threads ends up writing the initial metadata by + * calling write_layout(). + */ + os_mutex_t layout_write_mutex; + int laidout; + + /* + * UUID of the BTT + */ + uint8_t uuid[BTTINFO_UUID_LEN]; + + /* + * UUID of the containing namespace, used to validate BTT metadata. + */ + uint8_t parent_uuid[BTTINFO_UUID_LEN]; + + /* + * Parameters controlling/describing the BTT layout. + */ + uint64_t rawsize; /* size of containing namespace */ + uint32_t lbasize; /* external LBA size */ + uint32_t nfree; /* available flog entries */ + uint64_t nlba; /* total number of external LBAs */ + unsigned narena; /* number of arenas */ + + /* run-time state kept for each arena */ + struct arena { + uint32_t flags; /* arena flags (btt_info) */ + uint32_t external_nlba; /* LBAs that live in this arena */ + uint32_t internal_lbasize; + uint32_t internal_nlba; + + /* + * The following offsets are relative to the beginning of + * the encapsulating namespace. This is different from + * how these offsets are stored on-media, where they are + * relative to the start of the arena. The offset are + * converted by read_layout() to make them more convenient + * for run-time use. + */ + uint64_t startoff; /* offset to start of arena */ + uint64_t dataoff; /* offset to arena data area */ + uint64_t mapoff; /* offset to area map */ + uint64_t flogoff; /* offset to area flog */ + uint64_t nextoff; /* offset to next arena */ + + /* + * Run-time flog state. Indexed by lane. + * + * The write path uses the flog to find the free block + * it writes to before atomically making it the new + * active block for an external LBA. + * + * The read path doesn't use the flog at all. + */ + struct flog_runtime { + struct btt_flog flog; /* current info */ + uint64_t entries[2]; /* offsets for flog pair */ + int next; /* next write (0 or 1) */ + } *flogs; + + /* + * Read tracking table. Indexed by lane. + * + * Before using a free block found in the flog, the write path + * scans the rtt to see if there are any outstanding reads on + * that block (reads that started before the block was freed by + * a concurrent write). Unused slots in the rtt are indicated + * by setting the error bit, BTT_MAP_ENTRY_ERROR, so that the + * entry won't match any post-map LBA when checked. + */ + uint32_t volatile *rtt; + + /* + * Map locking. Indexed by pre-map LBA modulo nlane. + */ + os_mutex_t *map_locks; + + /* + * Arena info block locking. + */ + os_mutex_t info_lock; + } *arenas; + + /* + * Callbacks for doing I/O to namespace. These are provided by + * the code calling the BTT module, which passes them in to + * btt_init(). All namespace I/O is done using these. + * + * The opaque namespace handle "ns" was provided by the code calling + * the BTT module and is passed to each callback to identify the + * namespace being accessed. + */ + void *ns; + const struct ns_callback *ns_cbp; +}; + +/* + * Signature for arena info blocks. Total size is 16 bytes, including + * the '\0' added to the string by the declaration (the last two bytes + * of the string are '\0'). + */ +static const char Sig[] = BTTINFO_SIG; + +/* + * Zeroed out flog entry, used when initializing the flog. + */ +static const struct btt_flog Zflog; + +/* + * Lookup table and macro for looking up sequence numbers. These are + * the 2-bit numbers that cycle between 01, 10, and 11. + * + * To advance a sequence number to the next number, use something like: + * seq = NSEQ(seq); + */ +static const unsigned Nseq[] = { 0, 2, 3, 1 }; +#define NSEQ(seq) (Nseq[(seq) & 3]) + +/* + * get_map_lock_num -- (internal) Calculate offset into map_locks[] + * + * map_locks[] contains nfree locks which are used to protect the map + * from concurrent access to the same cache line. The index into + * map_locks[] is calculated by looking at the byte offset into the map + * (premap_lba * BTT_MAP_ENTRY_SIZE), figuring out how many cache lines + * that is into the map that is (dividing by BTT_MAP_LOCK_ALIGN), and + * then selecting one of nfree locks (the modulo at the end). + * + * The extra cast is to keep gcc from generating a false positive + * 64-32 bit conversion error when -fsanitize is set. + */ +static inline uint32_t +get_map_lock_num(uint32_t premap_lba, uint32_t nfree) +{ + return (uint32_t)(premap_lba * BTT_MAP_ENTRY_SIZE / BTT_MAP_LOCK_ALIGN) + % nfree; +} + +/* + * invalid_lba -- (internal) set errno and return true if lba is invalid + * + * This function is used at the top of the entry points where an external + * LBA is provided, like this: + * + * if (invalid_lba(bttp, lba)) + * return -1; + */ +static int +invalid_lba(struct btt *bttp, uint64_t lba) +{ + LOG(3, "bttp %p lba %" PRIu64, bttp, lba); + + if (lba >= bttp->nlba) { + ERR("lba out of range (nlba %" PRIu64 ")", bttp->nlba); + errno = EINVAL; + return 1; + } + + return 0; +} + +/* + * read_info -- (internal) convert btt_info to host byte order & validate + * + * Returns true if info block is valid, and all the integer fields are + * converted to host byte order. If the info block is not valid, this + * routine returns false and the info block passed in is left in an + * unknown state. + */ +static int +read_info(struct btt *bttp, struct btt_info *infop) +{ + LOG(3, "infop %p", infop); + + if (memcmp(infop->sig, Sig, BTTINFO_SIG_LEN)) { + LOG(3, "signature invalid"); + return 0; + } + + if (memcmp(infop->parent_uuid, bttp->parent_uuid, BTTINFO_UUID_LEN)) { + LOG(3, "parent UUID mismatch"); + return 0; + } + + /* to be valid, the fields must checksum correctly */ + if (!util_checksum(infop, sizeof(*infop), &infop->checksum, 0, 0)) { + LOG(3, "invalid checksum"); + return 0; + } + + /* to be valid, info block must have a major version of at least 1 */ + if ((infop->major = le16toh(infop->major)) == 0) { + LOG(3, "invalid major version (0)"); + return 0; + } + + infop->flags = le32toh(infop->flags); + infop->minor = le16toh(infop->minor); + infop->external_lbasize = le32toh(infop->external_lbasize); + infop->external_nlba = le32toh(infop->external_nlba); + infop->internal_lbasize = le32toh(infop->internal_lbasize); + infop->internal_nlba = le32toh(infop->internal_nlba); + infop->nfree = le32toh(infop->nfree); + infop->infosize = le32toh(infop->infosize); + infop->nextoff = le64toh(infop->nextoff); + infop->dataoff = le64toh(infop->dataoff); + infop->mapoff = le64toh(infop->mapoff); + infop->flogoff = le64toh(infop->flogoff); + infop->infooff = le64toh(infop->infooff); + + return 1; +} + +/* + * map_entry_is_zero -- (internal) checks if map_entry is in zero state + */ +static inline int +map_entry_is_zero(uint32_t map_entry) +{ + return (map_entry & ~BTT_MAP_ENTRY_LBA_MASK) == BTT_MAP_ENTRY_ZERO; +} + +/* + * map_entry_is_error -- (internal) checks if map_entry is in error state + */ +static inline int +map_entry_is_error(uint32_t map_entry) +{ + return (map_entry & ~BTT_MAP_ENTRY_LBA_MASK) == BTT_MAP_ENTRY_ERROR; +} + +/* + * map_entry_is_initial -- checks if map_entry is in initial state + */ +int +map_entry_is_initial(uint32_t map_entry) +{ + return (map_entry & ~BTT_MAP_ENTRY_LBA_MASK) == 0; +} + +/* + * map_entry_is_zero_or_initial -- (internal) checks if map_entry is in initial + * or zero state + */ +static inline int +map_entry_is_zero_or_initial(uint32_t map_entry) +{ + uint32_t entry_flags = map_entry & ~BTT_MAP_ENTRY_LBA_MASK; + return entry_flags == 0 || entry_flags == BTT_MAP_ENTRY_ZERO; +} + +/* + * btt_flog_get_valid -- return valid and current flog entry + */ +struct btt_flog * +btt_flog_get_valid(struct btt_flog *flog_pair, int *next) +{ + /* + * Interesting cases: + * - no valid seq numbers: layout consistency error + * - one valid seq number: that's the current entry + * - two valid seq numbers: higher number is current entry + * - identical seq numbers: layout consistency error + */ + if (flog_pair[0].seq == flog_pair[1].seq) { + return NULL; + } else if (flog_pair[0].seq == 0) { + /* singleton valid flog at flog_pair[1] */ + *next = 0; + return &flog_pair[1]; + } else if (flog_pair[1].seq == 0) { + /* singleton valid flog at flog_pair[0] */ + *next = 1; + return &flog_pair[0]; + } else if (NSEQ(flog_pair[0].seq) == flog_pair[1].seq) { + /* flog_pair[1] has the later sequence number */ + *next = 0; + return &flog_pair[1]; + } else { + /* flog_pair[0] has the later sequence number */ + *next = 1; + return &flog_pair[0]; + } +} + +/* + * read_flog_pair -- (internal) load up a single flog pair + * + * Zero is returned on success, otherwise -1/errno. + */ +static int +read_flog_pair(struct btt *bttp, unsigned lane, struct arena *arenap, + uint64_t flog_off, struct flog_runtime *flog_runtimep, uint32_t flognum) +{ + LOG(5, "bttp %p lane %u arenap %p flog_off %" PRIu64 " runtimep %p " + "flognum %u", bttp, lane, arenap, flog_off, flog_runtimep, + flognum); + + flog_runtimep->entries[0] = flog_off; + flog_runtimep->entries[1] = flog_off + sizeof(struct btt_flog); + + if (lane >= bttp->nfree) { + ERR("invalid lane %u among nfree %d", lane, bttp->nfree); + errno = EINVAL; + return -1; + } + + if (flog_off == 0) { + ERR("invalid flog offset %" PRIu64, flog_off); + errno = EINVAL; + return -1; + } + + struct btt_flog flog_pair[2]; + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, flog_pair, + sizeof(flog_pair), flog_off) < 0) + return -1; + + btt_flog_convert2h(&flog_pair[0]); + if (invalid_lba(bttp, flog_pair[0].lba)) + return -1; + + btt_flog_convert2h(&flog_pair[1]); + if (invalid_lba(bttp, flog_pair[1].lba)) + return -1; + + LOG(6, "flog_pair[0] flog_off %" PRIu64 " old_map %u new_map %u seq %u", + flog_off, flog_pair[0].old_map, + flog_pair[0].new_map, flog_pair[0].seq); + LOG(6, "flog_pair[1] old_map %u new_map %u seq %u", + flog_pair[1].old_map, flog_pair[1].new_map, + flog_pair[1].seq); + + struct btt_flog *currentp = btt_flog_get_valid(flog_pair, + &flog_runtimep->next); + + if (currentp == NULL) { + ERR("flog layout error: bad seq numbers %d %d", + flog_pair[0].seq, flog_pair[1].seq); + arenap->flags |= BTTINFO_FLAG_ERROR; + return 0; + } + + LOG(6, "run-time flog next is %d", flog_runtimep->next); + + /* copy current flog into run-time flog state */ + flog_runtimep->flog = *currentp; + + LOG(9, "read flog[%u]: lba %u old %u%s%s%s new %u%s%s%s", flognum, + currentp->lba, + currentp->old_map & BTT_MAP_ENTRY_LBA_MASK, + (map_entry_is_error(currentp->old_map)) ? " ERROR" : "", + (map_entry_is_zero(currentp->old_map)) ? " ZERO" : "", + (map_entry_is_initial(currentp->old_map)) ? " INIT" : "", + currentp->new_map & BTT_MAP_ENTRY_LBA_MASK, + (map_entry_is_error(currentp->new_map)) ? " ERROR" : "", + (map_entry_is_zero(currentp->new_map)) ? " ZERO" : "", + (map_entry_is_initial(currentp->new_map)) ? " INIT" : ""); + + /* + * Decide if the current flog info represents a completed + * operation or an incomplete operation. If completed, the + * old_map field will contain the free block to be used for + * the next write. But if the operation didn't complete (indicated + * by the map entry not being updated), then the operation is + * completed now by updating the map entry. + * + * A special case, used by flog entries when first created, is + * when old_map == new_map. This counts as a complete entry + * and doesn't require reading the map to see if recovery is + * required. + */ + if (currentp->old_map == currentp->new_map) { + LOG(9, "flog[%u] entry complete (initial state)", flognum); + return 0; + } + + /* convert pre-map LBA into an offset into the map */ + uint64_t map_entry_off = arenap->mapoff + + BTT_MAP_ENTRY_SIZE * currentp->lba; + + /* read current map entry */ + uint32_t entry; + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, &entry, + sizeof(entry), map_entry_off) < 0) + return -1; + + entry = le32toh(entry); + + /* map entry in initial state */ + if (map_entry_is_initial(entry)) + entry = currentp->lba | BTT_MAP_ENTRY_NORMAL; + + if (currentp->new_map != entry && currentp->old_map == entry) { + /* last update didn't complete */ + LOG(9, "recover flog[%u]: map[%u]: %u", + flognum, currentp->lba, currentp->new_map); + + /* + * Recovery step is to complete the transaction by + * updating the map entry. + */ + entry = htole32(currentp->new_map); + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &entry, + sizeof(uint32_t), map_entry_off) < 0) + return -1; + } + + return 0; +} + +/* + * flog_update -- (internal) write out an updated flog entry + * + * The flog entries are not checksummed. Instead, increasing sequence + * numbers are used to atomically switch the active flog entry between + * the first and second struct btt_flog in each slot. In order for this + * to work, the sequence number must be updated only after all the other + * fields in the flog are updated. So the writes to the flog are broken + * into two writes, one for the first three fields (lba, old_map, new_map) + * and, only after those fields are known to be written durably, the + * second write for the seq field is done. + * + * Returns 0 on success, otherwise -1/errno. + */ +static int +flog_update(struct btt *bttp, unsigned lane, struct arena *arenap, + uint32_t lba, uint32_t old_map, uint32_t new_map) +{ + LOG(3, "bttp %p lane %u arenap %p lba %u old_map %u new_map %u", + bttp, lane, arenap, lba, old_map, new_map); + + /* construct new flog entry in little-endian byte order */ + struct btt_flog new_flog; + new_flog.lba = lba; + new_flog.old_map = old_map; + new_flog.new_map = new_map; + new_flog.seq = NSEQ(arenap->flogs[lane].flog.seq); + btt_flog_convert2le(&new_flog); + + uint64_t new_flog_off = + arenap->flogs[lane].entries[arenap->flogs[lane].next]; + + /* write out first two fields first */ + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &new_flog, + sizeof(uint32_t) * 2, new_flog_off) < 0) + return -1; + new_flog_off += sizeof(uint32_t) * 2; + + /* write out new_map and seq field to make it active */ + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &new_flog.new_map, + sizeof(uint32_t) * 2, new_flog_off) < 0) + return -1; + + /* flog entry written successfully, update run-time state */ + arenap->flogs[lane].next = 1 - arenap->flogs[lane].next; + arenap->flogs[lane].flog.lba = lba; + arenap->flogs[lane].flog.old_map = old_map; + arenap->flogs[lane].flog.new_map = new_map; + arenap->flogs[lane].flog.seq = NSEQ(arenap->flogs[lane].flog.seq); + + LOG(9, "update flog[%u]: lba %u old %u%s%s%s new %u%s%s%s", lane, lba, + old_map & BTT_MAP_ENTRY_LBA_MASK, + (map_entry_is_error(old_map)) ? " ERROR" : "", + (map_entry_is_zero(old_map)) ? " ZERO" : "", + (map_entry_is_initial(old_map)) ? " INIT" : "", + new_map & BTT_MAP_ENTRY_LBA_MASK, + (map_entry_is_error(new_map)) ? " ERROR" : "", + (map_entry_is_zero(new_map)) ? " ZERO" : "", + (map_entry_is_initial(new_map)) ? " INIT" : ""); + + return 0; +} + +/* + * arena_setf -- (internal) updates the given flag for the arena info block + */ +static int +arena_setf(struct btt *bttp, struct arena *arenap, unsigned lane, uint32_t setf) +{ + LOG(3, "bttp %p arenap %p lane %u setf 0x%x", bttp, arenap, lane, setf); + + /* update runtime state */ + util_fetch_and_or32(&arenap->flags, setf); + + if (!bttp->laidout) { + /* no layout yet to update */ + return 0; + } + + /* + * Read, modify and write out the info block + * at both the beginning and end of the arena. + */ + uint64_t arena_off = arenap->startoff; + + struct btt_info info; + + /* protect from simultaneous writes to the layout */ + util_mutex_lock(&arenap->info_lock); + + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, &info, + sizeof(info), arena_off) < 0) { + goto err; + } + + uint64_t infooff = le64toh(info.infooff); + + /* update flags */ + info.flags |= htole32(setf); + + /* update checksum */ + util_checksum(&info, sizeof(info), &info.checksum, 1, 0); + + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &info, + sizeof(info), arena_off) < 0) { + goto err; + } + + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &info, + sizeof(info), arena_off + infooff) < 0) { + goto err; + } + + util_mutex_unlock(&arenap->info_lock); + return 0; + +err: + util_mutex_unlock(&arenap->info_lock); + return -1; +} + +/* + * set_arena_error -- (internal) set the error flag for the given arena + */ +static int +set_arena_error(struct btt *bttp, struct arena *arenap, unsigned lane) +{ + LOG(3, "bttp %p arena %p lane %u", bttp, arenap, lane); + + return arena_setf(bttp, arenap, lane, BTTINFO_FLAG_ERROR); +} + +/* + * read_flogs -- (internal) load up all the flog entries for an arena + * + * Zero is returned on success, otherwise -1/errno. + */ +static int +read_flogs(struct btt *bttp, unsigned lane, struct arena *arenap) +{ + if ((arenap->flogs = Zalloc(bttp->nfree * + sizeof(struct flog_runtime))) == NULL) { + ERR("!Malloc for %u flog entries", bttp->nfree); + return -1; + } + + /* + * Load up the flog state. read_flog_pair() will determine if + * any recovery steps are required take them on the in-memory + * data structures it creates. Sets error flag when it + * determines an invalid state. + */ + uint64_t flog_off = arenap->flogoff; + struct flog_runtime *flog_runtimep = arenap->flogs; + for (uint32_t i = 0; i < bttp->nfree; i++) { + if (read_flog_pair(bttp, lane, arenap, flog_off, + flog_runtimep, i) < 0) { + set_arena_error(bttp, arenap, lane); + return -1; + } + + /* prepare for next time around the loop */ + flog_off += roundup(2 * sizeof(struct btt_flog), + BTT_FLOG_PAIR_ALIGN); + flog_runtimep++; + } + + return 0; +} + +/* + * build_rtt -- (internal) construct a read tracking table for an arena + * + * Zero is returned on success, otherwise -1/errno. + * + * The rtt is big enough to hold an entry for each free block (nfree) + * since nlane can't be bigger than nfree. nlane may end up smaller, + * in which case some of the high rtt entries will be unused. + */ +static int +build_rtt(struct btt *bttp, struct arena *arenap) +{ + if ((arenap->rtt = Malloc(bttp->nfree * sizeof(uint32_t))) + == NULL) { + ERR("!Malloc for %d rtt entries", bttp->nfree); + return -1; + } + for (uint32_t lane = 0; lane < bttp->nfree; lane++) + arenap->rtt[lane] = BTT_MAP_ENTRY_ERROR; + util_synchronize(); + + return 0; +} + +/* + * build_map_locks -- (internal) construct map locks + * + * Zero is returned on success, otherwise -1/errno. + */ +static int +build_map_locks(struct btt *bttp, struct arena *arenap) +{ + if ((arenap->map_locks = + Malloc(bttp->nfree * sizeof(*arenap->map_locks))) + == NULL) { + ERR("!Malloc for %d map_lock entries", bttp->nfree); + return -1; + } + for (uint32_t lane = 0; lane < bttp->nfree; lane++) + util_mutex_init(&arenap->map_locks[lane]); + + return 0; +} + +/* + * read_arena -- (internal) load up an arena and build run-time state + * + * Zero is returned on success, otherwise -1/errno. + */ +static int +read_arena(struct btt *bttp, unsigned lane, uint64_t arena_off, + struct arena *arenap) +{ + LOG(3, "bttp %p lane %u arena_off %" PRIu64 " arenap %p", + bttp, lane, arena_off, arenap); + + struct btt_info info; + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, &info, sizeof(info), + arena_off) < 0) + return -1; + + arenap->flags = le32toh(info.flags); + arenap->external_nlba = le32toh(info.external_nlba); + arenap->internal_lbasize = le32toh(info.internal_lbasize); + arenap->internal_nlba = le32toh(info.internal_nlba); + + arenap->startoff = arena_off; + arenap->dataoff = arena_off + le64toh(info.dataoff); + arenap->mapoff = arena_off + le64toh(info.mapoff); + arenap->flogoff = arena_off + le64toh(info.flogoff); + arenap->nextoff = arena_off + le64toh(info.nextoff); + + if (read_flogs(bttp, lane, arenap) < 0) + return -1; + + if (build_rtt(bttp, arenap) < 0) + return -1; + + if (build_map_locks(bttp, arenap) < 0) + return -1; + + /* initialize the per arena info block lock */ + util_mutex_init(&arenap->info_lock); + + return 0; +} + +/* + * util_convert2h_btt_info -- convert btt_info to host byte order + */ +void +btt_info_convert2h(struct btt_info *infop) +{ + infop->flags = le32toh(infop->flags); + infop->major = le16toh(infop->major); + infop->minor = le16toh(infop->minor); + infop->external_lbasize = le32toh(infop->external_lbasize); + infop->external_nlba = le32toh(infop->external_nlba); + infop->internal_lbasize = le32toh(infop->internal_lbasize); + infop->internal_nlba = le32toh(infop->internal_nlba); + infop->nfree = le32toh(infop->nfree); + infop->infosize = le32toh(infop->infosize); + infop->nextoff = le64toh(infop->nextoff); + infop->dataoff = le64toh(infop->dataoff); + infop->mapoff = le64toh(infop->mapoff); + infop->flogoff = le64toh(infop->flogoff); + infop->infooff = le64toh(infop->infooff); +} + +/* + * btt_info_convert2le -- convert btt_info to little-endian byte order + */ +void +btt_info_convert2le(struct btt_info *infop) +{ + infop->flags = le32toh(infop->flags); + infop->major = le16toh(infop->major); + infop->minor = le16toh(infop->minor); + infop->external_lbasize = le32toh(infop->external_lbasize); + infop->external_nlba = le32toh(infop->external_nlba); + infop->internal_lbasize = le32toh(infop->internal_lbasize); + infop->internal_nlba = le32toh(infop->internal_nlba); + infop->nfree = le32toh(infop->nfree); + infop->infosize = le32toh(infop->infosize); + infop->nextoff = le64toh(infop->nextoff); + infop->dataoff = le64toh(infop->dataoff); + infop->mapoff = le64toh(infop->mapoff); + infop->flogoff = le64toh(infop->flogoff); + infop->infooff = le64toh(infop->infooff); +} + +/* + * btt_flog_convert2h -- convert btt_flog to host byte order + */ +void +btt_flog_convert2h(struct btt_flog *flogp) +{ + flogp->lba = le32toh(flogp->lba); + flogp->old_map = le32toh(flogp->old_map); + flogp->new_map = le32toh(flogp->new_map); + flogp->seq = le32toh(flogp->seq); +} + +/* + * btt_flog_convert2le -- convert btt_flog to LE byte order + */ +void +btt_flog_convert2le(struct btt_flog *flogp) +{ + flogp->lba = htole32(flogp->lba); + flogp->old_map = htole32(flogp->old_map); + flogp->new_map = htole32(flogp->new_map); + flogp->seq = htole32(flogp->seq); +} + +/* + * read_arenas -- (internal) load up all arenas and build run-time state + * + * On entry, layout must be known to be valid, and the number of arenas + * must be known. Zero is returned on success, otherwise -1/errno. + */ +static int +read_arenas(struct btt *bttp, unsigned lane, unsigned narena) +{ + LOG(3, "bttp %p lane %u narena %d", bttp, lane, narena); + + if ((bttp->arenas = Zalloc(narena * sizeof(*bttp->arenas))) == NULL) { + ERR("!Malloc for %u arenas", narena); + goto err; + } + + uint64_t arena_off = 0; + struct arena *arenap = bttp->arenas; + for (unsigned i = 0; i < narena; i++) { + + if (read_arena(bttp, lane, arena_off, arenap) < 0) + goto err; + + /* prepare for next time around the loop */ + arena_off = arenap->nextoff; + arenap++; + } + + bttp->laidout = 1; + + return 0; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + if (bttp->arenas) { + for (unsigned i = 0; i < bttp->narena; i++) { + if (bttp->arenas[i].flogs) + Free(bttp->arenas[i].flogs); + if (bttp->arenas[i].rtt) + Free((void *)bttp->arenas[i].rtt); + if (bttp->arenas[i].map_locks) + Free((void *)bttp->arenas[i].map_locks); + } + Free(bttp->arenas); + bttp->arenas = NULL; + } + errno = oerrno; + return -1; +} + +/* + * internal_lbasize -- (internal) calculate internal LBA size + */ +static inline uint32_t +internal_lbasize(uint32_t external_lbasize) +{ + uint32_t internal_lbasize = external_lbasize; + if (internal_lbasize < BTT_MIN_LBA_SIZE) + internal_lbasize = BTT_MIN_LBA_SIZE; + internal_lbasize = + roundup(internal_lbasize, BTT_INTERNAL_LBA_ALIGNMENT); + /* check for overflow */ + if (internal_lbasize < BTT_INTERNAL_LBA_ALIGNMENT) { + errno = EINVAL; + ERR("!Invalid lba size after alignment: %u ", internal_lbasize); + return 0; + } + + return internal_lbasize; +} + +/* + * btt_flog_size -- calculate flog data size + */ +uint64_t +btt_flog_size(uint32_t nfree) +{ + uint64_t flog_size = nfree * roundup(2 * sizeof(struct btt_flog), + BTT_FLOG_PAIR_ALIGN); + return roundup(flog_size, BTT_ALIGNMENT); +} + +/* + * btt_map_size -- calculate map data size + */ +uint64_t +btt_map_size(uint32_t external_nlba) +{ + return roundup(external_nlba * BTT_MAP_ENTRY_SIZE, BTT_ALIGNMENT); +} + +/* + * btt_arena_datasize -- whole arena size without BTT Info header, backup and + * flog means size of blocks and map + */ +uint64_t +btt_arena_datasize(uint64_t arena_size, uint32_t nfree) +{ + return arena_size - 2 * sizeof(struct btt_info) - btt_flog_size(nfree); +} + +/* + * btt_info_set_params -- (internal) calculate and set BTT Info + * external_lbasize, internal_lbasize, nfree, infosize, external_nlba and + * internal_nlba + */ +static int +btt_info_set_params(struct btt_info *info, uint32_t external_lbasize, + uint32_t internal_lbasize, uint32_t nfree, uint64_t arena_size) +{ + info->external_lbasize = external_lbasize; + info->internal_lbasize = internal_lbasize; + info->nfree = nfree; + info->infosize = sizeof(*info); + + uint64_t arena_data_size = btt_arena_datasize(arena_size, nfree); + + /* allow for map alignment padding */ + uint64_t internal_nlba = (arena_data_size - BTT_ALIGNMENT) / + (info->internal_lbasize + BTT_MAP_ENTRY_SIZE); + + /* ensure the number of blocks is at least 2*nfree */ + if (internal_nlba < 2 * nfree) { + errno = EINVAL; + ERR("!number of internal blocks: %" PRIu64 + " expected at least %u", + internal_nlba, 2 * nfree); + return -1; + } + + ASSERT(internal_nlba <= UINT32_MAX); + uint32_t internal_nlba_u32 = (uint32_t)internal_nlba; + + info->internal_nlba = internal_nlba_u32; + /* external LBA does not include free blocks */ + info->external_nlba = internal_nlba_u32 - info->nfree; + + ASSERT((arena_data_size - btt_map_size(info->external_nlba)) / + internal_lbasize >= internal_nlba); + + return 0; +} + +/* + * btt_info_set_offs -- (internal) calculate and set the BTT Info dataoff, + * nextoff, infooff, flogoff and mapoff. These are all relative to the + * beginning of the arena. + */ +static void +btt_info_set_offs(struct btt_info *info, uint64_t arena_size, + uint64_t space_left) +{ + info->dataoff = info->infosize; + + /* set offset to next valid arena */ + if (space_left >= BTT_MIN_SIZE) + info->nextoff = arena_size; + else + info->nextoff = 0; + + info->infooff = arena_size - sizeof(struct btt_info); + info->flogoff = info->infooff - btt_flog_size(info->nfree); + info->mapoff = info->flogoff - btt_map_size(info->external_nlba); + + ASSERTeq(btt_arena_datasize(arena_size, info->nfree) - + btt_map_size(info->external_nlba), info->mapoff - + info->dataoff); +} + +/* + * btt_info_set -- set BTT Info params and offsets + */ +int +btt_info_set(struct btt_info *info, uint32_t external_lbasize, + uint32_t nfree, uint64_t arena_size, uint64_t space_left) +{ + /* calculate internal LBA size */ + uint32_t internal_lba_size = internal_lbasize(external_lbasize); + if (internal_lba_size == 0) + return -1; + + /* set params and offsets */ + if (btt_info_set_params(info, external_lbasize, + internal_lba_size, nfree, arena_size)) + return -1; + + btt_info_set_offs(info, arena_size, space_left); + + return 0; +} + +/* + * write_layout -- (internal) write out the initial btt metadata layout + * + * Called with write == 1 only once in the life time of a btt namespace, when + * the first write happens. The caller of this routine is responsible for + * locking out multiple threads. This routine doesn't read anything -- by the + * time it is called, it is known there's no layout in the namespace and a new + * layout should be written. + * + * Calling with write == 0 tells this routine to do the calculations for + * bttp->narena and bttp->nlba, but don't write out any metadata. + * + * If successful, sets bttp->layout to 1 and returns 0. Otherwise -1 + * is returned and errno is set, and bttp->layout remains 0 so that + * later attempts to write will try again to create the layout. + */ +static int +write_layout(struct btt *bttp, unsigned lane, int write) +{ + LOG(3, "bttp %p lane %u write %d", bttp, lane, write); + + ASSERT(bttp->rawsize >= BTT_MIN_SIZE); + ASSERT(bttp->nfree); + + /* + * If a new layout is being written, generate the BTT's UUID. + */ + if (write) { + int ret = util_uuid_generate(bttp->uuid); + if (ret < 0) { + LOG(2, "util_uuid_generate failed"); + return -1; + } + } + + /* + * The number of arenas is the number of full arena of + * size BTT_MAX_ARENA that fit into rawsize and then, if + * the remainder is at least BTT_MIN_SIZE in size, then + * that adds one more arena. + */ + bttp->narena = (unsigned)(bttp->rawsize / BTT_MAX_ARENA); + if (bttp->rawsize % BTT_MAX_ARENA >= BTT_MIN_SIZE) + bttp->narena++; + LOG(4, "narena %u", bttp->narena); + + uint32_t internal_lba_size = internal_lbasize(bttp->lbasize); + if (internal_lba_size == 0) + return -1; + LOG(4, "adjusted internal_lbasize %u", internal_lba_size); + + uint64_t total_nlba = 0; + uint64_t rawsize = bttp->rawsize; + unsigned arena_num = 0; + uint64_t arena_off = 0; + + /* + * for each arena... + */ + while (rawsize >= BTT_MIN_SIZE) { + LOG(4, "layout arena %u", arena_num); + + uint64_t arena_rawsize = rawsize; + if (arena_rawsize > BTT_MAX_ARENA) { + arena_rawsize = BTT_MAX_ARENA; + } + rawsize -= arena_rawsize; + arena_num++; + + struct btt_info info; + memset(&info, '\0', sizeof(info)); + if (btt_info_set_params(&info, bttp->lbasize, + internal_lba_size, bttp->nfree, arena_rawsize)) + return -1; + + LOG(4, "internal_nlba %u external_nlba %u", + info.internal_nlba, info.external_nlba); + + total_nlba += info.external_nlba; + + /* + * The rest of the loop body calculates metadata structures + * and lays it out for this arena. So only continue if + * the write flag is set. + */ + if (!write) + continue; + + btt_info_set_offs(&info, arena_rawsize, rawsize); + + LOG(4, "nextoff 0x%016" PRIx64, info.nextoff); + LOG(4, "dataoff 0x%016" PRIx64, info.dataoff); + LOG(4, "mapoff 0x%016" PRIx64, info.mapoff); + LOG(4, "flogoff 0x%016" PRIx64, info.flogoff); + LOG(4, "infooff 0x%016" PRIx64, info.infooff); + + /* zero map if ns is not zero-initialized */ + if (!bttp->ns_cbp->ns_is_zeroed) { + uint64_t mapsize = btt_map_size(info.external_nlba); + if ((*bttp->ns_cbp->nszero)(bttp->ns, lane, mapsize, + info.mapoff) < 0) + return -1; + } + + /* write out the initial flog */ + uint64_t flog_entry_off = arena_off + info.flogoff; + uint32_t next_free_lba = info.external_nlba; + for (uint32_t i = 0; i < bttp->nfree; i++) { + struct btt_flog flog; + flog.lba = htole32(i); + flog.old_map = flog.new_map = + htole32(next_free_lba | BTT_MAP_ENTRY_ZERO); + flog.seq = htole32(1); + + /* + * Write both btt_flog structs in the pair, writing + * the second one as all zeros. + */ + LOG(6, "flog[%u] entry off %" PRIu64 + " initial %u + zero = %u", + i, flog_entry_off, + next_free_lba, + next_free_lba | BTT_MAP_ENTRY_ZERO); + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &flog, + sizeof(flog), flog_entry_off) < 0) + return -1; + flog_entry_off += sizeof(flog); + + LOG(6, "flog[%u] entry off %" PRIu64 " zeros", + i, flog_entry_off); + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &Zflog, + sizeof(Zflog), flog_entry_off) < 0) + return -1; + flog_entry_off += sizeof(flog); + flog_entry_off = roundup(flog_entry_off, + BTT_FLOG_PAIR_ALIGN); + + next_free_lba++; + } + + /* + * Construct the BTT info block and write it out + * at both the beginning and end of the arena. + */ + memcpy(info.sig, Sig, BTTINFO_SIG_LEN); + memcpy(info.uuid, bttp->uuid, BTTINFO_UUID_LEN); + memcpy(info.parent_uuid, bttp->parent_uuid, BTTINFO_UUID_LEN); + info.major = BTTINFO_MAJOR_VERSION; + info.minor = BTTINFO_MINOR_VERSION; + btt_info_convert2le(&info); + + util_checksum(&info, sizeof(info), &info.checksum, 1, 0); + + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &info, + sizeof(info), arena_off) < 0) + return -1; + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &info, + sizeof(info), arena_off + info.infooff) < 0) + return -1; + + arena_off += info.nextoff; + } + + ASSERTeq(bttp->narena, arena_num); + + bttp->nlba = total_nlba; + + if (write) { + /* + * The layout is written now, so load up the arenas. + */ + return read_arenas(bttp, lane, bttp->narena); + } + + return 0; +} + +/* + * read_layout -- (internal) load up layout info from btt namespace + * + * Called once when the btt namespace is opened for use. + * Sets bttp->layout to 0 if no valid layout is found, 1 otherwise. + * + * Any recovery actions required (as indicated by the flog state) are + * performed by this routine. + * + * Any quick checks for layout consistency are performed by this routine + * (quick enough to be done each time a BTT area is opened for use, not + * like the slow consistency checks done by btt_check()). + * + * Returns 0 if no errors are encountered accessing the namespace (in this + * context, detecting there's no layout is not an error if the nsread function + * didn't have any problems doing the reads). Otherwise, -1 is returned + * and errno is set. + */ +static int +read_layout(struct btt *bttp, unsigned lane) +{ + LOG(3, "bttp %p", bttp); + + ASSERT(bttp->rawsize >= BTT_MIN_SIZE); + + unsigned narena = 0; + uint32_t smallest_nfree = UINT32_MAX; + uint64_t rawsize = bttp->rawsize; + uint64_t total_nlba = 0; + uint64_t arena_off = 0; + + bttp->nfree = BTT_DEFAULT_NFREE; + + /* + * For each arena, see if there's a valid info block + */ + while (rawsize >= BTT_MIN_SIZE) { + narena++; + + struct btt_info info; + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, &info, + sizeof(info), arena_off) < 0) + return -1; + + if (!read_info(bttp, &info)) { + /* + * Failed to find complete BTT metadata. Just + * calculate the narena and nlba values that will + * result when write_layout() gets called. This + * allows checks against nlba to work correctly + * even before the layout is written. + */ + return write_layout(bttp, lane, 0); + } + if (info.external_lbasize != bttp->lbasize) { + /* can't read it assuming the wrong block size */ + ERR("inconsistent lbasize"); + errno = EINVAL; + return -1; + } + + if (info.nfree == 0) { + ERR("invalid nfree"); + errno = EINVAL; + return -1; + } + + if (info.external_nlba == 0) { + ERR("invalid external_nlba"); + errno = EINVAL; + return -1; + } + + if (info.nextoff && (info.nextoff != BTT_MAX_ARENA)) { + ERR("invalid arena size"); + errno = EINVAL; + return -1; + } + + if (info.nfree < smallest_nfree) + smallest_nfree = info.nfree; + + total_nlba += info.external_nlba; + arena_off += info.nextoff; + if (info.nextoff == 0) + break; + if (info.nextoff > rawsize) { + ERR("invalid next arena offset"); + errno = EINVAL; + return -1; + } + rawsize -= info.nextoff; + } + + ASSERT(narena); + + bttp->narena = narena; + bttp->nlba = total_nlba; + + /* + * All arenas were valid. nfree should be the smallest value found + * among different arenas. + */ + if (smallest_nfree < bttp->nfree) + bttp->nfree = smallest_nfree; + + /* + * Load up arenas. + */ + return read_arenas(bttp, lane, narena); +} + +/* + * zero_block -- (internal) satisfy a read with a block of zeros + * + * Returns 0 on success, otherwise -1/errno. + */ +static int +zero_block(struct btt *bttp, void *buf) +{ + LOG(3, "bttp %p", bttp); + + memset(buf, '\0', bttp->lbasize); + return 0; +} + +/* + * lba_to_arena_lba -- (internal) calculate the arena & pre-map LBA + * + * This routine takes the external LBA and matches it to the + * appropriate arena, adjusting the lba for use within that arena. + * + * If successful, zero is returned, *arenapp is a pointer to the appropriate + * arena struct in the run-time state, and *premap_lbap is the LBA adjusted + * to an arena-internal LBA (also known as the pre-map LBA). Otherwise + * -1/errno. + */ +static int +lba_to_arena_lba(struct btt *bttp, uint64_t lba, + struct arena **arenapp, uint32_t *premap_lbap) +{ + LOG(3, "bttp %p lba %" PRIu64, bttp, lba); + + ASSERT(bttp->laidout); + + unsigned arena; + for (arena = 0; arena < bttp->narena; arena++) + if (lba < bttp->arenas[arena].external_nlba) + break; + else + lba -= bttp->arenas[arena].external_nlba; + + ASSERT(arena < bttp->narena); + + *arenapp = &bttp->arenas[arena]; + ASSERT(lba <= UINT32_MAX); + *premap_lbap = (uint32_t)lba; + + LOG(3, "arenap %p pre-map LBA %u", *arenapp, *premap_lbap); + return 0; +} + +/* + * btt_init -- prepare a btt namespace for use, returning an opaque handle + * + * Returns handle on success, otherwise NULL/errno. + * + * When submitted a pristine namespace it will be formatted implicitly when + * touched for the first time. + * + * If arenas have different nfree values, we will be using the lowest one + * found as limiting to the overall "bandwidth". + */ +struct btt * +btt_init(uint64_t rawsize, uint32_t lbasize, uint8_t parent_uuid[], + unsigned maxlane, void *ns, const struct ns_callback *ns_cbp) +{ + LOG(3, "rawsize %" PRIu64 " lbasize %u", rawsize, lbasize); + + if (rawsize < BTT_MIN_SIZE) { + ERR("rawsize smaller than BTT_MIN_SIZE %u", BTT_MIN_SIZE); + errno = EINVAL; + return NULL; + } + + struct btt *bttp = Zalloc(sizeof(*bttp)); + + if (bttp == NULL) { + ERR("!Malloc %zu bytes", sizeof(*bttp)); + return NULL; + } + + util_mutex_init(&bttp->layout_write_mutex); + memcpy(bttp->parent_uuid, parent_uuid, BTTINFO_UUID_LEN); + bttp->rawsize = rawsize; + bttp->lbasize = lbasize; + bttp->ns = ns; + bttp->ns_cbp = ns_cbp; + + /* + * Load up layout, if it exists. + * + * Whether read_layout() finds a valid layout or not, it finishes + * updating these layout-related fields: + * bttp->nfree + * bttp->nlba + * bttp->narena + * since these fields are used even before a valid layout it written. + */ + if (read_layout(bttp, 0) < 0) { + btt_fini(bttp); /* free up any allocations */ + return NULL; + } + + bttp->nlane = bttp->nfree; + + /* maxlane, if provided, is an upper bound on nlane */ + if (maxlane && bttp->nlane > maxlane) + bttp->nlane = maxlane; + + LOG(3, "success, bttp %p nlane %u", bttp, bttp->nlane); + return bttp; +} + +/* + * btt_nlane -- return the number of "lanes" for this btt namespace + * + * The number of lanes is the number of threads allowed in this module + * concurrently for a given btt. Each thread executing this code must + * have a unique "lane" number assigned to it between 0 and btt_nlane() - 1. + */ +unsigned +btt_nlane(struct btt *bttp) +{ + LOG(3, "bttp %p", bttp); + + return bttp->nlane; +} + +/* + * btt_nlba -- return the number of usable blocks in a btt namespace + * + * Valid LBAs to pass to btt_read() and btt_write() are 0 through + * btt_nlba() - 1. + */ +size_t +btt_nlba(struct btt *bttp) +{ + LOG(3, "bttp %p", bttp); + + return bttp->nlba; +} + +/* + * btt_read -- read a block from a btt namespace + * + * Returns 0 on success, otherwise -1/errno. + */ +int +btt_read(struct btt *bttp, unsigned lane, uint64_t lba, void *buf) +{ + LOG(3, "bttp %p lane %u lba %" PRIu64, bttp, lane, lba); + + if (invalid_lba(bttp, lba)) + return -1; + + /* if there's no layout written yet, all reads come back as zeros */ + if (!bttp->laidout) + return zero_block(bttp, buf); + + /* find which arena LBA lives in, and the offset to the map entry */ + struct arena *arenap; + uint32_t premap_lba; + uint64_t map_entry_off; + if (lba_to_arena_lba(bttp, lba, &arenap, &premap_lba) < 0) + return -1; + + /* convert pre-map LBA into an offset into the map */ + map_entry_off = arenap->mapoff + BTT_MAP_ENTRY_SIZE * premap_lba; + + /* + * Read the current map entry to get the post-map LBA for the data + * block read. + */ + uint32_t entry; + + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, &entry, + sizeof(entry), map_entry_off) < 0) + return -1; + + entry = le32toh(entry); + + /* + * Retries come back to the top of this loop (for a rare case where + * the map is changed by another thread doing writes to the same LBA). + */ + while (1) { + if (map_entry_is_error(entry)) { + ERR("EIO due to map entry error flag"); + errno = EIO; + return -1; + } + + if (map_entry_is_zero_or_initial(entry)) + return zero_block(bttp, buf); + + /* + * Record the post-map LBA in the read tracking table during + * the read. The write will check entries in the read tracking + * table before allocating a block for a write, waiting for + * outstanding reads on that block to complete. + * + * Since we already checked for error, zero, and initial + * states above, the entry must have both error and zero + * bits set at this point (BTT_MAP_ENTRY_NORMAL). We store + * the entry that way, with those bits set, in the rtt and + * btt_write() will check for it the same way, with the bits + * both set. + */ + arenap->rtt[lane] = entry; + util_synchronize(); + + /* + * In case this thread was preempted between reading entry and + * storing it in the rtt, check to see if the map changed. If + * it changed, the block about to be read is at least free now + * (in the flog, but that's okay since the data will still be + * undisturbed) and potentially allocated and being used for + * another write (data disturbed, so not okay to continue). + */ + uint32_t latest_entry; + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, &latest_entry, + sizeof(latest_entry), map_entry_off) < 0) { + arenap->rtt[lane] = BTT_MAP_ENTRY_ERROR; + return -1; + } + + latest_entry = le32toh(latest_entry); + + if (entry == latest_entry) + break; /* map stayed the same */ + else + entry = latest_entry; /* try again */ + } + + /* + * It is safe to read the block now, since the rtt protects the + * block from getting re-allocated to something else by a write. + */ + uint64_t data_block_off = + arenap->dataoff + (uint64_t)(entry & BTT_MAP_ENTRY_LBA_MASK) * + arenap->internal_lbasize; + int readret = (*bttp->ns_cbp->nsread)(bttp->ns, lane, buf, + bttp->lbasize, data_block_off); + + /* done with read, so clear out rtt entry */ + arenap->rtt[lane] = BTT_MAP_ENTRY_ERROR; + + return readret; +} + +/* + * map_lock -- (internal) grab the map_lock and read a map entry + */ +static int +map_lock(struct btt *bttp, unsigned lane, struct arena *arenap, + uint32_t *entryp, uint32_t premap_lba) +{ + LOG(3, "bttp %p lane %u arenap %p premap_lba %u", + bttp, lane, arenap, premap_lba); + + uint64_t map_entry_off = + arenap->mapoff + BTT_MAP_ENTRY_SIZE * premap_lba; + uint32_t map_lock_num = get_map_lock_num(premap_lba, bttp->nfree); + + util_mutex_lock(&arenap->map_locks[map_lock_num]); + + /* read the old map entry */ + if ((*bttp->ns_cbp->nsread)(bttp->ns, lane, entryp, + sizeof(uint32_t), map_entry_off) < 0) { + util_mutex_unlock(&arenap->map_locks[map_lock_num]); + return -1; + } + + /* if map entry is in its initial state return premap_lba */ + if (map_entry_is_initial(*entryp)) + *entryp = htole32(premap_lba | BTT_MAP_ENTRY_NORMAL); + + LOG(9, "locked map[%d]: %u%s%s", premap_lba, + *entryp & BTT_MAP_ENTRY_LBA_MASK, + (map_entry_is_error(*entryp)) ? " ERROR" : "", + (map_entry_is_zero(*entryp)) ? " ZERO" : ""); + + return 0; +} + +/* + * map_abort -- (internal) drop the map_lock without updating the entry + */ +static void +map_abort(struct btt *bttp, unsigned lane, struct arena *arenap, + uint32_t premap_lba) +{ + LOG(3, "bttp %p lane %u arenap %p premap_lba %u", + bttp, lane, arenap, premap_lba); + + util_mutex_unlock(&arenap->map_locks[get_map_lock_num(premap_lba, + bttp->nfree)]); +} + +/* + * map_unlock -- (internal) update the map and drop the map_lock + */ +static int +map_unlock(struct btt *bttp, unsigned lane, struct arena *arenap, + uint32_t entry, uint32_t premap_lba) +{ + LOG(3, "bttp %p lane %u arenap %p entry %u premap_lba %u", + bttp, lane, arenap, entry, premap_lba); + + uint64_t map_entry_off = + arenap->mapoff + BTT_MAP_ENTRY_SIZE * premap_lba; + + /* write the new map entry */ + int err = (*bttp->ns_cbp->nswrite)(bttp->ns, lane, &entry, + sizeof(uint32_t), map_entry_off); + + util_mutex_unlock(&arenap->map_locks[get_map_lock_num(premap_lba, + bttp->nfree)]); + + LOG(9, "unlocked map[%d]: %u%s%s", premap_lba, + entry & BTT_MAP_ENTRY_LBA_MASK, + (map_entry_is_error(entry)) ? " ERROR" : "", + (map_entry_is_zero(entry)) ? " ZERO" : ""); + + return err; +} + +/* + * btt_write -- write a block to a btt namespace + * + * Returns 0 on success, otherwise -1/errno. + */ +int +btt_write(struct btt *bttp, unsigned lane, uint64_t lba, const void *buf) +{ + LOG(3, "bttp %p lane %u lba %" PRIu64, bttp, lane, lba); + + if (invalid_lba(bttp, lba)) + return -1; + + /* first write through here will initialize the metadata layout */ + if (!bttp->laidout) { + int err = 0; + + util_mutex_lock(&bttp->layout_write_mutex); + + if (!bttp->laidout) + err = write_layout(bttp, lane, 1); + + util_mutex_unlock(&bttp->layout_write_mutex); + + if (err < 0) + return err; + } + + /* find which arena LBA lives in, and the offset to the map entry */ + struct arena *arenap; + uint32_t premap_lba; + if (lba_to_arena_lba(bttp, lba, &arenap, &premap_lba) < 0) + return -1; + + /* if the arena is in an error state, writing is not allowed */ + if (arenap->flags & BTTINFO_FLAG_ERROR_MASK) { + ERR("EIO due to btt_info error flags 0x%x", + arenap->flags & BTTINFO_FLAG_ERROR_MASK); + errno = EIO; + return -1; + } + + /* + * This routine was passed a unique "lane" which is an index + * into the flog. That means the free block held by flog[lane] + * is assigned to this thread and to no other threads (no additional + * locking required). So start by performing the write to the + * free block. It is only safe to write to a free block if it + * doesn't appear in the read tracking table, so scan that first + * and if found, wait for the thread reading from it to finish. + */ + uint32_t free_entry = (arenap->flogs[lane].flog.old_map & + BTT_MAP_ENTRY_LBA_MASK) | BTT_MAP_ENTRY_NORMAL; + + LOG(3, "free_entry %u (before mask %u)", free_entry, + arenap->flogs[lane].flog.old_map); + + /* wait for other threads to finish any reads on free block */ + for (unsigned i = 0; i < bttp->nlane; i++) + while (arenap->rtt[i] == free_entry) + ; + + /* it is now safe to perform write to the free block */ + uint64_t data_block_off = arenap->dataoff + + (uint64_t)(free_entry & BTT_MAP_ENTRY_LBA_MASK) * + arenap->internal_lbasize; + if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, buf, + bttp->lbasize, data_block_off) < 0) + return -1; + + /* + * Make the new block active atomically by updating the on-media flog + * and then updating the map. + */ + uint32_t old_entry; + if (map_lock(bttp, lane, arenap, &old_entry, premap_lba) < 0) + return -1; + + old_entry = le32toh(old_entry); + + /* update the flog */ + if (flog_update(bttp, lane, arenap, premap_lba, + old_entry, free_entry) < 0) { + map_abort(bttp, lane, arenap, premap_lba); + return -1; + } + + if (map_unlock(bttp, lane, arenap, htole32(free_entry), + premap_lba) < 0) { + /* + * A critical write error occurred, set the arena's + * info block error bit. + */ + set_arena_error(bttp, arenap, lane); + errno = EIO; + return -1; + } + + return 0; +} + +/* + * map_entry_setf -- (internal) set a given flag on a map entry + * + * Returns 0 on success, otherwise -1/errno. + */ +static int +map_entry_setf(struct btt *bttp, unsigned lane, uint64_t lba, uint32_t setf) +{ + LOG(3, "bttp %p lane %u lba %" PRIu64 " setf 0x%x", + bttp, lane, lba, setf); + + if (invalid_lba(bttp, lba)) + return -1; + + if (!bttp->laidout) { + /* + * No layout is written yet. If the flag being set + * is the zero flag, it is superfluous since all blocks + * read as zero at this point. + */ + if (setf == BTT_MAP_ENTRY_ZERO) + return 0; + + /* + * Treat this like the first write and write out + * the metadata layout at this point. + */ + int err = 0; + util_mutex_lock(&bttp->layout_write_mutex); + + if (!bttp->laidout) + err = write_layout(bttp, lane, 1); + + util_mutex_unlock(&bttp->layout_write_mutex); + + if (err < 0) + return err; + } + + /* find which arena LBA lives in, and the offset to the map entry */ + struct arena *arenap; + uint32_t premap_lba; + if (lba_to_arena_lba(bttp, lba, &arenap, &premap_lba) < 0) + return -1; + + /* if the arena is in an error state, writing is not allowed */ + if (arenap->flags & BTTINFO_FLAG_ERROR_MASK) { + ERR("EIO due to btt_info error flags 0x%x", + arenap->flags & BTTINFO_FLAG_ERROR_MASK); + errno = EIO; + return -1; + } + + /* + * Set the flags in the map entry. To do this, read the + * current map entry, set the flags, and write out the update. + */ + uint32_t old_entry; + uint32_t new_entry; + + if (map_lock(bttp, lane, arenap, &old_entry, premap_lba) < 0) + return -1; + + old_entry = le32toh(old_entry); + + if (setf == BTT_MAP_ENTRY_ZERO && + map_entry_is_zero_or_initial(old_entry)) { + map_abort(bttp, lane, arenap, premap_lba); + return 0; /* block already zero, nothing to do */ + } + + /* create the new map entry */ + new_entry = (old_entry & BTT_MAP_ENTRY_LBA_MASK) | setf; + + if (map_unlock(bttp, lane, arenap, htole32(new_entry), premap_lba) < 0) + return -1; + + return 0; +} + +/* + * btt_set_zero -- mark a block as zeroed in a btt namespace + * + * Returns 0 on success, otherwise -1/errno. + */ +int +btt_set_zero(struct btt *bttp, unsigned lane, uint64_t lba) +{ + LOG(3, "bttp %p lane %u lba %" PRIu64, bttp, lane, lba); + + return map_entry_setf(bttp, lane, lba, BTT_MAP_ENTRY_ZERO); +} + +/* + * btt_set_error -- mark a block as in an error state in a btt namespace + * + * Returns 0 on success, otherwise -1/errno. + */ +int +btt_set_error(struct btt *bttp, unsigned lane, uint64_t lba) +{ + LOG(3, "bttp %p lane %u lba %" PRIu64, bttp, lane, lba); + + return map_entry_setf(bttp, lane, lba, BTT_MAP_ENTRY_ERROR); +} + +/* + * check_arena -- (internal) perform a consistency check on an arena + */ +static int +check_arena(struct btt *bttp, struct arena *arenap) +{ + LOG(3, "bttp %p arenap %p", bttp, arenap); + + int consistent = 1; + + uint64_t map_entry_off = arenap->mapoff; + uint32_t bitmapsize = howmany(arenap->internal_nlba, 8); + uint8_t *bitmap = Zalloc(bitmapsize); + if (bitmap == NULL) { + ERR("!Malloc for bitmap"); + return -1; + } + + /* + * Go through every post-map LBA mentioned in the map and make sure + * there are no duplicates. bitmap is used to track which LBAs have + * been seen so far. + */ + uint32_t *mapp = NULL; + ssize_t mlen; + int next_index = 0; + size_t remaining = 0; + for (uint32_t i = 0; i < arenap->external_nlba; i++) { + uint32_t entry; + + if (remaining == 0) { + /* request a mapping of remaining map area */ + size_t req_len = + (arenap->external_nlba - i) * sizeof(uint32_t); + mlen = (*bttp->ns_cbp->nsmap)(bttp->ns, 0, + (void **)&mapp, req_len, map_entry_off); + + if (mlen < 0) + return -1; + + remaining = (size_t)mlen; + next_index = 0; + } + entry = le32toh(mapp[next_index]); + + /* for debug, dump non-zero map entries at log level 11 */ + if (map_entry_is_zero_or_initial(entry) == 0) + LOG(11, "map[%d]: %u%s", i, + entry & BTT_MAP_ENTRY_LBA_MASK, + (map_entry_is_error(entry)) ? " ERROR" : ""); + + /* this is an uninitialized map entry, set the default value */ + if (map_entry_is_initial(entry)) + entry = i; + else + entry &= BTT_MAP_ENTRY_LBA_MASK; + + /* check if entry is valid */ + if (entry >= arenap->internal_nlba) { + ERR("map[%d] entry out of bounds: %u", i, entry); + errno = EINVAL; + return -1; + } + + if (util_isset(bitmap, entry)) { + ERR("map[%d] duplicate entry: %u", i, entry); + consistent = 0; + } else + util_setbit(bitmap, entry); + + map_entry_off += sizeof(uint32_t); + next_index++; + ASSERT(remaining >= sizeof(uint32_t)); + remaining -= sizeof(uint32_t); + } + + /* + * Go through the free blocks in the flog, adding them to bitmap + * and checking for duplications. It is sufficient to read the + * run-time flog here, avoiding more calls to nsread. + */ + for (uint32_t i = 0; i < bttp->nfree; i++) { + uint32_t entry = arenap->flogs[i].flog.old_map; + entry &= BTT_MAP_ENTRY_LBA_MASK; + + if (util_isset(bitmap, entry)) { + ERR("flog[%u] duplicate entry: %u", i, entry); + consistent = 0; + } else + util_setbit(bitmap, entry); + } + + /* + * Make sure every possible post-map LBA was accounted for + * in the two loops above. + */ + for (uint32_t i = 0; i < arenap->internal_nlba; i++) + if (util_isclr(bitmap, i)) { + ERR("unreferenced lba: %d", i); + consistent = 0; + } + + Free(bitmap); + + return consistent; +} + +/* + * btt_check -- perform a consistency check on a btt namespace + * + * This routine contains a fairly high-impact set of consistency checks. + * It may use a good amount of dynamic memory and CPU time performing + * the checks. Any lightweight, quick consistency checks are included + * in read_layout() so they happen every time the BTT area is opened + * for use. + * + * Returns true if consistent, zero if inconsistent, -1/error if checking + * cannot happen due to other errors. + * + * No lane number required here because only one thread is allowed -- all + * other threads must be locked out of all btt routines for this btt + * namespace while this is running. + */ +int +btt_check(struct btt *bttp) +{ + LOG(3, "bttp %p", bttp); + + int consistent = 1; + + if (!bttp->laidout) { + /* consistent by definition */ + LOG(3, "no layout yet"); + return consistent; + } + + /* XXX report issues found during read_layout (from flags) */ + + /* for each arena... */ + struct arena *arenap = bttp->arenas; + for (unsigned i = 0; i < bttp->narena; i++, arenap++) { + /* + * Perform the consistency checks for the arena. + */ + int retval = check_arena(bttp, arenap); + if (retval < 0) + return retval; + else if (retval == 0) + consistent = 0; + } + + /* XXX stub */ + return consistent; +} + +/* + * btt_fini -- delete opaque btt info, done using btt namespace + */ +void +btt_fini(struct btt *bttp) +{ + LOG(3, "bttp %p", bttp); + + if (bttp->arenas) { + for (unsigned i = 0; i < bttp->narena; i++) { + if (bttp->arenas[i].flogs) + Free(bttp->arenas[i].flogs); + if (bttp->arenas[i].rtt) + Free((void *)bttp->arenas[i].rtt); + if (bttp->arenas[i].rtt) + Free((void *)bttp->arenas[i].map_locks); + } + Free(bttp->arenas); + } + Free(bttp); +} diff --git a/src/pmdk/src/libpmemblk/btt.h b/src/pmdk/src/libpmemblk/btt.h new file mode 100644 index 000000000..94f699427 --- /dev/null +++ b/src/pmdk/src/libpmemblk/btt.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * btt.h -- btt module definitions + */ + +#ifndef BTT_H +#define BTT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* callback functions passed to btt_init() */ +struct ns_callback { + int (*nsread)(void *ns, unsigned lane, + void *buf, size_t count, uint64_t off); + int (*nswrite)(void *ns, unsigned lane, + const void *buf, size_t count, uint64_t off); + int (*nszero)(void *ns, unsigned lane, size_t count, uint64_t off); + ssize_t (*nsmap)(void *ns, unsigned lane, void **addrp, + size_t len, uint64_t off); + void (*nssync)(void *ns, unsigned lane, void *addr, size_t len); + + int ns_is_zeroed; +}; + +struct btt_info; + +struct btt *btt_init(uint64_t rawsize, uint32_t lbasize, uint8_t parent_uuid[], + unsigned maxlane, void *ns, const struct ns_callback *ns_cbp); +unsigned btt_nlane(struct btt *bttp); +size_t btt_nlba(struct btt *bttp); +int btt_read(struct btt *bttp, unsigned lane, uint64_t lba, void *buf); +int btt_write(struct btt *bttp, unsigned lane, uint64_t lba, const void *buf); +int btt_set_zero(struct btt *bttp, unsigned lane, uint64_t lba); +int btt_set_error(struct btt *bttp, unsigned lane, uint64_t lba); +int btt_check(struct btt *bttp); +void btt_fini(struct btt *bttp); + +uint64_t btt_flog_size(uint32_t nfree); +uint64_t btt_map_size(uint32_t external_nlba); +uint64_t btt_arena_datasize(uint64_t arena_size, uint32_t nfree); +int btt_info_set(struct btt_info *info, uint32_t external_lbasize, + uint32_t nfree, uint64_t arena_size, uint64_t space_left); + +struct btt_flog *btt_flog_get_valid(struct btt_flog *flog_pair, int *next); +int map_entry_is_initial(uint32_t map_entry); +void btt_info_convert2h(struct btt_info *infop); +void btt_info_convert2le(struct btt_info *infop); +void btt_flog_convert2h(struct btt_flog *flogp); +void btt_flog_convert2le(struct btt_flog *flogp); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemblk/btt_layout.h b/src/pmdk/src/libpmemblk/btt_layout.h new file mode 100644 index 000000000..8fa33f985 --- /dev/null +++ b/src/pmdk/src/libpmemblk/btt_layout.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * btt_layout.h -- block translation table on-media layout definitions + */ + +/* + * Layout of BTT info block. All integers are stored little-endian. + */ + +#ifndef BTT_LAYOUT_H +#define BTT_LAYOUT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#define BTT_ALIGNMENT ((uintptr_t)4096) /* alignment of all BTT structures */ +#define BTTINFO_SIG_LEN 16 +#define BTTINFO_UUID_LEN 16 +#define BTTINFO_UNUSED_LEN 3968 +#define BTTINFO_SIG "BTT_ARENA_INFO\0" + +struct btt_info { + char sig[BTTINFO_SIG_LEN]; /* must be "BTT_ARENA_INFO\0\0" */ + uint8_t uuid[BTTINFO_UUID_LEN]; /* BTT UUID */ + uint8_t parent_uuid[BTTINFO_UUID_LEN]; /* UUID of container */ + uint32_t flags; /* see flag bits below */ + uint16_t major; /* major version */ + uint16_t minor; /* minor version */ + uint32_t external_lbasize; /* advertised LBA size (bytes) */ + uint32_t external_nlba; /* advertised LBAs in this arena */ + uint32_t internal_lbasize; /* size of data area blocks (bytes) */ + uint32_t internal_nlba; /* number of blocks in data area */ + uint32_t nfree; /* number of free blocks */ + uint32_t infosize; /* size of this info block */ + + /* + * The following offsets are relative to the beginning of + * the btt_info block. + */ + uint64_t nextoff; /* offset to next arena (or zero) */ + uint64_t dataoff; /* offset to arena data area */ + uint64_t mapoff; /* offset to area map */ + uint64_t flogoff; /* offset to area flog */ + uint64_t infooff; /* offset to backup info block */ + + char unused[BTTINFO_UNUSED_LEN]; /* must be zero */ + + uint64_t checksum; /* Fletcher64 of all fields */ +}; + +/* + * Definitions for flags mask for btt_info structure above. + */ +#define BTTINFO_FLAG_ERROR 0x00000001 /* error state (read-only) */ +#define BTTINFO_FLAG_ERROR_MASK 0x00000001 /* all error bits */ + +/* + * Current on-media format versions. + */ +#define BTTINFO_MAJOR_VERSION 1 +#define BTTINFO_MINOR_VERSION 1 + +/* + * Layout of a BTT "flog" entry. All integers are stored little-endian. + * + * The "nfree" field in the BTT info block determines how many of these + * flog entries there are, and each entry consists of two of the following + * structs (entry updates alternate between the two structs), padded up + * to a cache line boundary to isolate adjacent updates. + */ + +#define BTT_FLOG_PAIR_ALIGN ((uintptr_t)64) + +struct btt_flog { + uint32_t lba; /* last pre-map LBA using this entry */ + uint32_t old_map; /* old post-map LBA (the freed block) */ + uint32_t new_map; /* new post-map LBA */ + uint32_t seq; /* sequence number (01, 10, 11) */ +}; + +/* + * Layout of a BTT "map" entry. 4-byte internal LBA offset, little-endian. + */ +#define BTT_MAP_ENTRY_SIZE 4 +#define BTT_MAP_ENTRY_ERROR 0x40000000U +#define BTT_MAP_ENTRY_ZERO 0x80000000U +#define BTT_MAP_ENTRY_NORMAL 0xC0000000U +#define BTT_MAP_ENTRY_LBA_MASK 0x3fffffffU +#define BTT_MAP_LOCK_ALIGN ((uintptr_t)64) + +/* + * BTT layout properties... + */ +#define BTT_MIN_SIZE ((1u << 20) * 16) +#define BTT_MAX_ARENA (1ull << 39) /* 512GB per arena */ +#define BTT_MIN_LBA_SIZE (size_t)512 +#define BTT_INTERNAL_LBA_ALIGNMENT 256U +#define BTT_DEFAULT_NFREE 256 + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemblk/libpmemblk.c b/src/pmdk/src/libpmemblk/libpmemblk.c new file mode 100644 index 000000000..21675eff7 --- /dev/null +++ b/src/pmdk/src/libpmemblk/libpmemblk.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2018, Intel Corporation */ + +/* + * libpmemblk.c -- pmem entry points for libpmemblk + */ + +#include +#include + +#include "libpmemblk.h" +#include "ctl_global.h" + +#include "pmemcommon.h" +#include "blk.h" + +/* + * The variable from which the config is directly loaded. The string + * cannot contain any comments or extraneous white characters. + */ +#define BLK_CONFIG_ENV_VARIABLE "PMEMBLK_CONF" + +/* + * The variable that points to a config file from which the config is loaded. + */ +#define BLK_CONFIG_FILE_ENV_VARIABLE "PMEMBLK_CONF_FILE" + +/* + * blk_ctl_init_and_load -- (static) initializes CTL and loads configuration + * from env variable and file + */ +static int +blk_ctl_init_and_load(PMEMblkpool *pbp) +{ + LOG(3, "pbp %p", pbp); + + if (pbp != NULL && (pbp->ctl = ctl_new()) == NULL) { + LOG(2, "!ctl_new"); + return -1; + } + + char *env_config = os_getenv(BLK_CONFIG_ENV_VARIABLE); + if (env_config != NULL) { + if (ctl_load_config_from_string(pbp ? pbp->ctl : NULL, + pbp, env_config) != 0) { + LOG(2, "unable to parse config stored in %s " + "environment variable", + BLK_CONFIG_ENV_VARIABLE); + goto err; + } + } + + char *env_config_file = os_getenv(BLK_CONFIG_FILE_ENV_VARIABLE); + if (env_config_file != NULL && env_config_file[0] != '\0') { + if (ctl_load_config_from_file(pbp ? pbp->ctl : NULL, + pbp, env_config_file) != 0) { + LOG(2, "unable to parse config stored in %s " + "file (from %s environment variable)", + env_config_file, + BLK_CONFIG_FILE_ENV_VARIABLE); + goto err; + } + } + + return 0; +err: + if (pbp) + ctl_delete(pbp->ctl); + return -1; +} + +/* + * libpmemblk_init -- (internal) load-time initialization for blk + * + * Called automatically by the run-time loader. + */ +ATTR_CONSTRUCTOR +void +libpmemblk_init(void) +{ + ctl_global_register(); + + if (blk_ctl_init_and_load(NULL)) + FATAL("error: %s", pmemblk_errormsg()); + + common_init(PMEMBLK_LOG_PREFIX, PMEMBLK_LOG_LEVEL_VAR, + PMEMBLK_LOG_FILE_VAR, PMEMBLK_MAJOR_VERSION, + PMEMBLK_MINOR_VERSION); + LOG(3, NULL); +} + +/* + * libpmemblk_fini -- libpmemblk cleanup routine + * + * Called automatically when the process terminates. + */ +ATTR_DESTRUCTOR +void +libpmemblk_fini(void) +{ + LOG(3, NULL); + common_fini(); +} + +/* + * pmemblk_check_versionU -- see if lib meets application version requirements + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmemblk_check_versionU(unsigned major_required, unsigned minor_required) +{ + LOG(3, "major_required %u minor_required %u", + major_required, minor_required); + + if (major_required != PMEMBLK_MAJOR_VERSION) { + ERR("libpmemblk major version mismatch (need %u, found %u)", + major_required, PMEMBLK_MAJOR_VERSION); + return out_get_errormsg(); + } + + if (minor_required > PMEMBLK_MINOR_VERSION) { + ERR("libpmemblk minor version mismatch (need %u, found %u)", + minor_required, PMEMBLK_MINOR_VERSION); + return out_get_errormsg(); + } + + return NULL; +} + +#ifndef _WIN32 +/* + * pmemblk_check_version -- see if lib meets application version requirements + */ +const char * +pmemblk_check_version(unsigned major_required, unsigned minor_required) +{ + return pmemblk_check_versionU(major_required, minor_required); +} +#else +/* + * pmemblk_check_versionW -- see if lib meets application version requirements + */ +const wchar_t * +pmemblk_check_versionW(unsigned major_required, unsigned minor_required) +{ + if (pmemblk_check_versionU(major_required, minor_required) != NULL) + return out_get_errormsgW(); + else + return NULL; +} +#endif + +/* + * pmemblk_set_funcs -- allow overriding libpmemblk's call to malloc, etc. + */ +void +pmemblk_set_funcs( + void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)) +{ + LOG(3, NULL); + + util_set_alloc_funcs(malloc_func, free_func, realloc_func, strdup_func); +} + +/* + * pmemblk_errormsgU -- return last error message + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmemblk_errormsgU(void) +{ + return out_get_errormsg(); +} + +#ifndef _WIN32 +/* + * pmemblk_errormsg -- return last error message + */ +const char * +pmemblk_errormsg(void) +{ + return pmemblk_errormsgU(); +} +#else +/* + * pmemblk_errormsgW -- return last error message as wchar_t + */ +const wchar_t * +pmemblk_errormsgW(void) +{ + return out_get_errormsgW(); +} +#endif diff --git a/src/pmdk/src/libpmemblk/libpmemblk.def b/src/pmdk/src/libpmemblk/libpmemblk.def new file mode 100644 index 000000000..fa7f91f14 --- /dev/null +++ b/src/pmdk/src/libpmemblk/libpmemblk.def @@ -0,0 +1,36 @@ +;;;; Begin Copyright Notice +; SPDX-License-Identifier: BSD-3-Clause +; Copyright 2015-2018, Intel Corporation +;;;; End Copyright Notice + +LIBRARY libpmemblk + +VERSION 1.0 + +EXPORTS + pmemblk_check_versionU + pmemblk_check_versionW + pmemblk_set_funcs + pmemblk_errormsgU + pmemblk_errormsgW + pmemblk_createU + pmemblk_createW + pmemblk_openU + pmemblk_openW + pmemblk_close + pmemblk_checkU + pmemblk_checkW + pmemblk_ctl_execU; + pmemblk_ctl_execW; + pmemblk_ctl_getU; + pmemblk_ctl_getW; + pmemblk_ctl_setU; + pmemblk_ctl_setW; + pmemblk_bsize + pmemblk_nblock + pmemblk_read + pmemblk_write + pmemblk_set_zero + pmemblk_set_error + + DllMain diff --git a/src/pmdk/src/libpmemblk/libpmemblk.link.in b/src/pmdk/src/libpmemblk/libpmemblk.link.in new file mode 100644 index 000000000..b61e83ced --- /dev/null +++ b/src/pmdk/src/libpmemblk/libpmemblk.link.in @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2019, Intel Corporation +# +# +# src/libpmemblk.link -- linker link file for libpmemblk +# +LIBPMEMBLK_1.0 { + global: + pmemblk_check_version; + pmemblk_set_funcs; + pmemblk_errormsg; + pmemblk_create; + pmemblk_open; + pmemblk_close; + pmemblk_check; + pmemblk_ctl_exec; + pmemblk_ctl_get; + pmemblk_ctl_set; + pmemblk_nblock; + pmemblk_read; + pmemblk_write; + pmemblk_set_zero; + pmemblk_set_error; + pmemblk_bsize; + fault_injection; + local: + *; +}; diff --git a/src/pmdk/src/libpmemblk/libpmemblk.rc b/src/pmdk/src/libpmemblk/libpmemblk.rc new file mode 100644 index 000000000..b95b6252e --- /dev/null +++ b/src/pmdk/src/libpmemblk/libpmemblk.rc @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016, Intel Corporation */ + +/* + * libpmemblk.rc -- libpmemblk resource file + */ + +#include +#define FILE_NAME "libpmemblk.dll" +#define DESCRIPTION "libpmemblk - persistent memory resident array of blocks" +#define TYPE VFT_DLL +#include \ No newline at end of file diff --git a/src/pmdk/src/libpmemblk/libpmemblk.vcxproj b/src/pmdk/src/libpmemblk/libpmemblk.vcxproj new file mode 100644 index 000000000..680052a3f --- /dev/null +++ b/src/pmdk/src/libpmemblk/libpmemblk.vcxproj @@ -0,0 +1,133 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {9e9e3d25-2139-4a5d-9200-18148ddead45} + + + {901f04db-e1a5-4a41-8b81-9d31c19acd59} + + + + + + + {f7c6c6b6-4142-4c82-8699-4a9d8183181b} + DynamicLibrary + libpmemblk + libpmemblk + en-US + 14.0 + 10.0.17134.0 + 10.0.10240.0 + + + + DynamicLibrary + true + v140 + + + DynamicLibrary + false + false + v140 + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmemblk/libpmemblk.vcxproj.filters b/src/pmdk/src/libpmemblk/libpmemblk.vcxproj.filters new file mode 100644 index 000000000..198595944 --- /dev/null +++ b/src/pmdk/src/libpmemblk/libpmemblk.vcxproj.filters @@ -0,0 +1,217 @@ + + + + + {5f4b56cf-a674-4f35-abfa-d867d9d91f68} + + + {dee0ff57-9af8-485a-888b-0087d6e11cf8} + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmemblk/libpmemblk_main.c b/src/pmdk/src/libpmemblk/libpmemblk_main.c new file mode 100644 index 000000000..28c547d2e --- /dev/null +++ b/src/pmdk/src/libpmemblk/libpmemblk_main.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2017, Intel Corporation */ + +/* + * libpmemblk_main.c -- entry point for libpmemblk.dll + * + * XXX - This is a placeholder. All the library initialization/cleanup + * that is done in library ctors/dtors, as well as TLS initialization + * should be moved here. + */ + +void libpmemblk_init(void); +void libpmemblk_fini(void); + +int APIENTRY +DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) +{ + switch (dwReason) { + case DLL_PROCESS_ATTACH: + libpmemblk_init(); + break; + + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + + case DLL_PROCESS_DETACH: + libpmemblk_fini(); + break; + } + return TRUE; +} diff --git a/src/pmdk/src/libpmemlog/Makefile b/src/pmdk/src/libpmemlog/Makefile new file mode 100644 index 000000000..25fa84d1f --- /dev/null +++ b/src/pmdk/src/libpmemlog/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# src/libpmemlog/Makefile -- Makefile for libpmemlog +# + +LIBRARY_NAME = pmemlog +LIBRARY_SO_VERSION = 1 +LIBRARY_VERSION = 0.0 + +include ../core/pmemcore.inc +include ../common/pmemcommon.inc + +SOURCE +=\ + libpmemlog.c\ + log.c + +include ../Makefile.inc + +CFLAGS += $(LIBNDCTL_CFLAGS) + +LIBS += -pthread -lpmem $(LIBNDCTL_LIBS) diff --git a/src/pmdk/src/libpmemlog/libpmemlog.c b/src/pmdk/src/libpmemlog/libpmemlog.c new file mode 100644 index 000000000..c24e0c7f9 --- /dev/null +++ b/src/pmdk/src/libpmemlog/libpmemlog.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2018, Intel Corporation */ + +/* + * libpmemlog.c -- pmem entry points for libpmemlog + */ + +#include +#include + +#include "libpmemlog.h" +#include "ctl_global.h" + +#include "pmemcommon.h" +#include "log.h" + +/* + * The variable from which the config is directly loaded. The string + * cannot contain any comments or extraneous white characters. + */ +#define LOG_CONFIG_ENV_VARIABLE "PMEMLOG_CONF" + +/* + * The variable that points to a config file from which the config is loaded. + */ +#define LOG_CONFIG_FILE_ENV_VARIABLE "PMEMLOG_CONF_FILE" + +/* + * log_ctl_init_and_load -- (static) initializes CTL and loads configuration + * from env variable and file + */ +static int +log_ctl_init_and_load(PMEMlogpool *plp) +{ + LOG(3, "plp %p", plp); + + if (plp != NULL && (plp->ctl = ctl_new()) == NULL) { + LOG(2, "!ctl_new"); + return -1; + } + + char *env_config = os_getenv(LOG_CONFIG_ENV_VARIABLE); + if (env_config != NULL) { + if (ctl_load_config_from_string(plp ? plp->ctl : NULL, + plp, env_config) != 0) { + LOG(2, "unable to parse config stored in %s " + "environment variable", + LOG_CONFIG_ENV_VARIABLE); + goto err; + } + } + + char *env_config_file = os_getenv(LOG_CONFIG_FILE_ENV_VARIABLE); + if (env_config_file != NULL && env_config_file[0] != '\0') { + if (ctl_load_config_from_file(plp ? plp->ctl : NULL, + plp, env_config_file) != 0) { + LOG(2, "unable to parse config stored in %s " + "file (from %s environment variable)", + env_config_file, + LOG_CONFIG_FILE_ENV_VARIABLE); + goto err; + } + } + + return 0; +err: + if (plp) + ctl_delete(plp->ctl); + return -1; +} + +/* + * log_init -- load-time initialization for log + * + * Called automatically by the run-time loader. + */ +ATTR_CONSTRUCTOR +void +libpmemlog_init(void) +{ + ctl_global_register(); + + if (log_ctl_init_and_load(NULL)) + FATAL("error: %s", pmemlog_errormsg()); + + common_init(PMEMLOG_LOG_PREFIX, PMEMLOG_LOG_LEVEL_VAR, + PMEMLOG_LOG_FILE_VAR, PMEMLOG_MAJOR_VERSION, + PMEMLOG_MINOR_VERSION); + LOG(3, NULL); +} + +/* + * libpmemlog_fini -- libpmemlog cleanup routine + * + * Called automatically when the process terminates. + */ +ATTR_DESTRUCTOR +void +libpmemlog_fini(void) +{ + LOG(3, NULL); + common_fini(); +} + +/* + * pmemlog_check_versionU -- see if lib meets application version requirements + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmemlog_check_versionU(unsigned major_required, unsigned minor_required) +{ + LOG(3, "major_required %u minor_required %u", + major_required, minor_required); + + if (major_required != PMEMLOG_MAJOR_VERSION) { + ERR("libpmemlog major version mismatch (need %u, found %u)", + major_required, PMEMLOG_MAJOR_VERSION); + return out_get_errormsg(); + } + + if (minor_required > PMEMLOG_MINOR_VERSION) { + ERR("libpmemlog minor version mismatch (need %u, found %u)", + minor_required, PMEMLOG_MINOR_VERSION); + return out_get_errormsg(); + } + + return NULL; +} + +#ifndef _WIN32 +/* + * pmemlog_check_version -- see if lib meets application version requirements + */ +const char * +pmemlog_check_version(unsigned major_required, unsigned minor_required) +{ + return pmemlog_check_versionU(major_required, minor_required); +} +#else +/* + * pmemlog_check_versionW -- see if lib meets application version requirements + */ +const wchar_t * +pmemlog_check_versionW(unsigned major_required, unsigned minor_required) +{ + if (pmemlog_check_versionU(major_required, minor_required) != NULL) + return out_get_errormsgW(); + else + return NULL; +} +#endif + +/* + * pmemlog_set_funcs -- allow overriding libpmemlog's call to malloc, etc. + */ +void +pmemlog_set_funcs( + void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)) +{ + LOG(3, NULL); + + util_set_alloc_funcs(malloc_func, free_func, realloc_func, strdup_func); +} + +/* + * pmemlog_errormsgU -- return last error message + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmemlog_errormsgU(void) +{ + return out_get_errormsg(); +} + +#ifndef _WIN32 +/* + * pmemlog_errormsg -- return last error message + */ +const char * +pmemlog_errormsg(void) +{ + return pmemlog_errormsgU(); +} +#else +/* + * pmemlog_errormsgW -- return last error message as wchar_t + */ +const wchar_t * +pmemlog_errormsgW(void) +{ + return out_get_errormsgW(); +} + +#endif diff --git a/src/pmdk/src/libpmemlog/libpmemlog.def b/src/pmdk/src/libpmemlog/libpmemlog.def new file mode 100644 index 000000000..34c887fcb --- /dev/null +++ b/src/pmdk/src/libpmemlog/libpmemlog.def @@ -0,0 +1,36 @@ +;;;; Begin Copyright Notice +; SPDX-License-Identifier: BSD-3-Clause +; Copyright 2016-2018, Intel Corporation +;;;; End Copyright Notice + +LIBRARY libpmemlog + +VERSION 1.0 + +EXPORTS + pmemlog_check_versionU + pmemlog_check_versionW + pmemlog_ctl_execU; + pmemlog_ctl_execW; + pmemlog_ctl_getU; + pmemlog_ctl_getW; + pmemlog_ctl_setU; + pmemlog_ctl_setW; + pmemlog_set_funcs + pmemlog_errormsgU + pmemlog_errormsgW + pmemlog_createU + pmemlog_createW + pmemlog_openU + pmemlog_openW + pmemlog_close + pmemlog_checkU + pmemlog_checkW + pmemlog_nbyte + pmemlog_append + pmemlog_appendv + pmemlog_rewind + pmemlog_tell + pmemlog_walk + + DllMain diff --git a/src/pmdk/src/libpmemlog/libpmemlog.link.in b/src/pmdk/src/libpmemlog/libpmemlog.link.in new file mode 100644 index 000000000..c003eaa54 --- /dev/null +++ b/src/pmdk/src/libpmemlog/libpmemlog.link.in @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2019, Intel Corporation +# +# +# src/libpmemlog.link -- linker link file for libpmemlog +# +LIBPMEMLOG_1.0 { + global: + pmemlog_check_version; + pmemlog_ctl_exec; + pmemlog_ctl_get; + pmemlog_ctl_set; + pmemlog_set_funcs; + pmemlog_errormsg; + pmemlog_create; + pmemlog_open; + pmemlog_close; + pmemlog_check; + pmemlog_nbyte; + pmemlog_append; + pmemlog_appendv; + pmemlog_tell; + pmemlog_rewind; + pmemlog_walk; + fault_injection; + local: + *; +}; diff --git a/src/pmdk/src/libpmemlog/libpmemlog.rc b/src/pmdk/src/libpmemlog/libpmemlog.rc new file mode 100644 index 000000000..89d515399 --- /dev/null +++ b/src/pmdk/src/libpmemlog/libpmemlog.rc @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016, Intel Corporation */ + +/* + * libpmemlog.rc -- libpmemlog resource file + */ + +#include +#define FILE_NAME "libpmemlog.dll" +#define DESCRIPTION "libpmemlog - persistent memory resident log file" +#define TYPE VFT_DLL +#include \ No newline at end of file diff --git a/src/pmdk/src/libpmemlog/libpmemlog.vcxproj b/src/pmdk/src/libpmemlog/libpmemlog.vcxproj new file mode 100644 index 000000000..eebb69d0f --- /dev/null +++ b/src/pmdk/src/libpmemlog/libpmemlog.vcxproj @@ -0,0 +1,130 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {9e9e3d25-2139-4a5d-9200-18148ddead45} + + + {901f04db-e1a5-4a41-8b81-9d31c19acd59} + + + + + + + {0B1818EB-BDC8-4865-964F-DB8BF05CFD86} + DynamicLibrary + libpmemlog + libpmemlog + en-US + 14.0 + 10.0.17134.0 + 10.0.10240.0 + + + + DynamicLibrary + true + v140 + + + DynamicLibrary + false + false + v140 + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmemlog/libpmemlog.vcxproj.filters b/src/pmdk/src/libpmemlog/libpmemlog.vcxproj.filters new file mode 100644 index 000000000..ac20a4a26 --- /dev/null +++ b/src/pmdk/src/libpmemlog/libpmemlog.vcxproj.filters @@ -0,0 +1,208 @@ + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {49cfa2b4-cfcb-4c02-928a-c04d1cceffb8} + + + {ac09c2fe-a24b-4a86-8763-d4e06d996ef3} + + + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmemlog/libpmemlog_main.c b/src/pmdk/src/libpmemlog/libpmemlog_main.c new file mode 100644 index 000000000..d688a36b8 --- /dev/null +++ b/src/pmdk/src/libpmemlog/libpmemlog_main.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2017, Intel Corporation */ + +/* + * libpmemlog_main.c -- entry point for libpmemlog.dll + * + * XXX - This is a placeholder. All the library initialization/cleanup + * that is done in library ctors/dtors, as well as TLS initialization + * should be moved here. + */ + +void libpmemlog_init(void); +void libpmemlog_fini(void); + +int APIENTRY +DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) +{ + switch (dwReason) { + case DLL_PROCESS_ATTACH: + libpmemlog_init(); + break; + + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + + case DLL_PROCESS_DETACH: + libpmemlog_fini(); + break; + } + return TRUE; +} diff --git a/src/pmdk/src/libpmemlog/log.c b/src/pmdk/src/libpmemlog/log.c new file mode 100644 index 000000000..0f0145bfb --- /dev/null +++ b/src/pmdk/src/libpmemlog/log.c @@ -0,0 +1,895 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * log.c -- log memory pool entry points for libpmem + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libpmem.h" +#include "libpmemlog.h" +#include "ctl_global.h" + +#include "os.h" +#include "set.h" +#include "out.h" +#include "log.h" +#include "mmap.h" +#include "sys_util.h" +#include "util_pmem.h" +#include "valgrind_internal.h" + +static const struct pool_attr Log_create_attr = { + LOG_HDR_SIG, + LOG_FORMAT_MAJOR, + LOG_FORMAT_FEAT_DEFAULT, + {0}, {0}, {0}, {0}, {0} +}; + +static const struct pool_attr Log_open_attr = { + LOG_HDR_SIG, + LOG_FORMAT_MAJOR, + LOG_FORMAT_FEAT_CHECK, + {0}, {0}, {0}, {0}, {0} +}; + +/* + * log_descr_create -- (internal) create log memory pool descriptor + */ +static void +log_descr_create(PMEMlogpool *plp, size_t poolsize) +{ + LOG(3, "plp %p poolsize %zu", plp, poolsize); + + ASSERTeq(poolsize % Pagesize, 0); + + /* create required metadata */ + plp->start_offset = htole64(roundup(sizeof(*plp), + LOG_FORMAT_DATA_ALIGN)); + plp->end_offset = htole64(poolsize); + plp->write_offset = plp->start_offset; + + /* store non-volatile part of pool's descriptor */ + util_persist(plp->is_pmem, &plp->start_offset, 3 * sizeof(uint64_t)); +} + +/* + * log_descr_check -- (internal) validate log memory pool descriptor + */ +static int +log_descr_check(PMEMlogpool *plp, size_t poolsize) +{ + LOG(3, "plp %p poolsize %zu", plp, poolsize); + + struct pmemlog hdr = *plp; + log_convert2h(&hdr); + + if ((hdr.start_offset != + roundup(sizeof(*plp), LOG_FORMAT_DATA_ALIGN)) || + (hdr.end_offset != poolsize) || + (hdr.start_offset > hdr.end_offset)) { + ERR("wrong start/end offsets " + "(start: %" PRIu64 " end: %" PRIu64 "), " + "pool size %zu", + hdr.start_offset, hdr.end_offset, poolsize); + errno = EINVAL; + return -1; + } + + if ((hdr.write_offset > hdr.end_offset) || (hdr.write_offset < + hdr.start_offset)) { + ERR("wrong write offset (start: %" PRIu64 " end: %" PRIu64 + " write: %" PRIu64 ")", + hdr.start_offset, hdr.end_offset, hdr.write_offset); + errno = EINVAL; + return -1; + } + + LOG(3, "start: %" PRIu64 ", end: %" PRIu64 ", write: %" PRIu64 "", + hdr.start_offset, hdr.end_offset, hdr.write_offset); + + return 0; +} + +/* + * log_runtime_init -- (internal) initialize log memory pool runtime data + */ +static int +log_runtime_init(PMEMlogpool *plp, int rdonly) +{ + LOG(3, "plp %p rdonly %d", plp, rdonly); + + /* remove volatile part of header */ + VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr, + sizeof(struct pmemlog) - + sizeof(struct pool_hdr) - + 3 * sizeof(uint64_t)); + + /* + * Use some of the memory pool area for run-time info. This + * run-time state is never loaded from the file, it is always + * created here, so no need to worry about byte-order. + */ + plp->rdonly = rdonly; + + if ((plp->rwlockp = Malloc(sizeof(*plp->rwlockp))) == NULL) { + ERR("!Malloc for a RW lock"); + return -1; + } + + util_rwlock_init(plp->rwlockp); + + /* + * If possible, turn off all permissions on the pool header page. + * + * The prototype PMFS doesn't allow this when large pages are in + * use. It is not considered an error if this fails. + */ + RANGE_NONE(plp->addr, sizeof(struct pool_hdr), plp->is_dev_dax); + + /* the rest should be kept read-only (debug version only) */ + RANGE_RO((char *)plp->addr + sizeof(struct pool_hdr), + plp->size - sizeof(struct pool_hdr), plp->is_dev_dax); + + return 0; +} + +/* + * pmemlog_createU -- create a log memory pool + */ +#ifndef _WIN32 +static inline +#endif +PMEMlogpool * +pmemlog_createU(const char *path, size_t poolsize, mode_t mode) +{ + LOG(3, "path %s poolsize %zu mode %d", path, poolsize, mode); + + struct pool_set *set; + struct pool_attr adj_pool_attr = Log_create_attr; + + /* force set SDS feature */ + if (SDS_at_create) + adj_pool_attr.features.incompat |= POOL_FEAT_SDS; + else + adj_pool_attr.features.incompat &= ~POOL_FEAT_SDS; + + if (util_pool_create(&set, path, poolsize, PMEMLOG_MIN_POOL, + PMEMLOG_MIN_PART, &adj_pool_attr, NULL, + REPLICAS_DISABLED) != 0) { + LOG(2, "cannot create pool or pool set"); + return NULL; + } + + ASSERT(set->nreplicas > 0); + + struct pool_replica *rep = set->replica[0]; + PMEMlogpool *plp = rep->part[0].addr; + + VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr, + sizeof(struct pmemlog) - + ((uintptr_t)&plp->addr - (uintptr_t)&plp->hdr)); + + plp->addr = plp; + plp->size = rep->repsize; + plp->set = set; + plp->is_pmem = rep->is_pmem; + plp->is_dev_dax = rep->part[0].is_dev_dax; + + /* is_dev_dax implies is_pmem */ + ASSERT(!plp->is_dev_dax || plp->is_pmem); + + /* create pool descriptor */ + log_descr_create(plp, rep->repsize); + + /* initialize runtime parts */ + if (log_runtime_init(plp, 0) != 0) { + ERR("pool initialization failed"); + goto err; + } + + if (util_poolset_chmod(set, mode)) + goto err; + + util_poolset_fdclose(set); + + LOG(3, "plp %p", plp); + return plp; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + util_poolset_close(set, DELETE_CREATED_PARTS); + errno = oerrno; + return NULL; +} + +#ifndef _WIN32 +/* + * pmemlog_create -- create a log memory pool + */ +PMEMlogpool * +pmemlog_create(const char *path, size_t poolsize, mode_t mode) +{ + return pmemlog_createU(path, poolsize, mode); +} +#else +/* + * pmemlog_createW -- create a log memory pool + */ +PMEMlogpool * +pmemlog_createW(const wchar_t *path, size_t poolsize, mode_t mode) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return NULL; + + PMEMlogpool *ret = pmemlog_createU(upath, poolsize, mode); + + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * log_open_common -- (internal) open a log memory pool + * + * This routine does all the work, but takes a cow flag so internal + * calls can map a read-only pool if required. + */ +static PMEMlogpool * +log_open_common(const char *path, unsigned flags) +{ + LOG(3, "path %s flags 0x%x", path, flags); + + struct pool_set *set; + + if (util_pool_open(&set, path, PMEMLOG_MIN_PART, &Log_open_attr, + NULL, NULL, flags) != 0) { + LOG(2, "cannot open pool or pool set"); + return NULL; + } + + ASSERT(set->nreplicas > 0); + + struct pool_replica *rep = set->replica[0]; + PMEMlogpool *plp = rep->part[0].addr; + + VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr, + sizeof(struct pmemlog) - + ((uintptr_t)&plp->addr - (uintptr_t)&plp->hdr)); + + plp->addr = plp; + plp->size = rep->repsize; + plp->set = set; + plp->is_pmem = rep->is_pmem; + plp->is_dev_dax = rep->part[0].is_dev_dax; + + /* is_dev_dax implies is_pmem */ + ASSERT(!plp->is_dev_dax || plp->is_pmem); + + if (set->nreplicas > 1) { + errno = ENOTSUP; + ERR("!replicas not supported"); + goto err; + } + + /* validate pool descriptor */ + if (log_descr_check(plp, rep->repsize) != 0) { + LOG(2, "descriptor check failed"); + goto err; + } + + /* initialize runtime parts */ + if (log_runtime_init(plp, set->rdonly) != 0) { + ERR("pool initialization failed"); + goto err; + } + + util_poolset_fdclose(set); + + LOG(3, "plp %p", plp); + return plp; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + util_poolset_close(set, DO_NOT_DELETE_PARTS); + errno = oerrno; + return NULL; +} + +/* + * pmemlog_openU -- open an existing log memory pool + */ +#ifndef _WIN32 +static inline +#endif +PMEMlogpool * +pmemlog_openU(const char *path) +{ + LOG(3, "path %s", path); + + return log_open_common(path, COW_at_open ? POOL_OPEN_COW : 0); +} + +#ifndef _WIN32 +/* + * pmemlog_open -- open an existing log memory pool + */ +PMEMlogpool * +pmemlog_open(const char *path) +{ + return pmemlog_openU(path); +} +#else +/* + * pmemlog_openW -- open an existing log memory pool + */ +PMEMlogpool * +pmemlog_openW(const wchar_t *path) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return NULL; + + PMEMlogpool *ret = pmemlog_openU(upath); + + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * pmemlog_close -- close a log memory pool + */ +void +pmemlog_close(PMEMlogpool *plp) +{ + LOG(3, "plp %p", plp); + + util_rwlock_destroy(plp->rwlockp); + Free((void *)plp->rwlockp); + + util_poolset_close(plp->set, DO_NOT_DELETE_PARTS); +} + +/* + * pmemlog_nbyte -- return usable size of a log memory pool + */ +size_t +pmemlog_nbyte(PMEMlogpool *plp) +{ + LOG(3, "plp %p", plp); + + util_rwlock_rdlock(plp->rwlockp); + + size_t size = le64toh(plp->end_offset) - le64toh(plp->start_offset); + LOG(4, "plp %p nbyte %zu", plp, size); + + util_rwlock_unlock(plp->rwlockp); + + return size; +} + +/* + * log_persist -- (internal) persist data, then metadata + * + * On entry, the write lock should be held. + */ +static void +log_persist(PMEMlogpool *plp, uint64_t new_write_offset) +{ + uint64_t old_write_offset = le64toh(plp->write_offset); + size_t length = new_write_offset - old_write_offset; + + /* unprotect the log space range (debug version only) */ + RANGE_RW((char *)plp->addr + old_write_offset, length, plp->is_dev_dax); + + /* persist the data */ + if (plp->is_pmem) + pmem_drain(); /* data already flushed */ + else + pmem_msync((char *)plp->addr + old_write_offset, length); + + /* protect the log space range (debug version only) */ + RANGE_RO((char *)plp->addr + old_write_offset, length, plp->is_dev_dax); + + /* unprotect the pool descriptor (debug version only) */ + RANGE_RW((char *)plp->addr + sizeof(struct pool_hdr), + LOG_FORMAT_DATA_ALIGN, plp->is_dev_dax); + + /* write the metadata */ + plp->write_offset = htole64(new_write_offset); + + /* persist the metadata */ + if (plp->is_pmem) + pmem_persist(&plp->write_offset, sizeof(plp->write_offset)); + else + pmem_msync(&plp->write_offset, sizeof(plp->write_offset)); + + /* set the write-protection again (debug version only) */ + RANGE_RO((char *)plp->addr + sizeof(struct pool_hdr), + LOG_FORMAT_DATA_ALIGN, plp->is_dev_dax); +} + +/* + * pmemlog_append -- add data to a log memory pool + */ +int +pmemlog_append(PMEMlogpool *plp, const void *buf, size_t count) +{ + int ret = 0; + + LOG(3, "plp %p buf %p count %zu", plp, buf, count); + + if (plp->rdonly) { + ERR("can't append to read-only log"); + errno = EROFS; + return -1; + } + + util_rwlock_wrlock(plp->rwlockp); + + /* get the current values */ + uint64_t end_offset = le64toh(plp->end_offset); + uint64_t write_offset = le64toh(plp->write_offset); + + if (write_offset >= end_offset) { + /* no space left */ + errno = ENOSPC; + ERR("!pmemlog_append"); + ret = -1; + goto end; + } + + /* make sure we don't write past the available space */ + if (count > (end_offset - write_offset)) { + errno = ENOSPC; + ERR("!pmemlog_append"); + ret = -1; + goto end; + } + + char *data = plp->addr; + + /* + * unprotect the log space range, where the new data will be stored + * (debug version only) + */ + RANGE_RW(&data[write_offset], count, plp->is_dev_dax); + + if (plp->is_pmem) + pmem_memcpy_nodrain(&data[write_offset], buf, count); + else + memcpy(&data[write_offset], buf, count); + + /* protect the log space range (debug version only) */ + RANGE_RO(&data[write_offset], count, plp->is_dev_dax); + + write_offset += count; + + /* persist the data and the metadata */ + log_persist(plp, write_offset); + +end: + util_rwlock_unlock(plp->rwlockp); + + return ret; +} + +/* + * pmemlog_appendv -- add gathered data to a log memory pool + */ +int +pmemlog_appendv(PMEMlogpool *plp, const struct iovec *iov, int iovcnt) +{ + LOG(3, "plp %p iovec %p iovcnt %d", plp, iov, iovcnt); + + int ret = 0; + int i; + + if (iovcnt < 0) { + errno = EINVAL; + ERR("iovcnt is less than zero: %d", iovcnt); + return -1; + } + + if (plp->rdonly) { + ERR("can't append to read-only log"); + errno = EROFS; + return -1; + } + + util_rwlock_wrlock(plp->rwlockp); + + /* get the current values */ + uint64_t end_offset = le64toh(plp->end_offset); + uint64_t write_offset = le64toh(plp->write_offset); + + if (write_offset >= end_offset) { + /* no space left */ + errno = ENOSPC; + ERR("!pmemlog_appendv"); + ret = -1; + goto end; + } + + char *data = plp->addr; + uint64_t count = 0; + char *buf; + + /* calculate required space */ + for (i = 0; i < iovcnt; ++i) + count += iov[i].iov_len; + + /* check if there is enough free space */ + if (count > (end_offset - write_offset)) { + errno = ENOSPC; + ret = -1; + goto end; + } + + /* append the data */ + for (i = 0; i < iovcnt; ++i) { + buf = iov[i].iov_base; + count = iov[i].iov_len; + + /* + * unprotect the log space range, where the new data will be + * stored (debug version only) + */ + RANGE_RW(&data[write_offset], count, plp->is_dev_dax); + + if (plp->is_pmem) + pmem_memcpy_nodrain(&data[write_offset], buf, count); + else + memcpy(&data[write_offset], buf, count); + + /* + * protect the log space range (debug version only) + */ + RANGE_RO(&data[write_offset], count, plp->is_dev_dax); + + write_offset += count; + } + + /* persist the data and the metadata */ + log_persist(plp, write_offset); + +end: + util_rwlock_unlock(plp->rwlockp); + + return ret; +} + +/* + * pmemlog_tell -- return current write point in a log memory pool + */ +long long +pmemlog_tell(PMEMlogpool *plp) +{ + LOG(3, "plp %p", plp); + + util_rwlock_rdlock(plp->rwlockp); + + ASSERT(le64toh(plp->write_offset) >= le64toh(plp->start_offset)); + long long wp = (long long)(le64toh(plp->write_offset) - + le64toh(plp->start_offset)); + + LOG(4, "write offset %lld", wp); + + util_rwlock_unlock(plp->rwlockp); + + return wp; +} + +/* + * pmemlog_rewind -- discard all data, resetting a log memory pool to empty + */ +void +pmemlog_rewind(PMEMlogpool *plp) +{ + LOG(3, "plp %p", plp); + + if (plp->rdonly) { + ERR("can't rewind read-only log"); + errno = EROFS; + return; + } + + util_rwlock_wrlock(plp->rwlockp); + + /* unprotect the pool descriptor (debug version only) */ + RANGE_RW((char *)plp->addr + sizeof(struct pool_hdr), + LOG_FORMAT_DATA_ALIGN, plp->is_dev_dax); + + plp->write_offset = plp->start_offset; + if (plp->is_pmem) + pmem_persist(&plp->write_offset, sizeof(uint64_t)); + else + pmem_msync(&plp->write_offset, sizeof(uint64_t)); + + /* set the write-protection again (debug version only) */ + RANGE_RO((char *)plp->addr + sizeof(struct pool_hdr), + LOG_FORMAT_DATA_ALIGN, plp->is_dev_dax); + + util_rwlock_unlock(plp->rwlockp); +} + +/* + * pmemlog_walk -- walk through all data in a log memory pool + * + * chunksize of 0 means process_chunk gets called once for all data + * as a single chunk. + */ +void +pmemlog_walk(PMEMlogpool *plp, size_t chunksize, + int (*process_chunk)(const void *buf, size_t len, void *arg), void *arg) +{ + LOG(3, "plp %p chunksize %zu", plp, chunksize); + + /* + * We are assuming that the walker doesn't change the data it's reading + * in place. We prevent everyone from changing the data behind our back + * until we are done with processing it. + */ + util_rwlock_rdlock(plp->rwlockp); + + char *data = plp->addr; + uint64_t write_offset = le64toh(plp->write_offset); + uint64_t data_offset = le64toh(plp->start_offset); + size_t len; + + if (chunksize == 0) { + /* most common case: process everything at once */ + len = write_offset - data_offset; + LOG(3, "length %zu", len); + (*process_chunk)(&data[data_offset], len, arg); + } else { + /* + * Walk through the complete record, chunk by chunk. + * The callback returns 0 to terminate the walk. + */ + while (data_offset < write_offset) { + len = MIN(chunksize, write_offset - data_offset); + if (!(*process_chunk)(&data[data_offset], len, arg)) + break; + data_offset += chunksize; + } + } + + util_rwlock_unlock(plp->rwlockp); +} + +/* + * pmemlog_checkU -- log memory pool consistency check + * + * Returns true if consistent, zero if inconsistent, -1/error if checking + * cannot happen due to other errors. + */ +#ifndef _WIN32 +static inline +#endif +int +pmemlog_checkU(const char *path) +{ + LOG(3, "path \"%s\"", path); + + PMEMlogpool *plp = log_open_common(path, POOL_OPEN_COW); + if (plp == NULL) + return -1; /* errno set by log_open_common() */ + + int consistent = 1; + + /* validate pool descriptor */ + uint64_t hdr_start = le64toh(plp->start_offset); + uint64_t hdr_end = le64toh(plp->end_offset); + uint64_t hdr_write = le64toh(plp->write_offset); + + if (hdr_start != roundup(sizeof(*plp), LOG_FORMAT_DATA_ALIGN)) { + ERR("wrong value of start_offset"); + consistent = 0; + } + + if (hdr_end != plp->size) { + ERR("wrong value of end_offset"); + consistent = 0; + } + + if (hdr_start > hdr_end) { + ERR("start_offset greater than end_offset"); + consistent = 0; + } + + if (hdr_start > hdr_write) { + ERR("start_offset greater than write_offset"); + consistent = 0; + } + + if (hdr_write > hdr_end) { + ERR("write_offset greater than end_offset"); + consistent = 0; + } + + pmemlog_close(plp); + + if (consistent) + LOG(4, "pool consistency check OK"); + + return consistent; +} + +#ifndef _WIN32 +/* + * pmemlog_check -- log memory pool consistency check + * + * Returns true if consistent, zero if inconsistent, -1/error if checking + * cannot happen due to other errors. + */ +int +pmemlog_check(const char *path) +{ + return pmemlog_checkU(path); +} +#else +/* + * pmemlog_checkW -- log memory pool consistency check + */ +int +pmemlog_checkW(const wchar_t *path) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return -1; + + int ret = pmemlog_checkU(upath); + + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * pmemlog_ctl_getU -- programmatically executes a read ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemlog_ctl_getU(PMEMlogpool *plp, const char *name, void *arg) +{ + LOG(3, "plp %p name %s arg %p", plp, name, arg); + return ctl_query(plp == NULL ? NULL : plp->ctl, plp, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_READ, arg); +} + +/* + * pmemblk_ctl_setU -- programmatically executes a write ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemlog_ctl_setU(PMEMlogpool *plp, const char *name, void *arg) +{ + LOG(3, "plp %p name %s arg %p", plp, name, arg); + return ctl_query(plp == NULL ? NULL : plp->ctl, plp, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_WRITE, arg); +} + +/* + * pmemlog_ctl_execU -- programmatically executes a runnable ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemlog_ctl_execU(PMEMlogpool *plp, const char *name, void *arg) +{ + LOG(3, "plp %p name %s arg %p", plp, name, arg); + return ctl_query(plp == NULL ? NULL : plp->ctl, plp, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_RUNNABLE, arg); +} + +#ifndef _WIN32 +/* + * pmemlog_ctl_get -- programmatically executes a read ctl query + */ +int +pmemlog_ctl_get(PMEMlogpool *plp, const char *name, void *arg) +{ + return pmemlog_ctl_getU(plp, name, arg); +} + +/* + * pmemlog_ctl_set -- programmatically executes a write ctl query + */ +int +pmemlog_ctl_set(PMEMlogpool *plp, const char *name, void *arg) +{ + return pmemlog_ctl_setU(plp, name, arg); +} + +/* + * pmemlog_ctl_exec -- programmatically executes a runnable ctl query + */ +int +pmemlog_ctl_exec(PMEMlogpool *plp, const char *name, void *arg) +{ + return pmemlog_ctl_execU(plp, name, arg); +} +#else +/* + * pmemlog_ctl_getW -- programmatically executes a read ctl query + */ +int +pmemlog_ctl_getW(PMEMlogpool *plp, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemlog_ctl_getU(plp, uname, arg); + util_free_UTF8(uname); + + return ret; +} + +/* + * pmemlog_ctl_setW -- programmatically executes a write ctl query + */ +int +pmemlog_ctl_setW(PMEMlogpool *plp, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemlog_ctl_setU(plp, uname, arg); + util_free_UTF8(uname); + + return ret; +} + +/* + * pmemlog_ctl_execW -- programmatically executes a runnable ctl query + */ +int +pmemlog_ctl_execW(PMEMlogpool *plp, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemlog_ctl_execU(plp, uname, arg); + util_free_UTF8(uname); + + return ret; +} +#endif + +#if FAULT_INJECTION +void +pmemlog_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + core_inject_fault_at(type, nth, at); +} + +int +pmemlog_fault_injection_enabled(void) +{ + return core_fault_injection_enabled(); +} +#endif diff --git a/src/pmdk/src/libpmemlog/log.h b/src/pmdk/src/libpmemlog/log.h new file mode 100644 index 000000000..7b86ca01d --- /dev/null +++ b/src/pmdk/src/libpmemlog/log.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * log.h -- internal definitions for libpmem log module + */ + +#ifndef LOG_H +#define LOG_H 1 + +#include +#include +#include + +#include "ctl.h" +#include "util.h" +#include "os_thread.h" +#include "pool_hdr.h" +#include "page_size.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "alloc.h" +#include "fault_injection.h" + +#define PMEMLOG_LOG_PREFIX "libpmemlog" +#define PMEMLOG_LOG_LEVEL_VAR "PMEMLOG_LOG_LEVEL" +#define PMEMLOG_LOG_FILE_VAR "PMEMLOG_LOG_FILE" + +/* attributes of the log memory pool format for the pool header */ +#define LOG_HDR_SIG "PMEMLOG" /* must be 8 bytes including '\0' */ +#define LOG_FORMAT_MAJOR 1 + +#define LOG_FORMAT_FEAT_DEFAULT \ + {POOL_FEAT_COMPAT_DEFAULT, POOL_FEAT_INCOMPAT_DEFAULT, 0x0000} + +#define LOG_FORMAT_FEAT_CHECK \ + {POOL_FEAT_COMPAT_VALID, POOL_FEAT_INCOMPAT_VALID, 0x0000} + +static const features_t log_format_feat_default = LOG_FORMAT_FEAT_DEFAULT; + +struct pmemlog { + struct pool_hdr hdr; /* memory pool header */ + + /* root info for on-media format... */ + uint64_t start_offset; /* start offset of the usable log space */ + uint64_t end_offset; /* maximum offset of the usable log space */ + uint64_t write_offset; /* current write point for the log */ + + /* some run-time state, allocated out of memory pool... */ + void *addr; /* mapped region */ + size_t size; /* size of mapped region */ + int is_pmem; /* true if pool is PMEM */ + int rdonly; /* true if pool is opened read-only */ + os_rwlock_t *rwlockp; /* pointer to RW lock */ + int is_dev_dax; /* true if mapped on device dax */ + struct ctl *ctl; /* top level node of the ctl tree structure */ + + struct pool_set *set; /* pool set info */ +}; + +/* data area starts at this alignment after the struct pmemlog above */ +#define LOG_FORMAT_DATA_ALIGN ((uintptr_t)PMEM_PAGESIZE) + +/* + * log_convert2h -- convert pmemlog structure to host byte order + */ +static inline void +log_convert2h(struct pmemlog *plp) +{ + plp->start_offset = le64toh(plp->start_offset); + plp->end_offset = le64toh(plp->end_offset); + plp->write_offset = le64toh(plp->write_offset); +} + +/* + * log_convert2le -- convert pmemlog structure to LE byte order + */ +static inline void +log_convert2le(struct pmemlog *plp) +{ + plp->start_offset = htole64(plp->start_offset); + plp->end_offset = htole64(plp->end_offset); + plp->write_offset = htole64(plp->write_offset); +} + +#if FAULT_INJECTION +void +pmemlog_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at); + +int +pmemlog_fault_injection_enabled(void); +#else +static inline void +pmemlog_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + abort(); +} + +static inline int +pmemlog_fault_injection_enabled(void) +{ + return 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/Makefile b/src/pmdk/src/libpmemobj/Makefile new file mode 100644 index 000000000..6ad3d73e4 --- /dev/null +++ b/src/pmdk/src/libpmemobj/Makefile @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# src/libpmemobj/Makefile -- Makefile for libpmemobj +# + +LIBRARY_NAME = pmemobj +LIBRARY_SO_VERSION = 1 +LIBRARY_VERSION = 0.0 + +include ../core/pmemcore.inc +include ../common/pmemcommon.inc + +SOURCE +=\ + alloc_class.c\ + bucket.c\ + container_ravl.c\ + container_seglists.c\ + critnib.c\ + ctl_debug.o\ + heap.c\ + lane.c\ + libpmemobj.c\ + list.c\ + memblock.c\ + memops.c\ + obj.c\ + palloc.c\ + pmalloc.c\ + recycler.c\ + sync.c\ + tx.c\ + stats.c\ + ulog.c + +include ../Makefile.inc + +CFLAGS += -DUSE_LIBDL -D_PMEMOBJ_INTRNL $(LIBNDCTL_CFLAGS) + +LIBS += -pthread -lpmem $(LIBDL) $(LIBNDCTL_LIBS) diff --git a/src/pmdk/src/libpmemobj/alloc_class.c b/src/pmdk/src/libpmemobj/alloc_class.c new file mode 100644 index 000000000..981815fd2 --- /dev/null +++ b/src/pmdk/src/libpmemobj/alloc_class.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * alloc_class.c -- implementation of allocation classes + */ + +#include +#include + +#include "alloc_class.h" +#include "heap_layout.h" +#include "util.h" +#include "out.h" +#include "bucket.h" +#include "critnib.h" + +#define RUN_CLASS_KEY_PACK(map_idx_s, flags_s, size_idx_s)\ +((uint64_t)(map_idx_s) << 32 |\ +(uint64_t)(flags_s) << 16 |\ +(uint64_t)(size_idx_s)) + +/* + * Value used to mark a reserved spot in the bucket array. + */ +#define ACLASS_RESERVED ((void *)0xFFFFFFFFULL) + +/* + * The last size that is handled by runs. + */ +#define MAX_RUN_SIZE (CHUNKSIZE * 10) + +/* + * Maximum number of bytes the allocation class generation algorithm can decide + * to waste in a single run chunk. + */ +#define MAX_RUN_WASTED_BYTES 1024 + +/* + * Allocation categories are used for allocation classes generation. Each one + * defines the biggest handled size (in bytes) and step pct of the generation + * process. The step percentage defines maximum allowed external fragmentation + * for the category. + */ +#define MAX_ALLOC_CATEGORIES 9 + +/* + * The first size (in byes) which is actually used in the allocation + * class generation algorithm. All smaller sizes use the first predefined bucket + * with the smallest run unit size. + */ +#define FIRST_GENERATED_CLASS_SIZE 128 + +/* + * The granularity of the allocation class generation algorithm. + */ +#define ALLOC_BLOCK_SIZE_GEN 64 + +/* + * The first predefined allocation class size + */ +#define MIN_UNIT_SIZE 128 + +static const struct { + size_t size; + float step; +} categories[MAX_ALLOC_CATEGORIES] = { + /* dummy category - the first allocation class is predefined */ + {FIRST_GENERATED_CLASS_SIZE, 0.05f}, + {1024, 0.05f}, + {2048, 0.05f}, + {4096, 0.05f}, + {8192, 0.05f}, + {16384, 0.05f}, + {32768, 0.05f}, + {131072, 0.05f}, + {393216, 0.05f}, +}; + +#define RUN_UNIT_MAX_ALLOC 8U + +/* + * Every allocation has to be a multiple of at least 8 because we need to + * ensure proper alignment of every pmem structure. + */ +#define ALLOC_BLOCK_SIZE 16 + +/* + * Converts size (in bytes) to number of allocation blocks. + */ +#define SIZE_TO_CLASS_MAP_INDEX(_s, _g) (1 + (((_s) - 1) / (_g))) + +/* + * Target number of allocations per run instance. + */ +#define RUN_MIN_NALLOCS 200 + +/* + * Hard limit of chunks per single run. + */ +#define RUN_SIZE_IDX_CAP (16) + +#define ALLOC_CLASS_DEFAULT_FLAGS CHUNK_FLAG_FLEX_BITMAP + +struct alloc_class_collection { + size_t granularity; + + struct alloc_class *aclasses[MAX_ALLOCATION_CLASSES]; + + /* + * The last size (in bytes) that is handled by runs, everything bigger + * uses the default class. + */ + size_t last_run_max_size; + + /* maps allocation classes to allocation sizes, excluding the header! */ + uint8_t *class_map_by_alloc_size; + + /* maps allocation classes to run unit sizes */ + struct critnib *class_map_by_unit_size; + + int fail_on_missing_class; + int autogenerate_on_missing_class; +}; + +/* + * alloc_class_find_first_free_slot -- searches for the + * first available allocation class slot + * + * This function must be thread-safe because allocation classes can be created + * at runtime. + */ +int +alloc_class_find_first_free_slot(struct alloc_class_collection *ac, + uint8_t *slot) +{ + LOG(10, NULL); + + for (int n = 0; n < MAX_ALLOCATION_CLASSES; ++n) { + if (util_bool_compare_and_swap64(&ac->aclasses[n], + NULL, ACLASS_RESERVED)) { + *slot = (uint8_t)n; + return 0; + } + } + + return -1; +} + +/* + * alloc_class_reserve -- reserve the specified class id + */ +int +alloc_class_reserve(struct alloc_class_collection *ac, uint8_t id) +{ + LOG(10, NULL); + + return util_bool_compare_and_swap64(&ac->aclasses[id], + NULL, ACLASS_RESERVED) ? 0 : -1; +} + +/* + * alloc_class_reservation_clear -- removes the reservation on class id + */ +static void +alloc_class_reservation_clear(struct alloc_class_collection *ac, int id) +{ + LOG(10, NULL); + + int ret = util_bool_compare_and_swap64(&ac->aclasses[id], + ACLASS_RESERVED, NULL); + ASSERT(ret); +} + +/* + * alloc_class_new -- creates a new allocation class + */ +struct alloc_class * +alloc_class_new(int id, struct alloc_class_collection *ac, + enum alloc_class_type type, enum header_type htype, + size_t unit_size, size_t alignment, + uint32_t size_idx) +{ + LOG(10, NULL); + + struct alloc_class *c = Malloc(sizeof(*c)); + if (c == NULL) + goto error_class_alloc; + + c->unit_size = unit_size; + c->header_type = htype; + c->type = type; + c->flags = (uint16_t) + (header_type_to_flag[c->header_type] | + (alignment ? CHUNK_FLAG_ALIGNED : 0)) | + ALLOC_CLASS_DEFAULT_FLAGS; + + switch (type) { + case CLASS_HUGE: + id = DEFAULT_ALLOC_CLASS_ID; + break; + case CLASS_RUN: + c->rdsc.alignment = alignment; + memblock_run_bitmap(&size_idx, c->flags, unit_size, + alignment, NULL, &c->rdsc.bitmap); + c->rdsc.nallocs = c->rdsc.bitmap.nbits; + c->rdsc.size_idx = size_idx; + + /* these two fields are duplicated from class */ + c->rdsc.unit_size = c->unit_size; + c->rdsc.flags = c->flags; + + uint8_t slot = (uint8_t)id; + if (id < 0 && alloc_class_find_first_free_slot(ac, + &slot) != 0) + goto error_class_alloc; + id = slot; + + size_t map_idx = SIZE_TO_CLASS_MAP_INDEX(c->unit_size, + ac->granularity); + ASSERT(map_idx <= UINT32_MAX); + uint32_t map_idx_s = (uint32_t)map_idx; + uint16_t size_idx_s = (uint16_t)size_idx; + uint16_t flags_s = (uint16_t)c->flags; + uint64_t k = RUN_CLASS_KEY_PACK(map_idx_s, + flags_s, size_idx_s); + if (critnib_insert(ac->class_map_by_unit_size, + k, c) != 0) { + ERR("unable to register allocation class"); + goto error_map_insert; + } + + break; + default: + ASSERT(0); + } + + c->id = (uint8_t)id; + ac->aclasses[c->id] = c; + return c; + +error_map_insert: + Free(c); +error_class_alloc: + if (id >= 0) + alloc_class_reservation_clear(ac, id); + return NULL; +} + +/* + * alloc_class_delete -- (internal) deletes an allocation class + */ +void +alloc_class_delete(struct alloc_class_collection *ac, + struct alloc_class *c) +{ + LOG(10, NULL); + + ac->aclasses[c->id] = NULL; + Free(c); +} + +/* + * alloc_class_find_or_create -- (internal) searches for the + * biggest allocation class for which unit_size is evenly divisible by n. + * If no such class exists, create one. + */ +static struct alloc_class * +alloc_class_find_or_create(struct alloc_class_collection *ac, size_t n) +{ + LOG(10, NULL); + + COMPILE_ERROR_ON(MAX_ALLOCATION_CLASSES > UINT8_MAX); + uint64_t required_size_bytes = n * RUN_MIN_NALLOCS; + uint32_t required_size_idx = 1; + if (required_size_bytes > RUN_DEFAULT_SIZE) { + required_size_bytes -= RUN_DEFAULT_SIZE; + required_size_idx += + CALC_SIZE_IDX(CHUNKSIZE, required_size_bytes); + if (required_size_idx > RUN_SIZE_IDX_CAP) + required_size_idx = RUN_SIZE_IDX_CAP; + } + + for (int i = MAX_ALLOCATION_CLASSES - 1; i >= 0; --i) { + struct alloc_class *c = ac->aclasses[i]; + + if (c == NULL || c->type == CLASS_HUGE || + c->rdsc.size_idx < required_size_idx) + continue; + + if (n % c->unit_size == 0 && + n / c->unit_size <= RUN_UNIT_MAX_ALLOC) + return c; + } + + /* + * In order to minimize the wasted space at the end of the run the + * run data size must be divisible by the allocation class unit size + * with the smallest possible remainder, preferably 0. + */ + struct run_bitmap b; + size_t runsize_bytes = 0; + do { + if (runsize_bytes != 0) /* don't increase on first iteration */ + n += ALLOC_BLOCK_SIZE_GEN; + + uint32_t size_idx = required_size_idx; + memblock_run_bitmap(&size_idx, ALLOC_CLASS_DEFAULT_FLAGS, n, 0, + NULL, &b); + + runsize_bytes = RUN_CONTENT_SIZE_BYTES(size_idx) - b.size; + } while ((runsize_bytes % n) > MAX_RUN_WASTED_BYTES); + + /* + * Now that the desired unit size is found the existing classes need + * to be searched for possible duplicates. If a class that can handle + * the calculated size already exists, simply return that. + */ + for (int i = 1; i < MAX_ALLOCATION_CLASSES; ++i) { + struct alloc_class *c = ac->aclasses[i]; + if (c == NULL || c->type == CLASS_HUGE) + continue; + if (n / c->unit_size <= RUN_UNIT_MAX_ALLOC && + n % c->unit_size == 0) + return c; + if (c->unit_size == n) + return c; + } + + return alloc_class_new(-1, ac, CLASS_RUN, HEADER_COMPACT, n, 0, + required_size_idx); +} + +/* + * alloc_class_find_min_frag -- searches for an existing allocation + * class that will provide the smallest internal fragmentation for the given + * size. + */ +static struct alloc_class * +alloc_class_find_min_frag(struct alloc_class_collection *ac, size_t n) +{ + LOG(10, NULL); + + struct alloc_class *best_c = NULL; + size_t lowest_waste = SIZE_MAX; + + ASSERTne(n, 0); + + /* + * Start from the largest buckets in order to minimize unit size of + * allocated memory blocks. + */ + for (int i = MAX_ALLOCATION_CLASSES - 1; i >= 0; --i) { + struct alloc_class *c = ac->aclasses[i]; + + /* can't use alloc classes /w no headers by default */ + if (c == NULL || c->header_type == HEADER_NONE) + continue; + + size_t real_size = n + header_type_to_size[c->header_type]; + + size_t units = CALC_SIZE_IDX(c->unit_size, real_size); + + /* can't exceed the maximum allowed run unit max */ + if (c->type == CLASS_RUN && units > RUN_UNIT_MAX_ALLOC) + continue; + + if (c->unit_size * units == real_size) + return c; + + size_t waste = (c->unit_size * units) - real_size; + + /* + * If we assume that the allocation class is only ever going to + * be used with exactly one size, the effective internal + * fragmentation would be increased by the leftover + * memory at the end of the run. + */ + if (c->type == CLASS_RUN) { + size_t wasted_units = c->rdsc.nallocs % units; + size_t wasted_bytes = wasted_units * c->unit_size; + size_t waste_avg_per_unit = wasted_bytes / + c->rdsc.nallocs; + + waste += waste_avg_per_unit; + } + + if (best_c == NULL || lowest_waste > waste) { + best_c = c; + lowest_waste = waste; + } + } + + ASSERTne(best_c, NULL); + return best_c; +} + +/* + * alloc_class_collection_new -- creates a new collection of allocation classes + */ +struct alloc_class_collection * +alloc_class_collection_new() +{ + LOG(10, NULL); + + struct alloc_class_collection *ac = Zalloc(sizeof(*ac)); + if (ac == NULL) + return NULL; + + ac->granularity = ALLOC_BLOCK_SIZE; + ac->last_run_max_size = MAX_RUN_SIZE; + ac->fail_on_missing_class = 0; + ac->autogenerate_on_missing_class = 1; + + size_t maps_size = (MAX_RUN_SIZE / ac->granularity) + 1; + + if ((ac->class_map_by_alloc_size = Malloc(maps_size)) == NULL) + goto error; + if ((ac->class_map_by_unit_size = critnib_new()) == NULL) + goto error; + + memset(ac->class_map_by_alloc_size, 0xFF, maps_size); + + if (alloc_class_new(-1, ac, CLASS_HUGE, HEADER_COMPACT, + CHUNKSIZE, 0, 1) == NULL) + goto error; + + struct alloc_class *predefined_class = + alloc_class_new(-1, ac, CLASS_RUN, HEADER_COMPACT, + MIN_UNIT_SIZE, 0, 1); + if (predefined_class == NULL) + goto error; + + for (size_t i = 0; i < FIRST_GENERATED_CLASS_SIZE / ac->granularity; + ++i) { + ac->class_map_by_alloc_size[i] = predefined_class->id; + } + + /* + * Based on the defined categories, a set of allocation classes is + * created. The unit size of those classes is depended on the category + * initial size and step. + */ + size_t granularity_mask = ALLOC_BLOCK_SIZE_GEN - 1; + for (int c = 1; c < MAX_ALLOC_CATEGORIES; ++c) { + size_t n = categories[c - 1].size + ALLOC_BLOCK_SIZE_GEN; + do { + if (alloc_class_find_or_create(ac, n) == NULL) + goto error; + + float stepf = (float)n * categories[c].step; + size_t stepi = (size_t)stepf; + stepi = (stepf - (float)stepi < FLT_EPSILON) ? + stepi : stepi + 1; + + n += (stepi + (granularity_mask)) & ~granularity_mask; + } while (n <= categories[c].size); + } + + /* + * Find the largest alloc class and use it's unit size as run allocation + * threshold. + */ + uint8_t largest_aclass_slot; + for (largest_aclass_slot = MAX_ALLOCATION_CLASSES - 1; + largest_aclass_slot > 0 && + ac->aclasses[largest_aclass_slot] == NULL; + --largest_aclass_slot) { + /* intentional NOP */ + } + + struct alloc_class *c = ac->aclasses[largest_aclass_slot]; + + /* + * The actual run might contain less unit blocks than the theoretical + * unit max variable. This may be the case for very large unit sizes. + */ + size_t real_unit_max = c->rdsc.nallocs < RUN_UNIT_MAX_ALLOC ? + c->rdsc.nallocs : RUN_UNIT_MAX_ALLOC; + + size_t theoretical_run_max_size = c->unit_size * real_unit_max; + + ac->last_run_max_size = MAX_RUN_SIZE > theoretical_run_max_size ? + theoretical_run_max_size : MAX_RUN_SIZE; + +#ifdef DEBUG + /* + * Verify that each bucket's unit size points back to the bucket by the + * bucket map. This must be true for the default allocation classes, + * otherwise duplicate buckets will be created. + */ + for (size_t i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + struct alloc_class *c = ac->aclasses[i]; + + if (c != NULL && c->type == CLASS_RUN) { + ASSERTeq(i, c->id); + ASSERTeq(alloc_class_by_run(ac, c->unit_size, + c->flags, c->rdsc.size_idx), c); + } + } +#endif + + return ac; + +error: + alloc_class_collection_delete(ac); + + return NULL; +} + +/* + * alloc_class_collection_delete -- deletes the allocation class collection and + * all of the classes within it + */ +void +alloc_class_collection_delete(struct alloc_class_collection *ac) +{ + LOG(10, NULL); + + for (size_t i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + struct alloc_class *c = ac->aclasses[i]; + if (c != NULL) { + alloc_class_delete(ac, c); + } + } + + if (ac->class_map_by_unit_size) + critnib_delete(ac->class_map_by_unit_size); + Free(ac->class_map_by_alloc_size); + Free(ac); +} + +/* + * alloc_class_assign_by_size -- (internal) chooses the allocation class that + * best approximates the provided size + */ +static struct alloc_class * +alloc_class_assign_by_size(struct alloc_class_collection *ac, + size_t size) +{ + LOG(10, NULL); + + size_t class_map_index = SIZE_TO_CLASS_MAP_INDEX(size, + ac->granularity); + + struct alloc_class *c = alloc_class_find_min_frag(ac, + class_map_index * ac->granularity); + ASSERTne(c, NULL); + + /* + * We don't lock this array because locking this section here and then + * bailing out if someone else was faster would be still slower than + * just calculating the class and failing to assign the variable. + * We are using a compare and swap so that helgrind/drd don't complain. + */ + util_bool_compare_and_swap64( + &ac->class_map_by_alloc_size[class_map_index], + MAX_ALLOCATION_CLASSES, c->id); + + return c; +} + +/* + * alloc_class_by_alloc_size -- returns allocation class that is assigned + * to handle an allocation of the provided size + */ +struct alloc_class * +alloc_class_by_alloc_size(struct alloc_class_collection *ac, size_t size) +{ + if (size < ac->last_run_max_size) { + uint8_t class_id = ac->class_map_by_alloc_size[ + SIZE_TO_CLASS_MAP_INDEX(size, ac->granularity)]; + + if (class_id == MAX_ALLOCATION_CLASSES) { + if (ac->fail_on_missing_class) + return NULL; + else if (ac->autogenerate_on_missing_class) + return alloc_class_assign_by_size(ac, size); + else + return ac->aclasses[DEFAULT_ALLOC_CLASS_ID]; + } + + return ac->aclasses[class_id]; + } else { + return ac->aclasses[DEFAULT_ALLOC_CLASS_ID]; + } +} + +/* + * alloc_class_by_run -- returns the allocation class that has the given + * unit size + */ +struct alloc_class * +alloc_class_by_run(struct alloc_class_collection *ac, + size_t unit_size, uint16_t flags, uint32_t size_idx) +{ + size_t map_idx = SIZE_TO_CLASS_MAP_INDEX(unit_size, ac->granularity); + ASSERT(map_idx <= UINT32_MAX); + uint32_t map_idx_s = (uint32_t)map_idx; + ASSERT(size_idx <= UINT16_MAX); + uint16_t size_idx_s = (uint16_t)size_idx; + uint16_t flags_s = (uint16_t)flags; + + return critnib_get(ac->class_map_by_unit_size, + RUN_CLASS_KEY_PACK(map_idx_s, flags_s, size_idx_s)); +} + +/* + * alloc_class_by_id -- returns the allocation class with an id + */ +struct alloc_class * +alloc_class_by_id(struct alloc_class_collection *ac, uint8_t id) +{ + return ac->aclasses[id]; +} + +/* + * alloc_class_calc_size_idx -- calculates how many units does the size require + */ +ssize_t +alloc_class_calc_size_idx(struct alloc_class *c, size_t size) +{ + uint32_t size_idx = CALC_SIZE_IDX(c->unit_size, + size + header_type_to_size[c->header_type]); + + if (c->type == CLASS_RUN) { + if (c->header_type == HEADER_NONE && size_idx != 1) + return -1; + else if (size_idx > RUN_UNIT_MAX) + return -1; + else if (size_idx > c->rdsc.nallocs) + return -1; + } + + return size_idx; +} diff --git a/src/pmdk/src/libpmemobj/alloc_class.h b/src/pmdk/src/libpmemobj/alloc_class.h new file mode 100644 index 000000000..442fa06f3 --- /dev/null +++ b/src/pmdk/src/libpmemobj/alloc_class.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * alloc_class.h -- internal definitions for allocation classes + */ + +#ifndef LIBPMEMOBJ_ALLOC_CLASS_H +#define LIBPMEMOBJ_ALLOC_CLASS_H 1 + +#include +#include +#include +#include "heap_layout.h" +#include "memblock.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_ALLOCATION_CLASSES (UINT8_MAX) +#define DEFAULT_ALLOC_CLASS_ID (0) +#define RUN_UNIT_MAX RUN_BITS_PER_VALUE + +struct alloc_class_collection; + +enum alloc_class_type { + CLASS_UNKNOWN, + CLASS_HUGE, + CLASS_RUN, + + MAX_ALLOC_CLASS_TYPES +}; + +struct alloc_class { + uint8_t id; + uint16_t flags; + + size_t unit_size; + + enum header_type header_type; + enum alloc_class_type type; + + /* run-specific data */ + struct run_descriptor rdsc; +}; + +struct alloc_class_collection *alloc_class_collection_new(void); +void alloc_class_collection_delete(struct alloc_class_collection *ac); + +struct alloc_class *alloc_class_by_run( + struct alloc_class_collection *ac, + size_t unit_size, uint16_t flags, uint32_t size_idx); +struct alloc_class *alloc_class_by_alloc_size( + struct alloc_class_collection *ac, size_t size); +struct alloc_class *alloc_class_by_id( + struct alloc_class_collection *ac, uint8_t id); + +int alloc_class_reserve(struct alloc_class_collection *ac, uint8_t id); +int alloc_class_find_first_free_slot(struct alloc_class_collection *ac, + uint8_t *slot); + +ssize_t +alloc_class_calc_size_idx(struct alloc_class *c, size_t size); + +struct alloc_class * +alloc_class_new(int id, struct alloc_class_collection *ac, + enum alloc_class_type type, enum header_type htype, + size_t unit_size, size_t alignment, + uint32_t size_idx); + +void alloc_class_delete(struct alloc_class_collection *ac, + struct alloc_class *c); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/bucket.c b/src/pmdk/src/libpmemobj/bucket.c new file mode 100644 index 000000000..4b32dc099 --- /dev/null +++ b/src/pmdk/src/libpmemobj/bucket.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * bucket.c -- bucket implementation + * + * Buckets manage volatile state of the heap. They are the abstraction layer + * between the heap-managed chunks/runs and memory allocations. + * + * Each bucket instance can have a different underlying container that is + * responsible for selecting blocks - which means that whether the allocator + * serves memory blocks in best/first/next -fit manner is decided during bucket + * creation. + */ + +#include "alloc_class.h" +#include "bucket.h" +#include "heap.h" +#include "out.h" +#include "sys_util.h" +#include "valgrind_internal.h" + +/* + * bucket_new -- creates a new bucket instance + */ +struct bucket * +bucket_new(struct block_container *c, struct alloc_class *aclass) +{ + if (c == NULL) + return NULL; + + struct bucket *b = Malloc(sizeof(*b)); + if (b == NULL) + return NULL; + + b->container = c; + b->c_ops = c->c_ops; + + util_mutex_init(&b->lock); + + b->is_active = 0; + b->active_memory_block = NULL; + if (aclass && aclass->type == CLASS_RUN) { + b->active_memory_block = + Zalloc(sizeof(struct memory_block_reserved)); + + if (b->active_memory_block == NULL) + goto error_active_alloc; + } + b->aclass = aclass; + + return b; + +error_active_alloc: + + util_mutex_destroy(&b->lock); + Free(b); + return NULL; +} + +/* + * bucket_insert_block -- inserts a block into the bucket + */ +int +bucket_insert_block(struct bucket *b, const struct memory_block *m) +{ +#if VG_MEMCHECK_ENABLED || VG_HELGRIND_ENABLED || VG_DRD_ENABLED + if (On_memcheck || On_drd_or_hg) { + size_t size = m->m_ops->get_real_size(m); + void *data = m->m_ops->get_real_data(m); + VALGRIND_DO_MAKE_MEM_NOACCESS(data, size); + VALGRIND_ANNOTATE_NEW_MEMORY(data, size); + } +#endif + return b->c_ops->insert(b->container, m); +} + +/* + * bucket_delete -- cleanups and deallocates bucket instance + */ +void +bucket_delete(struct bucket *b) +{ + if (b->active_memory_block) + Free(b->active_memory_block); + + util_mutex_destroy(&b->lock); + b->c_ops->destroy(b->container); + Free(b); +} + +/* + * bucket_current_resvp -- returns the pointer to the current reservation count + */ +int * +bucket_current_resvp(struct bucket *b) +{ + return b->active_memory_block ? &b->active_memory_block->nresv : NULL; +} diff --git a/src/pmdk/src/libpmemobj/bucket.h b/src/pmdk/src/libpmemobj/bucket.h new file mode 100644 index 000000000..167b6f403 --- /dev/null +++ b/src/pmdk/src/libpmemobj/bucket.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * bucket.h -- internal definitions for bucket + */ + +#ifndef LIBPMEMOBJ_BUCKET_H +#define LIBPMEMOBJ_BUCKET_H 1 + +#include +#include + +#include "container.h" +#include "memblock.h" +#include "os_thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define CALC_SIZE_IDX(_unit_size, _size)\ +((_size) == 0 ? 0 : (uint32_t)((((_size) - 1) / (_unit_size)) + 1)) + +struct bucket { + os_mutex_t lock; + + struct alloc_class *aclass; + + struct block_container *container; + const struct block_container_ops *c_ops; + + struct memory_block_reserved *active_memory_block; + int is_active; +}; + +struct bucket *bucket_new(struct block_container *c, + struct alloc_class *aclass); + +int *bucket_current_resvp(struct bucket *b); + +int bucket_insert_block(struct bucket *b, const struct memory_block *m); + +void bucket_delete(struct bucket *b); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/container.h b/src/pmdk/src/libpmemobj/container.h new file mode 100644 index 000000000..1b8f752d5 --- /dev/null +++ b/src/pmdk/src/libpmemobj/container.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * container.h -- internal definitions for block containers + */ + +#ifndef LIBPMEMOBJ_CONTAINER_H +#define LIBPMEMOBJ_CONTAINER_H 1 + +#include "memblock.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct block_container { + const struct block_container_ops *c_ops; + struct palloc_heap *heap; +}; + +struct block_container_ops { + /* inserts a new memory block into the container */ + int (*insert)(struct block_container *c, const struct memory_block *m); + + /* removes exact match memory block */ + int (*get_rm_exact)(struct block_container *c, + const struct memory_block *m); + + /* removes and returns the best-fit memory block for size */ + int (*get_rm_bestfit)(struct block_container *c, + struct memory_block *m); + + /* checks whether the container is empty */ + int (*is_empty)(struct block_container *c); + + /* removes all elements from the container */ + void (*rm_all)(struct block_container *c); + + /* deletes the container */ + void (*destroy)(struct block_container *c); +}; + +#ifdef __cplusplus +} +#endif + +#endif /* LIBPMEMOBJ_CONTAINER_H */ diff --git a/src/pmdk/src/libpmemobj/container_ravl.c b/src/pmdk/src/libpmemobj/container_ravl.c new file mode 100644 index 000000000..4c11d7300 --- /dev/null +++ b/src/pmdk/src/libpmemobj/container_ravl.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2019, Intel Corporation */ + +/* + * container_ravl.c -- implementation of ravl-based block container + */ + +#include "container_ravl.h" +#include "ravl.h" +#include "out.h" +#include "sys_util.h" + +struct block_container_ravl { + struct block_container super; + struct ravl *tree; +}; + +/* + * container_compare_memblocks -- (internal) compares two memory blocks + */ +static int +container_compare_memblocks(const void *lhs, const void *rhs) +{ + const struct memory_block *l = lhs; + const struct memory_block *r = rhs; + + int64_t diff = (int64_t)l->size_idx - (int64_t)r->size_idx; + if (diff != 0) + return diff > 0 ? 1 : -1; + + diff = (int64_t)l->zone_id - (int64_t)r->zone_id; + if (diff != 0) + return diff > 0 ? 1 : -1; + + diff = (int64_t)l->chunk_id - (int64_t)r->chunk_id; + if (diff != 0) + return diff > 0 ? 1 : -1; + + diff = (int64_t)l->block_off - (int64_t)r->block_off; + if (diff != 0) + return diff > 0 ? 1 : -1; + + return 0; +} + +/* + * container_ravl_insert_block -- (internal) inserts a new memory block + * into the container + */ +static int +container_ravl_insert_block(struct block_container *bc, + const struct memory_block *m) +{ + struct block_container_ravl *c = + (struct block_container_ravl *)bc; + + struct memory_block *e = m->m_ops->get_user_data(m); + VALGRIND_DO_MAKE_MEM_DEFINED(e, sizeof(*e)); + VALGRIND_ADD_TO_TX(e, sizeof(*e)); + *e = *m; + VALGRIND_SET_CLEAN(e, sizeof(*e)); + VALGRIND_REMOVE_FROM_TX(e, sizeof(*e)); + + return ravl_insert(c->tree, e); +} + +/* + * container_ravl_get_rm_block_bestfit -- (internal) removes and returns the + * best-fit memory block for size + */ +static int +container_ravl_get_rm_block_bestfit(struct block_container *bc, + struct memory_block *m) +{ + struct block_container_ravl *c = + (struct block_container_ravl *)bc; + + struct ravl_node *n = ravl_find(c->tree, m, + RAVL_PREDICATE_GREATER_EQUAL); + if (n == NULL) + return ENOMEM; + + struct memory_block *e = ravl_data(n); + *m = *e; + ravl_remove(c->tree, n); + + return 0; +} + +/* + * container_ravl_get_rm_block_exact -- + * (internal) removes exact match memory block + */ +static int +container_ravl_get_rm_block_exact(struct block_container *bc, + const struct memory_block *m) +{ + struct block_container_ravl *c = + (struct block_container_ravl *)bc; + + struct ravl_node *n = ravl_find(c->tree, m, RAVL_PREDICATE_EQUAL); + if (n == NULL) + return ENOMEM; + + ravl_remove(c->tree, n); + + return 0; +} + +/* + * container_ravl_is_empty -- (internal) checks whether the container is empty + */ +static int +container_ravl_is_empty(struct block_container *bc) +{ + struct block_container_ravl *c = + (struct block_container_ravl *)bc; + + return ravl_empty(c->tree); +} + +/* + * container_ravl_rm_all -- (internal) removes all elements from the tree + */ +static void +container_ravl_rm_all(struct block_container *bc) +{ + struct block_container_ravl *c = + (struct block_container_ravl *)bc; + + ravl_clear(c->tree); +} + +/* + * container_ravl_delete -- (internal) deletes the container + */ +static void +container_ravl_destroy(struct block_container *bc) +{ + struct block_container_ravl *c = + (struct block_container_ravl *)bc; + + ravl_delete(c->tree); + + Free(bc); +} + +/* + * Tree-based block container used to provide best-fit functionality to the + * bucket. The time complexity for this particular container is O(k) where k is + * the length of the key. + * + * The get methods also guarantee that the block with lowest possible address + * that best matches the requirements is provided. + */ +static const struct block_container_ops container_ravl_ops = { + .insert = container_ravl_insert_block, + .get_rm_exact = container_ravl_get_rm_block_exact, + .get_rm_bestfit = container_ravl_get_rm_block_bestfit, + .is_empty = container_ravl_is_empty, + .rm_all = container_ravl_rm_all, + .destroy = container_ravl_destroy, +}; + +/* + * container_new_ravl -- allocates and initializes a ravl container + */ +struct block_container * +container_new_ravl(struct palloc_heap *heap) +{ + struct block_container_ravl *bc = Malloc(sizeof(*bc)); + if (bc == NULL) + goto error_container_malloc; + + bc->super.heap = heap; + bc->super.c_ops = &container_ravl_ops; + bc->tree = ravl_new(container_compare_memblocks); + if (bc->tree == NULL) + goto error_ravl_new; + + return (struct block_container *)&bc->super; + +error_ravl_new: + Free(bc); + +error_container_malloc: + return NULL; +} diff --git a/src/pmdk/src/libpmemobj/container_ravl.h b/src/pmdk/src/libpmemobj/container_ravl.h new file mode 100644 index 000000000..62bc0daf1 --- /dev/null +++ b/src/pmdk/src/libpmemobj/container_ravl.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * container_ravl.h -- internal definitions for ravl-based block container + */ + +#ifndef LIBPMEMOBJ_CONTAINER_RAVL_H +#define LIBPMEMOBJ_CONTAINER_RAVL_H 1 + +#include "container.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct block_container *container_new_ravl(struct palloc_heap *heap); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBPMEMOBJ_CONTAINER_RAVL_H */ diff --git a/src/pmdk/src/libpmemobj/container_seglists.c b/src/pmdk/src/libpmemobj/container_seglists.c new file mode 100644 index 000000000..286cf60ae --- /dev/null +++ b/src/pmdk/src/libpmemobj/container_seglists.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2019, Intel Corporation */ + +/* + * container_seglists.c -- implementation of segregated lists block container + * + * This container is constructed from N (up to 64) intrusive lists and a + * single 8 byte bitmap that stores the information whether a given list is + * empty or not. + */ + +#include "container_seglists.h" +#include "out.h" +#include "sys_util.h" +#include "util.h" +#include "valgrind_internal.h" +#include "vecq.h" + +#define SEGLIST_BLOCK_LISTS 64U + +struct block_container_seglists { + struct block_container super; + struct memory_block m; + VECQ(, uint32_t) blocks[SEGLIST_BLOCK_LISTS]; + uint64_t nonempty_lists; +}; + +/* + * container_seglists_insert_block -- (internal) inserts a new memory block + * into the container + */ +static int +container_seglists_insert_block(struct block_container *bc, + const struct memory_block *m) +{ + ASSERT(m->chunk_id < MAX_CHUNK); + ASSERT(m->zone_id < UINT16_MAX); + ASSERTne(m->size_idx, 0); + + struct block_container_seglists *c = + (struct block_container_seglists *)bc; + + if (c->nonempty_lists == 0) + c->m = *m; + + ASSERT(m->size_idx <= SEGLIST_BLOCK_LISTS); + ASSERT(m->chunk_id == c->m.chunk_id); + ASSERT(m->zone_id == c->m.zone_id); + + if (VECQ_ENQUEUE(&c->blocks[m->size_idx - 1], m->block_off) != 0) + return -1; + + /* marks the list as nonempty */ + c->nonempty_lists |= 1ULL << (m->size_idx - 1); + + return 0; +} + +/* + * container_seglists_get_rm_block_bestfit -- (internal) removes and returns the + * best-fit memory block for size + */ +static int +container_seglists_get_rm_block_bestfit(struct block_container *bc, + struct memory_block *m) +{ + struct block_container_seglists *c = + (struct block_container_seglists *)bc; + + ASSERT(m->size_idx <= SEGLIST_BLOCK_LISTS); + uint32_t i = 0; + + /* applicable lists */ + uint64_t size_mask = (1ULL << (m->size_idx - 1)) - 1; + uint64_t v = c->nonempty_lists & ~size_mask; + if (v == 0) + return ENOMEM; + + /* finds the list that serves the smallest applicable size */ + i = util_lssb_index64(v); + + uint32_t block_offset = VECQ_DEQUEUE(&c->blocks[i]); + + if (VECQ_SIZE(&c->blocks[i]) == 0) /* marks the list as empty */ + c->nonempty_lists &= ~(1ULL << (i)); + + *m = c->m; + m->block_off = block_offset; + m->size_idx = i + 1; + + return 0; +} + +/* + * container_seglists_is_empty -- (internal) checks whether the container is + * empty + */ +static int +container_seglists_is_empty(struct block_container *bc) +{ + struct block_container_seglists *c = + (struct block_container_seglists *)bc; + + return c->nonempty_lists == 0; +} + +/* + * container_seglists_rm_all -- (internal) removes all elements from the tree + */ +static void +container_seglists_rm_all(struct block_container *bc) +{ + struct block_container_seglists *c = + (struct block_container_seglists *)bc; + + for (unsigned i = 0; i < SEGLIST_BLOCK_LISTS; ++i) + VECQ_CLEAR(&c->blocks[i]); + + c->nonempty_lists = 0; +} + +/* + * container_seglists_delete -- (internal) deletes the container + */ +static void +container_seglists_destroy(struct block_container *bc) +{ + struct block_container_seglists *c = + (struct block_container_seglists *)bc; + + for (unsigned i = 0; i < SEGLIST_BLOCK_LISTS; ++i) + VECQ_DELETE(&c->blocks[i]); + + Free(c); +} + +/* + * This container does not support retrieval of exact memory blocks, but other + * than provides best-fit in O(1) time for unit sizes that do not exceed 64. + */ +static const struct block_container_ops container_seglists_ops = { + .insert = container_seglists_insert_block, + .get_rm_exact = NULL, + .get_rm_bestfit = container_seglists_get_rm_block_bestfit, + .is_empty = container_seglists_is_empty, + .rm_all = container_seglists_rm_all, + .destroy = container_seglists_destroy, +}; + +/* + * container_new_seglists -- allocates and initializes a seglists container + */ +struct block_container * +container_new_seglists(struct palloc_heap *heap) +{ + struct block_container_seglists *bc = Malloc(sizeof(*bc)); + if (bc == NULL) + goto error_container_malloc; + + bc->super.heap = heap; + bc->super.c_ops = &container_seglists_ops; + + for (unsigned i = 0; i < SEGLIST_BLOCK_LISTS; ++i) + VECQ_INIT(&bc->blocks[i]); + bc->nonempty_lists = 0; + + return (struct block_container *)&bc->super; + +error_container_malloc: + return NULL; +} diff --git a/src/pmdk/src/libpmemobj/container_seglists.h b/src/pmdk/src/libpmemobj/container_seglists.h new file mode 100644 index 000000000..c21bb864c --- /dev/null +++ b/src/pmdk/src/libpmemobj/container_seglists.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * container_seglists.h -- internal definitions for + * segregated lists block container + */ + +#ifndef LIBPMEMOBJ_CONTAINER_SEGLISTS_H +#define LIBPMEMOBJ_CONTAINER_SEGLISTS_H 1 + +#include "container.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct block_container *container_new_seglists(struct palloc_heap *heap); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBPMEMOBJ_CONTAINER_SEGLISTS_H */ diff --git a/src/pmdk/src/libpmemobj/critnib.c b/src/pmdk/src/libpmemobj/critnib.c new file mode 100644 index 000000000..6b59b13f4 --- /dev/null +++ b/src/pmdk/src/libpmemobj/critnib.c @@ -0,0 +1,651 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2019, Intel Corporation */ + +/* + * critnib.c -- implementation of critnib tree + * + * It offers identity lookup (like a hashmap) and <= lookup (like a search + * tree). Unlike some hashing algorithms (cuckoo hash, perfect hashing) the + * complexity isn't constant, but for data sizes we expect it's several + * times as fast as cuckoo, and has no "stop the world" cases that would + * cause latency (ie, better worst case behaviour). + */ + +/* + * STRUCTURE DESCRIPTION + * + * Critnib is a hybrid between a radix tree and DJ Bernstein's critbit: + * it skips nodes for uninteresting radix nodes (ie, ones that would have + * exactly one child), this requires adding to every node a field that + * describes the slice (4-bit in our case) that this radix level is for. + * + * This implementation also stores each node's path (ie, bits that are + * common to every key in that subtree) -- this doesn't help with lookups + * at all (unused in == match, could be reconstructed at no cost in <= + * after first dive) but simplifies inserts and removes. If we ever want + * that piece of memory it's easy to trim it down. + */ + +/* + * CONCURRENCY ISSUES + * + * Reads are completely lock-free sync-free, but only almost wait-free: + * if for some reason a read thread gets pathologically stalled, it will + * notice the data being stale and restart the work. In usual cases, + * the structure having been modified does _not_ cause a restart. + * + * Writes could be easily made lock-free as well (with only a cmpxchg + * sync), but this leads to problems with removes. A possible solution + * would be doing removes by overwriting by NULL w/o freeing -- yet this + * would lead to the structure growing without bounds. Complex per-node + * locks would increase concurrency but they slow down individual writes + * enough that in practice a simple global write lock works faster. + * + * Removes are the only operation that can break reads. The structure + * can do local RCU well -- the problem being knowing when it's safe to + * free. Any synchronization with reads would kill their speed, thus + * instead we have a remove count. The grace period is DELETED_LIFE, + * after which any read will notice staleness and restart its work. + */ +#include +#include + +#include "alloc.h" +#include "critnib.h" +#include "out.h" +#include "sys_util.h" +#include "valgrind_internal.h" + +/* + * A node that has been deleted is left untouched for this many delete + * cycles. Reads have guaranteed correctness if they took no longer than + * DELETED_LIFE concurrent deletes, otherwise they notice something is + * wrong and restart. The memory of deleted nodes is never freed to + * malloc nor their pointers lead anywhere wrong, thus a stale read will + * (temporarily) get a wrong answer but won't crash. + * + * There's no need to count writes as they never interfere with reads. + * + * Allowing stale reads (of arbitrarily old writes or of deletes less than + * DELETED_LIFE old) might sound counterintuitive, but it doesn't affect + * semantics in any way: the thread could have been stalled just after + * returning from our code. Thus, the guarantee is: the result of get() or + * find_le() is a value that was current at any point between the call + * start and end. + */ +#define DELETED_LIFE 16 + +#define SLICE 4 +#define NIB ((1ULL << SLICE) - 1) +#define SLNODES (1 << SLICE) + +typedef unsigned char sh_t; + +struct critnib_node { + /* + * path is the part of a tree that's already traversed (be it through + * explicit nodes or collapsed links) -- ie, any subtree below has all + * those bits set to this value. + * + * nib is a 4-bit slice that's an index into the node's children. + * + * shift is the length (in bits) of the part of the key below this node. + * + * nib + * |XXXXXXXXXX|?|*****| + * path ^ + * +-----+ + * shift + */ + struct critnib_node *child[SLNODES]; + uint64_t path; + sh_t shift; +}; + +struct critnib_leaf { + uint64_t key; + void *value; +}; + +struct critnib { + struct critnib_node *root; + + /* pool of freed nodes: singly linked list, next at child[0] */ + struct critnib_node *deleted_node; + struct critnib_leaf *deleted_leaf; + + /* nodes removed but not yet eligible for reuse */ + struct critnib_node *pending_del_nodes[DELETED_LIFE]; + struct critnib_leaf *pending_del_leaves[DELETED_LIFE]; + + uint64_t remove_count; + + os_mutex_t mutex; /* writes/removes */ +}; + +/* + * atomic load + */ +static void +load(void *src, void *dst) +{ + util_atomic_load_explicit64((uint64_t *)src, (uint64_t *)dst, + memory_order_acquire); +} + +/* + * atomic store + */ +static void +store(void *dst, void *src) +{ + util_atomic_store_explicit64((uint64_t *)dst, (uint64_t)src, + memory_order_release); +} + +/* + * internal: is_leaf -- check tagged pointer for leafness + */ +static inline bool +is_leaf(struct critnib_node *n) +{ + return (uint64_t)n & 1; +} + +/* + * internal: to_leaf -- untag a leaf pointer + */ +static inline struct critnib_leaf * +to_leaf(struct critnib_node *n) +{ + return (void *)((uint64_t)n & ~1ULL); +} + +/* + * internal: path_mask -- return bit mask of a path above a subtree [shift] + * bits tall + */ +static inline uint64_t +path_mask(sh_t shift) +{ + return ~NIB << shift; +} + +/* + * internal: slice_index -- return index of child at the given nib + */ +static inline unsigned +slice_index(uint64_t key, sh_t shift) +{ + return (unsigned)((key >> shift) & NIB); +} + +/* + * critnib_new -- allocates a new critnib structure + */ +struct critnib * +critnib_new(void) +{ + struct critnib *c = Zalloc(sizeof(struct critnib)); + if (!c) + return NULL; + + util_mutex_init(&c->mutex); + + VALGRIND_HG_DRD_DISABLE_CHECKING(&c->root, sizeof(c->root)); + VALGRIND_HG_DRD_DISABLE_CHECKING(&c->remove_count, + sizeof(c->remove_count)); + + return c; +} + +/* + * internal: delete_node -- recursively free (to malloc) a subtree + */ +static void +delete_node(struct critnib_node *__restrict n) +{ + if (!is_leaf(n)) { + for (int i = 0; i < SLNODES; i++) { + if (n->child[i]) + delete_node(n->child[i]); + } + + Free(n); + } else { + Free(to_leaf(n)); + } +} + +/* + * critnib_delete -- destroy and free a critnib struct + */ +void +critnib_delete(struct critnib *c) +{ + if (c->root) + delete_node(c->root); + + util_mutex_destroy(&c->mutex); + + for (struct critnib_node *m = c->deleted_node; m; ) { + struct critnib_node *mm = m->child[0]; + Free(m); + m = mm; + } + + for (struct critnib_leaf *k = c->deleted_leaf; k; ) { + struct critnib_leaf *kk = k->value; + Free(k); + k = kk; + } + + for (int i = 0; i < DELETED_LIFE; i++) { + Free(c->pending_del_nodes[i]); + Free(c->pending_del_leaves[i]); + } + + Free(c); +} + +/* + * internal: free_node -- free (to internal pool, not malloc) a node. + * + * We cannot free them to malloc as a stalled reader thread may still walk + * through such nodes; it will notice the result being bogus but only after + * completing the walk, thus we need to ensure any freed nodes still point + * to within the critnib structure. + */ +static void +free_node(struct critnib *__restrict c, struct critnib_node *__restrict n) +{ + if (!n) + return; + + ASSERT(!is_leaf(n)); + n->child[0] = c->deleted_node; + c->deleted_node = n; +} + +/* + * internal: alloc_node -- allocate a node from our pool or from malloc + */ +static struct critnib_node * +alloc_node(struct critnib *__restrict c) +{ + if (!c->deleted_node) { + struct critnib_node *n = Malloc(sizeof(struct critnib_node)); + if (n == NULL) + ERR("!Malloc"); + + return n; + } + + struct critnib_node *n = c->deleted_node; + + c->deleted_node = n->child[0]; + VALGRIND_ANNOTATE_NEW_MEMORY(n, sizeof(*n)); + + return n; +} + +/* + * internal: free_leaf -- free (to internal pool, not malloc) a leaf. + * + * See free_node(). + */ +static void +free_leaf(struct critnib *__restrict c, struct critnib_leaf *__restrict k) +{ + if (!k) + return; + + k->value = c->deleted_leaf; + c->deleted_leaf = k; +} + +/* + * internal: alloc_leaf -- allocate a leaf from our pool or from malloc + */ +static struct critnib_leaf * +alloc_leaf(struct critnib *__restrict c) +{ + if (!c->deleted_leaf) { + struct critnib_leaf *k = Malloc(sizeof(struct critnib_leaf)); + if (k == NULL) + ERR("!Malloc"); + + return k; + } + + struct critnib_leaf *k = c->deleted_leaf; + + c->deleted_leaf = k->value; + VALGRIND_ANNOTATE_NEW_MEMORY(k, sizeof(*k)); + + return k; +} + +/* + * crinib_insert -- write a key:value pair to the critnib structure + * + * Returns: + * • 0 on success + * • EEXIST if such a key already exists + * • ENOMEM if we're out of memory + * + * Takes a global write lock but doesn't stall any readers. + */ +int +critnib_insert(struct critnib *c, uint64_t key, void *value) +{ + util_mutex_lock(&c->mutex); + + struct critnib_leaf *k = alloc_leaf(c); + if (!k) { + util_mutex_unlock(&c->mutex); + + return ENOMEM; + } + + VALGRIND_HG_DRD_DISABLE_CHECKING(k, sizeof(struct critnib_leaf)); + + k->key = key; + k->value = value; + + struct critnib_node *kn = (void *)((uint64_t)k | 1); + + struct critnib_node *n = c->root; + if (!n) { + c->root = kn; + + util_mutex_unlock(&c->mutex); + + return 0; + } + + struct critnib_node **parent = &c->root; + struct critnib_node *prev = c->root; + + while (n && !is_leaf(n) && (key & path_mask(n->shift)) == n->path) { + prev = n; + parent = &n->child[slice_index(key, n->shift)]; + n = *parent; + } + + if (!n) { + n = prev; + store(&n->child[slice_index(key, n->shift)], kn); + + util_mutex_unlock(&c->mutex); + + return 0; + } + + uint64_t path = is_leaf(n) ? to_leaf(n)->key : n->path; + /* Find where the path differs from our key. */ + uint64_t at = path ^ key; + if (!at) { + ASSERT(is_leaf(n)); + free_leaf(c, to_leaf(kn)); + /* fail instead of replacing */ + + util_mutex_unlock(&c->mutex); + + return EEXIST; + } + + /* and convert that to an index. */ + sh_t sh = util_mssb_index64(at) & (sh_t)~(SLICE - 1); + + struct critnib_node *m = alloc_node(c); + if (!m) { + free_leaf(c, to_leaf(kn)); + + util_mutex_unlock(&c->mutex); + + return ENOMEM; + } + VALGRIND_HG_DRD_DISABLE_CHECKING(m, sizeof(struct critnib_node)); + + for (int i = 0; i < SLNODES; i++) + m->child[i] = NULL; + + m->child[slice_index(key, sh)] = kn; + m->child[slice_index(path, sh)] = n; + m->shift = sh; + m->path = key & path_mask(sh); + store(parent, m); + + util_mutex_unlock(&c->mutex); + + return 0; +} + +/* + * critnib_remove -- delete a key from the critnib structure, return its value + */ +void * +critnib_remove(struct critnib *c, uint64_t key) +{ + struct critnib_leaf *k; + void *value = NULL; + + util_mutex_lock(&c->mutex); + + struct critnib_node *n = c->root; + if (!n) + goto not_found; + + uint64_t del = util_fetch_and_add64(&c->remove_count, 1) % DELETED_LIFE; + free_node(c, c->pending_del_nodes[del]); + free_leaf(c, c->pending_del_leaves[del]); + c->pending_del_nodes[del] = NULL; + c->pending_del_leaves[del] = NULL; + + if (is_leaf(n)) { + k = to_leaf(n); + if (k->key == key) { + store(&c->root, NULL); + goto del_leaf; + } + + goto not_found; + } + /* + * n and k are a parent:child pair (after the first iteration); k is the + * leaf that holds the key we're deleting. + */ + struct critnib_node **k_parent = &c->root; + struct critnib_node **n_parent = &c->root; + struct critnib_node *kn = n; + + while (!is_leaf(kn)) { + n_parent = k_parent; + n = kn; + k_parent = &kn->child[slice_index(key, kn->shift)]; + kn = *k_parent; + + if (!kn) + goto not_found; + } + + k = to_leaf(kn); + if (k->key != key) + goto not_found; + + store(&n->child[slice_index(key, n->shift)], NULL); + + /* Remove the node if there's only one remaining child. */ + int ochild = -1; + for (int i = 0; i < SLNODES; i++) { + if (n->child[i]) { + if (ochild != -1) + goto del_leaf; + + ochild = i; + } + } + + ASSERTne(ochild, -1); + + store(n_parent, n->child[ochild]); + c->pending_del_nodes[del] = n; + +del_leaf: + value = k->value; + c->pending_del_leaves[del] = k; + +not_found: + util_mutex_unlock(&c->mutex); + return value; +} + +/* + * critnib_get -- query for a key ("==" match), returns value or NULL + * + * Doesn't need a lock but if many deletes happened while our thread was + * somehow stalled the query is restarted (as freed nodes remain unused only + * for a grace period). + * + * Counterintuitively, it's pointless to return the most current answer, + * we need only one that was valid at any point after the call started. + */ +void * +critnib_get(struct critnib *c, uint64_t key) +{ + uint64_t wrs1, wrs2; + void *res; + + do { + struct critnib_node *n; + + load(&c->remove_count, &wrs1); + load(&c->root, &n); + + /* + * critbit algorithm: dive into the tree, looking at nothing but + * each node's critical bit^H^H^Hnibble. This means we risk + * going wrong way if our path is missing, but that's ok... + */ + while (n && !is_leaf(n)) + load(&n->child[slice_index(key, n->shift)], &n); + + /* ... as we check it at the end. */ + struct critnib_leaf *k = to_leaf(n); + res = (n && k->key == key) ? k->value : NULL; + load(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + return res; +} + +/* + * internal: find_successor -- return the rightmost non-null node in a subtree + */ +static void * +find_successor(struct critnib_node *__restrict n) +{ + while (1) { + int nib; + for (nib = NIB; nib >= 0; nib--) + if (n->child[nib]) + break; + + if (nib < 0) + return NULL; + + n = n->child[nib]; + if (is_leaf(n)) + return to_leaf(n)->value; + } +} + +/* + * internal: find_le -- recursively search <= in a subtree + */ +static void * +find_le(struct critnib_node *__restrict n, uint64_t key) +{ + if (!n) + return NULL; + + if (is_leaf(n)) { + struct critnib_leaf *k = to_leaf(n); + + return (k->key <= key) ? k->value : NULL; + } + + /* + * is our key outside the subtree we're in? + * + * If we're inside, all bits above the nib will be identical; note + * that shift points at the nib's lower rather than upper edge, so it + * needs to be masked away as well. + */ + if ((key ^ n->path) >> (n->shift) & ~NIB) { + /* + * subtree is too far to the left? + * -> its rightmost value is good + */ + if (n->path < key) + return find_successor(n); + + /* + * subtree is too far to the right? + * -> it has nothing of interest to us + */ + return NULL; + } + + unsigned nib = slice_index(key, n->shift); + /* recursive call: follow the path */ + { + struct critnib_node *m; + load(&n->child[nib], &m); + void *value = find_le(m, key); + if (value) + return value; + } + + /* + * nothing in that subtree? We strayed from the path at this point, + * thus need to search every subtree to our left in this node. No + * need to dive into any but the first non-null, though. + */ + for (; nib > 0; nib--) { + struct critnib_node *m; + load(&n->child[nib - 1], &m); + if (m) { + n = m; + if (is_leaf(n)) + return to_leaf(n)->value; + + return find_successor(n); + } + } + + return NULL; +} + +/* + * critnib_find_le -- query for a key ("<=" match), returns value or NULL + * + * Same guarantees as critnib_get(). + */ +void * +critnib_find_le(struct critnib *c, uint64_t key) +{ + uint64_t wrs1, wrs2; + void *res; + + do { + load(&c->remove_count, &wrs1); + struct critnib_node *n; /* avoid a subtle TOCTOU */ + load(&c->root, &n); + res = n ? find_le(n, key) : NULL; + load(&c->remove_count, &wrs2); + } while (wrs1 + DELETED_LIFE <= wrs2); + + return res; +} diff --git a/src/pmdk/src/libpmemobj/critnib.h b/src/pmdk/src/libpmemobj/critnib.h new file mode 100644 index 000000000..d33664366 --- /dev/null +++ b/src/pmdk/src/libpmemobj/critnib.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * critnib.h -- internal definitions for critnib tree + */ + +#ifndef LIBPMEMOBJ_CRITNIB_H +#define LIBPMEMOBJ_CRITNIB_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct critnib; + +struct critnib *critnib_new(void); +void critnib_delete(struct critnib *c); + +int critnib_insert(struct critnib *c, uint64_t key, void *value); +void *critnib_remove(struct critnib *c, uint64_t key); +void *critnib_get(struct critnib *c, uint64_t key); +void *critnib_find_le(struct critnib *c, uint64_t key); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/ctl_debug.c b/src/pmdk/src/libpmemobj/ctl_debug.c new file mode 100644 index 000000000..fa7b20947 --- /dev/null +++ b/src/pmdk/src/libpmemobj/ctl_debug.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2019, Intel Corporation */ + +/* + * ctl_debug.c -- implementation of the debug CTL namespace + */ + +#include "ctl.h" +#include "ctl_debug.h" +#include "obj.h" + +/* + * CTL_WRITE_HANDLER(alloc_pattern) -- sets the alloc_pattern field in heap + */ +static int +CTL_WRITE_HANDLER(alloc_pattern)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + int arg_in = *(int *)arg; + + pop->heap.alloc_pattern = arg_in; + return 0; +} + +/* + * CTL_READ_HANDLER(alloc_pattern) -- returns alloc_pattern heap field + */ +static int +CTL_READ_HANDLER(alloc_pattern)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + int *arg_out = arg; + + *arg_out = pop->heap.alloc_pattern; + return 0; +} + +static const struct ctl_argument CTL_ARG(alloc_pattern) = CTL_ARG_LONG_LONG; + +static const struct ctl_node CTL_NODE(heap)[] = { + CTL_LEAF_RW(alloc_pattern), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(debug)[] = { + CTL_CHILD(heap), + + CTL_NODE_END +}; + +/* + * debug_ctl_register -- registers ctl nodes for "debug" module + */ +void +debug_ctl_register(PMEMobjpool *pop) +{ + CTL_REGISTER_MODULE(pop->ctl, debug); +} diff --git a/src/pmdk/src/libpmemobj/ctl_debug.h b/src/pmdk/src/libpmemobj/ctl_debug.h new file mode 100644 index 000000000..7cdb52ccc --- /dev/null +++ b/src/pmdk/src/libpmemobj/ctl_debug.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * ctl_debug.h -- definitions for the debug CTL namespace + */ +#ifndef LIBPMEMOBJ_CTL_DEBUG_H +#define LIBPMEMOBJ_CTL_DEBUG_H 1 + +#include "libpmemobj.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void debug_ctl_register(PMEMobjpool *pop); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBPMEMOBJ_CTL_DEBUG_H */ diff --git a/src/pmdk/src/libpmemobj/heap.c b/src/pmdk/src/libpmemobj/heap.c new file mode 100644 index 000000000..13fb4b57e --- /dev/null +++ b/src/pmdk/src/libpmemobj/heap.c @@ -0,0 +1,1893 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * heap.c -- heap implementation + */ + +#include +#include +#include +#include + +#include "queue.h" +#include "heap.h" +#include "out.h" +#include "util.h" +#include "sys_util.h" +#include "valgrind_internal.h" +#include "recycler.h" +#include "container_ravl.h" +#include "container_seglists.h" +#include "alloc_class.h" +#include "os_thread.h" +#include "set.h" + +#define MAX_RUN_LOCKS MAX_CHUNK +#define MAX_RUN_LOCKS_VG 1024 /* avoid perf issues /w drd */ + +/* + * This is the value by which the heap might grow once we hit an OOM. + */ +#define HEAP_DEFAULT_GROW_SIZE (1 << 27) /* 128 megabytes */ +#define MAX_DEFAULT_ARENAS (1 << 10) /* 1024 arenas */ + +struct arenas { + VEC(, struct arena *) vec; + size_t nactive; + + /* + * When nesting with other locks, this one must be acquired first, + * prior to locking any buckets or memory blocks. + */ + os_mutex_t lock; + + /* stores a pointer to one of the arenas */ + os_tls_key_t thread; +}; + +/* + * Arenas store the collection of buckets for allocation classes. + * Each thread is assigned an arena on its first allocator operation + * if arena is set to auto. + */ +struct arena { + /* one bucket per allocation class */ + struct bucket *buckets[MAX_ALLOCATION_CLASSES]; + + /* + * Decides whether the arena can be + * automatically assigned to a thread. + */ + int automatic; + size_t nthreads; + struct arenas *arenas; +}; + +struct heap_rt { + struct alloc_class_collection *alloc_classes; + + /* DON'T use these two variable directly! */ + struct bucket *default_bucket; + + struct arenas arenas; + + struct recycler *recyclers[MAX_ALLOCATION_CLASSES]; + + os_mutex_t run_locks[MAX_RUN_LOCKS]; + unsigned nlocks; + + unsigned nzones; + unsigned zones_exhausted; +}; + +/* + * heap_arenas_init - (internal) initialize generic arenas info + */ +static int +heap_arenas_init(struct arenas *arenas) +{ + util_mutex_init(&arenas->lock); + VEC_INIT(&arenas->vec); + arenas->nactive = 0; + + if (VEC_RESERVE(&arenas->vec, MAX_DEFAULT_ARENAS) == -1) + return -1; + return 0; +} + +/* + * heap_arenas_fini - (internal) destroy generic arenas info + */ +static void +heap_arenas_fini(struct arenas *arenas) +{ + util_mutex_destroy(&arenas->lock); + VEC_DELETE(&arenas->vec); +} + +/* + * heap_alloc_classes -- returns the allocation classes collection + */ +struct alloc_class_collection * +heap_alloc_classes(struct palloc_heap *heap) +{ + return heap->rt ? heap->rt->alloc_classes : NULL; +} + +/* + * heap_arena_delete -- (internal) destroys arena instance + */ +static void +heap_arena_delete(struct arena *arena) +{ + for (int i = 0; i < MAX_ALLOCATION_CLASSES; ++i) + if (arena->buckets[i] != NULL) + bucket_delete(arena->buckets[i]); + Free(arena); +} + +/* + * heap_arena_new -- (internal) initializes arena instance + */ +static struct arena * +heap_arena_new(struct palloc_heap *heap, int automatic) +{ + struct heap_rt *rt = heap->rt; + + struct arena *arena = Zalloc(sizeof(struct arena)); + if (arena == NULL) { + ERR("!heap: arena malloc error"); + return NULL; + } + arena->nthreads = 0; + arena->automatic = automatic; + arena->arenas = &heap->rt->arenas; + + COMPILE_ERROR_ON(MAX_ALLOCATION_CLASSES > UINT8_MAX); + for (uint8_t i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + struct alloc_class *ac = + alloc_class_by_id(rt->alloc_classes, i); + if (ac != NULL) { + arena->buckets[i] = + bucket_new(container_new_seglists(heap), ac); + if (arena->buckets[i] == NULL) + goto error_bucket_create; + } else { + arena->buckets[i] = NULL; + } + } + + return arena; + +error_bucket_create: + heap_arena_delete(arena); + return NULL; +} + +/* + * heap_get_best_class -- returns the alloc class that best fits the + * requested size + */ +struct alloc_class * +heap_get_best_class(struct palloc_heap *heap, size_t size) +{ + return alloc_class_by_alloc_size(heap->rt->alloc_classes, size); +} + +/* + * heap_arena_thread_detach -- detaches arena from the current thread + * + * Must be called with arenas lock taken. + */ +static void +heap_arena_thread_detach(struct arena *a) +{ + /* + * Even though this is under a lock, nactive variable can also be read + * concurrently from the recycler (without the arenas lock). + * That's why we are using an atomic operation. + */ + if ((--a->nthreads) == 0) + util_fetch_and_sub64(&a->arenas->nactive, 1); +} + +/* + * heap_arena_thread_attach -- assign arena to the current thread + * + * Must be called with arenas lock taken. + */ +static void +heap_arena_thread_attach(struct palloc_heap *heap, struct arena *a) +{ + struct heap_rt *h = heap->rt; + + struct arena *thread_arena = os_tls_get(h->arenas.thread); + if (thread_arena) + heap_arena_thread_detach(thread_arena); + + ASSERTne(a, NULL); + + /* + * Even though this is under a lock, nactive variable can also be read + * concurrently from the recycler (without the arenas lock). + * That's why we are using an atomic operation. + */ + if ((a->nthreads++) == 0) + util_fetch_and_add64(&a->arenas->nactive, 1); + + os_tls_set(h->arenas.thread, a); +} + +/* + * heap_thread_arena_destructor -- (internal) removes arena thread assignment + */ +static void +heap_thread_arena_destructor(void *arg) +{ + struct arena *a = arg; + os_mutex_lock(&a->arenas->lock); + heap_arena_thread_detach(a); + os_mutex_unlock(&a->arenas->lock); +} + +/* + * heap_get_arena_by_id -- returns arena by id + * + * Must be called with arenas lock taken. + */ +static struct arena * +heap_get_arena_by_id(struct palloc_heap *heap, unsigned arena_id) +{ + return VEC_ARR(&heap->rt->arenas.vec)[arena_id - 1]; +} + +/* + * heap_thread_arena_assign -- (internal) assigns the least used arena + * to current thread + * + * To avoid complexities with regards to races in the search for the least + * used arena, a lock is used, but the nthreads counter of the arena is still + * bumped using atomic instruction because it can happen in parallel to a + * destructor of a thread, which also touches that variable. + */ +static struct arena * +heap_thread_arena_assign(struct palloc_heap *heap) +{ + util_mutex_lock(&heap->rt->arenas.lock); + + struct arena *least_used = NULL; + + ASSERTne(VEC_SIZE(&heap->rt->arenas.vec), 0); + + struct arena *a; + VEC_FOREACH(a, &heap->rt->arenas.vec) { + if (!a->automatic) + continue; + if (least_used == NULL || + a->nthreads < least_used->nthreads) + least_used = a; + } + + LOG(4, "assigning %p arena to current thread", least_used); + + /* at least one automatic arena must exist */ + ASSERTne(least_used, NULL); + heap_arena_thread_attach(heap, least_used); + + util_mutex_unlock(&heap->rt->arenas.lock); + + return least_used; +} + +/* + * heap_thread_arena -- (internal) returns the arena assigned to the current + * thread + */ +static struct arena * +heap_thread_arena(struct palloc_heap *heap) +{ + struct arena *a; + if ((a = os_tls_get(heap->rt->arenas.thread)) == NULL) + a = heap_thread_arena_assign(heap); + + return a; +} + +/* + * heap_get_thread_arena_id -- returns the arena id assigned to the current + * thread + */ +unsigned +heap_get_thread_arena_id(struct palloc_heap *heap) +{ + unsigned arena_id = 1; + struct arena *arenap = heap_thread_arena(heap); + struct arena *arenav; + struct heap_rt *rt = heap->rt; + + util_mutex_lock(&rt->arenas.lock); + VEC_FOREACH(arenav, &heap->rt->arenas.vec) { + if (arenav == arenap) { + util_mutex_unlock(&rt->arenas.lock); + return arena_id; + } + arena_id++; + } + + util_mutex_unlock(&rt->arenas.lock); + ASSERT(0); + return arena_id; +} + +/* + * heap_bucket_acquire -- fetches by arena or by id a bucket exclusive + * for the thread until heap_bucket_release is called + */ +struct bucket * +heap_bucket_acquire(struct palloc_heap *heap, uint8_t class_id, + uint16_t arena_id) +{ + struct heap_rt *rt = heap->rt; + struct bucket *b; + + if (class_id == DEFAULT_ALLOC_CLASS_ID) { + b = rt->default_bucket; + goto out; + } + + if (arena_id == HEAP_ARENA_PER_THREAD) { + struct arena *arena = heap_thread_arena(heap); + ASSERTne(arena->buckets, NULL); + b = arena->buckets[class_id]; + } else { + b = (VEC_ARR(&heap->rt->arenas.vec) + [arena_id - 1])->buckets[class_id]; + } + +out: + util_mutex_lock(&b->lock); + + return b; +} + +/* + * heap_bucket_release -- puts the bucket back into the heap + */ +void +heap_bucket_release(struct palloc_heap *heap, struct bucket *b) +{ + util_mutex_unlock(&b->lock); +} + +/* + * heap_get_run_lock -- returns the lock associated with memory block + */ +os_mutex_t * +heap_get_run_lock(struct palloc_heap *heap, uint32_t chunk_id) +{ + return &heap->rt->run_locks[chunk_id % heap->rt->nlocks]; +} + +/* + * heap_max_zone -- (internal) calculates how many zones can the heap fit + */ +static unsigned +heap_max_zone(size_t size) +{ + unsigned max_zone = 0; + size -= sizeof(struct heap_header); + + while (size >= ZONE_MIN_SIZE) { + max_zone++; + size -= size <= ZONE_MAX_SIZE ? size : ZONE_MAX_SIZE; + } + + return max_zone; +} + +/* + * zone_calc_size_idx -- (internal) calculates zone size index + */ +static uint32_t +zone_calc_size_idx(uint32_t zone_id, unsigned max_zone, size_t heap_size) +{ + ASSERT(max_zone > 0); + if (zone_id < max_zone - 1) + return MAX_CHUNK; + + ASSERT(heap_size >= zone_id * ZONE_MAX_SIZE); + size_t zone_raw_size = heap_size - zone_id * ZONE_MAX_SIZE; + + ASSERT(zone_raw_size >= (sizeof(struct zone_header) + + sizeof(struct chunk_header) * MAX_CHUNK) + + sizeof(struct heap_header)); + zone_raw_size -= sizeof(struct zone_header) + + sizeof(struct chunk_header) * MAX_CHUNK + + sizeof(struct heap_header); + + size_t zone_size_idx = zone_raw_size / CHUNKSIZE; + ASSERT(zone_size_idx <= UINT32_MAX); + + return (uint32_t)zone_size_idx; +} + +/* + * heap_zone_init -- (internal) writes zone's first chunk and header + */ +static void +heap_zone_init(struct palloc_heap *heap, uint32_t zone_id, + uint32_t first_chunk_id) +{ + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); + uint32_t size_idx = zone_calc_size_idx(zone_id, heap->rt->nzones, + *heap->sizep); + + ASSERT(size_idx > first_chunk_id); + memblock_huge_init(heap, first_chunk_id, zone_id, + size_idx - first_chunk_id); + + struct zone_header nhdr = { + .size_idx = size_idx, + .magic = ZONE_HEADER_MAGIC, + }; + z->header = nhdr; /* write the entire header (8 bytes) at once */ + pmemops_persist(&heap->p_ops, &z->header, sizeof(z->header)); +} + +/* + * heap_memblock_insert_block -- (internal) bucket insert wrapper for callbacks + */ +static int +heap_memblock_insert_block(const struct memory_block *m, void *b) +{ + return bucket_insert_block(b, m); +} + +/* + * heap_run_create -- (internal) initializes a new run on an existing free chunk + */ +static int +heap_run_create(struct palloc_heap *heap, struct bucket *b, + struct memory_block *m) +{ + *m = memblock_run_init(heap, m->chunk_id, m->zone_id, &b->aclass->rdsc); + + if (m->m_ops->iterate_free(m, heap_memblock_insert_block, b) != 0) { + b->c_ops->rm_all(b->container); + return -1; + } + + STATS_INC(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + + return 0; +} + +/* + * heap_run_reuse -- (internal) reuses existing run + */ +static int +heap_run_reuse(struct palloc_heap *heap, struct bucket *b, + const struct memory_block *m) +{ + int ret = 0; + + ASSERTeq(m->type, MEMORY_BLOCK_RUN); + os_mutex_t *lock = m->m_ops->get_lock(m); + + util_mutex_lock(lock); + + ret = m->m_ops->iterate_free(m, heap_memblock_insert_block, b); + + util_mutex_unlock(lock); + + if (ret == 0) { + b->active_memory_block->m = *m; + b->active_memory_block->bucket = b; + b->is_active = 1; + util_fetch_and_add64(&b->active_memory_block->nresv, 1); + } else { + b->c_ops->rm_all(b->container); + } + + return ret; +} + +/* + * heap_free_chunk_reuse -- reuses existing free chunk + */ +int +heap_free_chunk_reuse(struct palloc_heap *heap, + struct bucket *bucket, + struct memory_block *m) +{ + /* + * Perform coalescing just in case there + * are any neighboring free chunks. + */ + struct memory_block nm = heap_coalesce_huge(heap, bucket, m); + if (nm.size_idx != m->size_idx) { + m->m_ops->prep_hdr(&nm, MEMBLOCK_FREE, NULL); + } + + *m = nm; + + return bucket_insert_block(bucket, m); +} + +/* + * heap_run_into_free_chunk -- (internal) creates a new free chunk in place of + * a run. + */ +static void +heap_run_into_free_chunk(struct palloc_heap *heap, + struct bucket *bucket, + struct memory_block *m) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(heap, m); + + m->block_off = 0; + m->size_idx = hdr->size_idx; + + STATS_SUB(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + + /* + * The only thing this could race with is heap_memblock_on_free() + * because that function is called after processing the operation, + * which means that a different thread might immediately call this + * function if the free() made the run empty. + * We could forgo this lock if it weren't for helgrind which needs it + * to establish happens-before relation for the chunk metadata. + */ + os_mutex_t *lock = m->m_ops->get_lock(m); + util_mutex_lock(lock); + + *m = memblock_huge_init(heap, m->chunk_id, m->zone_id, m->size_idx); + + heap_free_chunk_reuse(heap, bucket, m); + + util_mutex_unlock(lock); +} + +/* + * heap_reclaim_run -- checks the run for available memory if unclaimed. + * + * Returns 1 if reclaimed chunk, 0 otherwise. + */ +static int +heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m, int startup) +{ + struct chunk_run *run = heap_get_chunk_run(heap, m); + struct chunk_header *hdr = heap_get_chunk_hdr(heap, m); + + struct alloc_class *c = alloc_class_by_run( + heap->rt->alloc_classes, + run->hdr.block_size, hdr->flags, m->size_idx); + + struct recycler_element e = recycler_element_new(heap, m); + if (c == NULL) { + uint32_t size_idx = m->size_idx; + struct run_bitmap b; + m->m_ops->get_bitmap(m, &b); + + ASSERTeq(size_idx, m->size_idx); + + return e.free_space == b.nbits; + } + + if (e.free_space == c->rdsc.nallocs) + return 1; + + if (startup) { + STATS_INC(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + STATS_INC(heap->stats, transient, heap_run_allocated, + (c->rdsc.nallocs - e.free_space) * run->hdr.block_size); + } + + if (recycler_put(heap->rt->recyclers[c->id], m, e) < 0) + ERR("lost runtime tracking info of %u run due to OOM", c->id); + + return 0; +} + +/* + * heap_reclaim_zone_garbage -- (internal) creates volatile state of unused runs + */ +static void +heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, + uint32_t zone_id) +{ + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); + + for (uint32_t i = 0; i < z->header.size_idx; ) { + struct chunk_header *hdr = &z->chunk_headers[i]; + ASSERT(hdr->size_idx != 0); + + struct memory_block m = MEMORY_BLOCK_NONE; + m.zone_id = zone_id; + m.chunk_id = i; + m.size_idx = hdr->size_idx; + + memblock_rebuild_state(heap, &m); + m.m_ops->reinit_chunk(&m); + + switch (hdr->type) { + case CHUNK_TYPE_RUN: + if (heap_reclaim_run(heap, &m, 1) != 0) + heap_run_into_free_chunk(heap, bucket, + &m); + break; + case CHUNK_TYPE_FREE: + heap_free_chunk_reuse(heap, bucket, &m); + break; + case CHUNK_TYPE_USED: + break; + default: + ASSERT(0); + } + + i = m.chunk_id + m.size_idx; /* hdr might have changed */ + } +} + +/* + * heap_populate_bucket -- (internal) creates volatile state of memory blocks + */ +static int +heap_populate_bucket(struct palloc_heap *heap, struct bucket *bucket) +{ + struct heap_rt *h = heap->rt; + + /* at this point we are sure that there's no more memory in the heap */ + if (h->zones_exhausted == h->nzones) + return ENOMEM; + + uint32_t zone_id = h->zones_exhausted++; + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); + + /* ignore zone and chunk headers */ + VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(z, sizeof(z->header) + + sizeof(z->chunk_headers)); + + if (z->header.magic != ZONE_HEADER_MAGIC) + heap_zone_init(heap, zone_id, 0); + + heap_reclaim_zone_garbage(heap, bucket, zone_id); + + /* + * It doesn't matter that this function might not have found any + * free blocks because there is still potential that subsequent calls + * will find something in later zones. + */ + return 0; +} + +/* + * heap_recycle_unused -- recalculate scores in the recycler and turn any + * empty runs into free chunks + * + * If force is not set, this function might effectively be a noop if not enough + * of space was freed. + */ +static int +heap_recycle_unused(struct palloc_heap *heap, struct recycler *recycler, + struct bucket *defb, int force) +{ + struct empty_runs r = recycler_recalc(recycler, force); + if (VEC_SIZE(&r) == 0) + return ENOMEM; + + struct bucket *nb = defb == NULL ? heap_bucket_acquire(heap, + DEFAULT_ALLOC_CLASS_ID, HEAP_ARENA_PER_THREAD) : NULL; + + ASSERT(defb != NULL || nb != NULL); + + struct memory_block *nm; + VEC_FOREACH_BY_PTR(nm, &r) { + heap_run_into_free_chunk(heap, defb ? defb : nb, nm); + } + + if (nb != NULL) + heap_bucket_release(heap, nb); + + VEC_DELETE(&r); + + return 0; +} + +/* + * heap_reclaim_garbage -- (internal) creates volatile state of unused runs + */ +static int +heap_reclaim_garbage(struct palloc_heap *heap, struct bucket *bucket) +{ + int ret = ENOMEM; + struct recycler *r; + for (size_t i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + if ((r = heap->rt->recyclers[i]) == NULL) + continue; + + if (heap_recycle_unused(heap, r, bucket, 1) == 0) + ret = 0; + } + + return ret; +} + +/* + * heap_ensure_huge_bucket_filled -- + * (internal) refills the default bucket if needed + */ +static int +heap_ensure_huge_bucket_filled(struct palloc_heap *heap, struct bucket *bucket) +{ + if (heap_reclaim_garbage(heap, bucket) == 0) + return 0; + + if (heap_populate_bucket(heap, bucket) == 0) + return 0; + + int extend; + if ((extend = heap_extend(heap, bucket, heap->growsize)) < 0) + return ENOMEM; + + if (extend == 1) + return 0; + + /* + * Extending the pool does not automatically add the chunks into the + * runtime state of the bucket - we need to traverse the new zone if + * it was created. + */ + if (heap_populate_bucket(heap, bucket) == 0) + return 0; + + return ENOMEM; +} + +/* + * heap_bucket_deref_active -- detaches active blocks from the bucket + */ +static int +heap_bucket_deref_active(struct palloc_heap *heap, struct bucket *b) +{ + /* get rid of the active block in the bucket */ + struct memory_block_reserved **active = &b->active_memory_block; + + if (b->is_active) { + b->c_ops->rm_all(b->container); + if (util_fetch_and_sub64(&(*active)->nresv, 1) == 1) { + VALGRIND_ANNOTATE_HAPPENS_AFTER(&(*active)->nresv); + heap_discard_run(heap, &(*active)->m); + } else { + VALGRIND_ANNOTATE_HAPPENS_BEFORE(&(*active)->nresv); + *active = NULL; + } + b->is_active = 0; + } + + if (*active == NULL) { + *active = Zalloc(sizeof(struct memory_block_reserved)); + if (*active == NULL) + return -1; + } + + return 0; +} + +/* + * heap_force_recycle -- detaches all memory from arenas, and forces global + * recycling of all memory blocks + */ +void +heap_force_recycle(struct palloc_heap *heap) +{ + util_mutex_lock(&heap->rt->arenas.lock); + struct arena *arenap; + VEC_FOREACH(arenap, &heap->rt->arenas.vec) { + for (int i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + struct bucket *b = arenap->buckets[i]; + if (b == NULL) + continue; + util_mutex_lock(&b->lock); + /* + * There's no need to check if this fails, as that + * will not prevent progress in this function. + */ + heap_bucket_deref_active(heap, b); + util_mutex_unlock(&b->lock); + } + } + util_mutex_unlock(&heap->rt->arenas.lock); + heap_reclaim_garbage(heap, NULL); +} + +/* + * heap_reuse_from_recycler -- (internal) try reusing runs that are currently + * in the recycler + */ +static int +heap_reuse_from_recycler(struct palloc_heap *heap, + struct bucket *b, uint32_t units, int force) +{ + struct memory_block m = MEMORY_BLOCK_NONE; + m.size_idx = units; + + struct recycler *r = heap->rt->recyclers[b->aclass->id]; + if (!force && recycler_get(r, &m) == 0) + return heap_run_reuse(heap, b, &m); + + heap_recycle_unused(heap, r, NULL, force); + + if (recycler_get(r, &m) == 0) + return heap_run_reuse(heap, b, &m); + + return ENOMEM; +} + +/* + * heap_discard_run -- puts the memory block back into the global heap. + */ +void +heap_discard_run(struct palloc_heap *heap, struct memory_block *m) +{ + if (heap_reclaim_run(heap, m, 0)) { + struct bucket *defb = + heap_bucket_acquire(heap, + DEFAULT_ALLOC_CLASS_ID, 0); + + heap_run_into_free_chunk(heap, defb, m); + + heap_bucket_release(heap, defb); + } +} + +/* + * heap_ensure_run_bucket_filled -- (internal) refills the bucket if needed + */ +static int +heap_ensure_run_bucket_filled(struct palloc_heap *heap, struct bucket *b, + uint32_t units) +{ + ASSERTeq(b->aclass->type, CLASS_RUN); + int ret = 0; + + if (heap_bucket_deref_active(heap, b) != 0) + return ENOMEM; + + if (heap_reuse_from_recycler(heap, b, units, 0) == 0) + goto out; + + /* search in the next zone before attempting to create a new run */ + struct bucket *defb = heap_bucket_acquire(heap, + DEFAULT_ALLOC_CLASS_ID, + HEAP_ARENA_PER_THREAD); + heap_populate_bucket(heap, defb); + heap_bucket_release(heap, defb); + + if (heap_reuse_from_recycler(heap, b, units, 0) == 0) + goto out; + + struct memory_block m = MEMORY_BLOCK_NONE; + m.size_idx = b->aclass->rdsc.size_idx; + + defb = heap_bucket_acquire(heap, + DEFAULT_ALLOC_CLASS_ID, + HEAP_ARENA_PER_THREAD); + /* cannot reuse an existing run, create a new one */ + if (heap_get_bestfit_block(heap, defb, &m) == 0) { + ASSERTeq(m.block_off, 0); + if (heap_run_create(heap, b, &m) != 0) { + heap_bucket_release(heap, defb); + return ENOMEM; + } + + b->active_memory_block->m = m; + b->is_active = 1; + b->active_memory_block->bucket = b; + util_fetch_and_add64(&b->active_memory_block->nresv, 1); + + heap_bucket_release(heap, defb); + + goto out; + } + heap_bucket_release(heap, defb); + + if (heap_reuse_from_recycler(heap, b, units, 0) == 0) + goto out; + + ret = ENOMEM; +out: + + return ret; +} + +/* + * heap_memblock_on_free -- bookkeeping actions executed at every free of a + * block + */ +void +heap_memblock_on_free(struct palloc_heap *heap, const struct memory_block *m) +{ + if (m->type != MEMORY_BLOCK_RUN) + return; + + struct chunk_header *hdr = heap_get_chunk_hdr(heap, m); + struct chunk_run *run = heap_get_chunk_run(heap, m); + + ASSERTeq(hdr->type, CHUNK_TYPE_RUN); + + struct alloc_class *c = alloc_class_by_run( + heap->rt->alloc_classes, + run->hdr.block_size, hdr->flags, hdr->size_idx); + + if (c == NULL) + return; + + recycler_inc_unaccounted(heap->rt->recyclers[c->id], m); +} + +/* + * heap_split_block -- (internal) splits unused part of the memory block + */ +static void +heap_split_block(struct palloc_heap *heap, struct bucket *b, + struct memory_block *m, uint32_t units) +{ + ASSERT(units <= UINT16_MAX); + ASSERT(units > 0); + + if (b->aclass->type == CLASS_RUN) { + ASSERT((uint64_t)m->block_off + (uint64_t)units <= UINT32_MAX); + struct memory_block r = {m->chunk_id, m->zone_id, + m->size_idx - units, (uint32_t)(m->block_off + units), + NULL, NULL, 0, 0, NULL}; + memblock_rebuild_state(heap, &r); + if (bucket_insert_block(b, &r) != 0) + LOG(2, + "failed to allocate memory block runtime tracking info"); + } else { + uint32_t new_chunk_id = m->chunk_id + units; + uint32_t new_size_idx = m->size_idx - units; + + struct memory_block n = memblock_huge_init(heap, + new_chunk_id, m->zone_id, new_size_idx); + + *m = memblock_huge_init(heap, m->chunk_id, m->zone_id, units); + + if (bucket_insert_block(b, &n) != 0) + LOG(2, + "failed to allocate memory block runtime tracking info"); + } + + m->size_idx = units; +} + +/* + * heap_get_bestfit_block -- + * extracts a memory block of equal size index + */ +int +heap_get_bestfit_block(struct palloc_heap *heap, struct bucket *b, + struct memory_block *m) +{ + uint32_t units = m->size_idx; + + while (b->c_ops->get_rm_bestfit(b->container, m) != 0) { + if (b->aclass->type == CLASS_HUGE) { + if (heap_ensure_huge_bucket_filled(heap, b) != 0) + return ENOMEM; + } else { + if (heap_ensure_run_bucket_filled(heap, b, units) != 0) + return ENOMEM; + } + } + + ASSERT(m->size_idx >= units); + + if (units != m->size_idx) + heap_split_block(heap, b, m, units); + + m->m_ops->ensure_header_type(m, b->aclass->header_type); + m->header_type = b->aclass->header_type; + + return 0; +} + +/* + * heap_get_adjacent_free_block -- locates adjacent free memory block in heap + */ +static int +heap_get_adjacent_free_block(struct palloc_heap *heap, + const struct memory_block *in, struct memory_block *out, int prev) +{ + struct zone *z = ZID_TO_ZONE(heap->layout, in->zone_id); + struct chunk_header *hdr = &z->chunk_headers[in->chunk_id]; + out->zone_id = in->zone_id; + + if (prev) { + if (in->chunk_id == 0) + return ENOENT; + + struct chunk_header *prev_hdr = + &z->chunk_headers[in->chunk_id - 1]; + out->chunk_id = in->chunk_id - prev_hdr->size_idx; + + if (z->chunk_headers[out->chunk_id].type != CHUNK_TYPE_FREE) + return ENOENT; + + out->size_idx = z->chunk_headers[out->chunk_id].size_idx; + } else { /* next */ + if (in->chunk_id + hdr->size_idx == z->header.size_idx) + return ENOENT; + + out->chunk_id = in->chunk_id + hdr->size_idx; + + if (z->chunk_headers[out->chunk_id].type != CHUNK_TYPE_FREE) + return ENOENT; + + out->size_idx = z->chunk_headers[out->chunk_id].size_idx; + } + memblock_rebuild_state(heap, out); + + return 0; +} + +/* + * heap_coalesce -- (internal) merges adjacent memory blocks + */ +static struct memory_block +heap_coalesce(struct palloc_heap *heap, + const struct memory_block *blocks[], int n) +{ + struct memory_block ret = MEMORY_BLOCK_NONE; + + const struct memory_block *b = NULL; + ret.size_idx = 0; + for (int i = 0; i < n; ++i) { + if (blocks[i] == NULL) + continue; + b = b ? b : blocks[i]; + ret.size_idx += blocks[i]->size_idx; + } + + ASSERTne(b, NULL); + + ret.chunk_id = b->chunk_id; + ret.zone_id = b->zone_id; + ret.block_off = b->block_off; + memblock_rebuild_state(heap, &ret); + + return ret; +} + +/* + * heap_coalesce_huge -- finds neighbours of a huge block, removes them from the + * volatile state and returns the resulting block + */ +struct memory_block +heap_coalesce_huge(struct palloc_heap *heap, struct bucket *b, + const struct memory_block *m) +{ + const struct memory_block *blocks[3] = {NULL, m, NULL}; + + struct memory_block prev = MEMORY_BLOCK_NONE; + if (heap_get_adjacent_free_block(heap, m, &prev, 1) == 0 && + b->c_ops->get_rm_exact(b->container, &prev) == 0) { + blocks[0] = &prev; + } + + struct memory_block next = MEMORY_BLOCK_NONE; + if (heap_get_adjacent_free_block(heap, m, &next, 0) == 0 && + b->c_ops->get_rm_exact(b->container, &next) == 0) { + blocks[2] = &next; + } + + return heap_coalesce(heap, blocks, 3); +} + +/* + * heap_end -- returns first address after heap + */ +void * +heap_end(struct palloc_heap *h) +{ + ASSERT(h->rt->nzones > 0); + + struct zone *last_zone = ZID_TO_ZONE(h->layout, h->rt->nzones - 1); + + return &last_zone->chunks[last_zone->header.size_idx]; +} + +/* + * heap_arena_create -- create a new arena, push it to the vector + * and return new arena id or -1 on failure + */ +int +heap_arena_create(struct palloc_heap *heap) +{ + struct heap_rt *h = heap->rt; + + struct arena *arena = heap_arena_new(heap, 0); + if (arena == NULL) + return -1; + + util_mutex_lock(&h->arenas.lock); + + if (VEC_PUSH_BACK(&h->arenas.vec, arena)) + goto err_push_back; + + int ret = (int)VEC_SIZE(&h->arenas.vec); + util_mutex_unlock(&h->arenas.lock); + + return ret; + +err_push_back: + util_mutex_unlock(&h->arenas.lock); + heap_arena_delete(arena); + return -1; +} + +/* + * heap_get_narenas_total -- returns the number of all arenas in the heap + */ +unsigned +heap_get_narenas_total(struct palloc_heap *heap) +{ + struct heap_rt *h = heap->rt; + + util_mutex_lock(&h->arenas.lock); + unsigned total = (unsigned)VEC_SIZE(&h->arenas.vec); + util_mutex_unlock(&h->arenas.lock); + + return total; +} + +/* + * heap_get_narenas_max -- returns the max number of arenas + */ +unsigned +heap_get_narenas_max(struct palloc_heap *heap) +{ + struct heap_rt *h = heap->rt; + + util_mutex_lock(&h->arenas.lock); + unsigned max = (unsigned)VEC_CAPACITY(&h->arenas.vec); + util_mutex_unlock(&h->arenas.lock); + + return max; +} + +/* + * heap_set_narenas_max -- change the max number of arenas + */ +int +heap_set_narenas_max(struct palloc_heap *heap, unsigned size) +{ + struct heap_rt *h = heap->rt; + int ret = -1; + + util_mutex_lock(&h->arenas.lock); + unsigned capacity = (unsigned)VEC_CAPACITY(&h->arenas.vec); + if (size < capacity) { + LOG(2, "cannot decrease max number of arenas"); + goto out; + } else if (size == capacity) { + ret = 0; + goto out; + } + + ret = VEC_RESERVE(&h->arenas.vec, size); + +out: + util_mutex_unlock(&h->arenas.lock); + return ret; +} + +/* + * heap_get_narenas_auto -- returns the number of all automatic arenas + */ +unsigned +heap_get_narenas_auto(struct palloc_heap *heap) +{ + struct heap_rt *h = heap->rt; + struct arena *arena; + unsigned narenas = 0; + + util_mutex_lock(&h->arenas.lock); + + VEC_FOREACH(arena, &h->arenas.vec) { + if (arena->automatic) + narenas++; + } + + util_mutex_unlock(&h->arenas.lock); + + return narenas; +} + +/* + * heap_get_arena_buckets -- returns a pointer to buckets from the arena + */ +struct bucket ** +heap_get_arena_buckets(struct palloc_heap *heap, unsigned arena_id) +{ + util_mutex_lock(&heap->rt->arenas.lock); + struct arena *a = heap_get_arena_by_id(heap, arena_id); + util_mutex_unlock(&heap->rt->arenas.lock); + + return a->buckets; +} + +/* + * heap_get_arena_auto -- returns arena automatic value + */ +int +heap_get_arena_auto(struct palloc_heap *heap, unsigned arena_id) +{ + util_mutex_lock(&heap->rt->arenas.lock); + struct arena *a = heap_get_arena_by_id(heap, arena_id); + util_mutex_unlock(&heap->rt->arenas.lock); + + return a->automatic; +} + +/* + * heap_set_arena_auto -- sets arena automatic value + */ +int +heap_set_arena_auto(struct palloc_heap *heap, unsigned arena_id, + int automatic) +{ + unsigned nautomatic = 0; + struct arena *a; + struct heap_rt *h = heap->rt; + int ret = 0; + + util_mutex_lock(&h->arenas.lock); + VEC_FOREACH(a, &h->arenas.vec) + if (a->automatic) + nautomatic++; + + a = VEC_ARR(&heap->rt->arenas.vec)[arena_id - 1]; + + if (!automatic && nautomatic <= 1 && a->automatic) { + ERR("at least one automatic arena must exist"); + ret = -1; + goto out; + } + a->automatic = automatic; + +out: + util_mutex_unlock(&h->arenas.lock); + return ret; + +} + +/* + * heap_set_arena_thread -- assign arena with given id to the current thread + */ +void +heap_set_arena_thread(struct palloc_heap *heap, unsigned arena_id) +{ + os_mutex_lock(&heap->rt->arenas.lock); + heap_arena_thread_attach(heap, heap_get_arena_by_id(heap, arena_id)); + os_mutex_unlock(&heap->rt->arenas.lock); +} + +/* + * heap_get_procs -- (internal) returns the number of arenas to create + */ +static unsigned +heap_get_procs(void) +{ + long cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus < 1) + cpus = 1; + + unsigned arenas = (unsigned)cpus; + + LOG(4, "creating %u arenas", arenas); + + return arenas; +} + +/* + * heap_create_alloc_class_buckets -- allocates all cache bucket + * instances of the specified type + */ +int +heap_create_alloc_class_buckets(struct palloc_heap *heap, struct alloc_class *c) +{ + struct heap_rt *h = heap->rt; + + if (c->type == CLASS_RUN) { + h->recyclers[c->id] = recycler_new(heap, c->rdsc.nallocs, + &heap->rt->arenas.nactive); + if (h->recyclers[c->id] == NULL) + goto error_recycler_new; + } + + size_t i; + struct arena *arena; + VEC_FOREACH_BY_POS(i, &h->arenas.vec) { + arena = VEC_ARR(&h->arenas.vec)[i]; + if (arena->buckets[c->id] == NULL) + arena->buckets[c->id] = bucket_new( + container_new_seglists(heap), c); + if (arena->buckets[c->id] == NULL) + goto error_cache_bucket_new; + } + + return 0; + +error_cache_bucket_new: + recycler_delete(h->recyclers[c->id]); + + for (; i != 0; --i) + bucket_delete(VEC_ARR(&h->arenas.vec)[i - 1]->buckets[c->id]); + +error_recycler_new: + return -1; +} + +/* + * heap_buckets_init -- (internal) initializes bucket instances + */ +int +heap_buckets_init(struct palloc_heap *heap) +{ + struct heap_rt *h = heap->rt; + + for (uint8_t i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + struct alloc_class *c = alloc_class_by_id(h->alloc_classes, i); + if (c != NULL) { + if (heap_create_alloc_class_buckets(heap, c) != 0) + goto error_bucket_create; + } + } + + h->default_bucket = bucket_new(container_new_ravl(heap), + alloc_class_by_id(h->alloc_classes, DEFAULT_ALLOC_CLASS_ID)); + + if (h->default_bucket == NULL) + goto error_bucket_create; + + return 0; + +error_bucket_create: { + struct arena *arena; + VEC_FOREACH(arena, &h->arenas.vec) + heap_arena_delete(arena); + } + return -1; +} + +/* + * heap_extend -- extend the heap by the given size + * + * Returns 0 if the current zone has been extended, 1 if a new zone had to be + * created, -1 if unsuccessful. + * + * If this function has to create a new zone, it will NOT populate buckets with + * the new chunks. + */ +int +heap_extend(struct palloc_heap *heap, struct bucket *b, size_t size) +{ + void *nptr = util_pool_extend(heap->set, &size, PMEMOBJ_MIN_PART); + if (nptr == NULL) + return -1; + + *heap->sizep += size; + pmemops_persist(&heap->p_ops, heap->sizep, sizeof(*heap->sizep)); + + /* + * If interrupted after changing the size, the heap will just grow + * automatically on the next heap_boot. + */ + + uint32_t nzones = heap_max_zone(*heap->sizep); + uint32_t zone_id = nzones - 1; + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); + uint32_t chunk_id = heap->rt->nzones == nzones ? z->header.size_idx : 0; + heap_zone_init(heap, zone_id, chunk_id); + + if (heap->rt->nzones != nzones) { + heap->rt->nzones = nzones; + return 0; + } + + struct chunk_header *hdr = &z->chunk_headers[chunk_id]; + + struct memory_block m = MEMORY_BLOCK_NONE; + m.chunk_id = chunk_id; + m.zone_id = zone_id; + m.block_off = 0; + m.size_idx = hdr->size_idx; + memblock_rebuild_state(heap, &m); + + heap_free_chunk_reuse(heap, b, &m); + + return 1; +} + +/* + * heap_zone_update_if_needed -- updates the zone metadata if the pool has been + * extended. + */ +static void +heap_zone_update_if_needed(struct palloc_heap *heap) +{ + struct zone *z; + + for (uint32_t i = 0; i < heap->rt->nzones; ++i) { + z = ZID_TO_ZONE(heap->layout, i); + if (z->header.magic != ZONE_HEADER_MAGIC) + continue; + + size_t size_idx = zone_calc_size_idx(i, heap->rt->nzones, + *heap->sizep); + + if (size_idx == z->header.size_idx) + continue; + + heap_zone_init(heap, i, z->header.size_idx); + } +} + +/* + * heap_boot -- opens the heap region of the pmemobj pool + * + * If successful function returns zero. Otherwise an error number is returned. + */ +int +heap_boot(struct palloc_heap *heap, void *heap_start, uint64_t heap_size, + uint64_t *sizep, void *base, struct pmem_ops *p_ops, + struct stats *stats, struct pool_set *set) +{ + /* + * The size can be 0 if interrupted during heap_init or this is the + * first time booting the heap with the persistent size field. + */ + if (*sizep == 0) { + *sizep = heap_size; + pmemops_persist(p_ops, sizep, sizeof(*sizep)); + } + + if (heap_size < *sizep) { + ERR("mapped region smaller than the heap size"); + return EINVAL; + } + + struct heap_rt *h = Malloc(sizeof(*h)); + int err; + if (h == NULL) { + err = ENOMEM; + goto error_heap_malloc; + } + + h->alloc_classes = alloc_class_collection_new(); + if (h->alloc_classes == NULL) { + err = ENOMEM; + goto error_alloc_classes_new; + } + + unsigned narenas_default = heap_get_procs(); + + if (heap_arenas_init(&h->arenas) != 0) { + err = errno; + goto error_arenas_malloc; + } + + h->nzones = heap_max_zone(heap_size); + + h->zones_exhausted = 0; + + h->nlocks = On_valgrind ? MAX_RUN_LOCKS_VG : MAX_RUN_LOCKS; + for (unsigned i = 0; i < h->nlocks; ++i) + util_mutex_init(&h->run_locks[i]); + + os_tls_key_create(&h->arenas.thread, heap_thread_arena_destructor); + + heap->p_ops = *p_ops; + heap->layout = heap_start; + heap->rt = h; + heap->sizep = sizep; + heap->base = base; + heap->stats = stats; + heap->set = set; + heap->growsize = HEAP_DEFAULT_GROW_SIZE; + heap->alloc_pattern = PALLOC_CTL_DEBUG_NO_PATTERN; + VALGRIND_DO_CREATE_MEMPOOL(heap->layout, 0, 0); + + for (unsigned i = 0; i < narenas_default; ++i) { + if (VEC_PUSH_BACK(&h->arenas.vec, heap_arena_new(heap, 1))) { + err = errno; + goto error_vec_reserve; + } + } + + for (unsigned i = 0; i < MAX_ALLOCATION_CLASSES; ++i) + h->recyclers[i] = NULL; + + heap_zone_update_if_needed(heap); + + return 0; + +error_vec_reserve: + heap_arenas_fini(&h->arenas); +error_arenas_malloc: + alloc_class_collection_delete(h->alloc_classes); +error_alloc_classes_new: + Free(h); + heap->rt = NULL; +error_heap_malloc: + return err; +} + +/* + * heap_write_header -- (internal) creates a clean header + */ +static void +heap_write_header(struct heap_header *hdr) +{ + struct heap_header newhdr = { + .signature = HEAP_SIGNATURE, + .major = HEAP_MAJOR, + .minor = HEAP_MINOR, + .unused = 0, + .chunksize = CHUNKSIZE, + .chunks_per_zone = MAX_CHUNK, + .reserved = {0}, + .checksum = 0 + }; + + util_checksum(&newhdr, sizeof(newhdr), &newhdr.checksum, 1, 0); + *hdr = newhdr; +} + +/* + * heap_init -- initializes the heap + * + * If successful function returns zero. Otherwise an error number is returned. + */ +int +heap_init(void *heap_start, uint64_t heap_size, uint64_t *sizep, + struct pmem_ops *p_ops) +{ + if (heap_size < HEAP_MIN_SIZE) + return EINVAL; + + VALGRIND_DO_MAKE_MEM_UNDEFINED(heap_start, heap_size); + + struct heap_layout *layout = heap_start; + heap_write_header(&layout->header); + pmemops_persist(p_ops, &layout->header, sizeof(struct heap_header)); + + unsigned zones = heap_max_zone(heap_size); + for (unsigned i = 0; i < zones; ++i) { + struct zone *zone = ZID_TO_ZONE(layout, i); + pmemops_memset(p_ops, &zone->header, 0, + sizeof(struct zone_header), 0); + pmemops_memset(p_ops, &zone->chunk_headers, 0, + sizeof(struct chunk_header), 0); + + /* only explicitly allocated chunks should be accessible */ + VALGRIND_DO_MAKE_MEM_NOACCESS(&zone->chunk_headers, + sizeof(struct chunk_header)); + } + + *sizep = heap_size; + pmemops_persist(p_ops, sizep, sizeof(*sizep)); + + return 0; +} + +/* + * heap_cleanup -- cleanups the volatile heap state + */ +void +heap_cleanup(struct palloc_heap *heap) +{ + struct heap_rt *rt = heap->rt; + + alloc_class_collection_delete(rt->alloc_classes); + + os_tls_key_delete(rt->arenas.thread); + bucket_delete(rt->default_bucket); + + struct arena *arena; + VEC_FOREACH(arena, &rt->arenas.vec) + heap_arena_delete(arena); + + for (unsigned i = 0; i < rt->nlocks; ++i) + util_mutex_destroy(&rt->run_locks[i]); + + heap_arenas_fini(&rt->arenas); + + for (int i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + if (heap->rt->recyclers[i] == NULL) + continue; + + recycler_delete(rt->recyclers[i]); + } + + VALGRIND_DO_DESTROY_MEMPOOL(heap->layout); + + Free(rt); + heap->rt = NULL; +} + +/* + * heap_verify_header -- (internal) verifies if the heap header is consistent + */ +static int +heap_verify_header(struct heap_header *hdr) +{ + if (util_checksum(hdr, sizeof(*hdr), &hdr->checksum, 0, 0) != 1) { + ERR("heap: invalid header's checksum"); + return -1; + } + + if (memcmp(hdr->signature, HEAP_SIGNATURE, HEAP_SIGNATURE_LEN) != 0) { + ERR("heap: invalid signature"); + return -1; + } + + return 0; +} + +/* + * heap_verify_zone_header -- + * (internal) verifies if the zone header is consistent + */ +static int +heap_verify_zone_header(struct zone_header *hdr) +{ + if (hdr->magic != ZONE_HEADER_MAGIC) /* not initialized */ + return 0; + + if (hdr->size_idx == 0) { + ERR("heap: invalid zone size"); + return -1; + } + + return 0; +} + +/* + * heap_verify_chunk_header -- + * (internal) verifies if the chunk header is consistent + */ +static int +heap_verify_chunk_header(struct chunk_header *hdr) +{ + if (hdr->type == CHUNK_TYPE_UNKNOWN) { + ERR("heap: invalid chunk type"); + return -1; + } + + if (hdr->type >= MAX_CHUNK_TYPE) { + ERR("heap: unknown chunk type"); + return -1; + } + + if (hdr->flags & ~CHUNK_FLAGS_ALL_VALID) { + ERR("heap: invalid chunk flags"); + return -1; + } + + return 0; +} + +/* + * heap_verify_zone -- (internal) verifies if the zone is consistent + */ +static int +heap_verify_zone(struct zone *zone) +{ + if (zone->header.magic == 0) + return 0; /* not initialized, and that is OK */ + + if (zone->header.magic != ZONE_HEADER_MAGIC) { + ERR("heap: invalid zone magic"); + return -1; + } + + if (heap_verify_zone_header(&zone->header)) + return -1; + + uint32_t i; + for (i = 0; i < zone->header.size_idx; ) { + if (heap_verify_chunk_header(&zone->chunk_headers[i])) + return -1; + + i += zone->chunk_headers[i].size_idx; + } + + if (i != zone->header.size_idx) { + ERR("heap: chunk sizes mismatch"); + return -1; + } + + return 0; +} + +/* + * heap_check -- verifies if the heap is consistent and can be opened properly + * + * If successful function returns zero. Otherwise an error number is returned. + */ +int +heap_check(void *heap_start, uint64_t heap_size) +{ + if (heap_size < HEAP_MIN_SIZE) { + ERR("heap: invalid heap size"); + return -1; + } + + struct heap_layout *layout = heap_start; + + if (heap_verify_header(&layout->header)) + return -1; + + for (unsigned i = 0; i < heap_max_zone(heap_size); ++i) { + if (heap_verify_zone(ZID_TO_ZONE(layout, i))) + return -1; + } + + return 0; +} + +/* + * heap_check_remote -- verifies if the heap of a remote pool is consistent + * and can be opened properly + * + * If successful function returns zero. Otherwise an error number is returned. + */ +int +heap_check_remote(void *heap_start, uint64_t heap_size, struct remote_ops *ops) +{ + if (heap_size < HEAP_MIN_SIZE) { + ERR("heap: invalid heap size"); + return -1; + } + + struct heap_layout *layout = heap_start; + + struct heap_header header; + if (ops->read(ops->ctx, ops->base, &header, &layout->header, + sizeof(struct heap_header))) { + ERR("heap: obj_read_remote error"); + return -1; + } + + if (heap_verify_header(&header)) + return -1; + + struct zone *zone_buff = (struct zone *)Malloc(sizeof(struct zone)); + if (zone_buff == NULL) { + ERR("heap: zone_buff malloc error"); + return -1; + } + for (unsigned i = 0; i < heap_max_zone(heap_size); ++i) { + if (ops->read(ops->ctx, ops->base, zone_buff, + ZID_TO_ZONE(layout, i), sizeof(struct zone))) { + ERR("heap: obj_read_remote error"); + goto out; + } + + if (heap_verify_zone(zone_buff)) { + goto out; + } + } + Free(zone_buff); + return 0; + +out: + Free(zone_buff); + return -1; +} + +/* + * heap_zone_foreach_object -- (internal) iterates through objects in a zone + */ +static int +heap_zone_foreach_object(struct palloc_heap *heap, object_callback cb, + void *arg, struct memory_block *m) +{ + struct zone *zone = ZID_TO_ZONE(heap->layout, m->zone_id); + if (zone->header.magic == 0) + return 0; + + for (; m->chunk_id < zone->header.size_idx; ) { + struct chunk_header *hdr = heap_get_chunk_hdr(heap, m); + memblock_rebuild_state(heap, m); + m->size_idx = hdr->size_idx; + + if (m->m_ops->iterate_used(m, cb, arg) != 0) + return 1; + + m->chunk_id += m->size_idx; + m->block_off = 0; + } + + return 0; +} + +/* + * heap_foreach_object -- (internal) iterates through objects in the heap + */ +void +heap_foreach_object(struct palloc_heap *heap, object_callback cb, void *arg, + struct memory_block m) +{ + for (; m.zone_id < heap->rt->nzones; ++m.zone_id) { + if (heap_zone_foreach_object(heap, cb, arg, &m) != 0) + break; + + m.chunk_id = 0; + } +} + +#if VG_MEMCHECK_ENABLED + +/* + * heap_vg_open -- notifies Valgrind about heap layout + */ +void +heap_vg_open(struct palloc_heap *heap, object_callback cb, + void *arg, int objects) +{ + ASSERTne(cb, NULL); + VALGRIND_DO_MAKE_MEM_UNDEFINED(heap->layout, *heap->sizep); + + struct heap_layout *layout = heap->layout; + + VALGRIND_DO_MAKE_MEM_DEFINED(&layout->header, sizeof(layout->header)); + + unsigned zones = heap_max_zone(*heap->sizep); + + struct memory_block m = MEMORY_BLOCK_NONE; + for (unsigned i = 0; i < zones; ++i) { + struct zone *z = ZID_TO_ZONE(layout, i); + uint32_t chunks; + m.zone_id = i; + m.chunk_id = 0; + + VALGRIND_DO_MAKE_MEM_DEFINED(&z->header, sizeof(z->header)); + + if (z->header.magic != ZONE_HEADER_MAGIC) + continue; + + chunks = z->header.size_idx; + + for (uint32_t c = 0; c < chunks; ) { + struct chunk_header *hdr = &z->chunk_headers[c]; + + /* define the header before rebuilding state */ + VALGRIND_DO_MAKE_MEM_DEFINED(hdr, sizeof(*hdr)); + + m.chunk_id = c; + m.size_idx = hdr->size_idx; + + memblock_rebuild_state(heap, &m); + + m.m_ops->vg_init(&m, objects, cb, arg); + m.block_off = 0; + + ASSERT(hdr->size_idx > 0); + + c += hdr->size_idx; + } + + /* mark all unused chunk headers after last as not accessible */ + VALGRIND_DO_MAKE_MEM_NOACCESS(&z->chunk_headers[chunks], + (MAX_CHUNK - chunks) * sizeof(struct chunk_header)); + } +} +#endif diff --git a/src/pmdk/src/libpmemobj/heap.h b/src/pmdk/src/libpmemobj/heap.h new file mode 100644 index 000000000..ae5d3b3af --- /dev/null +++ b/src/pmdk/src/libpmemobj/heap.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * heap.h -- internal definitions for heap + */ + +#ifndef LIBPMEMOBJ_HEAP_H +#define LIBPMEMOBJ_HEAP_H 1 + +#include +#include + +#include "bucket.h" +#include "memblock.h" +#include "memops.h" +#include "palloc.h" +#include "os_thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define HEAP_OFF_TO_PTR(heap, off) ((void *)((char *)((heap)->base) + (off))) +#define HEAP_PTR_TO_OFF(heap, ptr)\ + ((uintptr_t)(ptr) - (uintptr_t)((heap)->base)) + +#define BIT_IS_CLR(a, i) (!((a) & (1ULL << (i)))) +#define HEAP_ARENA_PER_THREAD (0) + +int heap_boot(struct palloc_heap *heap, void *heap_start, uint64_t heap_size, + uint64_t *sizep, + void *base, struct pmem_ops *p_ops, + struct stats *stats, struct pool_set *set); +int heap_init(void *heap_start, uint64_t heap_size, uint64_t *sizep, + struct pmem_ops *p_ops); +void heap_cleanup(struct palloc_heap *heap); +int heap_check(void *heap_start, uint64_t heap_size); +int heap_check_remote(void *heap_start, uint64_t heap_size, + struct remote_ops *ops); +int heap_buckets_init(struct palloc_heap *heap); +int heap_create_alloc_class_buckets(struct palloc_heap *heap, + struct alloc_class *c); + +int heap_extend(struct palloc_heap *heap, struct bucket *defb, size_t size); + +struct alloc_class * +heap_get_best_class(struct palloc_heap *heap, size_t size); + +struct bucket * +heap_bucket_acquire(struct palloc_heap *heap, uint8_t class_id, + uint16_t arena_id); + +void +heap_bucket_release(struct palloc_heap *heap, struct bucket *b); + +int heap_get_bestfit_block(struct palloc_heap *heap, struct bucket *b, + struct memory_block *m); +struct memory_block +heap_coalesce_huge(struct palloc_heap *heap, struct bucket *b, + const struct memory_block *m); +os_mutex_t *heap_get_run_lock(struct palloc_heap *heap, + uint32_t chunk_id); + +void +heap_force_recycle(struct palloc_heap *heap); + +void +heap_discard_run(struct palloc_heap *heap, struct memory_block *m); + +void +heap_memblock_on_free(struct palloc_heap *heap, const struct memory_block *m); + +int +heap_free_chunk_reuse(struct palloc_heap *heap, + struct bucket *bucket, struct memory_block *m); + +void heap_foreach_object(struct palloc_heap *heap, object_callback cb, + void *arg, struct memory_block start); + +struct alloc_class_collection *heap_alloc_classes(struct palloc_heap *heap); + +void *heap_end(struct palloc_heap *heap); + +unsigned heap_get_narenas_total(struct palloc_heap *heap); + +unsigned heap_get_narenas_max(struct palloc_heap *heap); + +int heap_set_narenas_max(struct palloc_heap *heap, unsigned size); + +unsigned heap_get_narenas_auto(struct palloc_heap *heap); + +unsigned heap_get_thread_arena_id(struct palloc_heap *heap); + +int heap_arena_create(struct palloc_heap *heap); + +struct bucket ** +heap_get_arena_buckets(struct palloc_heap *heap, unsigned arena_id); + +int heap_get_arena_auto(struct palloc_heap *heap, unsigned arena_id); + +int heap_set_arena_auto(struct palloc_heap *heap, unsigned arena_id, + int automatic); + +void heap_set_arena_thread(struct palloc_heap *heap, unsigned arena_id); + +void heap_vg_open(struct palloc_heap *heap, object_callback cb, + void *arg, int objects); + +static inline struct chunk_header * +heap_get_chunk_hdr(struct palloc_heap *heap, const struct memory_block *m) +{ + return GET_CHUNK_HDR(heap->layout, m->zone_id, m->chunk_id); +} + +static inline struct chunk * +heap_get_chunk(struct palloc_heap *heap, const struct memory_block *m) +{ + return GET_CHUNK(heap->layout, m->zone_id, m->chunk_id); +} + +static inline struct chunk_run * +heap_get_chunk_run(struct palloc_heap *heap, const struct memory_block *m) +{ + return GET_CHUNK_RUN(heap->layout, m->zone_id, m->chunk_id); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/heap_layout.h b/src/pmdk/src/libpmemobj/heap_layout.h new file mode 100644 index 000000000..94554f2cc --- /dev/null +++ b/src/pmdk/src/libpmemobj/heap_layout.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * heap_layout.h -- internal definitions for heap layout + */ + +#ifndef LIBPMEMOBJ_HEAP_LAYOUT_H +#define LIBPMEMOBJ_HEAP_LAYOUT_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define HEAP_MAJOR 1 +#define HEAP_MINOR 0 + +#define MAX_CHUNK (UINT16_MAX - 7) /* has to be multiple of 8 */ +#define CHUNK_BASE_ALIGNMENT 1024 +#define CHUNKSIZE ((size_t)1024 * 256) /* 256 kilobytes */ +#define MAX_MEMORY_BLOCK_SIZE (MAX_CHUNK * CHUNKSIZE) +#define HEAP_SIGNATURE_LEN 16 +#define HEAP_SIGNATURE "MEMORY_HEAP_HDR\0" +#define ZONE_HEADER_MAGIC 0xC3F0A2D2 +#define ZONE_MIN_SIZE (sizeof(struct zone) + sizeof(struct chunk)) +#define ZONE_MAX_SIZE (sizeof(struct zone) + sizeof(struct chunk) * MAX_CHUNK) +#define HEAP_MIN_SIZE (sizeof(struct heap_layout) + ZONE_MIN_SIZE) + +/* Base bitmap values, relevant for both normal and flexible bitmaps */ +#define RUN_BITS_PER_VALUE 64U +#define RUN_BASE_METADATA_VALUES\ + ((unsigned)(sizeof(struct chunk_run_header) / sizeof(uint64_t))) +#define RUN_BASE_METADATA_SIZE (sizeof(struct chunk_run_header)) + +#define RUN_CONTENT_SIZE (CHUNKSIZE - RUN_BASE_METADATA_SIZE) + +/* + * Calculates the size in bytes of a single run instance, including bitmap + */ +#define RUN_CONTENT_SIZE_BYTES(size_idx)\ +(RUN_CONTENT_SIZE + (((size_idx) - 1) * CHUNKSIZE)) + +/* Default bitmap values, specific for old, non-flexible, bitmaps */ +#define RUN_DEFAULT_METADATA_VALUES 40 /* in 8 byte words, 320 bytes total */ +#define RUN_DEFAULT_BITMAP_VALUES \ + (RUN_DEFAULT_METADATA_VALUES - RUN_BASE_METADATA_VALUES) +#define RUN_DEFAULT_BITMAP_SIZE (sizeof(uint64_t) * RUN_DEFAULT_BITMAP_VALUES) +#define RUN_DEFAULT_BITMAP_NBITS\ + (RUN_BITS_PER_VALUE * RUN_DEFAULT_BITMAP_VALUES) +#define RUN_DEFAULT_SIZE \ + (CHUNKSIZE - RUN_BASE_METADATA_SIZE - RUN_DEFAULT_BITMAP_SIZE) + +/* + * Calculates the size in bytes of a single run instance, without bitmap, + * but only for the default fixed-bitmap algorithm + */ +#define RUN_DEFAULT_SIZE_BYTES(size_idx)\ +(RUN_DEFAULT_SIZE + (((size_idx) - 1) * CHUNKSIZE)) + +#define CHUNK_MASK ((CHUNKSIZE) - 1) +#define CHUNK_ALIGN_UP(value) ((((value) + CHUNK_MASK) & ~CHUNK_MASK)) + +enum chunk_flags { + CHUNK_FLAG_COMPACT_HEADER = 0x0001, + CHUNK_FLAG_HEADER_NONE = 0x0002, + CHUNK_FLAG_ALIGNED = 0x0004, + CHUNK_FLAG_FLEX_BITMAP = 0x0008, +}; + +#define CHUNK_FLAGS_ALL_VALID (\ + CHUNK_FLAG_COMPACT_HEADER |\ + CHUNK_FLAG_HEADER_NONE |\ + CHUNK_FLAG_ALIGNED |\ + CHUNK_FLAG_FLEX_BITMAP\ +) + +enum chunk_type { + CHUNK_TYPE_UNKNOWN, + CHUNK_TYPE_FOOTER, /* not actual chunk type */ + CHUNK_TYPE_FREE, + CHUNK_TYPE_USED, + CHUNK_TYPE_RUN, + CHUNK_TYPE_RUN_DATA, + + MAX_CHUNK_TYPE +}; + +struct chunk { + uint8_t data[CHUNKSIZE]; +}; + +struct chunk_run_header { + uint64_t block_size; + uint64_t alignment; /* valid only /w CHUNK_FLAG_ALIGNED */ +}; + +struct chunk_run { + struct chunk_run_header hdr; + uint8_t content[RUN_CONTENT_SIZE]; /* bitmap + data */ +}; + +struct chunk_header { + uint16_t type; + uint16_t flags; + uint32_t size_idx; +}; + +struct zone_header { + uint32_t magic; + uint32_t size_idx; + uint8_t reserved[56]; +}; + +struct zone { + struct zone_header header; + struct chunk_header chunk_headers[MAX_CHUNK]; + struct chunk chunks[]; +}; + +struct heap_header { + char signature[HEAP_SIGNATURE_LEN]; + uint64_t major; + uint64_t minor; + uint64_t unused; /* might be garbage */ + uint64_t chunksize; + uint64_t chunks_per_zone; + uint8_t reserved[960]; + uint64_t checksum; +}; + +struct heap_layout { + struct heap_header header; + struct zone zone0; /* first element of zones array */ +}; + +#define ALLOC_HDR_SIZE_SHIFT (48ULL) +#define ALLOC_HDR_FLAGS_MASK (((1ULL) << ALLOC_HDR_SIZE_SHIFT) - 1) + +struct allocation_header_legacy { + uint8_t unused[8]; + uint64_t size; + uint8_t unused2[32]; + uint64_t root_size; + uint64_t type_num; +}; + +#define ALLOC_HDR_COMPACT_SIZE sizeof(struct allocation_header_compact) + +struct allocation_header_compact { + uint64_t size; + uint64_t extra; +}; + +enum header_type { + HEADER_LEGACY, + HEADER_COMPACT, + HEADER_NONE, + + MAX_HEADER_TYPES +}; + +static const size_t header_type_to_size[MAX_HEADER_TYPES] = { + sizeof(struct allocation_header_legacy), + sizeof(struct allocation_header_compact), + 0 +}; + +static const enum chunk_flags header_type_to_flag[MAX_HEADER_TYPES] = { + (enum chunk_flags)0, + CHUNK_FLAG_COMPACT_HEADER, + CHUNK_FLAG_HEADER_NONE +}; + +static inline struct zone * +ZID_TO_ZONE(struct heap_layout *layout, size_t zone_id) +{ + return (struct zone *) + ((uintptr_t)&layout->zone0 + ZONE_MAX_SIZE * zone_id); +} + +static inline struct chunk_header * +GET_CHUNK_HDR(struct heap_layout *layout, size_t zone_id, unsigned chunk_id) +{ + return &ZID_TO_ZONE(layout, zone_id)->chunk_headers[chunk_id]; +} + +static inline struct chunk * +GET_CHUNK(struct heap_layout *layout, size_t zone_id, unsigned chunk_id) +{ + return &ZID_TO_ZONE(layout, zone_id)->chunks[chunk_id]; +} + +static inline struct chunk_run * +GET_CHUNK_RUN(struct heap_layout *layout, size_t zone_id, unsigned chunk_id) +{ + return (struct chunk_run *)GET_CHUNK(layout, zone_id, chunk_id); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/lane.c b/src/pmdk/src/libpmemobj/lane.c new file mode 100644 index 000000000..350da814a --- /dev/null +++ b/src/pmdk/src/libpmemobj/lane.c @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2019, Intel Corporation */ + +/* + * lane.c -- lane implementation + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include + +#include "libpmemobj.h" +#include "critnib.h" +#include "lane.h" +#include "out.h" +#include "util.h" +#include "obj.h" +#include "os_thread.h" +#include "valgrind_internal.h" +#include "memops.h" +#include "palloc.h" +#include "tx.h" + +static os_tls_key_t Lane_info_key; + +static __thread struct critnib *Lane_info_ht; +static __thread struct lane_info *Lane_info_records; +static __thread struct lane_info *Lane_info_cache; + +/* + * lane_info_create -- (internal) constructor for thread shared data + */ +static inline void +lane_info_create(void) +{ + Lane_info_ht = critnib_new(); + if (Lane_info_ht == NULL) + FATAL("critnib_new"); +} + +/* + * lane_info_delete -- (internal) deletes lane info hash table + */ +static inline void +lane_info_delete(void) +{ + if (unlikely(Lane_info_ht == NULL)) + return; + + critnib_delete(Lane_info_ht); + struct lane_info *record; + struct lane_info *head = Lane_info_records; + while (head != NULL) { + record = head; + head = head->next; + Free(record); + } + + Lane_info_ht = NULL; + Lane_info_records = NULL; + Lane_info_cache = NULL; +} + +/* + * lane_info_ht_boot -- (internal) boot lane info and add it to thread shared + * data + */ +static inline void +lane_info_ht_boot(void) +{ + lane_info_create(); + int result = os_tls_set(Lane_info_key, Lane_info_ht); + if (result != 0) { + errno = result; + FATAL("!os_tls_set"); + } +} + +/* + * lane_info_ht_destroy -- (internal) destructor for thread shared data + */ +static inline void +lane_info_ht_destroy(void *ht) +{ + lane_info_delete(); +} + +/* + * lane_info_boot -- initialize lane info hash table and lane info key + */ +void +lane_info_boot(void) +{ + int result = os_tls_key_create(&Lane_info_key, lane_info_ht_destroy); + if (result != 0) { + errno = result; + FATAL("!os_tls_key_create"); + } +} + +/* + * lane_info_destroy -- destroy lane info hash table + */ +void +lane_info_destroy(void) +{ + lane_info_delete(); + (void) os_tls_key_delete(Lane_info_key); +} + +/* + * lane_info_cleanup -- remove lane info record regarding pool being deleted + */ +static inline void +lane_info_cleanup(PMEMobjpool *pop) +{ + if (unlikely(Lane_info_ht == NULL)) + return; + + struct lane_info *info = critnib_remove(Lane_info_ht, pop->uuid_lo); + if (likely(info != NULL)) { + if (info->prev) + info->prev->next = info->next; + + if (info->next) + info->next->prev = info->prev; + + if (Lane_info_cache == info) + Lane_info_cache = NULL; + + if (Lane_info_records == info) + Lane_info_records = info->next; + + Free(info); + } +} + +/* + * lane_get_layout -- (internal) calculates the real pointer of the lane layout + */ +static struct lane_layout * +lane_get_layout(PMEMobjpool *pop, uint64_t lane_idx) +{ + return (void *)((char *)pop + pop->lanes_offset + + sizeof(struct lane_layout) * lane_idx); +} + +/* + * lane_ulog_constructor -- (internal) constructor of a ulog extension + */ +static int +lane_ulog_constructor(void *base, void *ptr, size_t usable_size, void *arg) +{ + PMEMobjpool *pop = base; + const struct pmem_ops *p_ops = &pop->p_ops; + + size_t capacity = ALIGN_DOWN(usable_size - sizeof(struct ulog), + CACHELINE_SIZE); + + uint64_t gen_num = *(uint64_t *)arg; + ulog_construct(OBJ_PTR_TO_OFF(base, ptr), capacity, + gen_num, 1, 0, p_ops); + + return 0; +} + +/* + * lane_undo_extend -- allocates a new undo log + */ +static int +lane_undo_extend(void *base, uint64_t *redo, uint64_t gen_num) +{ + PMEMobjpool *pop = base; + struct tx_parameters *params = pop->tx_params; + size_t s = SIZEOF_ALIGNED_ULOG(params->cache_size); + + return pmalloc_construct(base, redo, s, lane_ulog_constructor, &gen_num, + 0, OBJ_INTERNAL_OBJECT_MASK, 0); +} + +/* + * lane_redo_extend -- allocates a new redo log + */ +static int +lane_redo_extend(void *base, uint64_t *redo, uint64_t gen_num) +{ + size_t s = SIZEOF_ALIGNED_ULOG(LANE_REDO_EXTERNAL_SIZE); + + return pmalloc_construct(base, redo, s, lane_ulog_constructor, &gen_num, + 0, OBJ_INTERNAL_OBJECT_MASK, 0); +} + +/* + * lane_init -- (internal) initializes a single lane runtime variables + */ +static int +lane_init(PMEMobjpool *pop, struct lane *lane, struct lane_layout *layout) +{ + ASSERTne(lane, NULL); + + lane->layout = layout; + + lane->internal = operation_new((struct ulog *)&layout->internal, + LANE_REDO_INTERNAL_SIZE, + NULL, NULL, &pop->p_ops, + LOG_TYPE_REDO); + if (lane->internal == NULL) + goto error_internal_new; + + lane->external = operation_new((struct ulog *)&layout->external, + LANE_REDO_EXTERNAL_SIZE, + lane_redo_extend, (ulog_free_fn)pfree, &pop->p_ops, + LOG_TYPE_REDO); + if (lane->external == NULL) + goto error_external_new; + + lane->undo = operation_new((struct ulog *)&layout->undo, + LANE_UNDO_SIZE, + lane_undo_extend, (ulog_free_fn)pfree, &pop->p_ops, + LOG_TYPE_UNDO); + if (lane->undo == NULL) + goto error_undo_new; + + return 0; + +error_undo_new: + operation_delete(lane->external); +error_external_new: + operation_delete(lane->internal); +error_internal_new: + return -1; +} + +/* + * lane_destroy -- cleanups a single lane runtime variables + */ +static void +lane_destroy(PMEMobjpool *pop, struct lane *lane) +{ + operation_delete(lane->undo); + operation_delete(lane->internal); + operation_delete(lane->external); +} + +/* + * lane_boot -- initializes all lanes + */ +int +lane_boot(PMEMobjpool *pop) +{ + int err = 0; + + pop->lanes_desc.lane = Malloc(sizeof(struct lane) * pop->nlanes); + if (pop->lanes_desc.lane == NULL) { + err = ENOMEM; + ERR("!Malloc of volatile lanes"); + goto error_lanes_malloc; + } + + pop->lanes_desc.next_lane_idx = 0; + + pop->lanes_desc.lane_locks = + Zalloc(sizeof(*pop->lanes_desc.lane_locks) * pop->nlanes); + if (pop->lanes_desc.lane_locks == NULL) { + ERR("!Malloc for lane locks"); + goto error_locks_malloc; + } + + /* add lanes to pmemcheck ignored list */ + VALGRIND_ADD_TO_GLOBAL_TX_IGNORE((char *)pop + pop->lanes_offset, + (sizeof(struct lane_layout) * pop->nlanes)); + + uint64_t i; + for (i = 0; i < pop->nlanes; ++i) { + struct lane_layout *layout = lane_get_layout(pop, i); + + if ((err = lane_init(pop, &pop->lanes_desc.lane[i], layout))) { + ERR("!lane_init"); + goto error_lane_init; + } + } + + return 0; + +error_lane_init: + for (; i >= 1; --i) + lane_destroy(pop, &pop->lanes_desc.lane[i - 1]); + Free(pop->lanes_desc.lane_locks); + pop->lanes_desc.lane_locks = NULL; +error_locks_malloc: + Free(pop->lanes_desc.lane); + pop->lanes_desc.lane = NULL; +error_lanes_malloc: + return err; +} + +/* + * lane_init_data -- initializes ulogs for all the lanes + */ +void +lane_init_data(PMEMobjpool *pop) +{ + struct lane_layout *layout; + + for (uint64_t i = 0; i < pop->nlanes; ++i) { + layout = lane_get_layout(pop, i); + ulog_construct(OBJ_PTR_TO_OFF(pop, &layout->internal), + LANE_REDO_INTERNAL_SIZE, 0, 0, 0, &pop->p_ops); + ulog_construct(OBJ_PTR_TO_OFF(pop, &layout->external), + LANE_REDO_EXTERNAL_SIZE, 0, 0, 0, &pop->p_ops); + ulog_construct(OBJ_PTR_TO_OFF(pop, &layout->undo), + LANE_UNDO_SIZE, 0, 0, 0, &pop->p_ops); + } + layout = lane_get_layout(pop, 0); + pmemops_xpersist(&pop->p_ops, layout, + pop->nlanes * sizeof(struct lane_layout), + PMEMOBJ_F_RELAXED); +} + +/* + * lane_cleanup -- destroys all lanes + */ +void +lane_cleanup(PMEMobjpool *pop) +{ + for (uint64_t i = 0; i < pop->nlanes; ++i) + lane_destroy(pop, &pop->lanes_desc.lane[i]); + + Free(pop->lanes_desc.lane); + pop->lanes_desc.lane = NULL; + Free(pop->lanes_desc.lane_locks); + pop->lanes_desc.lane_locks = NULL; + + lane_info_cleanup(pop); +} + +/* + * lane_recover_and_section_boot -- performs initialization and recovery of all + * lanes + */ +int +lane_recover_and_section_boot(PMEMobjpool *pop) +{ + COMPILE_ERROR_ON(SIZEOF_ULOG(LANE_UNDO_SIZE) + + SIZEOF_ULOG(LANE_REDO_EXTERNAL_SIZE) + + SIZEOF_ULOG(LANE_REDO_INTERNAL_SIZE) != LANE_TOTAL_SIZE); + + int err = 0; + uint64_t i; /* lane index */ + struct lane_layout *layout; + + /* + * First we need to recover the internal/external redo logs so that the + * allocator state is consistent before we boot it. + */ + for (i = 0; i < pop->nlanes; ++i) { + layout = lane_get_layout(pop, i); + + ulog_recover((struct ulog *)&layout->internal, + OBJ_OFF_IS_VALID_FROM_CTX, &pop->p_ops); + ulog_recover((struct ulog *)&layout->external, + OBJ_OFF_IS_VALID_FROM_CTX, &pop->p_ops); + } + + if ((err = pmalloc_boot(pop)) != 0) + return err; + + /* + * Undo logs must be processed after the heap is initialized since + * a undo recovery might require deallocation of the next ulogs. + */ + for (i = 0; i < pop->nlanes; ++i) { + struct operation_context *ctx = pop->lanes_desc.lane[i].undo; + operation_resume(ctx); + operation_process(ctx); + operation_finish(ctx, ULOG_INC_FIRST_GEN_NUM | + ULOG_FREE_AFTER_FIRST); + } + + return 0; +} + +/* + * lane_section_cleanup -- performs runtime cleanup of all lanes + */ +int +lane_section_cleanup(PMEMobjpool *pop) +{ + return pmalloc_cleanup(pop); +} + +/* + * lane_check -- performs check of all lanes + */ +int +lane_check(PMEMobjpool *pop) +{ + int err = 0; + uint64_t j; /* lane index */ + struct lane_layout *layout; + + for (j = 0; j < pop->nlanes; ++j) { + layout = lane_get_layout(pop, j); + if (ulog_check((struct ulog *)&layout->internal, + OBJ_OFF_IS_VALID_FROM_CTX, &pop->p_ops) != 0) { + LOG(2, "lane %" PRIu64 " internal redo failed: %d", + j, err); + return err; + } + } + + return 0; +} + +/* + * get_lane -- (internal) get free lane index + */ +static inline void +get_lane(uint64_t *locks, struct lane_info *info, uint64_t nlocks) +{ + info->lane_idx = info->primary; + while (1) { + do { + info->lane_idx %= nlocks; + if (likely(util_bool_compare_and_swap64( + &locks[info->lane_idx], 0, 1))) { + if (info->lane_idx == info->primary) { + info->primary_attempts = + LANE_PRIMARY_ATTEMPTS; + } else if (info->primary_attempts == 0) { + info->primary = info->lane_idx; + info->primary_attempts = + LANE_PRIMARY_ATTEMPTS; + } + return; + } + + if (info->lane_idx == info->primary && + info->primary_attempts > 0) { + info->primary_attempts--; + } + + ++info->lane_idx; + } while (info->lane_idx < nlocks); + + sched_yield(); + } +} + +/* + * get_lane_info_record -- (internal) get lane record attached to memory pool + * or first free + */ +static inline struct lane_info * +get_lane_info_record(PMEMobjpool *pop) +{ + if (likely(Lane_info_cache != NULL && + Lane_info_cache->pop_uuid_lo == pop->uuid_lo)) { + return Lane_info_cache; + } + + if (unlikely(Lane_info_ht == NULL)) { + lane_info_ht_boot(); + } + + struct lane_info *info = critnib_get(Lane_info_ht, pop->uuid_lo); + + if (unlikely(info == NULL)) { + info = Malloc(sizeof(struct lane_info)); + if (unlikely(info == NULL)) { + FATAL("Malloc"); + } + info->pop_uuid_lo = pop->uuid_lo; + info->lane_idx = UINT64_MAX; + info->nest_count = 0; + info->next = Lane_info_records; + info->prev = NULL; + info->primary = 0; + info->primary_attempts = LANE_PRIMARY_ATTEMPTS; + if (Lane_info_records) { + Lane_info_records->prev = info; + } + Lane_info_records = info; + + if (unlikely(critnib_insert( + Lane_info_ht, pop->uuid_lo, info) != 0)) { + FATAL("critnib_insert"); + } + } + + Lane_info_cache = info; + return info; +} + +/* + * lane_hold -- grabs a per-thread lane in a round-robin fashion + */ +unsigned +lane_hold(PMEMobjpool *pop, struct lane **lanep) +{ + /* + * Before runtime lane initialization all remote operations are + * executed using RLANE_DEFAULT. + */ + if (unlikely(!pop->lanes_desc.runtime_nlanes)) { + ASSERT(pop->has_remote_replicas); + if (lanep != NULL) + FATAL("cannot obtain section before lane's init"); + return RLANE_DEFAULT; + } + + struct lane_info *lane = get_lane_info_record(pop); + while (unlikely(lane->lane_idx == UINT64_MAX)) { + /* initial wrap to next CL */ + lane->primary = lane->lane_idx = util_fetch_and_add32( + &pop->lanes_desc.next_lane_idx, LANE_JUMP); + } /* handles wraparound */ + + uint64_t *llocks = pop->lanes_desc.lane_locks; + /* grab next free lane from lanes available at runtime */ + if (!lane->nest_count++) { + get_lane(llocks, lane, pop->lanes_desc.runtime_nlanes); + } + + struct lane *l = &pop->lanes_desc.lane[lane->lane_idx]; + + /* reinitialize lane's content only if in outermost hold */ + if (lanep && lane->nest_count == 1) { + VALGRIND_ANNOTATE_NEW_MEMORY(l, sizeof(*l)); + VALGRIND_ANNOTATE_NEW_MEMORY(l->layout, sizeof(*l->layout)); + operation_init(l->external); + operation_init(l->internal); + operation_init(l->undo); + } + + if (lanep) + *lanep = l; + + return (unsigned)lane->lane_idx; +} + +/* + * lane_release -- drops the per-thread lane + */ +void +lane_release(PMEMobjpool *pop) +{ + if (unlikely(!pop->lanes_desc.runtime_nlanes)) { + ASSERT(pop->has_remote_replicas); + return; + } + + struct lane_info *lane = get_lane_info_record(pop); + + ASSERTne(lane, NULL); + ASSERTne(lane->lane_idx, UINT64_MAX); + + if (unlikely(lane->nest_count == 0)) { + FATAL("lane_release"); + } else if (--(lane->nest_count) == 0) { + if (unlikely(!util_bool_compare_and_swap64( + &pop->lanes_desc.lane_locks[lane->lane_idx], + 1, 0))) { + FATAL("util_bool_compare_and_swap64"); + } + } +} diff --git a/src/pmdk/src/libpmemobj/lane.h b/src/pmdk/src/libpmemobj/lane.h new file mode 100644 index 000000000..1dfc29d28 --- /dev/null +++ b/src/pmdk/src/libpmemobj/lane.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * lane.h -- internal definitions for lanes + */ + +#ifndef LIBPMEMOBJ_LANE_H +#define LIBPMEMOBJ_LANE_H 1 + +#include +#include "ulog.h" +#include "libpmemobj.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Distance between lanes used by threads required to prevent threads from + * false sharing part of lanes array. Used if properly spread lanes are + * available. Otherwise less spread out lanes would be used. + */ +#define LANE_JUMP (64 / sizeof(uint64_t)) + +/* + * Number of times the algorithm will try to reacquire the primary lane for the + * thread. If this threshold is exceeded, a new primary lane is selected for the + * thread. + */ +#define LANE_PRIMARY_ATTEMPTS 128 + +#define RLANE_DEFAULT 0 + +#define LANE_TOTAL_SIZE 3072 /* 3 * 1024 (sum of 3 old lane sections) */ +/* + * We have 3 kilobytes to distribute. + * The smallest capacity is needed for the internal redo log for which we can + * accurately calculate the maximum number of occupied space: 48 bytes, + * 3 times sizeof(struct ulog_entry_val). One for bitmap OR, second for bitmap + * AND, third for modification of the destination pointer. For future needs, + * this has been bumped up to 12 ulog entries. + * + * The remaining part has to be split between transactional redo and undo logs, + * and since by far the most space consuming operations are transactional + * snapshots, most of the space, 2 kilobytes, is assigned to the undo log. + * After that, the remainder, 640 bytes, or 40 ulog entries, is left for the + * transactional redo logs. + * Thanks to this distribution, all small and medium transactions should be + * entirely performed without allocating any additional metadata. + * + * These values must be cacheline size aligned to be used for ulogs. Therefore + * they are parametrized for the size of the struct ulog changes between + * platforms. + */ +#define LANE_UNDO_SIZE (LANE_TOTAL_SIZE \ + - LANE_REDO_EXTERNAL_SIZE \ + - LANE_REDO_INTERNAL_SIZE \ + - 3 * sizeof(struct ulog)) /* 2048 for 64B ulog */ +#define LANE_REDO_EXTERNAL_SIZE ALIGN_UP(704 - sizeof(struct ulog), \ + CACHELINE_SIZE) /* 640 for 64B ulog */ +#define LANE_REDO_INTERNAL_SIZE ALIGN_UP(256 - sizeof(struct ulog), \ + CACHELINE_SIZE) /* 192 for 64B ulog */ + +struct lane_layout { + /* + * Redo log for self-contained and 'one-shot' allocator operations. + * Cannot be extended. + */ + struct ULOG(LANE_REDO_INTERNAL_SIZE) internal; + /* + * Redo log for large operations/transactions. + * Can be extended by the use of internal ulog. + */ + struct ULOG(LANE_REDO_EXTERNAL_SIZE) external; + /* + * Undo log for snapshots done in a transaction. + * Can be extended/shrunk by the use of internal ulog. + */ + struct ULOG(LANE_UNDO_SIZE) undo; +}; + +struct lane { + struct lane_layout *layout; /* pointer to persistent layout */ + struct operation_context *internal; /* context for internal ulog */ + struct operation_context *external; /* context for external ulog */ + struct operation_context *undo; /* context for undo ulog */ +}; + +struct lane_descriptor { + /* + * Number of lanes available at runtime must be <= total number of lanes + * available in the pool. Number of lanes can be limited by shortage of + * other resources e.g. available RNIC's submission queue sizes. + */ + unsigned runtime_nlanes; + unsigned next_lane_idx; + uint64_t *lane_locks; + struct lane *lane; +}; + +typedef int (*section_layout_op)(PMEMobjpool *pop, void *data, unsigned length); +typedef void *(*section_constr)(PMEMobjpool *pop, void *data); +typedef void (*section_destr)(PMEMobjpool *pop, void *rt); +typedef int (*section_global_op)(PMEMobjpool *pop); + +struct section_operations { + section_constr construct_rt; + section_destr destroy_rt; + section_layout_op check; + section_layout_op recover; + section_global_op boot; + section_global_op cleanup; +}; + +struct lane_info { + uint64_t pop_uuid_lo; + uint64_t lane_idx; + unsigned long nest_count; + + /* + * The index of the primary lane for the thread. A thread will always + * try to acquire the primary lane first, and only if that fails it will + * look for a different available lane. + */ + uint64_t primary; + int primary_attempts; + + struct lane_info *prev, *next; +}; + +void lane_info_boot(void); +void lane_info_destroy(void); + +void lane_init_data(PMEMobjpool *pop); +int lane_boot(PMEMobjpool *pop); +void lane_cleanup(PMEMobjpool *pop); +int lane_recover_and_section_boot(PMEMobjpool *pop); +int lane_section_cleanup(PMEMobjpool *pop); +int lane_check(PMEMobjpool *pop); + +unsigned lane_hold(PMEMobjpool *pop, struct lane **lane); +void lane_release(PMEMobjpool *pop); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/libpmemobj.c b/src/pmdk/src/libpmemobj/libpmemobj.c new file mode 100644 index 000000000..36fb2128d --- /dev/null +++ b/src/pmdk/src/libpmemobj/libpmemobj.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2017, Intel Corporation */ + +/* + * libpmemobj.c -- pmem entry points for libpmemobj + */ + +#include "pmemcommon.h" +#include "obj.h" + +/* + * libpmemobj_init -- load-time initialization for obj + * + * Called automatically by the run-time loader. + */ +ATTR_CONSTRUCTOR +void +libpmemobj_init(void) +{ + common_init(PMEMOBJ_LOG_PREFIX, PMEMOBJ_LOG_LEVEL_VAR, + PMEMOBJ_LOG_FILE_VAR, PMEMOBJ_MAJOR_VERSION, + PMEMOBJ_MINOR_VERSION); + LOG(3, NULL); + obj_init(); +} + +/* + * libpmemobj_fini -- libpmemobj cleanup routine + * + * Called automatically when the process terminates. + */ +ATTR_DESTRUCTOR +void +libpmemobj_fini(void) +{ + LOG(3, NULL); + obj_fini(); + common_fini(); +} + +/* + * pmemobj_check_versionU -- see if lib meets application version requirements + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmemobj_check_versionU(unsigned major_required, unsigned minor_required) +{ + LOG(3, "major_required %u minor_required %u", + major_required, minor_required); + + if (major_required != PMEMOBJ_MAJOR_VERSION) { + ERR("libpmemobj major version mismatch (need %u, found %u)", + major_required, PMEMOBJ_MAJOR_VERSION); + return out_get_errormsg(); + } + + if (minor_required > PMEMOBJ_MINOR_VERSION) { + ERR("libpmemobj minor version mismatch (need %u, found %u)", + minor_required, PMEMOBJ_MINOR_VERSION); + return out_get_errormsg(); + } + + return NULL; +} + +#ifndef _WIN32 +/* + * pmemobj_check_version -- see if lib meets application version requirements + */ +const char * +pmemobj_check_version(unsigned major_required, unsigned minor_required) +{ + return pmemobj_check_versionU(major_required, minor_required); +} +#else +/* + * pmemobj_check_versionW -- see if lib meets application version requirements + */ +const wchar_t * +pmemobj_check_versionW(unsigned major_required, unsigned minor_required) +{ + if (pmemobj_check_versionU(major_required, minor_required) != NULL) + return out_get_errormsgW(); + else + return NULL; +} +#endif + +/* + * pmemobj_set_funcs -- allow overriding libpmemobj's call to malloc, etc. + */ +void +pmemobj_set_funcs( + void *(*malloc_func)(size_t size), + void (*free_func)(void *ptr), + void *(*realloc_func)(void *ptr, size_t size), + char *(*strdup_func)(const char *s)) +{ + LOG(3, NULL); + + util_set_alloc_funcs(malloc_func, free_func, realloc_func, strdup_func); +} + +/* + * pmemobj_errormsgU -- return last error message + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmemobj_errormsgU(void) +{ + return out_get_errormsg(); +} + +#ifndef _WIN32 +/* + * pmemobj_errormsg -- return last error message + */ +const char * +pmemobj_errormsg(void) +{ + return pmemobj_errormsgU(); +} +#else +/* + * pmemobj_errormsgW -- return last error message as wchar_t + */ +const wchar_t * +pmemobj_errormsgW(void) +{ + return out_get_errormsgW(); +} +#endif diff --git a/src/pmdk/src/libpmemobj/libpmemobj.def b/src/pmdk/src/libpmemobj/libpmemobj.def new file mode 100644 index 000000000..d20fa256d --- /dev/null +++ b/src/pmdk/src/libpmemobj/libpmemobj.def @@ -0,0 +1,124 @@ +;;;; Begin Copyright Notice +; SPDX-License-Identifier: BSD-3-Clause +; Copyright 2015-2020, Intel Corporation +;;;; End Copyright Notice + +LIBRARY libpmemobj + +VERSION 1.0 + +EXPORTS + pmemobj_check_versionU + pmemobj_check_versionW + pmemobj_set_funcs + pmemobj_errormsgU + pmemobj_errormsgW + pmemobj_createU + pmemobj_createW + pmemobj_openU + pmemobj_openW + pmemobj_close + pmemobj_checkU + pmemobj_checkW + pmemobj_mutex_zero + pmemobj_mutex_lock + pmemobj_mutex_trylock + pmemobj_mutex_unlock + pmemobj_mutex_timedlock + pmemobj_rwlock_zero + pmemobj_rwlock_rdlock + pmemobj_rwlock_wrlock + pmemobj_rwlock_timedrdlock + pmemobj_rwlock_timedwrlock + pmemobj_rwlock_tryrdlock + pmemobj_rwlock_trywrlock + pmemobj_rwlock_unlock + pmemobj_cond_zero + pmemobj_cond_broadcast + pmemobj_cond_signal + pmemobj_cond_timedwait + pmemobj_cond_wait + pmemobj_ctl_execU; + pmemobj_ctl_execW; + pmemobj_ctl_getU; + pmemobj_ctl_getW; + pmemobj_ctl_setU; + pmemobj_ctl_setW; + pmemobj_pool_by_oid + pmemobj_pool_by_ptr + pmemobj_alloc + pmemobj_xalloc + pmemobj_zalloc + pmemobj_realloc + pmemobj_zrealloc + pmemobj_strdup + pmemobj_wcsdup + pmemobj_free + pmemobj_alloc_usable_size + pmemobj_type_num + pmemobj_root + pmemobj_root_construct + pmemobj_root_size + pmemobj_first + pmemobj_next + pmemobj_list_insert + pmemobj_list_insert_new + pmemobj_list_remove + pmemobj_list_move + pmemobj_tx_begin + pmemobj_tx_stage + pmemobj_tx_abort + pmemobj_tx_commit + pmemobj_tx_end + pmemobj_tx_process + pmemobj_tx_add_range + pmemobj_tx_add_range_direct + pmemobj_tx_alloc + pmemobj_tx_xadd_range + pmemobj_tx_xadd_range_direct + pmemobj_tx_xalloc + pmemobj_tx_zalloc + pmemobj_tx_realloc + pmemobj_tx_zrealloc + pmemobj_tx_strdup + pmemobj_tx_xstrdup + pmemobj_tx_wcsdup + pmemobj_tx_xwcsdup + pmemobj_tx_free + pmemobj_tx_xfree + pmemobj_tx_errno + pmemobj_tx_lock + pmemobj_tx_xlock + pmemobj_tx_log_append_buffer + pmemobj_tx_xlog_append_buffer + pmemobj_tx_log_auto_alloc + pmemobj_tx_log_snapshots_max_size + pmemobj_tx_log_intents_max_size + pmemobj_tx_set_user_data + pmemobj_tx_get_user_data + pmemobj_tx_set_failure_behavior + pmemobj_tx_get_failure_behavior + pmemobj_memcpy + pmemobj_memcpy_persist + pmemobj_memmove + pmemobj_memset + pmemobj_memset_persist + pmemobj_persist + pmemobj_flush + pmemobj_drain + pmemobj_direct + pmemobj_volatile + pmemobj_oid + pmemobj_reserve + pmemobj_xreserve + pmemobj_defer_free + pmemobj_set_value + pmemobj_publish + pmemobj_tx_publish + pmemobj_tx_xpublish + pmemobj_cancel + pmemobj_set_user_data + pmemobj_get_user_data + pmemobj_defrag + _pobj_debug_notice + DllMain diff --git a/src/pmdk/src/libpmemobj/libpmemobj.link.in b/src/pmdk/src/libpmemobj/libpmemobj.link.in new file mode 100644 index 000000000..5b730caa6 --- /dev/null +++ b/src/pmdk/src/libpmemobj/libpmemobj.link.in @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation +# +# +# src/libpmemobj.link -- linker link file for libpmemobj +# +LIBPMEMOBJ_1.0 { + global: + pmemobj_check_version; + pmemobj_set_funcs; + pmemobj_errormsg; + pmemobj_create; + pmemobj_open; + pmemobj_close; + pmemobj_check; + pmemobj_ctl_exec; + pmemobj_ctl_get; + pmemobj_ctl_set; + pmemobj_mutex_zero; + pmemobj_mutex_lock; + pmemobj_mutex_timedlock; + pmemobj_mutex_trylock; + pmemobj_mutex_unlock; + pmemobj_rwlock_zero; + pmemobj_rwlock_rdlock; + pmemobj_rwlock_wrlock; + pmemobj_rwlock_timedrdlock; + pmemobj_rwlock_timedwrlock; + pmemobj_rwlock_tryrdlock; + pmemobj_rwlock_trywrlock; + pmemobj_rwlock_unlock; + pmemobj_cond_zero; + pmemobj_cond_broadcast; + pmemobj_cond_signal; + pmemobj_cond_timedwait; + pmemobj_cond_wait; + pmemobj_pool_by_oid; + pmemobj_pool_by_ptr; + pmemobj_oid; + pmemobj_alloc; + pmemobj_xalloc; + pmemobj_zalloc; + pmemobj_realloc; + pmemobj_zrealloc; + pmemobj_strdup; + pmemobj_wcsdup; + pmemobj_free; + pmemobj_alloc_usable_size; + pmemobj_type_num; + pmemobj_root; + pmemobj_root_construct; + pmemobj_root_size; + pmemobj_first; + pmemobj_next; + pmemobj_list_insert; + pmemobj_list_insert_new; + pmemobj_list_remove; + pmemobj_list_move; + pmemobj_tx_begin; + pmemobj_tx_stage; + pmemobj_tx_abort; + pmemobj_tx_commit; + pmemobj_tx_end; + pmemobj_tx_errno; + pmemobj_tx_process; + pmemobj_tx_add_range; + pmemobj_tx_add_range_direct; + pmemobj_tx_xadd_range; + pmemobj_tx_xadd_range_direct; + pmemobj_tx_alloc; + pmemobj_tx_xalloc; + pmemobj_tx_zalloc; + pmemobj_tx_realloc; + pmemobj_tx_zrealloc; + pmemobj_tx_strdup; + pmemobj_tx_xstrdup; + pmemobj_tx_wcsdup; + pmemobj_tx_xwcsdup; + pmemobj_tx_free; + pmemobj_tx_xfree; + pmemobj_tx_lock; + pmemobj_tx_xlock; + pmemobj_tx_log_append_buffer; + pmemobj_tx_xlog_append_buffer; + pmemobj_tx_log_auto_alloc; + pmemobj_tx_log_snapshots_max_size; + pmemobj_tx_log_intents_max_size; + pmemobj_tx_set_user_data; + pmemobj_tx_get_user_data; + pmemobj_tx_set_failure_behavior; + pmemobj_tx_get_failure_behavior; + pmemobj_memcpy; + pmemobj_memcpy_persist; + pmemobj_memmove; + pmemobj_memset; + pmemobj_memset_persist; + pmemobj_persist; + pmemobj_flush; + pmemobj_drain; + pmemobj_xpersist; + pmemobj_xflush; + pmemobj_direct; + pmemobj_volatile; + pmemobj_reserve; + pmemobj_xreserve; + pmemobj_defer_free; + pmemobj_set_value; + pmemobj_publish; + pmemobj_tx_publish; + pmemobj_tx_xpublish; + pmemobj_cancel; + pmemobj_set_user_data; + pmemobj_get_user_data; + pmemobj_defrag; + _pobj_cached_pool; + _pobj_cache_invalidate; + _pobj_debug_notice; + fault_injection; + local: + *; +}; diff --git a/src/pmdk/src/libpmemobj/libpmemobj.rc b/src/pmdk/src/libpmemobj/libpmemobj.rc new file mode 100644 index 000000000..fae9e2d94 --- /dev/null +++ b/src/pmdk/src/libpmemobj/libpmemobj.rc @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016, Intel Corporation */ + +/* + * libpmemobj.rc -- libpmemobj resource file + */ + +#include +#define FILE_NAME "libpmemobj.dll" +#define DESCRIPTION "libpmemobj - persistent memory transactional object store" +#define TYPE VFT_DLL +#include \ No newline at end of file diff --git a/src/pmdk/src/libpmemobj/libpmemobj.vcxproj b/src/pmdk/src/libpmemobj/libpmemobj.vcxproj new file mode 100644 index 000000000..8f627bbc9 --- /dev/null +++ b/src/pmdk/src/libpmemobj/libpmemobj.vcxproj @@ -0,0 +1,187 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {9e9e3d25-2139-4a5d-9200-18148ddead45} + + + {901f04db-e1a5-4a41-8b81-9d31c19acd59} + + + + + + + {1BAA1617-93AE-4196-8A1A-BD492FB18AEF} + DynamicLibrary + libpmemobj + libpmemobj + en-US + 14.0 + 10.0.17134.0 + 10.0.10240.0 + + + + DynamicLibrary + true + v140 + + + DynamicLibrary + false + false + v140 + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmemobj/libpmemobj.vcxproj.filters b/src/pmdk/src/libpmemobj/libpmemobj.vcxproj.filters new file mode 100644 index 000000000..e32a53882 --- /dev/null +++ b/src/pmdk/src/libpmemobj/libpmemobj.vcxproj.filters @@ -0,0 +1,384 @@ + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files\libpmemobj + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {73806be6-053a-4dfd-92de-956b0480b5d9} + h + + + {60288a68-9214-4faa-b5c4-bf33b1020120} + c;def + + + {ab47d7d2-14e7-4ab2-af19-e7cf10e43fbf} + + + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmemobj/libpmemobj_main.c b/src/pmdk/src/libpmemobj/libpmemobj_main.c new file mode 100644 index 000000000..cf5bdae00 --- /dev/null +++ b/src/pmdk/src/libpmemobj/libpmemobj_main.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2017, Intel Corporation */ + +/* + * libpmemobj_main.c -- entry point for libpmemobj.dll + * + * XXX - This is a placeholder. All the library initialization/cleanup + * that is done in library ctors/dtors, as well as TLS initialization + * should be moved here. + */ + +void libpmemobj_init(void); +void libpmemobj_fini(void); + +int APIENTRY +DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) +{ + switch (dwReason) { + case DLL_PROCESS_ATTACH: + libpmemobj_init(); + break; + + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + + case DLL_PROCESS_DETACH: + libpmemobj_fini(); + break; + } + return TRUE; +} diff --git a/src/pmdk/src/libpmemobj/list.c b/src/pmdk/src/libpmemobj/list.c new file mode 100644 index 000000000..9eb1b7070 --- /dev/null +++ b/src/pmdk/src/libpmemobj/list.c @@ -0,0 +1,939 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2019, Intel Corporation */ + +/* + * list.c -- implementation of persistent atomic lists module + */ +#include + +#include "list.h" +#include "obj.h" +#include "os_thread.h" +#include "out.h" +#include "sync.h" +#include "valgrind_internal.h" +#include "memops.h" + +#define PREV_OFF (offsetof(struct list_entry, pe_prev) + offsetof(PMEMoid, off)) +#define NEXT_OFF (offsetof(struct list_entry, pe_next) + offsetof(PMEMoid, off)) + +/* + * list_args_common -- common arguments for operations on list + * + * pe_offset - offset to list entry relative to user data + * obj_doffset - offset to element's data relative to pmemobj pool + * entry_ptr - list entry structure of element + */ +struct list_args_common { + ssize_t pe_offset; + uint64_t obj_doffset; + struct list_entry *entry_ptr; +}; + +/* + * list_args_insert -- arguments for inserting element to list + * + * head - list head + * dest - destination element OID + * dest_entry_ptr - list entry of destination element + * before - insert before or after destination element + */ +struct list_args_insert { + struct list_head *head; + PMEMoid dest; + struct list_entry *dest_entry_ptr; + int before; +}; + +/* + * list_args_reinsert -- arguments for reinserting element on list + * + * head - list head + * entry_ptr - list entry of old element + * obj_doffset - offset to element's data relative to pmemobj pool + */ +struct list_args_reinsert { + struct list_head *head; + struct list_entry *entry_ptr; + uint64_t obj_doffset; +}; + +/* + * list_args_remove -- arguments for removing element from list + * + * pe_offset - offset to list entry relative to user data + * obj_doffset - offset to element's data relative to pmemobj pool + * head - list head + * entry_ptr - list entry structure of element + */ +struct list_args_remove { + ssize_t pe_offset; + uint64_t obj_doffset; + struct list_head *head; + struct list_entry *entry_ptr; +}; + +/* + * list_mutexes_lock -- (internal) grab one or two locks in ascending + * address order + */ +static inline int +list_mutexes_lock(PMEMobjpool *pop, + struct list_head *head1, struct list_head *head2) +{ + ASSERTne(head1, NULL); + + if (!head2 || head1 == head2) + return pmemobj_mutex_lock(pop, &head1->lock); + + PMEMmutex *lock1; + PMEMmutex *lock2; + if ((uintptr_t)&head1->lock < (uintptr_t)&head2->lock) { + lock1 = &head1->lock; + lock2 = &head2->lock; + } else { + lock1 = &head2->lock; + lock2 = &head1->lock; + } + + int ret; + if ((ret = pmemobj_mutex_lock(pop, lock1))) + goto err; + if ((ret = pmemobj_mutex_lock(pop, lock2))) + goto err_unlock; + + return 0; + +err_unlock: + pmemobj_mutex_unlock(pop, lock1); +err: + return ret; +} + +/* + * list_mutexes_unlock -- (internal) release one or two locks + */ +static inline void +list_mutexes_unlock(PMEMobjpool *pop, + struct list_head *head1, struct list_head *head2) +{ + ASSERTne(head1, NULL); + + if (!head2 || head1 == head2) { + pmemobj_mutex_unlock_nofail(pop, &head1->lock); + return; + } + + pmemobj_mutex_unlock_nofail(pop, &head1->lock); + pmemobj_mutex_unlock_nofail(pop, &head2->lock); +} + +/* + * list_get_dest -- (internal) return destination object ID + * + * If the input dest is not OID_NULL returns dest. + * If the input dest is OID_NULL and before is set returns first element. + * If the input dest is OID_NULL and before is no set returns last element. + */ +static inline PMEMoid +list_get_dest(PMEMobjpool *pop, struct list_head *head, PMEMoid dest, + ssize_t pe_offset, int before) +{ + if (dest.off) + return dest; + + if (head->pe_first.off == 0 || !!before == POBJ_LIST_DEST_HEAD) + return head->pe_first; + + struct list_entry *first_ptr = (struct list_entry *)OBJ_OFF_TO_PTR(pop, + (uintptr_t)((ssize_t)head->pe_first.off + pe_offset)); + + return first_ptr->pe_prev; +} + +/* + * list_set_oid_redo_log -- (internal) set PMEMoid value using redo log + */ +static size_t +list_set_oid_redo_log(PMEMobjpool *pop, + struct operation_context *ctx, + PMEMoid *oidp, uint64_t obj_doffset, int oidp_inited) +{ + ASSERT(OBJ_PTR_IS_VALID(pop, oidp)); + + if (!oidp_inited || oidp->pool_uuid_lo != pop->uuid_lo) { + if (oidp_inited) + ASSERTeq(oidp->pool_uuid_lo, 0); + + operation_add_entry(ctx, &oidp->pool_uuid_lo, pop->uuid_lo, + ULOG_OPERATION_SET); + } + + operation_add_entry(ctx, &oidp->off, obj_doffset, + ULOG_OPERATION_SET); + return 0; +} + +/* + * list_update_head -- (internal) update pe_first entry in list head + */ +static size_t +list_update_head(PMEMobjpool *pop, + struct operation_context *ctx, + struct list_head *head, uint64_t first_offset) +{ + LOG(15, NULL); + + operation_add_entry(ctx, &head->pe_first.off, first_offset, + ULOG_OPERATION_SET); + + if (head->pe_first.pool_uuid_lo == 0) { + operation_add_entry(ctx, &head->pe_first.pool_uuid_lo, + pop->uuid_lo, ULOG_OPERATION_SET); + } + + return 0; +} + +/* + * u64_add_offset -- (internal) add signed offset to unsigned integer and check + * for overflows + */ +static void +u64_add_offset(uint64_t *value, ssize_t off) +{ + uint64_t prev = *value; + if (off >= 0) { + *value += (size_t)off; + ASSERT(*value >= prev); /* detect overflow */ + } else { + *value -= (size_t)-off; + ASSERT(*value < prev); + } +} + +/* + * list_fill_entry_persist -- (internal) fill new entry using persist function + * + * Used for newly allocated objects. + */ +static void +list_fill_entry_persist(PMEMobjpool *pop, struct list_entry *entry_ptr, + uint64_t next_offset, uint64_t prev_offset) +{ + LOG(15, NULL); + + VALGRIND_ADD_TO_TX(entry_ptr, sizeof(*entry_ptr)); + entry_ptr->pe_next.pool_uuid_lo = pop->uuid_lo; + entry_ptr->pe_next.off = next_offset; + + entry_ptr->pe_prev.pool_uuid_lo = pop->uuid_lo; + entry_ptr->pe_prev.off = prev_offset; + VALGRIND_REMOVE_FROM_TX(entry_ptr, sizeof(*entry_ptr)); + + pmemops_persist(&pop->p_ops, entry_ptr, sizeof(*entry_ptr)); +} + +/* + * list_fill_entry_redo_log -- (internal) fill new entry using redo log + * + * Used to update entry in existing object. + */ +static size_t +list_fill_entry_redo_log(PMEMobjpool *pop, + struct operation_context *ctx, + struct list_args_common *args, + uint64_t next_offset, uint64_t prev_offset, int set_uuid) +{ + LOG(15, NULL); + struct pmem_ops *ops = &pop->p_ops; + + ASSERTne(args->entry_ptr, NULL); + ASSERTne(args->obj_doffset, 0); + + if (set_uuid) { + VALGRIND_ADD_TO_TX(&(args->entry_ptr->pe_next.pool_uuid_lo), + sizeof(args->entry_ptr->pe_next.pool_uuid_lo)); + VALGRIND_ADD_TO_TX(&(args->entry_ptr->pe_prev.pool_uuid_lo), + sizeof(args->entry_ptr->pe_prev.pool_uuid_lo)); + /* don't need to fill pool uuid using redo log */ + args->entry_ptr->pe_next.pool_uuid_lo = pop->uuid_lo; + args->entry_ptr->pe_prev.pool_uuid_lo = pop->uuid_lo; + VALGRIND_REMOVE_FROM_TX( + &(args->entry_ptr->pe_next.pool_uuid_lo), + sizeof(args->entry_ptr->pe_next.pool_uuid_lo)); + VALGRIND_REMOVE_FROM_TX( + &(args->entry_ptr->pe_prev.pool_uuid_lo), + sizeof(args->entry_ptr->pe_prev.pool_uuid_lo)); + pmemops_persist(ops, args->entry_ptr, sizeof(*args->entry_ptr)); + } else { + ASSERTeq(args->entry_ptr->pe_next.pool_uuid_lo, pop->uuid_lo); + ASSERTeq(args->entry_ptr->pe_prev.pool_uuid_lo, pop->uuid_lo); + } + + /* set current->next and current->prev using redo log */ + uint64_t next_off_off = args->obj_doffset + NEXT_OFF; + uint64_t prev_off_off = args->obj_doffset + PREV_OFF; + u64_add_offset(&next_off_off, args->pe_offset); + u64_add_offset(&prev_off_off, args->pe_offset); + void *next_ptr = (char *)pop + next_off_off; + void *prev_ptr = (char *)pop + prev_off_off; + + operation_add_entry(ctx, next_ptr, next_offset, ULOG_OPERATION_SET); + operation_add_entry(ctx, prev_ptr, prev_offset, ULOG_OPERATION_SET); + + return 0; +} + +/* + * list_remove_single -- (internal) remove element from single list + */ +static size_t +list_remove_single(PMEMobjpool *pop, + struct operation_context *ctx, + struct list_args_remove *args) +{ + LOG(15, NULL); + + if (args->entry_ptr->pe_next.off == args->obj_doffset) { + /* only one element on list */ + ASSERTeq(args->head->pe_first.off, args->obj_doffset); + ASSERTeq(args->entry_ptr->pe_prev.off, args->obj_doffset); + + return list_update_head(pop, ctx, args->head, 0); + } else { + /* set next->prev = prev and prev->next = next */ + uint64_t next_off = args->entry_ptr->pe_next.off; + uint64_t next_prev_off = next_off + PREV_OFF; + u64_add_offset(&next_prev_off, args->pe_offset); + uint64_t prev_off = args->entry_ptr->pe_prev.off; + uint64_t prev_next_off = prev_off + NEXT_OFF; + u64_add_offset(&prev_next_off, args->pe_offset); + + void *prev_ptr = (char *)pop + next_prev_off; + void *next_ptr = (char *)pop + prev_next_off; + + operation_add_entry(ctx, prev_ptr, prev_off, + ULOG_OPERATION_SET); + operation_add_entry(ctx, next_ptr, next_off, + ULOG_OPERATION_SET); + + if (args->head->pe_first.off == args->obj_doffset) { + /* removing element is the first one */ + return list_update_head(pop, ctx, + args->head, next_off); + } else { + return 0; + } + } +} + +/* + * list_insert_before -- (internal) insert element at offset before an element + */ +static size_t +list_insert_before(PMEMobjpool *pop, + struct operation_context *ctx, + struct list_args_insert *args, struct list_args_common *args_common, + uint64_t *next_offset, uint64_t *prev_offset) +{ + LOG(15, NULL); + + /* current->next = dest and current->prev = dest->prev */ + *next_offset = args->dest.off; + *prev_offset = args->dest_entry_ptr->pe_prev.off; + + /* dest->prev = current and dest->prev->next = current */ + uint64_t dest_prev_off = args->dest.off + PREV_OFF; + u64_add_offset(&dest_prev_off, args_common->pe_offset); + uint64_t dest_prev_next_off = args->dest_entry_ptr->pe_prev.off + + NEXT_OFF; + u64_add_offset(&dest_prev_next_off, args_common->pe_offset); + + void *dest_prev_ptr = (char *)pop + dest_prev_off; + void *dest_prev_next_ptr = (char *)pop + dest_prev_next_off; + operation_add_entry(ctx, dest_prev_ptr, args_common->obj_doffset, + ULOG_OPERATION_SET); + operation_add_entry(ctx, dest_prev_next_ptr, args_common->obj_doffset, + ULOG_OPERATION_SET); + + return 0; +} + +/* + * list_insert_after -- (internal) insert element at offset after an element + */ +static size_t +list_insert_after(PMEMobjpool *pop, + struct operation_context *ctx, + struct list_args_insert *args, struct list_args_common *args_common, + uint64_t *next_offset, uint64_t *prev_offset) +{ + LOG(15, NULL); + + /* current->next = dest->next and current->prev = dest */ + *next_offset = args->dest_entry_ptr->pe_next.off; + *prev_offset = args->dest.off; + + /* dest->next = current and dest->next->prev = current */ + uint64_t dest_next_off = args->dest.off + NEXT_OFF; + u64_add_offset(&dest_next_off, args_common->pe_offset); + uint64_t dest_next_prev_off = args->dest_entry_ptr->pe_next.off + + PREV_OFF; + u64_add_offset(&dest_next_prev_off, args_common->pe_offset); + + void *dest_next_ptr = (char *)pop + dest_next_off; + void *dest_next_prev_ptr = (char *)pop + dest_next_prev_off; + operation_add_entry(ctx, dest_next_ptr, args_common->obj_doffset, + ULOG_OPERATION_SET); + operation_add_entry(ctx, dest_next_prev_ptr, args_common->obj_doffset, + ULOG_OPERATION_SET); + + return 0; +} + +/* + * list_insert_user -- (internal) insert element at offset to a user list + */ +static size_t +list_insert_user(PMEMobjpool *pop, + struct operation_context *ctx, + struct list_args_insert *args, struct list_args_common *args_common, + uint64_t *next_offset, uint64_t *prev_offset) +{ + LOG(15, NULL); + if (args->dest.off == 0) { + /* inserting the first element on list */ + ASSERTeq(args->head->pe_first.off, 0); + + /* set loop on current element */ + *next_offset = args_common->obj_doffset; + *prev_offset = args_common->obj_doffset; + + /* update head */ + list_update_head(pop, ctx, args->head, + args_common->obj_doffset); + } else { + if (args->before) { + /* inserting before dest */ + list_insert_before(pop, ctx, args, args_common, + next_offset, prev_offset); + + if (args->dest.off == args->head->pe_first.off) { + /* current element at first position */ + list_update_head(pop, ctx, args->head, + args_common->obj_doffset); + } + } else { + /* inserting after dest */ + list_insert_after(pop, ctx, args, args_common, + next_offset, prev_offset); + } + } + + return 0; +} + +/* + * list_insert_new -- allocate and insert element to oob and user lists + * + * pop - pmemobj pool handle + * pe_offset - offset to list entry on user list relative to user data + * user_head - user list head, must be locked if not NULL + * dest - destination on user list + * before - insert before/after destination on user list + * size - size of allocation, will be increased by OBJ_OOB_SIZE + * constructor - object's constructor + * arg - argument for object's constructor + * oidp - pointer to target object ID + */ +static int +list_insert_new(PMEMobjpool *pop, + size_t pe_offset, struct list_head *user_head, PMEMoid dest, int before, + size_t size, uint64_t type_num, int (*constructor)(void *ctx, void *ptr, + size_t usable_size, void *arg), void *arg, PMEMoid *oidp) +{ + LOG(3, NULL); + ASSERT(user_head != NULL); + + int ret; + +#ifdef DEBUG + int r = pmemobj_mutex_assert_locked(pop, &user_head->lock); + ASSERTeq(r, 0); +#endif + struct lane *lane; + lane_hold(pop, &lane); + + struct pobj_action reserved; + if (palloc_reserve(&pop->heap, size, constructor, arg, + type_num, 0, 0, 0, &reserved) != 0) { + ERR("!palloc_reserve"); + ret = -1; + goto err_pmalloc; + } + uint64_t obj_doffset = reserved.heap.offset; + + struct operation_context *ctx = lane->external; + operation_start(ctx); + + ASSERT((ssize_t)pe_offset >= 0); + + dest = list_get_dest(pop, user_head, dest, + (ssize_t)pe_offset, before); + + struct list_entry *entry_ptr = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + obj_doffset + pe_offset); + + struct list_entry *dest_entry_ptr = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + dest.off + pe_offset); + + struct list_args_insert args = { + .dest = dest, + .dest_entry_ptr = dest_entry_ptr, + .head = user_head, + .before = before, + }; + + struct list_args_common args_common = { + .obj_doffset = obj_doffset, + .entry_ptr = entry_ptr, + .pe_offset = (ssize_t)pe_offset, + }; + + uint64_t next_offset; + uint64_t prev_offset; + + /* insert element to user list */ + list_insert_user(pop, + ctx, &args, &args_common, + &next_offset, &prev_offset); + + /* don't need to use redo log for filling new element */ + list_fill_entry_persist(pop, entry_ptr, + next_offset, prev_offset); + + if (oidp != NULL) { + if (OBJ_PTR_IS_VALID(pop, oidp)) { + list_set_oid_redo_log(pop, ctx, + oidp, obj_doffset, 0); + } else { + oidp->off = obj_doffset; + oidp->pool_uuid_lo = pop->uuid_lo; + } + } + + palloc_publish(&pop->heap, &reserved, 1, ctx); + + ret = 0; + +err_pmalloc: + lane_release(pop); + + ASSERT(ret == 0 || ret == -1); + return ret; +} + +/* + * list_insert_new_user -- allocate and insert element to oob and user lists + * + * pop - pmemobj pool handle + * oob_head - oob list head + * pe_offset - offset to list entry on user list relative to user data + * user_head - user list head + * dest - destination on user list + * before - insert before/after destination on user list + * size - size of allocation, will be increased by OBJ_OOB_SIZE + * constructor - object's constructor + * arg - argument for object's constructor + * oidp - pointer to target object ID + */ +int +list_insert_new_user(PMEMobjpool *pop, + size_t pe_offset, struct list_head *user_head, PMEMoid dest, int before, + size_t size, uint64_t type_num, int (*constructor)(void *ctx, void *ptr, + size_t usable_size, void *arg), void *arg, PMEMoid *oidp) +{ + int ret; + if ((ret = pmemobj_mutex_lock(pop, &user_head->lock))) { + errno = ret; + LOG(2, "pmemobj_mutex_lock failed"); + return -1; + } + + ret = list_insert_new(pop, pe_offset, user_head, + dest, before, size, type_num, constructor, arg, oidp); + + pmemobj_mutex_unlock_nofail(pop, &user_head->lock); + + ASSERT(ret == 0 || ret == -1); + return ret; +} + +/* + * list_insert -- insert object to a single list + * + * pop - pmemobj handle + * pe_offset - offset to list entry on user list relative to user data + * head - list head + * dest - destination object ID + * before - before/after destination + * oid - target object ID + */ +int +list_insert(PMEMobjpool *pop, + ssize_t pe_offset, struct list_head *head, + PMEMoid dest, int before, + PMEMoid oid) +{ + LOG(3, NULL); + ASSERTne(head, NULL); + + struct lane *lane; + lane_hold(pop, &lane); + + int ret; + + if ((ret = pmemobj_mutex_lock(pop, &head->lock))) { + errno = ret; + LOG(2, "pmemobj_mutex_lock failed"); + ret = -1; + goto err; + } + + struct operation_context *ctx = lane->external; + operation_start(ctx); + + dest = list_get_dest(pop, head, dest, pe_offset, before); + + struct list_entry *entry_ptr = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + (uintptr_t)((ssize_t)oid.off + pe_offset)); + + struct list_entry *dest_entry_ptr = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + (uintptr_t)((ssize_t)dest.off + pe_offset)); + + struct list_args_insert args = { + .dest = dest, + .dest_entry_ptr = dest_entry_ptr, + .head = head, + .before = before, + }; + + struct list_args_common args_common = { + .obj_doffset = oid.off, + .entry_ptr = entry_ptr, + .pe_offset = (ssize_t)pe_offset, + }; + + uint64_t next_offset; + uint64_t prev_offset; + + /* insert element to user list */ + list_insert_user(pop, ctx, + &args, &args_common, &next_offset, &prev_offset); + + /* fill entry of existing element using redo log */ + list_fill_entry_redo_log(pop, ctx, + &args_common, next_offset, prev_offset, 1); + + operation_process(ctx); + operation_finish(ctx, 0); + + pmemobj_mutex_unlock_nofail(pop, &head->lock); +err: + lane_release(pop); + + ASSERT(ret == 0 || ret == -1); + return ret; +} + +/* + * list_remove_free -- remove from two lists and free an object + * + * pop - pmemobj pool handle + * oob_head - oob list head + * pe_offset - offset to list entry on user list relative to user data + * user_head - user list head, *must* be locked if not NULL + * oidp - pointer to target object ID + */ +static void +list_remove_free(PMEMobjpool *pop, size_t pe_offset, + struct list_head *user_head, PMEMoid *oidp) +{ + LOG(3, NULL); + ASSERT(user_head != NULL); + +#ifdef DEBUG + int r = pmemobj_mutex_assert_locked(pop, &user_head->lock); + ASSERTeq(r, 0); +#endif + + struct lane *lane; + lane_hold(pop, &lane); + struct operation_context *ctx = lane->external; + operation_start(ctx); + + struct pobj_action deferred; + palloc_defer_free(&pop->heap, oidp->off, &deferred); + uint64_t obj_doffset = oidp->off; + + ASSERT((ssize_t)pe_offset >= 0); + + struct list_entry *entry_ptr = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + obj_doffset + pe_offset); + + struct list_args_remove args = { + .pe_offset = (ssize_t)pe_offset, + .head = user_head, + .entry_ptr = entry_ptr, + .obj_doffset = obj_doffset + }; + + /* remove from user list */ + list_remove_single(pop, ctx, &args); + + /* clear the oid */ + if (OBJ_PTR_IS_VALID(pop, oidp)) + list_set_oid_redo_log(pop, ctx, oidp, 0, 1); + else + oidp->off = 0; + + palloc_publish(&pop->heap, &deferred, 1, ctx); + + lane_release(pop); +} + +/* + * list_remove_free_user -- remove from two lists and free an object + * + * pop - pmemobj pool handle + * oob_head - oob list head + * pe_offset - offset to list entry on user list relative to user data + * user_head - user list head + * oidp - pointer to target object ID + */ +int +list_remove_free_user(PMEMobjpool *pop, size_t pe_offset, + struct list_head *user_head, PMEMoid *oidp) +{ + LOG(3, NULL); + + int ret; + if ((ret = pmemobj_mutex_lock(pop, &user_head->lock))) { + errno = ret; + LOG(2, "pmemobj_mutex_lock failed"); + return -1; + } + + list_remove_free(pop, pe_offset, user_head, oidp); + + pmemobj_mutex_unlock_nofail(pop, &user_head->lock); + + return 0; +} + +/* + * list_remove -- remove object from list + * + * pop - pmemobj handle + * pe_offset - offset to list entry on user list relative to user data + * head - list head + * oid - target object ID + */ +int +list_remove(PMEMobjpool *pop, + ssize_t pe_offset, struct list_head *head, + PMEMoid oid) +{ + LOG(3, NULL); + ASSERTne(head, NULL); + + int ret; + + struct lane *lane; + lane_hold(pop, &lane); + + if ((ret = pmemobj_mutex_lock(pop, &head->lock))) { + errno = ret; + LOG(2, "pmemobj_mutex_lock failed"); + ret = -1; + goto err; + } + + struct operation_context *ctx = lane->external; + operation_start(ctx); + + struct list_entry *entry_ptr = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + oid.off + (size_t)pe_offset); + + struct list_args_remove args = { + .pe_offset = (ssize_t)pe_offset, + .head = head, + .entry_ptr = entry_ptr, + .obj_doffset = oid.off, + }; + + struct list_args_common args_common = { + .obj_doffset = oid.off, + .entry_ptr = entry_ptr, + .pe_offset = (ssize_t)pe_offset, + }; + + /* remove element from user list */ + list_remove_single(pop, ctx, &args); + + /* clear next and prev offsets in removing element using redo log */ + list_fill_entry_redo_log(pop, ctx, + &args_common, 0, 0, 0); + + operation_process(ctx); + operation_finish(ctx, 0); + + pmemobj_mutex_unlock_nofail(pop, &head->lock); +err: + lane_release(pop); + + ASSERT(ret == 0 || ret == -1); + return ret; +} + +/* + * list_move -- move object between two lists + * + * pop - pmemobj handle + * pe_offset_old - offset to old list entry relative to user data + * head_old - old list head + * pe_offset_new - offset to new list entry relative to user data + * head_new - new list head + * dest - destination object ID + * before - before/after destination + * oid - target object ID + */ +int +list_move(PMEMobjpool *pop, + size_t pe_offset_old, struct list_head *head_old, + size_t pe_offset_new, struct list_head *head_new, + PMEMoid dest, int before, PMEMoid oid) +{ + LOG(3, NULL); + ASSERTne(head_old, NULL); + ASSERTne(head_new, NULL); + + int ret; + + struct lane *lane; + lane_hold(pop, &lane); + + /* + * Grab locks in specified order to avoid dead-locks. + * + * XXX performance improvement: initialize oob locks at pool opening + */ + if ((ret = list_mutexes_lock(pop, head_new, head_old))) { + errno = ret; + LOG(2, "list_mutexes_lock failed"); + ret = -1; + goto err; + } + + struct operation_context *ctx = lane->external; + operation_start(ctx); + + dest = list_get_dest(pop, head_new, dest, + (ssize_t)pe_offset_new, before); + + struct list_entry *entry_ptr_old = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + oid.off + pe_offset_old); + + struct list_entry *entry_ptr_new = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + oid.off + pe_offset_new); + + struct list_entry *dest_entry_ptr = + (struct list_entry *)OBJ_OFF_TO_PTR(pop, + dest.off + pe_offset_new); + + if (head_old == head_new) { + /* moving within the same list */ + + if (dest.off == oid.off) + goto unlock; + + if (before && dest_entry_ptr->pe_prev.off == oid.off) { + if (head_old->pe_first.off != dest.off) + goto unlock; + + list_update_head(pop, ctx, + head_old, oid.off); + + goto redo_last; + } + + if (!before && dest_entry_ptr->pe_next.off == oid.off) { + if (head_old->pe_first.off != oid.off) + goto unlock; + + list_update_head(pop, ctx, + head_old, entry_ptr_old->pe_next.off); + + goto redo_last; + } + } + + ASSERT((ssize_t)pe_offset_old >= 0); + struct list_args_remove args_remove = { + .pe_offset = (ssize_t)pe_offset_old, + .head = head_old, + .entry_ptr = entry_ptr_old, + .obj_doffset = oid.off, + }; + + struct list_args_insert args_insert = { + .head = head_new, + .dest = dest, + .dest_entry_ptr = dest_entry_ptr, + .before = before, + }; + + ASSERT((ssize_t)pe_offset_new >= 0); + struct list_args_common args_common = { + .obj_doffset = oid.off, + .entry_ptr = entry_ptr_new, + .pe_offset = (ssize_t)pe_offset_new, + }; + + uint64_t next_offset; + uint64_t prev_offset; + + /* remove element from user list */ + list_remove_single(pop, ctx, &args_remove); + + /* insert element to user list */ + list_insert_user(pop, ctx, &args_insert, + &args_common, &next_offset, &prev_offset); + + /* offsets differ, move is between different list entries - set uuid */ + int set_uuid = pe_offset_new != pe_offset_old ? 1 : 0; + + /* fill next and prev offsets of moving element using redo log */ + list_fill_entry_redo_log(pop, ctx, + &args_common, next_offset, prev_offset, set_uuid); + +redo_last: +unlock: + operation_process(ctx); + operation_finish(ctx, 0); + list_mutexes_unlock(pop, head_new, head_old); +err: + lane_release(pop); + + ASSERT(ret == 0 || ret == -1); + return ret; +} diff --git a/src/pmdk/src/libpmemobj/list.h b/src/pmdk/src/libpmemobj/list.h new file mode 100644 index 000000000..bb8969140 --- /dev/null +++ b/src/pmdk/src/libpmemobj/list.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * list.h -- internal definitions for persistent atomic lists module + */ + +#ifndef LIBPMEMOBJ_LIST_H +#define LIBPMEMOBJ_LIST_H 1 + +#include +#include +#include + +#include "libpmemobj.h" +#include "lane.h" +#include "pmalloc.h" +#include "ulog.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct list_entry { + PMEMoid pe_next; + PMEMoid pe_prev; +}; + +struct list_head { + PMEMoid pe_first; + PMEMmutex lock; +}; + +int list_insert_new_user(PMEMobjpool *pop, + size_t pe_offset, struct list_head *user_head, PMEMoid dest, int before, + size_t size, uint64_t type_num, palloc_constr constructor, void *arg, + PMEMoid *oidp); + +int list_insert(PMEMobjpool *pop, + ssize_t pe_offset, struct list_head *head, PMEMoid dest, int before, + PMEMoid oid); + +int list_remove_free_user(PMEMobjpool *pop, + size_t pe_offset, struct list_head *user_head, + PMEMoid *oidp); + +int list_remove(PMEMobjpool *pop, + ssize_t pe_offset, struct list_head *head, + PMEMoid oid); + +int list_move(PMEMobjpool *pop, + size_t pe_offset_old, struct list_head *head_old, + size_t pe_offset_new, struct list_head *head_new, + PMEMoid dest, int before, PMEMoid oid); + +void list_move_oob(PMEMobjpool *pop, + struct list_head *head_old, struct list_head *head_new, + PMEMoid oid); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/memblock.c b/src/pmdk/src/libpmemobj/memblock.c new file mode 100644 index 000000000..0e1dd1468 --- /dev/null +++ b/src/pmdk/src/libpmemobj/memblock.c @@ -0,0 +1,1520 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * memblock.c -- implementation of memory block + * + * Memory block is a representation of persistent object that resides in the + * heap. A valid memory block must be either a huge (free or used) chunk or a + * block inside a run. + * + * Huge blocks are 1:1 correlated with the chunk headers in the zone whereas + * run blocks are represented by bits in corresponding chunk bitmap. + * + * This file contains implementations of abstract operations on memory blocks. + * Instead of storing the mbops structure inside each memory block the correct + * method implementation is chosen at runtime. + */ + +#include + +#include "obj.h" +#include "heap.h" +#include "memblock.h" +#include "out.h" +#include "valgrind_internal.h" +#include "alloc_class.h" + +/* calculates the size of the entire run, including any additional chunks */ +#define SIZEOF_RUN(runp, size_idx)\ + (sizeof(*(runp)) + (((size_idx) - 1) * CHUNKSIZE)) + +/* + * memblock_header_type -- determines the memory block's header type + */ +static enum header_type +memblock_header_type(const struct memory_block *m) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + + if (hdr->flags & CHUNK_FLAG_COMPACT_HEADER) + return HEADER_COMPACT; + + if (hdr->flags & CHUNK_FLAG_HEADER_NONE) + return HEADER_NONE; + + return HEADER_LEGACY; +} + +/* + * memblock_header_legacy_get_size -- + * (internal) returns the size stored in a legacy header + */ +static size_t +memblock_header_legacy_get_size(const struct memory_block *m) +{ + struct allocation_header_legacy *hdr = m->m_ops->get_real_data(m); + + return hdr->size; +} + +/* + * memblock_header_compact_get_size -- + * (internal) returns the size stored in a compact header + */ +static size_t +memblock_header_compact_get_size(const struct memory_block *m) +{ + struct allocation_header_compact *hdr = m->m_ops->get_real_data(m); + + return hdr->size & ALLOC_HDR_FLAGS_MASK; +} + +/* + * memblock_header_none_get_size -- + * (internal) determines the sizes of an object without a header + */ +static size_t +memblock_header_none_get_size(const struct memory_block *m) +{ + return m->m_ops->block_size(m); +} + +/* + * memblock_header_legacy_get_extra -- + * (internal) returns the extra field stored in a legacy header + */ +static uint64_t +memblock_header_legacy_get_extra(const struct memory_block *m) +{ + struct allocation_header_legacy *hdr = m->m_ops->get_real_data(m); + + return hdr->type_num; +} + +/* + * memblock_header_compact_get_extra -- + * (internal) returns the extra field stored in a compact header + */ +static uint64_t +memblock_header_compact_get_extra(const struct memory_block *m) +{ + struct allocation_header_compact *hdr = m->m_ops->get_real_data(m); + + return hdr->extra; +} + +/* + * memblock_header_none_get_extra -- + * (internal) objects without a header don't have an extra field + */ +static uint64_t +memblock_header_none_get_extra(const struct memory_block *m) +{ + return 0; +} + +/* + * memblock_header_legacy_get_flags -- + * (internal) returns the flags stored in a legacy header + */ +static uint16_t +memblock_header_legacy_get_flags(const struct memory_block *m) +{ + struct allocation_header_legacy *hdr = m->m_ops->get_real_data(m); + + return (uint16_t)(hdr->root_size >> ALLOC_HDR_SIZE_SHIFT); +} + +/* + * memblock_header_compact_get_flags -- + * (internal) returns the flags stored in a compact header + */ +static uint16_t +memblock_header_compact_get_flags(const struct memory_block *m) +{ + struct allocation_header_compact *hdr = m->m_ops->get_real_data(m); + + return (uint16_t)(hdr->size >> ALLOC_HDR_SIZE_SHIFT); +} + +/* + * memblock_header_none_get_flags -- + * (internal) objects without a header do not support flags + */ +static uint16_t +memblock_header_none_get_flags(const struct memory_block *m) +{ + return 0; +} + +/* + * memblock_header_legacy_write -- + * (internal) writes a legacy header of an object + */ +static void +memblock_header_legacy_write(const struct memory_block *m, + size_t size, uint64_t extra, uint16_t flags) +{ + struct allocation_header_legacy hdr; + hdr.size = size; + hdr.type_num = extra; + hdr.root_size = ((uint64_t)flags << ALLOC_HDR_SIZE_SHIFT); + + struct allocation_header_legacy *hdrp = m->m_ops->get_real_data(m); + + VALGRIND_DO_MAKE_MEM_UNDEFINED(hdrp, sizeof(*hdrp)); + + VALGRIND_ADD_TO_TX(hdrp, sizeof(*hdrp)); + pmemops_memcpy(&m->heap->p_ops, hdrp, &hdr, + sizeof(hdr), /* legacy header is 64 bytes in size */ + PMEMOBJ_F_MEM_WC | PMEMOBJ_F_MEM_NODRAIN | PMEMOBJ_F_RELAXED); + VALGRIND_REMOVE_FROM_TX(hdrp, sizeof(*hdrp)); + + /* unused fields of the legacy headers are used as a red zone */ + VALGRIND_DO_MAKE_MEM_NOACCESS(hdrp->unused, sizeof(hdrp->unused)); +} + +/* + * memblock_header_compact_write -- + * (internal) writes a compact header of an object + */ +static void +memblock_header_compact_write(const struct memory_block *m, + size_t size, uint64_t extra, uint16_t flags) +{ + COMPILE_ERROR_ON(ALLOC_HDR_COMPACT_SIZE > CACHELINE_SIZE); + + struct { + struct allocation_header_compact hdr; + uint8_t padding[CACHELINE_SIZE - ALLOC_HDR_COMPACT_SIZE]; + } padded; + + padded.hdr.size = size | ((uint64_t)flags << ALLOC_HDR_SIZE_SHIFT); + padded.hdr.extra = extra; + + struct allocation_header_compact *hdrp = m->m_ops->get_real_data(m); + + VALGRIND_DO_MAKE_MEM_UNDEFINED(hdrp, sizeof(*hdrp)); + + /* + * If possible write the entire header with a single memcpy, this allows + * the copy implementation to avoid a cache miss on a partial cache line + * write. + */ + size_t hdr_size = ALLOC_HDR_COMPACT_SIZE; + if ((uintptr_t)hdrp % CACHELINE_SIZE == 0 && size >= sizeof(padded)) + hdr_size = sizeof(padded); + + VALGRIND_ADD_TO_TX(hdrp, hdr_size); + + pmemops_memcpy(&m->heap->p_ops, hdrp, &padded, hdr_size, + PMEMOBJ_F_MEM_WC | PMEMOBJ_F_MEM_NODRAIN | PMEMOBJ_F_RELAXED); + VALGRIND_DO_MAKE_MEM_UNDEFINED((char *)hdrp + ALLOC_HDR_COMPACT_SIZE, + hdr_size - ALLOC_HDR_COMPACT_SIZE); + + VALGRIND_REMOVE_FROM_TX(hdrp, hdr_size); +} + +/* + * memblock_header_none_write -- + * (internal) nothing to write + */ +static void +memblock_header_none_write(const struct memory_block *m, + size_t size, uint64_t extra, uint16_t flags) +{ + /* NOP */ +} + +/* + * memblock_header_legacy_invalidate -- + * (internal) invalidates a legacy header + */ +static void +memblock_header_legacy_invalidate(const struct memory_block *m) +{ + struct allocation_header_legacy *hdr = m->m_ops->get_real_data(m); + VALGRIND_SET_CLEAN(hdr, sizeof(*hdr)); +} + +/* + * memblock_header_compact_invalidate -- + * (internal) invalidates a compact header + */ +static void +memblock_header_compact_invalidate(const struct memory_block *m) +{ + struct allocation_header_compact *hdr = m->m_ops->get_real_data(m); + VALGRIND_SET_CLEAN(hdr, sizeof(*hdr)); +} + +/* + * memblock_no_header_invalidate -- + * (internal) nothing to invalidate + */ +static void +memblock_header_none_invalidate(const struct memory_block *m) +{ + /* NOP */ +} + +/* + * memblock_header_legacy_reinit -- + * (internal) reinitializes a legacy header after a heap restart + */ +static void +memblock_header_legacy_reinit(const struct memory_block *m) +{ + struct allocation_header_legacy *hdr = m->m_ops->get_real_data(m); + + VALGRIND_DO_MAKE_MEM_DEFINED(hdr, sizeof(*hdr)); + + /* unused fields of the legacy headers are used as a red zone */ + VALGRIND_DO_MAKE_MEM_NOACCESS(hdr->unused, sizeof(hdr->unused)); +} + +/* + * memblock_header_compact_reinit -- + * (internal) reinitializes a compact header after a heap restart + */ +static void +memblock_header_compact_reinit(const struct memory_block *m) +{ + struct allocation_header_compact *hdr = m->m_ops->get_real_data(m); + + VALGRIND_DO_MAKE_MEM_DEFINED(hdr, sizeof(*hdr)); +} + +/* + * memblock_header_none_reinit -- + * (internal) nothing to reinitialize + */ +static void +memblock_header_none_reinit(const struct memory_block *m) +{ + /* NOP */ +} + +static const struct { + /* determines the sizes of an object */ + size_t (*get_size)(const struct memory_block *m); + + /* returns the extra field (if available, 0 if not) */ + uint64_t (*get_extra)(const struct memory_block *m); + + /* returns the flags stored in a header (if available, 0 if not) */ + uint16_t (*get_flags)(const struct memory_block *m); + + /* + * Stores size, extra info and flags in header of an object + * (if available, does nothing otherwise). + */ + void (*write)(const struct memory_block *m, + size_t size, uint64_t extra, uint16_t flags); + void (*invalidate)(const struct memory_block *m); + + /* + * Reinitializes a header after a heap restart (if available, does + * nothing otherwise) (VG). + */ + void (*reinit)(const struct memory_block *m); +} memblock_header_ops[MAX_HEADER_TYPES] = { + [HEADER_LEGACY] = { + memblock_header_legacy_get_size, + memblock_header_legacy_get_extra, + memblock_header_legacy_get_flags, + memblock_header_legacy_write, + memblock_header_legacy_invalidate, + memblock_header_legacy_reinit, + }, + [HEADER_COMPACT] = { + memblock_header_compact_get_size, + memblock_header_compact_get_extra, + memblock_header_compact_get_flags, + memblock_header_compact_write, + memblock_header_compact_invalidate, + memblock_header_compact_reinit, + }, + [HEADER_NONE] = { + memblock_header_none_get_size, + memblock_header_none_get_extra, + memblock_header_none_get_flags, + memblock_header_none_write, + memblock_header_none_invalidate, + memblock_header_none_reinit, + } +}; + +/* + * memblock_run_default_nallocs -- returns the number of memory blocks + * available in the in a run with given parameters using the default + * fixed-bitmap algorithm + */ +static unsigned +memblock_run_default_nallocs(uint32_t *size_idx, uint16_t flags, + uint64_t unit_size, uint64_t alignment) +{ + unsigned nallocs = (unsigned) + (RUN_DEFAULT_SIZE_BYTES(*size_idx) / unit_size); + + while (nallocs > RUN_DEFAULT_BITMAP_NBITS) { + LOG(3, "tried to create a run (%lu) with number " + "of units (%u) exceeding the bitmap size (%u)", + unit_size, nallocs, RUN_DEFAULT_BITMAP_NBITS); + if (*size_idx > 1) { + *size_idx -= 1; + /* recalculate the number of allocations */ + nallocs = (uint32_t) + (RUN_DEFAULT_SIZE_BYTES(*size_idx) / unit_size); + LOG(3, "run (%lu) was constructed with " + "fewer (%u) than requested chunks (%u)", + unit_size, *size_idx, *size_idx + 1); + } else { + LOG(3, "run (%lu) was constructed with " + "fewer units (%u) than optimal (%u), " + "this might lead to " + "inefficient memory utilization!", + unit_size, + RUN_DEFAULT_BITMAP_NBITS, nallocs); + + nallocs = RUN_DEFAULT_BITMAP_NBITS; + } + } + + return nallocs - (alignment ? 1 : 0); +} + +/* + * memblock_run_bitmap -- calculate bitmap parameters for given arguments + */ +void +memblock_run_bitmap(uint32_t *size_idx, uint16_t flags, + uint64_t unit_size, uint64_t alignment, void *content, + struct run_bitmap *b) +{ + ASSERTne(*size_idx, 0); + + /* + * Flexible bitmaps have a variably sized values array. The size varies + * depending on: + * alignment - initial run alignment might require up-to a unit + * size idx - the larger the run, the more units it carries + * unit_size - the smaller the unit size, the more units per run + * + * The size of the bitmap also has to be calculated in such a way that + * the beginning of allocations data is cacheline aligned. This is + * required to perform many optimizations throughout the codebase. + * This alignment requirement means that some of the bitmap values might + * remain unused and will serve only as a padding for data. + */ + if (flags & CHUNK_FLAG_FLEX_BITMAP) { + /* + * First calculate the number of values without accounting for + * the bitmap size. + */ + size_t content_size = RUN_CONTENT_SIZE_BYTES(*size_idx); + b->nbits = (unsigned)(content_size / unit_size); + b->nvalues = util_div_ceil(b->nbits, RUN_BITS_PER_VALUE); + + /* + * Then, align the number of values up, so that the cacheline + * alignment is preserved. + */ + b->nvalues = ALIGN_UP(b->nvalues + RUN_BASE_METADATA_VALUES, + (unsigned)(CACHELINE_SIZE / sizeof(*b->values))) + - RUN_BASE_METADATA_VALUES; + + /* + * This is the total number of bytes needed for the bitmap AND + * padding. + */ + b->size = b->nvalues * sizeof(*b->values); + + /* + * Calculate the number of allocations again, but this time + * accounting for the bitmap/padding. + */ + b->nbits = (unsigned)((content_size - b->size) / unit_size) + - (alignment ? 1U : 0U); + + /* + * The last step is to calculate how much of the padding + * is left at the end of the bitmap. + */ + unsigned unused_bits = (b->nvalues * RUN_BITS_PER_VALUE) + - b->nbits; + unsigned unused_values = unused_bits / RUN_BITS_PER_VALUE; + b->nvalues -= unused_values; + + b->values = (uint64_t *)content; + + return; + } + + b->size = RUN_DEFAULT_BITMAP_SIZE; + b->nbits = memblock_run_default_nallocs(size_idx, flags, + unit_size, alignment); + + unsigned unused_bits = RUN_DEFAULT_BITMAP_NBITS - b->nbits; + unsigned unused_values = unused_bits / RUN_BITS_PER_VALUE; + b->nvalues = RUN_DEFAULT_BITMAP_VALUES - unused_values; + + b->values = (uint64_t *)content; +} + +/* + * run_get_bitmap -- initializes run bitmap information + */ +static void +run_get_bitmap(const struct memory_block *m, struct run_bitmap *b) +{ + struct chunk_run *run = heap_get_chunk_run(m->heap, m); + + if (m->cached_bitmap != NULL) { + *b = *m->cached_bitmap; + b->values = (uint64_t *)run->content; + } else { + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + + uint32_t size_idx = hdr->size_idx; + memblock_run_bitmap(&size_idx, hdr->flags, run->hdr.block_size, + run->hdr.alignment, run->content, b); + ASSERTeq(size_idx, hdr->size_idx); + } +} + +/* + * huge_block_size -- returns the compile-time constant which defines the + * huge memory block size. + */ +static size_t +huge_block_size(const struct memory_block *m) +{ + return CHUNKSIZE; +} + +/* + * run_block_size -- looks for the right chunk and returns the block size + * information that is attached to the run block metadata. + */ +static size_t +run_block_size(const struct memory_block *m) +{ + struct chunk_run *run = heap_get_chunk_run(m->heap, m); + + return run->hdr.block_size; +} + +/* + * huge_get_real_data -- returns pointer to the beginning data of a huge block + */ +static void * +huge_get_real_data(const struct memory_block *m) +{ + return heap_get_chunk(m->heap, m)->data; +} + +/* + * run_get_data_start -- (internal) returns the pointer to the beginning of + * allocations in a run + */ +static char * +run_get_data_start(const struct memory_block *m) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + struct chunk_run *run = heap_get_chunk_run(m->heap, m); + + struct run_bitmap b; + run_get_bitmap(m, &b); + + if (hdr->flags & CHUNK_FLAG_ALIGNED) { + /* + * Alignment is property of user data in allocations. And + * since objects have headers, we need to take them into + * account when calculating the address. + */ + uintptr_t hsize = header_type_to_size[m->header_type]; + uintptr_t base = (uintptr_t)run->content + + b.size + hsize; + return (char *)(ALIGN_UP(base, run->hdr.alignment) - hsize); + } else { + return (char *)&run->content + b.size; + } +} + +/* + * run_get_data_offset -- (internal) returns the number of bytes between + * run base metadata and data + */ +static size_t +run_get_data_offset(const struct memory_block *m) +{ + struct chunk_run *run = heap_get_chunk_run(m->heap, m); + return (size_t)run_get_data_start(m) - (size_t)&run->content; +} + +/* + * run_get_real_data -- returns pointer to the beginning data of a run block + */ +static void * +run_get_real_data(const struct memory_block *m) +{ + struct chunk_run *run = heap_get_chunk_run(m->heap, m); + ASSERT(run->hdr.block_size != 0); + + return run_get_data_start(m) + (run->hdr.block_size * m->block_off); +} + +/* + * block_get_user_data -- returns pointer to the data of a block + */ +static void * +block_get_user_data(const struct memory_block *m) +{ + return (char *)m->m_ops->get_real_data(m) + + header_type_to_size[m->header_type]; +} + +/* + * chunk_get_chunk_hdr_value -- (internal) get value of a header for redo log + */ +static uint64_t +chunk_get_chunk_hdr_value(uint16_t type, uint16_t flags, uint32_t size_idx) +{ + uint64_t val; + COMPILE_ERROR_ON(sizeof(struct chunk_header) != sizeof(uint64_t)); + + struct chunk_header hdr; + hdr.type = type; + hdr.flags = flags; + hdr.size_idx = size_idx; + memcpy(&val, &hdr, sizeof(val)); + + return val; +} + +/* + * huge_prep_operation_hdr -- prepares the new value of a chunk header that will + * be set after the operation concludes. + */ +static void +huge_prep_operation_hdr(const struct memory_block *m, enum memblock_state op, + struct operation_context *ctx) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + + /* + * Depending on the operation that needs to be performed a new chunk + * header needs to be prepared with the new chunk state. + */ + uint64_t val = chunk_get_chunk_hdr_value( + op == MEMBLOCK_ALLOCATED ? CHUNK_TYPE_USED : CHUNK_TYPE_FREE, + hdr->flags, + m->size_idx); + + if (ctx == NULL) { + util_atomic_store_explicit64((uint64_t *)hdr, val, + memory_order_relaxed); + pmemops_persist(&m->heap->p_ops, hdr, sizeof(*hdr)); + } else { + operation_add_entry(ctx, hdr, val, ULOG_OPERATION_SET); + } + + VALGRIND_DO_MAKE_MEM_NOACCESS(hdr + 1, + (hdr->size_idx - 1) * sizeof(struct chunk_header)); + + /* + * In the case of chunks larger than one unit the footer must be + * created immediately AFTER the persistent state is safely updated. + */ + if (m->size_idx == 1) + return; + + struct chunk_header *footer = hdr + m->size_idx - 1; + VALGRIND_DO_MAKE_MEM_UNDEFINED(footer, sizeof(*footer)); + + val = chunk_get_chunk_hdr_value(CHUNK_TYPE_FOOTER, 0, m->size_idx); + + /* + * It's only safe to write the footer AFTER the persistent part of + * the operation have been successfully processed because the footer + * pointer might point to a currently valid persistent state + * of a different chunk. + * The footer entry change is updated as transient because it will + * be recreated at heap boot regardless - it's just needed for runtime + * operations. + */ + if (ctx == NULL) { + util_atomic_store_explicit64((uint64_t *)footer, val, + memory_order_relaxed); + VALGRIND_SET_CLEAN(footer, sizeof(*footer)); + } else { + operation_add_typed_entry(ctx, + footer, val, ULOG_OPERATION_SET, LOG_TRANSIENT); + } +} + +/* + * run_prep_operation_hdr -- prepares the new value for a select few bytes of + * a run bitmap that will be set after the operation concludes. + * + * It's VERY important to keep in mind that the particular value of the + * bitmap this method is modifying must not be changed after this function + * is called and before the operation is processed. + */ +static void +run_prep_operation_hdr(const struct memory_block *m, enum memblock_state op, + struct operation_context *ctx) +{ + ASSERT(m->size_idx <= RUN_BITS_PER_VALUE); + + /* + * Free blocks are represented by clear bits and used blocks by set + * bits - which is the reverse of the commonly used scheme. + * + * Here a bit mask is prepared that flips the bits that represent the + * memory block provided by the caller - because both the size index and + * the block offset are tied 1:1 to the bitmap this operation is + * relatively simple. + */ + uint64_t bmask; + if (m->size_idx == RUN_BITS_PER_VALUE) { + ASSERTeq(m->block_off % RUN_BITS_PER_VALUE, 0); + bmask = UINT64_MAX; + } else { + bmask = ((1ULL << m->size_idx) - 1ULL) << + (m->block_off % RUN_BITS_PER_VALUE); + } + + /* + * The run bitmap is composed of several 8 byte values, so a proper + * element of the bitmap array must be selected. + */ + unsigned bpos = m->block_off / RUN_BITS_PER_VALUE; + + struct run_bitmap b; + run_get_bitmap(m, &b); + + /* the bit mask is applied immediately by the add entry operations */ + if (op == MEMBLOCK_ALLOCATED) { + operation_add_entry(ctx, &b.values[bpos], + bmask, ULOG_OPERATION_OR); + } else if (op == MEMBLOCK_FREE) { + operation_add_entry(ctx, &b.values[bpos], + ~bmask, ULOG_OPERATION_AND); + } else { + ASSERT(0); + } +} + +/* + * huge_get_lock -- because huge memory blocks are always allocated from a + * single bucket there's no reason to lock them - the bucket itself is + * protected. + */ +static os_mutex_t * +huge_get_lock(const struct memory_block *m) +{ + return NULL; +} + +/* + * run_get_lock -- gets the runtime mutex from the heap. + */ +static os_mutex_t * +run_get_lock(const struct memory_block *m) +{ + return heap_get_run_lock(m->heap, m->chunk_id); +} + +/* + * huge_get_state -- returns whether a huge block is allocated or not + */ +static enum memblock_state +huge_get_state(const struct memory_block *m) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + + if (hdr->type == CHUNK_TYPE_USED) + return MEMBLOCK_ALLOCATED; + + if (hdr->type == CHUNK_TYPE_FREE) + return MEMBLOCK_FREE; + + return MEMBLOCK_STATE_UNKNOWN; +} + +/* + * huge_get_state -- returns whether a block from a run is allocated or not + */ +static enum memblock_state +run_get_state(const struct memory_block *m) +{ + struct run_bitmap b; + run_get_bitmap(m, &b); + + unsigned v = m->block_off / RUN_BITS_PER_VALUE; + uint64_t bitmap = b.values[v]; + unsigned bit = m->block_off % RUN_BITS_PER_VALUE; + + unsigned bit_last = bit + m->size_idx; + ASSERT(bit_last <= RUN_BITS_PER_VALUE); + + for (unsigned i = bit; i < bit_last; ++i) { + if (!BIT_IS_CLR(bitmap, i)) { + return MEMBLOCK_ALLOCATED; + } + } + + return MEMBLOCK_FREE; +} + +/* + * huge_ensure_header_type -- checks the header type of a chunk and modifies + * it if necessary. This is fail-safe atomic. + */ +static void +huge_ensure_header_type(const struct memory_block *m, + enum header_type t) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + ASSERTeq(hdr->type, CHUNK_TYPE_FREE); + + if ((hdr->flags & header_type_to_flag[t]) == 0) { + VALGRIND_ADD_TO_TX(hdr, sizeof(*hdr)); + uint16_t f = ((uint16_t)header_type_to_flag[t]); + hdr->flags |= f; + pmemops_persist(&m->heap->p_ops, hdr, sizeof(*hdr)); + VALGRIND_REMOVE_FROM_TX(hdr, sizeof(*hdr)); + } +} + +/* + * run_ensure_header_type -- runs must be created with appropriate header type. + */ +static void +run_ensure_header_type(const struct memory_block *m, + enum header_type t) +{ +#ifdef DEBUG + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + ASSERTeq(hdr->type, CHUNK_TYPE_RUN); + ASSERT((hdr->flags & header_type_to_flag[t]) == header_type_to_flag[t]); +#endif +} + +/* + * block_get_real_size -- returns the size of a memory block that includes all + * of the overhead (headers) + */ +static size_t +block_get_real_size(const struct memory_block *m) +{ + /* + * There are two valid ways to get a size. If the memory block + * initialized properly and the size index is set, the chunk unit size + * can be simply multiplied by that index, otherwise we need to look at + * the allocation header. + */ + if (m->size_idx != 0) { + return m->m_ops->block_size(m) * m->size_idx; + } else { + return memblock_header_ops[m->header_type].get_size(m); + } +} + +/* + * block_get_user_size -- returns the size of a memory block without overheads, + * this is the size of a data block that can be used. + */ +static size_t +block_get_user_size(const struct memory_block *m) +{ + return block_get_real_size(m) - header_type_to_size[m->header_type]; +} + +/* + * block_write_header -- writes a header of an allocation + */ +static void +block_write_header(const struct memory_block *m, + uint64_t extra_field, uint16_t flags) +{ + memblock_header_ops[m->header_type].write(m, + block_get_real_size(m), extra_field, flags); +} + +/* + * block_invalidate -- invalidates allocation data and header + */ +static void +block_invalidate(const struct memory_block *m) +{ + void *data = m->m_ops->get_user_data(m); + size_t size = m->m_ops->get_user_size(m); + VALGRIND_SET_CLEAN(data, size); + + memblock_header_ops[m->header_type].invalidate(m); +} + +/* + * block_reinit_header -- reinitializes a block after a heap restart + */ +static void +block_reinit_header(const struct memory_block *m) +{ + memblock_header_ops[m->header_type].reinit(m); +} + +/* + * block_get_extra -- returns the extra field of an allocation + */ +static uint64_t +block_get_extra(const struct memory_block *m) +{ + return memblock_header_ops[m->header_type].get_extra(m); +} + +/* + * block_get_flags -- returns the flags of an allocation + */ +static uint16_t +block_get_flags(const struct memory_block *m) +{ + return memblock_header_ops[m->header_type].get_flags(m); +} + +/* + * heap_run_process_bitmap_value -- (internal) looks for unset bits in the + * value, creates a valid memory block out of them and inserts that + * block into the given bucket. + */ +static int +run_process_bitmap_value(const struct memory_block *m, + uint64_t value, uint32_t base_offset, object_callback cb, void *arg) +{ + int ret = 0; + + uint64_t shift = 0; /* already processed bits */ + struct memory_block s = *m; + do { + /* + * Shift the value so that the next memory block starts on the + * least significant position: + * ..............0 (free block) + * or ..............1 (used block) + */ + uint64_t shifted = value >> shift; + + /* all clear or set bits indicate the end of traversal */ + if (shifted == 0) { + /* + * Insert the remaining blocks as free. Remember that + * unsigned values are always zero-filled, so we must + * take the current shift into account. + */ + s.block_off = (uint32_t)(base_offset + shift); + s.size_idx = (uint32_t)(RUN_BITS_PER_VALUE - shift); + + if ((ret = cb(&s, arg)) != 0) + return ret; + + break; + } else if (shifted == UINT64_MAX) { + break; + } + + /* + * Offset and size of the next free block, either of these + * can be zero depending on where the free block is located + * in the value. + */ + unsigned off = (unsigned)util_lssb_index64(~shifted); + unsigned size = (unsigned)util_lssb_index64(shifted); + + shift += off + size; + + if (size != 0) { /* zero size means skip to the next value */ + s.block_off = (uint32_t)(base_offset + (shift - size)); + s.size_idx = (uint32_t)(size); + + memblock_rebuild_state(m->heap, &s); + if ((ret = cb(&s, arg)) != 0) + return ret; + } + } while (shift != RUN_BITS_PER_VALUE); + + return 0; +} + +/* + * run_iterate_free -- iterates over free blocks in a run + */ +static int +run_iterate_free(const struct memory_block *m, object_callback cb, void *arg) +{ + int ret = 0; + uint32_t block_off = 0; + + struct run_bitmap b; + run_get_bitmap(m, &b); + + struct memory_block nm = *m; + for (unsigned i = 0; i < b.nvalues; ++i) { + uint64_t v = b.values[i]; + ASSERT((uint64_t)RUN_BITS_PER_VALUE * (uint64_t)i + <= UINT32_MAX); + block_off = RUN_BITS_PER_VALUE * i; + ret = run_process_bitmap_value(&nm, v, block_off, cb, arg); + if (ret != 0) + return ret; + } + + return 0; +} + +/* + * run_iterate_used -- iterates over used blocks in a run + */ +static int +run_iterate_used(const struct memory_block *m, object_callback cb, void *arg) +{ + uint32_t i = m->block_off / RUN_BITS_PER_VALUE; + uint32_t block_start = m->block_off % RUN_BITS_PER_VALUE; + uint32_t block_off; + + struct chunk_run *run = heap_get_chunk_run(m->heap, m); + + struct memory_block iter = *m; + + struct run_bitmap b; + run_get_bitmap(m, &b); + + for (; i < b.nvalues; ++i) { + uint64_t v = b.values[i]; + block_off = (uint32_t)(RUN_BITS_PER_VALUE * i); + + for (uint32_t j = block_start; j < RUN_BITS_PER_VALUE; ) { + if (block_off + j >= (uint32_t)b.nbits) + break; + + if (!BIT_IS_CLR(v, j)) { + iter.block_off = (uint32_t)(block_off + j); + + /* + * The size index of this memory block cannot be + * retrieved at this time because the header + * might not be initialized in valgrind yet. + */ + iter.size_idx = 0; + + if (cb(&iter, arg) != 0) + return 1; + + iter.size_idx = CALC_SIZE_IDX( + run->hdr.block_size, + iter.m_ops->get_real_size(&iter)); + j = (uint32_t)(j + iter.size_idx); + } else { + ++j; + } + } + block_start = 0; + } + + return 0; +} + +/* + * huge_iterate_free -- calls cb on memory block if it's free + */ +static int +huge_iterate_free(const struct memory_block *m, object_callback cb, void *arg) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + + return hdr->type == CHUNK_TYPE_FREE ? cb(m, arg) : 0; +} + +/* + * huge_iterate_free -- calls cb on memory block if it's used + */ +static int +huge_iterate_used(const struct memory_block *m, object_callback cb, void *arg) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + + return hdr->type == CHUNK_TYPE_USED ? cb(m, arg) : 0; +} + +/* + * huge_vg_init -- initializes chunk metadata in memcheck state + */ +static void +huge_vg_init(const struct memory_block *m, int objects, + object_callback cb, void *arg) +{ + struct zone *z = ZID_TO_ZONE(m->heap->layout, m->zone_id); + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + struct chunk *chunk = heap_get_chunk(m->heap, m); + VALGRIND_DO_MAKE_MEM_DEFINED(hdr, sizeof(*hdr)); + + /* + * Mark unused chunk headers as not accessible. + */ + VALGRIND_DO_MAKE_MEM_NOACCESS( + &z->chunk_headers[m->chunk_id + 1], + (m->size_idx - 1) * + sizeof(struct chunk_header)); + + size_t size = block_get_real_size(m); + VALGRIND_DO_MAKE_MEM_NOACCESS(chunk, size); + + if (objects && huge_get_state(m) == MEMBLOCK_ALLOCATED) { + if (cb(m, arg) != 0) + FATAL("failed to initialize valgrind state"); + } +} + +/* + * run_vg_init -- initializes run metadata in memcheck state + */ +static void +run_vg_init(const struct memory_block *m, int objects, + object_callback cb, void *arg) +{ + struct zone *z = ZID_TO_ZONE(m->heap->layout, m->zone_id); + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + struct chunk_run *run = heap_get_chunk_run(m->heap, m); + VALGRIND_DO_MAKE_MEM_DEFINED(hdr, sizeof(*hdr)); + + /* set the run metadata as defined */ + VALGRIND_DO_MAKE_MEM_DEFINED(run, RUN_BASE_METADATA_SIZE); + + struct run_bitmap b; + run_get_bitmap(m, &b); + + /* + * Mark run data headers as defined. + */ + for (unsigned j = 1; j < m->size_idx; ++j) { + struct chunk_header *data_hdr = + &z->chunk_headers[m->chunk_id + j]; + VALGRIND_DO_MAKE_MEM_DEFINED(data_hdr, + sizeof(struct chunk_header)); + ASSERTeq(data_hdr->type, CHUNK_TYPE_RUN_DATA); + } + + VALGRIND_DO_MAKE_MEM_NOACCESS(run, SIZEOF_RUN(run, m->size_idx)); + + /* set the run bitmap as defined */ + VALGRIND_DO_MAKE_MEM_DEFINED(run, b.size + RUN_BASE_METADATA_SIZE); + + if (objects) { + if (run_iterate_used(m, cb, arg) != 0) + FATAL("failed to initialize valgrind state"); + } +} + +/* + * run_reinit_chunk -- run reinitialization on first zone traversal + */ +static void +run_reinit_chunk(const struct memory_block *m) +{ + /* noop */ +} + +/* + * huge_write_footer -- (internal) writes a chunk footer + */ +static void +huge_write_footer(struct chunk_header *hdr, uint32_t size_idx) +{ + if (size_idx == 1) /* that would overwrite the header */ + return; + + VALGRIND_DO_MAKE_MEM_UNDEFINED(hdr + size_idx - 1, sizeof(*hdr)); + + struct chunk_header f = *hdr; + f.type = CHUNK_TYPE_FOOTER; + f.size_idx = size_idx; + *(hdr + size_idx - 1) = f; + /* no need to persist, footers are recreated in heap_populate_buckets */ + VALGRIND_SET_CLEAN(hdr + size_idx - 1, sizeof(f)); +} + +/* + * huge_reinit_chunk -- chunk reinitialization on first zone traversal + */ +static void +huge_reinit_chunk(const struct memory_block *m) +{ + struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); + if (hdr->type == CHUNK_TYPE_USED) + huge_write_footer(hdr, hdr->size_idx); +} + +/* + * run_calc_free -- calculates the number of free units in a run + */ +static void +run_calc_free(const struct memory_block *m, + uint32_t *free_space, uint32_t *max_free_block) +{ + struct run_bitmap b; + run_get_bitmap(m, &b); + for (unsigned i = 0; i < b.nvalues; ++i) { + uint64_t value = ~b.values[i]; + if (value == 0) + continue; + + uint32_t free_in_value = util_popcount64(value); + *free_space = *free_space + free_in_value; + + /* + * If this value has less free blocks than already found max, + * there's no point in calculating. + */ + if (free_in_value < *max_free_block) + continue; + + /* if the entire value is empty, no point in calculating */ + if (free_in_value == RUN_BITS_PER_VALUE) { + *max_free_block = RUN_BITS_PER_VALUE; + continue; + } + + /* if already at max, no point in calculating */ + if (*max_free_block == RUN_BITS_PER_VALUE) + continue; + + /* + * Calculate the biggest free block in the bitmap. + * This algorithm is not the most clever imaginable, but it's + * easy to implement and fast enough. + */ + uint16_t n = 0; + while (value != 0) { + value &= (value << 1ULL); + n++; + } + + if (n > *max_free_block) + *max_free_block = n; + } +} + +/* + * huge_fill_pct -- huge blocks by definition use the entirety of a chunk + */ +static unsigned +huge_fill_pct(const struct memory_block *m) +{ + return 100; +} + +/* + * run_fill_pct -- calculates the percentage of allocated units inside of a run + */ +static unsigned +run_fill_pct(const struct memory_block *m) +{ + struct run_bitmap b; + run_get_bitmap(m, &b); + unsigned clearbits = 0; + for (unsigned i = 0; i < b.nvalues; ++i) { + uint64_t value = ~b.values[i]; + if (value == 0) + continue; + + clearbits += util_popcount64(value); + } + ASSERT(b.nbits >= clearbits); + unsigned setbits = b.nbits - clearbits; + + return (100 * setbits) / b.nbits; +} + +static const struct memory_block_ops mb_ops[MAX_MEMORY_BLOCK] = { + [MEMORY_BLOCK_HUGE] = { + .block_size = huge_block_size, + .prep_hdr = huge_prep_operation_hdr, + .get_lock = huge_get_lock, + .get_state = huge_get_state, + .get_user_data = block_get_user_data, + .get_real_data = huge_get_real_data, + .get_user_size = block_get_user_size, + .get_real_size = block_get_real_size, + .write_header = block_write_header, + .invalidate = block_invalidate, + .ensure_header_type = huge_ensure_header_type, + .reinit_header = block_reinit_header, + .vg_init = huge_vg_init, + .get_extra = block_get_extra, + .get_flags = block_get_flags, + .iterate_free = huge_iterate_free, + .iterate_used = huge_iterate_used, + .reinit_chunk = huge_reinit_chunk, + .calc_free = NULL, + .get_bitmap = NULL, + .fill_pct = huge_fill_pct, + }, + [MEMORY_BLOCK_RUN] = { + .block_size = run_block_size, + .prep_hdr = run_prep_operation_hdr, + .get_lock = run_get_lock, + .get_state = run_get_state, + .get_user_data = block_get_user_data, + .get_real_data = run_get_real_data, + .get_user_size = block_get_user_size, + .get_real_size = block_get_real_size, + .write_header = block_write_header, + .invalidate = block_invalidate, + .ensure_header_type = run_ensure_header_type, + .reinit_header = block_reinit_header, + .vg_init = run_vg_init, + .get_extra = block_get_extra, + .get_flags = block_get_flags, + .iterate_free = run_iterate_free, + .iterate_used = run_iterate_used, + .reinit_chunk = run_reinit_chunk, + .calc_free = run_calc_free, + .get_bitmap = run_get_bitmap, + .fill_pct = run_fill_pct, + } +}; + +/* + * memblock_huge_init -- initializes a new huge memory block + */ +struct memory_block +memblock_huge_init(struct palloc_heap *heap, + uint32_t chunk_id, uint32_t zone_id, uint32_t size_idx) +{ + struct memory_block m = MEMORY_BLOCK_NONE; + m.chunk_id = chunk_id; + m.zone_id = zone_id; + m.size_idx = size_idx; + m.heap = heap; + + struct chunk_header nhdr = { + .type = CHUNK_TYPE_FREE, + .flags = 0, + .size_idx = size_idx + }; + + struct chunk_header *hdr = heap_get_chunk_hdr(heap, &m); + + VALGRIND_DO_MAKE_MEM_UNDEFINED(hdr, sizeof(*hdr)); + VALGRIND_ANNOTATE_NEW_MEMORY(hdr, sizeof(*hdr)); + + *hdr = nhdr; /* write the entire header (8 bytes) at once */ + + pmemops_persist(&heap->p_ops, hdr, sizeof(*hdr)); + + huge_write_footer(hdr, size_idx); + + memblock_rebuild_state(heap, &m); + + return m; +} + +/* + * memblock_run_init -- initializes a new run memory block + */ +struct memory_block +memblock_run_init(struct palloc_heap *heap, + uint32_t chunk_id, uint32_t zone_id, struct run_descriptor *rdsc) +{ + uint32_t size_idx = rdsc->size_idx; + ASSERTne(size_idx, 0); + + struct memory_block m = MEMORY_BLOCK_NONE; + m.chunk_id = chunk_id; + m.zone_id = zone_id; + m.size_idx = size_idx; + m.heap = heap; + + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); + + struct chunk_run *run = heap_get_chunk_run(heap, &m); + size_t runsize = SIZEOF_RUN(run, size_idx); + + VALGRIND_DO_MAKE_MEM_UNDEFINED(run, runsize); + + /* add/remove chunk_run and chunk_header to valgrind transaction */ + VALGRIND_ADD_TO_TX(run, runsize); + run->hdr.block_size = rdsc->unit_size; + run->hdr.alignment = rdsc->alignment; + + struct run_bitmap b = rdsc->bitmap; + b.values = (uint64_t *)run->content; + + size_t bitmap_size = b.size; + + /* set all the bits */ + memset(b.values, 0xFF, bitmap_size); + + /* clear only the bits available for allocations from this bucket */ + memset(b.values, 0, sizeof(*b.values) * (b.nvalues - 1)); + + unsigned trailing_bits = b.nbits % RUN_BITS_PER_VALUE; + uint64_t last_value = UINT64_MAX << trailing_bits; + b.values[b.nvalues - 1] = last_value; + + VALGRIND_REMOVE_FROM_TX(run, runsize); + + pmemops_flush(&heap->p_ops, run, + sizeof(struct chunk_run_header) + + bitmap_size); + + struct chunk_header run_data_hdr; + run_data_hdr.type = CHUNK_TYPE_RUN_DATA; + run_data_hdr.flags = 0; + + VALGRIND_ADD_TO_TX(&z->chunk_headers[chunk_id], + sizeof(struct chunk_header) * size_idx); + + struct chunk_header *data_hdr; + for (unsigned i = 1; i < size_idx; ++i) { + data_hdr = &z->chunk_headers[chunk_id + i]; + VALGRIND_DO_MAKE_MEM_UNDEFINED(data_hdr, sizeof(*data_hdr)); + VALGRIND_ANNOTATE_NEW_MEMORY(data_hdr, sizeof(*data_hdr)); + run_data_hdr.size_idx = i; + *data_hdr = run_data_hdr; + } + pmemops_persist(&heap->p_ops, + &z->chunk_headers[chunk_id + 1], + sizeof(struct chunk_header) * (size_idx - 1)); + + struct chunk_header *hdr = &z->chunk_headers[chunk_id]; + ASSERT(hdr->type == CHUNK_TYPE_FREE); + + VALGRIND_ANNOTATE_NEW_MEMORY(hdr, sizeof(*hdr)); + + struct chunk_header run_hdr; + run_hdr.size_idx = hdr->size_idx; + run_hdr.type = CHUNK_TYPE_RUN; + run_hdr.flags = rdsc->flags; + *hdr = run_hdr; + pmemops_persist(&heap->p_ops, hdr, sizeof(*hdr)); + + VALGRIND_REMOVE_FROM_TX(&z->chunk_headers[chunk_id], + sizeof(struct chunk_header) * size_idx); + + memblock_rebuild_state(heap, &m); + m.cached_bitmap = &rdsc->bitmap; + + return m; +} + +/* + * memblock_detect_type -- looks for the corresponding chunk header and + * depending on the chunks type returns the right memory block type + */ +static enum memory_block_type +memblock_detect_type(struct palloc_heap *heap, const struct memory_block *m) +{ + enum memory_block_type ret; + + switch (heap_get_chunk_hdr(heap, m)->type) { + case CHUNK_TYPE_RUN: + case CHUNK_TYPE_RUN_DATA: + ret = MEMORY_BLOCK_RUN; + break; + case CHUNK_TYPE_FREE: + case CHUNK_TYPE_USED: + case CHUNK_TYPE_FOOTER: + ret = MEMORY_BLOCK_HUGE; + break; + default: + /* unreachable */ + FATAL("possible zone chunks metadata corruption"); + } + return ret; +} + +/* + * memblock_from_offset -- resolves a memory block data from an offset that + * originates from the heap + */ +struct memory_block +memblock_from_offset_opt(struct palloc_heap *heap, uint64_t off, int size) +{ + struct memory_block m = MEMORY_BLOCK_NONE; + m.heap = heap; + + off -= HEAP_PTR_TO_OFF(heap, &heap->layout->zone0); + m.zone_id = (uint32_t)(off / ZONE_MAX_SIZE); + + off -= (ZONE_MAX_SIZE * m.zone_id) + sizeof(struct zone); + m.chunk_id = (uint32_t)(off / CHUNKSIZE); + + struct chunk_header *hdr = heap_get_chunk_hdr(heap, &m); + + if (hdr->type == CHUNK_TYPE_RUN_DATA) + m.chunk_id -= hdr->size_idx; + + off -= CHUNKSIZE * m.chunk_id; + + m.header_type = memblock_header_type(&m); + + off -= header_type_to_size[m.header_type]; + + m.type = off != 0 ? MEMORY_BLOCK_RUN : MEMORY_BLOCK_HUGE; + ASSERTeq(memblock_detect_type(heap, &m), m.type); + + m.m_ops = &mb_ops[m.type]; + + uint64_t unit_size = m.m_ops->block_size(&m); + + if (off != 0) { /* run */ + off -= run_get_data_offset(&m); + off -= RUN_BASE_METADATA_SIZE; + m.block_off = (uint16_t)(off / unit_size); + off -= m.block_off * unit_size; + } + + struct alloc_class_collection *acc = heap_alloc_classes(heap); + if (acc != NULL) { + struct alloc_class *ac = alloc_class_by_run(acc, + unit_size, hdr->flags, hdr->size_idx); + if (ac != NULL) + m.cached_bitmap = &ac->rdsc.bitmap; + } + + m.size_idx = !size ? 0 : CALC_SIZE_IDX(unit_size, + memblock_header_ops[m.header_type].get_size(&m)); + + ASSERTeq(off, 0); + + return m; +} + +/* + * memblock_from_offset -- returns memory block with size + */ +struct memory_block +memblock_from_offset(struct palloc_heap *heap, uint64_t off) +{ + return memblock_from_offset_opt(heap, off, 1); +} + +/* + * memblock_rebuild_state -- fills in the runtime-state related fields of a + * memory block structure + * + * This function must be called on all memory blocks that were created by hand + * (as opposed to retrieved from memblock_from_offset function). + */ +void +memblock_rebuild_state(struct palloc_heap *heap, struct memory_block *m) +{ + m->heap = heap; + m->header_type = memblock_header_type(m); + m->type = memblock_detect_type(heap, m); + m->m_ops = &mb_ops[m->type]; + m->cached_bitmap = NULL; +} diff --git a/src/pmdk/src/libpmemobj/memblock.h b/src/pmdk/src/libpmemobj/memblock.h new file mode 100644 index 000000000..653c0937d --- /dev/null +++ b/src/pmdk/src/libpmemobj/memblock.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * memblock.h -- internal definitions for memory block + */ + +#ifndef LIBPMEMOBJ_MEMBLOCK_H +#define LIBPMEMOBJ_MEMBLOCK_H 1 + +#include +#include + +#include "os_thread.h" +#include "heap_layout.h" +#include "memops.h" +#include "palloc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MEMORY_BLOCK_NONE \ +(struct memory_block)\ +{0, 0, 0, 0, NULL, NULL, MAX_HEADER_TYPES, MAX_MEMORY_BLOCK, NULL} + +#define MEMORY_BLOCK_IS_NONE(_m)\ +((_m).heap == NULL) + +#define MEMORY_BLOCK_EQUALS(lhs, rhs)\ +((lhs).zone_id == (rhs).zone_id && (lhs).chunk_id == (rhs).chunk_id &&\ +(lhs).block_off == (rhs).block_off && (lhs).heap == (rhs).heap) + +enum memory_block_type { + /* + * Huge memory blocks are directly backed by memory chunks. A single + * huge block can consist of several chunks. + * The persistent representation of huge memory blocks can be thought + * of as a doubly linked list with variable length elements. + * That list is stored in the chunk headers array where one element + * directly corresponds to one chunk. + * + * U - used, F - free, R - footer, . - empty + * |U| represents a used chunk with a size index of 1, with type + * information (CHUNK_TYPE_USED) stored in the corresponding header + * array element - chunk_headers[chunk_id]. + * + * |F...R| represents a free chunk with size index of 5. The empty + * chunk headers have undefined values and shouldn't be used. All + * chunks with size larger than 1 must have a footer in the last + * corresponding header array - chunk_headers[chunk_id - size_idx - 1]. + * + * The above representation of chunks will be used to describe the + * way fail-safety is achieved during heap operations. + * + * Allocation of huge memory block with size index 5: + * Initial heap state: |U| <> |F..R| <> |U| <> |F......R| + * + * The only block that matches that size is at very end of the chunks + * list: |F......R| + * + * As the request was for memory block of size 5, and this ones size is + * 7 there's a need to first split the chunk in two. + * 1) The last chunk header of the new allocation is marked as footer + * and the block after that one is marked as free: |F...RF.R| + * This is allowed and has no impact on the heap because this + * modification is into chunk header that is otherwise unused, in + * other words the linked list didn't change. + * + * 2) The size index of the first header is changed from previous value + * of 7 to 5: |F...R||F.R| + * This is a single fail-safe atomic operation and this is the + * first change that is noticeable by the heap operations. + * A single linked list element is split into two new ones. + * + * 3) The allocation process either uses redo log or changes directly + * the chunk header type from free to used: |U...R| <> |F.R| + * + * In a similar fashion the reverse operation, free, is performed: + * Initial heap state: |U| <> |F..R| <> |F| <> |U...R| <> |F.R| + * + * This is the heap after the previous example with the single chunk + * in between changed from used to free. + * + * 1) Determine the neighbors of the memory block which is being + * freed. + * + * 2) Update the footer (if needed) information of the last chunk which + * is the memory block being freed or it's neighbor to the right. + * |F| <> |U...R| <> |F.R << this one| + * + * 3) Update the size index and type of the left-most chunk header. + * And so this: |F << this one| <> |U...R| <> |F.R| + * becomes this: |F.......R| + * The entire chunk header can be updated in a single fail-safe + * atomic operation because it's size is only 64 bytes. + */ + MEMORY_BLOCK_HUGE, + /* + * Run memory blocks are chunks with CHUNK_TYPE_RUN and size index of 1. + * The entire chunk is subdivided into smaller blocks and has an + * additional metadata attached in the form of a bitmap - each bit + * corresponds to a single block. + * In this case there's no need to perform any coalescing or splitting + * on the persistent metadata. + * The bitmap is stored on a variable number of 64 bit values and + * because of the requirement of allocation fail-safe atomicity the + * maximum size index of a memory block from a run is 64 - since that's + * the limit of atomic write guarantee. + * + * The allocation/deallocation process is a single 8 byte write that + * sets/clears the corresponding bits. Depending on the user choice + * it can either be made atomically or using redo-log when grouped with + * other operations. + * It's also important to note that in a case of realloc it might so + * happen that a single 8 byte bitmap value has its bits both set and + * cleared - that's why the run memory block metadata changes operate + * on AND'ing or OR'ing a bitmask instead of directly setting the value. + */ + MEMORY_BLOCK_RUN, + + MAX_MEMORY_BLOCK +}; + +enum memblock_state { + MEMBLOCK_STATE_UNKNOWN, + MEMBLOCK_ALLOCATED, + MEMBLOCK_FREE, + + MAX_MEMBLOCK_STATE, +}; + +/* runtime bitmap information for a run */ +struct run_bitmap { + unsigned nvalues; /* number of 8 byte values - size of values array */ + unsigned nbits; /* number of valid bits */ + + size_t size; /* total size of the bitmap in bytes */ + + uint64_t *values; /* pointer to the bitmap's values array */ +}; + +/* runtime information necessary to create a run */ +struct run_descriptor { + uint16_t flags; /* chunk flags for the run */ + size_t unit_size; /* the size of a single unit in a run */ + uint32_t size_idx; /* size index of a single run instance */ + size_t alignment; /* required alignment of objects */ + unsigned nallocs; /* number of allocs per run */ + struct run_bitmap bitmap; +}; + +struct memory_block_ops { + /* returns memory block size */ + size_t (*block_size)(const struct memory_block *m); + + /* prepares header modification operation */ + void (*prep_hdr)(const struct memory_block *m, + enum memblock_state dest_state, struct operation_context *ctx); + + /* returns lock associated with memory block */ + os_mutex_t *(*get_lock)(const struct memory_block *m); + + /* returns whether a block is allocated or not */ + enum memblock_state (*get_state)(const struct memory_block *m); + + /* returns pointer to the data of a block */ + void *(*get_user_data)(const struct memory_block *m); + + /* + * Returns the size of a memory block without overhead. + * This is the size of a data block that can be used. + */ + size_t (*get_user_size)(const struct memory_block *m); + + /* returns pointer to the beginning of data of a run block */ + void *(*get_real_data)(const struct memory_block *m); + + /* returns the size of a memory block, including headers */ + size_t (*get_real_size)(const struct memory_block *m); + + /* writes a header of an allocation */ + void (*write_header)(const struct memory_block *m, + uint64_t extra_field, uint16_t flags); + void (*invalidate)(const struct memory_block *m); + + /* + * Checks the header type of a chunk matches the expected type and + * modifies it if necessary. This is fail-safe atomic. + */ + void (*ensure_header_type)(const struct memory_block *m, + enum header_type t); + + /* + * Reinitializes a block after a heap restart. + * This is called for EVERY allocation, but *only* under Valgrind. + */ + void (*reinit_header)(const struct memory_block *m); + + /* returns the extra field of an allocation */ + uint64_t (*get_extra)(const struct memory_block *m); + + /* returns the flags of an allocation */ + uint16_t (*get_flags)(const struct memory_block *m); + + /* initializes memblock in valgrind */ + void (*vg_init)(const struct memory_block *m, int objects, + object_callback cb, void *arg); + + /* iterates over every free block */ + int (*iterate_free)(const struct memory_block *m, + object_callback cb, void *arg); + + /* iterates over every used block */ + int (*iterate_used)(const struct memory_block *m, + object_callback cb, void *arg); + + /* calculates number of free units, valid only for runs */ + void (*calc_free)(const struct memory_block *m, + uint32_t *free_space, uint32_t *max_free_block); + + /* this is called exactly once for every existing chunk */ + void (*reinit_chunk)(const struct memory_block *m); + + /* + * Initializes bitmap data for a run. + * Do *not* use this function unless absolutely necessary, it breaks + * the abstraction layer by exposing implementation details. + */ + void (*get_bitmap)(const struct memory_block *m, struct run_bitmap *b); + + /* calculates the ratio between occupied and unoccupied space */ + unsigned (*fill_pct)(const struct memory_block *m); +}; + +struct memory_block { + uint32_t chunk_id; /* index of the memory block in its zone */ + uint32_t zone_id; /* index of this block zone in the heap */ + + /* + * Size index of the memory block represented in either multiple of + * CHUNKSIZE in the case of a huge chunk or in multiple of a run + * block size. + */ + uint32_t size_idx; + + /* + * Used only for run chunks, must be zeroed for huge. + * Number of preceding blocks in the chunk. In other words, the + * position of this memory block in run bitmap. + */ + uint32_t block_off; + + /* + * The variables below are associated with the memory block and are + * stored here for convenience. Those fields are filled by either the + * memblock_from_offset or memblock_rebuild_state, and they should not + * be modified manually. + */ + const struct memory_block_ops *m_ops; + struct palloc_heap *heap; + enum header_type header_type; + enum memory_block_type type; + struct run_bitmap *cached_bitmap; +}; + +/* + * This is a representation of a run memory block that is active in a bucket or + * is on a pending list in the recycler. + * This structure should never be passed around by value because the address of + * the nresv variable can be in reservations made through palloc_reserve(). Only + * if the number of reservations equals 0 the structure can be moved/freed. + */ +struct memory_block_reserved { + struct memory_block m; + + struct bucket *bucket; + /* + * Number of reservations made from this run, the pointer to this value + * is stored in a user facing pobj_action structure. Decremented once + * the reservation is published or canceled. + */ + int nresv; +}; + +struct memory_block memblock_from_offset(struct palloc_heap *heap, + uint64_t off); +struct memory_block memblock_from_offset_opt(struct palloc_heap *heap, + uint64_t off, int size); +void memblock_rebuild_state(struct palloc_heap *heap, struct memory_block *m); + +struct memory_block memblock_huge_init(struct palloc_heap *heap, + uint32_t chunk_id, uint32_t zone_id, uint32_t size_idx); + +struct memory_block memblock_run_init(struct palloc_heap *heap, + uint32_t chunk_id, uint32_t zone_id, struct run_descriptor *rdsc); + +void memblock_run_bitmap(uint32_t *size_idx, uint16_t flags, + uint64_t unit_size, uint64_t alignment, void *content, + struct run_bitmap *b); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/memops.c b/src/pmdk/src/libpmemobj/memops.c new file mode 100644 index 000000000..81464e663 --- /dev/null +++ b/src/pmdk/src/libpmemobj/memops.c @@ -0,0 +1,837 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * memops.c -- aggregated memory operations helper implementation + * + * The operation collects all of the required memory modifications that + * need to happen in an atomic way (all of them or none), and abstracts + * away the storage type (transient/persistent) and the underlying + * implementation of how it's actually performed - in some cases using + * the redo log is unnecessary and the allocation process can be sped up + * a bit by completely omitting that whole machinery. + * + * The modifications are not visible until the context is processed. + */ + +#include "memops.h" +#include "obj.h" +#include "out.h" +#include "ravl.h" +#include "valgrind_internal.h" +#include "vecq.h" +#include "sys_util.h" + +#define ULOG_BASE_SIZE 1024 +#define OP_MERGE_SEARCH 64 + +enum operation_state { + OPERATION_IDLE, + OPERATION_IN_PROGRESS, + OPERATION_CLEANUP, +}; + +struct operation_log { + size_t capacity; /* capacity of the ulog log */ + size_t offset; /* data offset inside of the log */ + struct ulog *ulog; /* DRAM allocated log of modifications */ +}; + +/* + * operation_context -- context of an ongoing palloc operation + */ +struct operation_context { + enum log_type type; + + ulog_extend_fn extend; /* function to allocate next ulog */ + ulog_free_fn ulog_free; /* function to free next ulogs */ + + const struct pmem_ops *p_ops; + struct pmem_ops t_ops; /* used for transient data processing */ + struct pmem_ops s_ops; /* used for shadow copy data processing */ + + size_t ulog_curr_offset; /* offset in the log for buffer stores */ + size_t ulog_curr_capacity; /* capacity of the current log */ + size_t ulog_curr_gen_num; /* transaction counter in the current log */ + struct ulog *ulog_curr; /* current persistent log */ + size_t total_logged; /* total amount of buffer stores in the logs */ + + struct ulog *ulog; /* pointer to the persistent ulog log */ + size_t ulog_base_nbytes; /* available bytes in initial ulog log */ + size_t ulog_capacity; /* sum of capacity, incl all next ulog logs */ + int ulog_auto_reserve; /* allow or do not to auto ulog reservation */ + int ulog_any_user_buffer; /* set if any user buffer is added */ + + struct ulog_next next; /* vector of 'next' fields of persistent ulog */ + + enum operation_state state; /* operation sanity check */ + + struct operation_log pshadow_ops; /* shadow copy of persistent ulog */ + struct operation_log transient_ops; /* log of transient changes */ + + /* collection used to look for potential merge candidates */ + VECQ(, struct ulog_entry_val *) merge_entries; +}; + +/* + * operation_log_transient_init -- (internal) initialize operation log + * containing transient memory resident changes + */ +static int +operation_log_transient_init(struct operation_log *log) +{ + log->capacity = ULOG_BASE_SIZE; + log->offset = 0; + + struct ulog *src = Zalloc(sizeof(struct ulog) + + ULOG_BASE_SIZE); + if (src == NULL) { + ERR("!Zalloc"); + return -1; + } + + /* initialize underlying redo log structure */ + src->capacity = ULOG_BASE_SIZE; + + log->ulog = src; + + return 0; +} + +/* + * operation_log_persistent_init -- (internal) initialize operation log + * containing persistent memory resident changes + */ +static int +operation_log_persistent_init(struct operation_log *log, + size_t ulog_base_nbytes) +{ + log->capacity = ULOG_BASE_SIZE; + log->offset = 0; + + struct ulog *src = Zalloc(sizeof(struct ulog) + + ULOG_BASE_SIZE); + if (src == NULL) { + ERR("!Zalloc"); + return -1; + } + + /* initialize underlying redo log structure */ + src->capacity = ulog_base_nbytes; + memset(src->unused, 0, sizeof(src->unused)); + + log->ulog = src; + + return 0; +} + +/* + * operation_transient_clean -- cleans pmemcheck address state + */ +static int +operation_transient_clean(void *base, const void *addr, size_t len, + unsigned flags) +{ + VALGRIND_SET_CLEAN(addr, len); + + return 0; +} + +/* + * operation_transient_drain -- noop + */ +static void +operation_transient_drain(void *base) +{ +} + +/* + * operation_transient_memcpy -- transient memcpy wrapper + */ +static void * +operation_transient_memcpy(void *base, void *dest, const void *src, size_t len, + unsigned flags) +{ + return memcpy(dest, src, len); +} + +/* + * operation_new -- creates new operation context + */ +struct operation_context * +operation_new(struct ulog *ulog, size_t ulog_base_nbytes, + ulog_extend_fn extend, ulog_free_fn ulog_free, + const struct pmem_ops *p_ops, enum log_type type) +{ + struct operation_context *ctx = Zalloc(sizeof(*ctx)); + if (ctx == NULL) { + ERR("!Zalloc"); + goto error_ctx_alloc; + } + + ctx->ulog = ulog; + ctx->ulog_base_nbytes = ulog_base_nbytes; + ctx->ulog_capacity = ulog_capacity(ulog, + ulog_base_nbytes, p_ops); + ctx->extend = extend; + ctx->ulog_free = ulog_free; + ctx->state = OPERATION_IDLE; + VEC_INIT(&ctx->next); + ulog_rebuild_next_vec(ulog, &ctx->next, p_ops); + ctx->p_ops = p_ops; + ctx->type = type; + ctx->ulog_any_user_buffer = 0; + + ctx->ulog_curr_offset = 0; + ctx->ulog_curr_capacity = 0; + ctx->ulog_curr = NULL; + + ctx->t_ops.base = NULL; + ctx->t_ops.flush = operation_transient_clean; + ctx->t_ops.memcpy = operation_transient_memcpy; + ctx->t_ops.drain = operation_transient_drain; + + ctx->s_ops.base = p_ops->base; + ctx->s_ops.flush = operation_transient_clean; + ctx->s_ops.memcpy = operation_transient_memcpy; + ctx->s_ops.drain = operation_transient_drain; + + VECQ_INIT(&ctx->merge_entries); + + if (operation_log_transient_init(&ctx->transient_ops) != 0) + goto error_ulog_alloc; + + if (operation_log_persistent_init(&ctx->pshadow_ops, + ulog_base_nbytes) != 0) + goto error_ulog_alloc; + + return ctx; + +error_ulog_alloc: + operation_delete(ctx); +error_ctx_alloc: + return NULL; +} + +/* + * operation_delete -- deletes operation context + */ +void +operation_delete(struct operation_context *ctx) +{ + VECQ_DELETE(&ctx->merge_entries); + VEC_DELETE(&ctx->next); + Free(ctx->pshadow_ops.ulog); + Free(ctx->transient_ops.ulog); + Free(ctx); +} + +/* + * operation_user_buffer_remove -- removes range from the tree and returns 0 + */ +static int +operation_user_buffer_remove(void *base, void *addr) +{ + PMEMobjpool *pop = base; + if (!pop->ulog_user_buffers.verify) + return 0; + + util_mutex_lock(&pop->ulog_user_buffers.lock); + + struct ravl *ravl = pop->ulog_user_buffers.map; + enum ravl_predicate predict = RAVL_PREDICATE_EQUAL; + + struct user_buffer_def range; + range.addr = addr; + range.size = 0; + + struct ravl_node *n = ravl_find(ravl, &range, predict); + ASSERTne(n, NULL); + ravl_remove(ravl, n); + + util_mutex_unlock(&pop->ulog_user_buffers.lock); + + return 0; +} + +/* + * operation_free_logs -- free all logs except first + */ +void +operation_free_logs(struct operation_context *ctx, uint64_t flags) +{ + int freed = ulog_free_next(ctx->ulog, ctx->p_ops, ctx->ulog_free, + operation_user_buffer_remove, flags); + if (freed) { + ctx->ulog_capacity = ulog_capacity(ctx->ulog, + ctx->ulog_base_nbytes, ctx->p_ops); + VEC_CLEAR(&ctx->next); + ulog_rebuild_next_vec(ctx->ulog, &ctx->next, ctx->p_ops); + } + + ASSERTeq(VEC_SIZE(&ctx->next), 0); +} + +/* + * operation_merge -- (internal) performs operation on a field + */ +static inline void +operation_merge(struct ulog_entry_base *entry, uint64_t value, + ulog_operation_type type) +{ + struct ulog_entry_val *e = (struct ulog_entry_val *)entry; + + switch (type) { + case ULOG_OPERATION_AND: + e->value &= value; + break; + case ULOG_OPERATION_OR: + e->value |= value; + break; + case ULOG_OPERATION_SET: + e->value = value; + break; + default: + ASSERT(0); /* unreachable */ + } +} + +/* + * operation_try_merge_entry -- tries to merge the incoming log entry with + * existing entries + * + * Because this requires a reverse foreach, it cannot be implemented using + * the on-media ulog log structure since there's no way to find what's + * the previous entry in the log. Instead, the last N entries are stored + * in a collection and traversed backwards. + */ +static int +operation_try_merge_entry(struct operation_context *ctx, + void *ptr, uint64_t value, ulog_operation_type type) +{ + int ret = 0; + uint64_t offset = OBJ_PTR_TO_OFF(ctx->p_ops->base, ptr); + + struct ulog_entry_val *e; + VECQ_FOREACH_REVERSE(e, &ctx->merge_entries) { + if (ulog_entry_offset(&e->base) == offset) { + if (ulog_entry_type(&e->base) == type) { + operation_merge(&e->base, value, type); + return 1; + } else { + break; + } + } + } + + return ret; +} + +/* + * operation_merge_entry_add -- adds a new entry to the merge collection, + * keeps capacity at OP_MERGE_SEARCH. Removes old entries in FIFO fashion. + */ +static void +operation_merge_entry_add(struct operation_context *ctx, + struct ulog_entry_val *entry) +{ + if (VECQ_SIZE(&ctx->merge_entries) == OP_MERGE_SEARCH) + (void) VECQ_DEQUEUE(&ctx->merge_entries); + + if (VECQ_ENQUEUE(&ctx->merge_entries, entry) != 0) { + /* this is fine, only runtime perf will get slower */ + LOG(2, "out of memory - unable to track entries"); + } +} + +/* + * operation_add_typed_value -- adds new entry to the current operation, if the + * same ptr address already exists and the operation type is set, + * the new value is not added and the function has no effect. + */ +int +operation_add_typed_entry(struct operation_context *ctx, + void *ptr, uint64_t value, + ulog_operation_type type, enum operation_log_type log_type) +{ + struct operation_log *oplog = log_type == LOG_PERSISTENT ? + &ctx->pshadow_ops : &ctx->transient_ops; + + /* + * Always make sure to have one extra spare cacheline so that the + * ulog log entry creation has enough room for zeroing. + */ + if (oplog->offset + CACHELINE_SIZE == oplog->capacity) { + size_t ncapacity = oplog->capacity + ULOG_BASE_SIZE; + struct ulog *ulog = Realloc(oplog->ulog, + SIZEOF_ULOG(ncapacity)); + if (ulog == NULL) + return -1; + oplog->capacity += ULOG_BASE_SIZE; + oplog->ulog = ulog; + oplog->ulog->capacity = oplog->capacity; + + /* + * Realloc invalidated the ulog entries that are inside of this + * vector, need to clear it to avoid use after free. + */ + VECQ_CLEAR(&ctx->merge_entries); + } + + if (log_type == LOG_PERSISTENT && + operation_try_merge_entry(ctx, ptr, value, type) != 0) + return 0; + + struct ulog_entry_val *entry = ulog_entry_val_create( + oplog->ulog, oplog->offset, ptr, value, type, + log_type == LOG_TRANSIENT ? &ctx->t_ops : &ctx->s_ops); + + if (log_type == LOG_PERSISTENT) + operation_merge_entry_add(ctx, entry); + + oplog->offset += ulog_entry_size(&entry->base); + + return 0; +} + +/* + * operation_add_value -- adds new entry to the current operation with + * entry type autodetected based on the memory location + */ +int +operation_add_entry(struct operation_context *ctx, void *ptr, uint64_t value, + ulog_operation_type type) +{ + const struct pmem_ops *p_ops = ctx->p_ops; + PMEMobjpool *pop = (PMEMobjpool *)p_ops->base; + + int from_pool = OBJ_OFF_IS_VALID(pop, + (uintptr_t)ptr - (uintptr_t)p_ops->base); + + return operation_add_typed_entry(ctx, ptr, value, type, + from_pool ? LOG_PERSISTENT : LOG_TRANSIENT); +} + +/* + * operation_add_buffer -- adds a buffer operation to the log + */ +int +operation_add_buffer(struct operation_context *ctx, + void *dest, void *src, size_t size, ulog_operation_type type) +{ + size_t real_size = size + sizeof(struct ulog_entry_buf); + + /* if there's no space left in the log, reserve some more */ + if (ctx->ulog_curr_capacity == 0) { + ctx->ulog_curr_gen_num = ctx->ulog->gen_num; + if (operation_reserve(ctx, ctx->total_logged + real_size) != 0) + return -1; + + ctx->ulog_curr = ctx->ulog_curr == NULL ? ctx->ulog : + ulog_next(ctx->ulog_curr, ctx->p_ops); + ASSERTne(ctx->ulog_curr, NULL); + ctx->ulog_curr_offset = 0; + ctx->ulog_curr_capacity = ctx->ulog_curr->capacity; + } + + size_t curr_size = MIN(real_size, ctx->ulog_curr_capacity); + size_t data_size = curr_size - sizeof(struct ulog_entry_buf); + size_t entry_size = ALIGN_UP(curr_size, CACHELINE_SIZE); + + /* + * To make sure that the log is consistent and contiguous, we need + * make sure that the header of the entry that would be located + * immediately after this one is zeroed. + */ + struct ulog_entry_base *next_entry = NULL; + if (entry_size == ctx->ulog_curr_capacity) { + struct ulog *u = ulog_next(ctx->ulog_curr, ctx->p_ops); + if (u != NULL) + next_entry = (struct ulog_entry_base *)u->data; + } else { + size_t next_entry_offset = ctx->ulog_curr_offset + entry_size; + next_entry = (struct ulog_entry_base *)(ctx->ulog_curr->data + + next_entry_offset); + } + if (next_entry != NULL) + ulog_clobber_entry(next_entry, ctx->p_ops); + + /* create a persistent log entry */ + struct ulog_entry_buf *e = ulog_entry_buf_create(ctx->ulog_curr, + ctx->ulog_curr_offset, + ctx->ulog_curr_gen_num, + dest, src, data_size, + type, ctx->p_ops); + ASSERT(entry_size == ulog_entry_size(&e->base)); + ASSERT(entry_size <= ctx->ulog_curr_capacity); + + ctx->total_logged += entry_size; + ctx->ulog_curr_offset += entry_size; + ctx->ulog_curr_capacity -= entry_size; + + /* + * Recursively add the data to the log until the entire buffer is + * processed. + */ + return size - data_size == 0 ? 0 : operation_add_buffer(ctx, + (char *)dest + data_size, + (char *)src + data_size, + size - data_size, type); +} + +/* + * operation_user_buffer_range_cmp -- compares addresses of + * user buffers + */ +int +operation_user_buffer_range_cmp(const void *lhs, const void *rhs) +{ + const struct user_buffer_def *l = lhs; + const struct user_buffer_def *r = rhs; + + if (l->addr > r->addr) + return 1; + else if (l->addr < r->addr) + return -1; + + return 0; +} + +/* + * operation_user_buffer_try_insert -- adds a user buffer range to the tree, + * if the buffer already exists in the tree function returns -1, otherwise + * it returns 0 + */ +static int +operation_user_buffer_try_insert(PMEMobjpool *pop, + struct user_buffer_def *userbuf) +{ + int ret = 0; + + if (!pop->ulog_user_buffers.verify) + return ret; + + util_mutex_lock(&pop->ulog_user_buffers.lock); + + void *addr_end = (char *)userbuf->addr + userbuf->size; + struct user_buffer_def search; + search.addr = addr_end; + struct ravl_node *n = ravl_find(pop->ulog_user_buffers.map, + &search, RAVL_PREDICATE_LESS_EQUAL); + if (n != NULL) { + struct user_buffer_def *r = ravl_data(n); + void *r_end = (char *)r->addr + r->size; + + if (r_end > userbuf->addr && r->addr < addr_end) { + /* what was found overlaps with what is being added */ + ret = -1; + goto out; + } + } + + if (ravl_emplace_copy(pop->ulog_user_buffers.map, userbuf) == -1) { + ASSERTne(errno, EEXIST); + ret = -1; + } + +out: + util_mutex_unlock(&pop->ulog_user_buffers.lock); + return ret; +} + +/* + * operation_user_buffer_verify_align -- verify if the provided buffer can be + * used as a transaction log, and if so - perform necessary alignments + */ +int +operation_user_buffer_verify_align(struct operation_context *ctx, + struct user_buffer_def *userbuf) +{ + /* + * Address of the buffer has to be aligned up, and the size + * has to be aligned down, taking into account the number of bytes + * the address was incremented by. The remaining size has to be large + * enough to contain the header and at least one ulog entry. + */ + uint64_t buffer_offset = OBJ_PTR_TO_OFF(ctx->p_ops->base, + userbuf->addr); + ptrdiff_t size_diff = (intptr_t)ulog_by_offset(buffer_offset, + ctx->p_ops) - (intptr_t)userbuf->addr; + ssize_t capacity_unaligned = (ssize_t)userbuf->size - size_diff + - (ssize_t)sizeof(struct ulog); + if (capacity_unaligned < (ssize_t)CACHELINE_SIZE) { + ERR("Capacity insufficient"); + return -1; + } + + size_t capacity_aligned = ALIGN_DOWN((size_t)capacity_unaligned, + CACHELINE_SIZE); + + userbuf->addr = ulog_by_offset(buffer_offset, ctx->p_ops); + userbuf->size = capacity_aligned + sizeof(struct ulog); + + if (operation_user_buffer_try_insert(ctx->p_ops->base, userbuf)) { + ERR("Buffer currently used"); + return -1; + } + + return 0; +} + +/* + * operation_add_user_buffer -- add user buffer to the ulog + */ +void +operation_add_user_buffer(struct operation_context *ctx, + struct user_buffer_def *userbuf) +{ + uint64_t buffer_offset = OBJ_PTR_TO_OFF(ctx->p_ops->base, + userbuf->addr); + size_t capacity = userbuf->size - sizeof(struct ulog); + + ulog_construct(buffer_offset, capacity, ctx->ulog->gen_num, + 1, ULOG_USER_OWNED, ctx->p_ops); + + struct ulog *last_log; + /* if there is only one log */ + if (!VEC_SIZE(&ctx->next)) + last_log = ctx->ulog; + else /* get last element from vector */ + last_log = ulog_by_offset(VEC_BACK(&ctx->next), ctx->p_ops); + + ASSERTne(last_log, NULL); + size_t next_size = sizeof(last_log->next); + VALGRIND_ADD_TO_TX(&last_log->next, next_size); + last_log->next = buffer_offset; + pmemops_persist(ctx->p_ops, &last_log->next, next_size); + + VEC_PUSH_BACK(&ctx->next, buffer_offset); + ctx->ulog_capacity += capacity; + operation_set_any_user_buffer(ctx, 1); +} + +/* + * operation_set_auto_reserve -- set auto reserve value for context + */ +void +operation_set_auto_reserve(struct operation_context *ctx, int auto_reserve) +{ + ctx->ulog_auto_reserve = auto_reserve; +} + +/* + * operation_set_any_user_buffer -- set ulog_any_user_buffer value for context + */ +void +operation_set_any_user_buffer(struct operation_context *ctx, + int any_user_buffer) +{ + ctx->ulog_any_user_buffer = any_user_buffer; +} + +/* + * operation_get_any_user_buffer -- get ulog_any_user_buffer value from context + */ +int +operation_get_any_user_buffer(struct operation_context *ctx) +{ + return ctx->ulog_any_user_buffer; +} + +/* + * operation_process_persistent_redo -- (internal) process using ulog + */ +static void +operation_process_persistent_redo(struct operation_context *ctx) +{ + ASSERTeq(ctx->pshadow_ops.capacity % CACHELINE_SIZE, 0); + + ulog_store(ctx->ulog, ctx->pshadow_ops.ulog, + ctx->pshadow_ops.offset, ctx->ulog_base_nbytes, + ctx->ulog_capacity, + &ctx->next, ctx->p_ops); + + ulog_process(ctx->pshadow_ops.ulog, OBJ_OFF_IS_VALID_FROM_CTX, + ctx->p_ops); + + ulog_clobber(ctx->ulog, &ctx->next, ctx->p_ops); +} + +/* + * operation_process_persistent_undo -- (internal) process using ulog + */ +static void +operation_process_persistent_undo(struct operation_context *ctx) +{ + ASSERTeq(ctx->pshadow_ops.capacity % CACHELINE_SIZE, 0); + + ulog_process(ctx->ulog, OBJ_OFF_IS_VALID_FROM_CTX, ctx->p_ops); +} + +/* + * operation_reserve -- (internal) reserves new capacity in persistent ulog log + */ +int +operation_reserve(struct operation_context *ctx, size_t new_capacity) +{ + if (new_capacity > ctx->ulog_capacity) { + if (ctx->extend == NULL) { + ERR("no extend function present"); + return -1; + } + + if (ulog_reserve(ctx->ulog, + ctx->ulog_base_nbytes, + ctx->ulog_curr_gen_num, + ctx->ulog_auto_reserve, + &new_capacity, ctx->extend, + &ctx->next, ctx->p_ops) != 0) + return -1; + ctx->ulog_capacity = new_capacity; + } + + return 0; +} + +/* + * operation_init -- initializes runtime state of an operation + */ +void +operation_init(struct operation_context *ctx) +{ + struct operation_log *plog = &ctx->pshadow_ops; + struct operation_log *tlog = &ctx->transient_ops; + + VALGRIND_ANNOTATE_NEW_MEMORY(ctx, sizeof(*ctx)); + VALGRIND_ANNOTATE_NEW_MEMORY(tlog->ulog, sizeof(struct ulog) + + tlog->capacity); + VALGRIND_ANNOTATE_NEW_MEMORY(plog->ulog, sizeof(struct ulog) + + plog->capacity); + tlog->offset = 0; + plog->offset = 0; + VECQ_REINIT(&ctx->merge_entries); + + ctx->ulog_curr_offset = 0; + ctx->ulog_curr_capacity = 0; + ctx->ulog_curr_gen_num = 0; + ctx->ulog_curr = NULL; + ctx->total_logged = 0; + ctx->ulog_auto_reserve = 1; + ctx->ulog_any_user_buffer = 0; +} + +/* + * operation_start -- initializes and starts a new operation + */ +void +operation_start(struct operation_context *ctx) +{ + operation_init(ctx); + ASSERTeq(ctx->state, OPERATION_IDLE); + ctx->state = OPERATION_IN_PROGRESS; +} + +void +operation_resume(struct operation_context *ctx) +{ + operation_start(ctx); + ctx->total_logged = ulog_base_nbytes(ctx->ulog); +} + +/* + * operation_cancel -- cancels a running operation + */ +void +operation_cancel(struct operation_context *ctx) +{ + ASSERTeq(ctx->state, OPERATION_IN_PROGRESS); + ctx->state = OPERATION_IDLE; +} + +/* + * operation_process -- processes registered operations + * + * The order of processing is important: persistent, transient. + * This is because the transient entries that reside on persistent memory might + * require write to a location that is currently occupied by a valid persistent + * state but becomes a transient state after operation is processed. + */ +void +operation_process(struct operation_context *ctx) +{ + /* + * If there's exactly one persistent entry there's no need to involve + * the redo log. We can simply assign the value, the operation will be + * atomic. + */ + int redo_process = ctx->type == LOG_TYPE_REDO && + ctx->pshadow_ops.offset != 0; + if (redo_process && + ctx->pshadow_ops.offset == sizeof(struct ulog_entry_val)) { + struct ulog_entry_base *e = (struct ulog_entry_base *) + ctx->pshadow_ops.ulog->data; + ulog_operation_type t = ulog_entry_type(e); + if (t == ULOG_OPERATION_SET || t == ULOG_OPERATION_AND || + t == ULOG_OPERATION_OR) { + ulog_entry_apply(e, 1, ctx->p_ops); + redo_process = 0; + } + } + + if (redo_process) { + operation_process_persistent_redo(ctx); + ctx->state = OPERATION_CLEANUP; + } else if (ctx->type == LOG_TYPE_UNDO && ctx->total_logged != 0) { + operation_process_persistent_undo(ctx); + ctx->state = OPERATION_CLEANUP; + } + + /* process transient entries with transient memory ops */ + if (ctx->transient_ops.offset != 0) + ulog_process(ctx->transient_ops.ulog, NULL, &ctx->t_ops); +} + +/* + * operation_finish -- finalizes the operation + */ +void +operation_finish(struct operation_context *ctx, unsigned flags) +{ + ASSERTne(ctx->state, OPERATION_IDLE); + + if (ctx->type == LOG_TYPE_UNDO && ctx->total_logged != 0) + ctx->state = OPERATION_CLEANUP; + + if (ctx->ulog_any_user_buffer) { + flags |= ULOG_ANY_USER_BUFFER; + ctx->state = OPERATION_CLEANUP; + } + + if (ctx->state != OPERATION_CLEANUP) + goto out; + + if (ctx->type == LOG_TYPE_UNDO) { + int ret = ulog_clobber_data(ctx->ulog, + ctx->total_logged, ctx->ulog_base_nbytes, + &ctx->next, ctx->ulog_free, + operation_user_buffer_remove, + ctx->p_ops, flags); + if (ret == 0) + goto out; + } else if (ctx->type == LOG_TYPE_REDO) { + int ret = ulog_free_next(ctx->ulog, ctx->p_ops, + ctx->ulog_free, operation_user_buffer_remove, + flags); + if (ret == 0) + goto out; + } + + /* clobbering shrunk the ulog */ + ctx->ulog_capacity = ulog_capacity(ctx->ulog, + ctx->ulog_base_nbytes, ctx->p_ops); + VEC_CLEAR(&ctx->next); + ulog_rebuild_next_vec(ctx->ulog, &ctx->next, ctx->p_ops); + +out: + ctx->state = OPERATION_IDLE; +} diff --git a/src/pmdk/src/libpmemobj/memops.h b/src/pmdk/src/libpmemobj/memops.h new file mode 100644 index 000000000..b05948ed0 --- /dev/null +++ b/src/pmdk/src/libpmemobj/memops.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * memops.h -- aggregated memory operations helper definitions + */ + +#ifndef LIBPMEMOBJ_MEMOPS_H +#define LIBPMEMOBJ_MEMOPS_H 1 + +#include +#include + +#include "vec.h" +#include "pmemops.h" +#include "ulog.h" +#include "lane.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum operation_log_type { + LOG_PERSISTENT, /* log of persistent modifications */ + LOG_TRANSIENT, /* log of transient memory modifications */ + + MAX_OPERATION_LOG_TYPE +}; + +enum log_type { + LOG_TYPE_UNDO, + LOG_TYPE_REDO, + + MAX_LOG_TYPE, +}; + +struct user_buffer_def { + void *addr; + size_t size; +}; + +struct operation_context; + +struct operation_context * +operation_new(struct ulog *redo, size_t ulog_base_nbytes, + ulog_extend_fn extend, ulog_free_fn ulog_free, + const struct pmem_ops *p_ops, enum log_type type); + +void operation_init(struct operation_context *ctx); +void operation_start(struct operation_context *ctx); +void operation_resume(struct operation_context *ctx); + +void operation_delete(struct operation_context *ctx); +void operation_free_logs(struct operation_context *ctx, uint64_t flags); + +int operation_add_buffer(struct operation_context *ctx, + void *dest, void *src, size_t size, ulog_operation_type type); + +int operation_add_entry(struct operation_context *ctx, + void *ptr, uint64_t value, ulog_operation_type type); +int operation_add_typed_entry(struct operation_context *ctx, + void *ptr, uint64_t value, + ulog_operation_type type, enum operation_log_type log_type); +int operation_user_buffer_verify_align(struct operation_context *ctx, + struct user_buffer_def *userbuf); +void operation_add_user_buffer(struct operation_context *ctx, + struct user_buffer_def *userbuf); +void operation_set_auto_reserve(struct operation_context *ctx, + int auto_reserve); +void operation_set_any_user_buffer(struct operation_context *ctx, + int any_user_buffer); +int operation_get_any_user_buffer(struct operation_context *ctx); +int operation_user_buffer_range_cmp(const void *lhs, const void *rhs); + +int operation_reserve(struct operation_context *ctx, size_t new_capacity); +void operation_process(struct operation_context *ctx); +void operation_finish(struct operation_context *ctx, unsigned flags); +void operation_cancel(struct operation_context *ctx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/obj.c b/src/pmdk/src/libpmemobj/obj.c new file mode 100644 index 000000000..13abe2ac6 --- /dev/null +++ b/src/pmdk/src/libpmemobj/obj.c @@ -0,0 +1,3447 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * obj.c -- transactional object store implementation + */ +#include +#include +#include +#include + +#include "valgrind_internal.h" +#include "libpmem.h" +#include "memblock.h" +#include "critnib.h" +#include "list.h" +#include "mmap.h" +#include "obj.h" +#include "ctl_global.h" +#include "ravl.h" + +#include "heap_layout.h" +#include "os.h" +#include "os_thread.h" +#include "pmemops.h" +#include "set.h" +#include "sync.h" +#include "tx.h" +#include "sys_util.h" + +/* + * The variable from which the config is directly loaded. The string + * cannot contain any comments or extraneous white characters. + */ +#define OBJ_CONFIG_ENV_VARIABLE "PMEMOBJ_CONF" + +/* + * The variable that points to a config file from which the config is loaded. + */ +#define OBJ_CONFIG_FILE_ENV_VARIABLE "PMEMOBJ_CONF_FILE" + +/* + * The variable which overwrites a number of lanes available at runtime. + */ +#define OBJ_NLANES_ENV_VARIABLE "PMEMOBJ_NLANES" + +#define OBJ_X_VALID_FLAGS PMEMOBJ_F_RELAXED + +static const struct pool_attr Obj_create_attr = { + OBJ_HDR_SIG, + OBJ_FORMAT_MAJOR, + OBJ_FORMAT_FEAT_DEFAULT, + {0}, {0}, {0}, {0}, {0} +}; + +static const struct pool_attr Obj_open_attr = { + OBJ_HDR_SIG, + OBJ_FORMAT_MAJOR, + OBJ_FORMAT_FEAT_CHECK, + {0}, {0}, {0}, {0}, {0} +}; + +static struct critnib *pools_ht; /* hash table used for searching by UUID */ +static struct critnib *pools_tree; /* tree used for searching by address */ + +int _pobj_cache_invalidate; + +#ifndef _WIN32 + +__thread struct _pobj_pcache _pobj_cached_pool; + +/* + * pmemobj_direct -- returns the direct pointer of an object + */ +void * +pmemobj_direct(PMEMoid oid) +{ + return pmemobj_direct_inline(oid); +} + +#else /* _WIN32 */ + +/* + * XXX - this is a temporary implementation + * + * Seems like we could still use TLS and simply substitute "__thread" with + * "__declspec(thread)", however it's not clear if it would work correctly + * with Windows DLL's. + * Need to verify that once we have the multi-threaded tests ported. + */ + +struct _pobj_pcache { + PMEMobjpool *pop; + uint64_t uuid_lo; + int invalidate; +}; + +static os_once_t Cached_pool_key_once = OS_ONCE_INIT; +static os_tls_key_t Cached_pool_key; + +/* + * _Cached_pool_key_alloc -- (internal) allocate pool cache pthread key + */ +static void +_Cached_pool_key_alloc(void) +{ + int pth_ret = os_tls_key_create(&Cached_pool_key, free); + if (pth_ret) + FATAL("!os_tls_key_create"); +} + +/* + * pmemobj_direct -- returns the direct pointer of an object + */ +void * +pmemobj_direct(PMEMoid oid) +{ + if (oid.off == 0 || oid.pool_uuid_lo == 0) + return NULL; + + struct _pobj_pcache *pcache = os_tls_get(Cached_pool_key); + if (pcache == NULL) { + pcache = calloc(sizeof(struct _pobj_pcache), 1); + if (pcache == NULL) + FATAL("!pcache malloc"); + int ret = os_tls_set(Cached_pool_key, pcache); + if (ret) + FATAL("!os_tls_set"); + } + + if (_pobj_cache_invalidate != pcache->invalidate || + pcache->uuid_lo != oid.pool_uuid_lo) { + pcache->invalidate = _pobj_cache_invalidate; + + if ((pcache->pop = pmemobj_pool_by_oid(oid)) == NULL) { + pcache->uuid_lo = 0; + return NULL; + } + + pcache->uuid_lo = oid.pool_uuid_lo; + } + + return (void *)((uintptr_t)pcache->pop + oid.off); +} + +#endif /* _WIN32 */ + +/* + * obj_ctl_init_and_load -- (static) initializes CTL and loads configuration + * from env variable and file + */ +static int +obj_ctl_init_and_load(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + if (pop != NULL && (pop->ctl = ctl_new()) == NULL) { + LOG(2, "!ctl_new"); + return -1; + } + + if (pop) { + tx_ctl_register(pop); + pmalloc_ctl_register(pop); + stats_ctl_register(pop); + debug_ctl_register(pop); + } + + char *env_config = os_getenv(OBJ_CONFIG_ENV_VARIABLE); + if (env_config != NULL) { + if (ctl_load_config_from_string(pop ? pop->ctl : NULL, + pop, env_config) != 0) { + LOG(2, "unable to parse config stored in %s " + "environment variable", + OBJ_CONFIG_ENV_VARIABLE); + goto err; + } + } + + char *env_config_file = os_getenv(OBJ_CONFIG_FILE_ENV_VARIABLE); + if (env_config_file != NULL && env_config_file[0] != '\0') { + if (ctl_load_config_from_file(pop ? pop->ctl : NULL, + pop, env_config_file) != 0) { + LOG(2, "unable to parse config stored in %s " + "file (from %s environment variable)", + env_config_file, + OBJ_CONFIG_FILE_ENV_VARIABLE); + goto err; + } + } + + return 0; +err: + if (pop) + ctl_delete(pop->ctl); + return -1; +} + +/* + * obj_pool_init -- (internal) allocate global structs holding all opened pools + * + * This is invoked on a first call to pmemobj_open() or pmemobj_create(). + * Memory is released in library destructor. + * + * This function needs to be threadsafe. + */ +static void +obj_pool_init(void) +{ + LOG(3, NULL); + + struct critnib *c; + + if (pools_ht == NULL) { + c = critnib_new(); + if (c == NULL) + FATAL("!critnib_new for pools_ht"); + if (!util_bool_compare_and_swap64(&pools_ht, NULL, c)) + critnib_delete(c); + } + + if (pools_tree == NULL) { + c = critnib_new(); + if (c == NULL) + FATAL("!critnib_new for pools_tree"); + if (!util_bool_compare_and_swap64(&pools_tree, NULL, c)) + critnib_delete(c); + } +} + +/* + * pmemobj_oid -- return a PMEMoid based on the virtual address + * + * If the address does not belong to any pool OID_NULL is returned. + */ +PMEMoid +pmemobj_oid(const void *addr) +{ + PMEMobjpool *pop = pmemobj_pool_by_ptr(addr); + if (pop == NULL) + return OID_NULL; + + PMEMoid oid = {pop->uuid_lo, (uintptr_t)addr - (uintptr_t)pop}; + return oid; +} + +/* + * obj_init -- initialization of obj + * + * Called by constructor. + */ +void +obj_init(void) +{ + LOG(3, NULL); + + COMPILE_ERROR_ON(sizeof(struct pmemobjpool) != + POOL_HDR_SIZE + POOL_DESC_SIZE); + + COMPILE_ERROR_ON(PMEMOBJ_F_MEM_NODRAIN != PMEM_F_MEM_NODRAIN); + + COMPILE_ERROR_ON(PMEMOBJ_F_MEM_NONTEMPORAL != PMEM_F_MEM_NONTEMPORAL); + COMPILE_ERROR_ON(PMEMOBJ_F_MEM_TEMPORAL != PMEM_F_MEM_TEMPORAL); + + COMPILE_ERROR_ON(PMEMOBJ_F_MEM_WC != PMEM_F_MEM_WC); + COMPILE_ERROR_ON(PMEMOBJ_F_MEM_WB != PMEM_F_MEM_WB); + + COMPILE_ERROR_ON(PMEMOBJ_F_MEM_NOFLUSH != PMEM_F_MEM_NOFLUSH); + +#ifdef _WIN32 + /* XXX - temporary implementation (see above) */ + os_once(&Cached_pool_key_once, _Cached_pool_key_alloc); +#endif + /* + * Load global config, ignore any issues. They will be caught on the + * subsequent call to this function for individual pools. + */ + ctl_global_register(); + + if (obj_ctl_init_and_load(NULL)) + FATAL("error: %s", pmemobj_errormsg()); + + lane_info_boot(); + + util_remote_init(); +} + +/* + * obj_fini -- cleanup of obj + * + * Called by destructor. + */ +void +obj_fini(void) +{ + LOG(3, NULL); + + if (pools_ht) + critnib_delete(pools_ht); + if (pools_tree) + critnib_delete(pools_tree); + lane_info_destroy(); + util_remote_fini(); + +#ifdef _WIN32 + (void) os_tls_key_delete(Cached_pool_key); +#endif +} + +/* + * obj_drain_empty -- (internal) empty function for drain on non-pmem memory + */ +static void +obj_drain_empty(void) +{ + /* do nothing */ +} + +/* + * obj_msync_nofail -- (internal) pmem_msync wrapper that never fails from + * caller's perspective + */ +static void +obj_msync_nofail(const void *addr, size_t size) +{ + if (pmem_msync(addr, size)) + FATAL("!pmem_msync"); +} + +/* + * obj_nopmem_memcpy -- (internal) memcpy followed by an msync + */ +static void * +obj_nopmem_memcpy(void *dest, const void *src, size_t len, unsigned flags) +{ + LOG(15, "dest %p src %p len %zu flags 0x%x", dest, src, len, flags); + + /* + * Use pmem_memcpy instead of memcpy, because pmemobj_memcpy is supposed + * to guarantee that multiple of 8 byte stores to 8 byte aligned + * addresses are fail safe atomic. pmem_memcpy guarantees that, while + * libc memcpy does not. + */ + pmem_memcpy(dest, src, len, PMEM_F_MEM_NOFLUSH); + obj_msync_nofail(dest, len); + return dest; +} + +/* + * obj_nopmem_memmove -- (internal) memmove followed by an msync + */ +static void * +obj_nopmem_memmove(void *dest, const void *src, size_t len, unsigned flags) +{ + LOG(15, "dest %p src %p len %zu flags 0x%x", dest, src, len, flags); + + /* see comment in obj_nopmem_memcpy */ + pmem_memmove(dest, src, len, PMEM_F_MEM_NOFLUSH); + obj_msync_nofail(dest, len); + return dest; +} + +/* + * obj_nopmem_memset -- (internal) memset followed by an msync + */ +static void * +obj_nopmem_memset(void *dest, int c, size_t len, unsigned flags) +{ + LOG(15, "dest %p c 0x%02x len %zu flags 0x%x", dest, c, len, flags); + + /* see comment in obj_nopmem_memcpy */ + pmem_memset(dest, c, len, PMEM_F_MEM_NOFLUSH); + obj_msync_nofail(dest, len); + return dest; +} + +/* + * obj_remote_persist -- (internal) remote persist function + */ +static int +obj_remote_persist(PMEMobjpool *pop, const void *addr, size_t len, + unsigned lane, unsigned flags) +{ + LOG(15, "pop %p addr %p len %zu lane %u flags %u", + pop, addr, len, lane, flags); + + ASSERTne(pop->rpp, NULL); + + uintptr_t offset = (uintptr_t)addr - pop->remote_base; + + unsigned rpmem_flags = 0; + if (flags & PMEMOBJ_F_RELAXED) + rpmem_flags |= RPMEM_PERSIST_RELAXED; + + int rv = Rpmem_persist(pop->rpp, offset, len, lane, rpmem_flags); + if (rv) { + ERR("!rpmem_persist(rpp %p offset %zu length %zu lane %u)" + " FATAL ERROR (returned value %i)", + pop->rpp, offset, len, lane, rv); + return -1; + } + + return 0; +} + +/* + * XXX - Consider removing obj_norep_*() wrappers to call *_local() + * functions directly. Alternatively, always use obj_rep_*(), even + * if there are no replicas. Verify the performance penalty. + */ + +/* + * obj_norep_memcpy -- (internal) memcpy w/o replication + */ +static void * +obj_norep_memcpy(void *ctx, void *dest, const void *src, size_t len, + unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p dest %p src %p len %zu flags 0x%x", pop, dest, src, len, + flags); + + return pop->memcpy_local(dest, src, len, + flags & PMEM_F_MEM_VALID_FLAGS); +} + +/* + * obj_norep_memmove -- (internal) memmove w/o replication + */ +static void * +obj_norep_memmove(void *ctx, void *dest, const void *src, size_t len, + unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p dest %p src %p len %zu flags 0x%x", pop, dest, src, len, + flags); + + return pop->memmove_local(dest, src, len, + flags & PMEM_F_MEM_VALID_FLAGS); +} + +/* + * obj_norep_memset -- (internal) memset w/o replication + */ +static void * +obj_norep_memset(void *ctx, void *dest, int c, size_t len, unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p dest %p c 0x%02x len %zu flags 0x%x", pop, dest, c, len, + flags); + + return pop->memset_local(dest, c, len, flags & PMEM_F_MEM_VALID_FLAGS); +} + +/* + * obj_norep_persist -- (internal) persist w/o replication + */ +static int +obj_norep_persist(void *ctx, const void *addr, size_t len, unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + pop->persist_local(addr, len); + + return 0; +} + +/* + * obj_norep_flush -- (internal) flush w/o replication + */ +static int +obj_norep_flush(void *ctx, const void *addr, size_t len, unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + pop->flush_local(addr, len); + + return 0; +} + +/* + * obj_norep_drain -- (internal) drain w/o replication + */ +static void +obj_norep_drain(void *ctx) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p", pop); + + pop->drain_local(); +} + +static void obj_pool_cleanup(PMEMobjpool *pop); + +/* + * obj_handle_remote_persist_error -- (internal) handle remote persist + * fatal error + */ +static void +obj_handle_remote_persist_error(PMEMobjpool *pop) +{ + LOG(1, "pop %p", pop); + + ERR("error clean up..."); + obj_pool_cleanup(pop); + + FATAL("Fatal error of remote persist. Aborting..."); +} + +/* + * obj_rep_memcpy -- (internal) memcpy with replication + */ +static void * +obj_rep_memcpy(void *ctx, void *dest, const void *src, size_t len, + unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p dest %p src %p len %zu flags 0x%x", pop, dest, src, len, + flags); + + unsigned lane = UINT_MAX; + + if (pop->has_remote_replicas) + lane = lane_hold(pop, NULL); + + void *ret = pop->memcpy_local(dest, src, len, flags); + + PMEMobjpool *rep = pop->replica; + while (rep) { + void *rdest = (char *)rep + (uintptr_t)dest - (uintptr_t)pop; + if (rep->rpp == NULL) { + rep->memcpy_local(rdest, src, len, + flags & PMEM_F_MEM_VALID_FLAGS); + } else { + if (rep->persist_remote(rep, rdest, len, lane, flags)) + obj_handle_remote_persist_error(pop); + } + rep = rep->replica; + } + + if (pop->has_remote_replicas) + lane_release(pop); + + return ret; +} + +/* + * obj_rep_memmove -- (internal) memmove with replication + */ +static void * +obj_rep_memmove(void *ctx, void *dest, const void *src, size_t len, + unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p dest %p src %p len %zu flags 0x%x", pop, dest, src, len, + flags); + + unsigned lane = UINT_MAX; + + if (pop->has_remote_replicas) + lane = lane_hold(pop, NULL); + + void *ret = pop->memmove_local(dest, src, len, flags); + + PMEMobjpool *rep = pop->replica; + while (rep) { + void *rdest = (char *)rep + (uintptr_t)dest - (uintptr_t)pop; + if (rep->rpp == NULL) { + rep->memmove_local(rdest, src, len, + flags & PMEM_F_MEM_VALID_FLAGS); + } else { + if (rep->persist_remote(rep, rdest, len, lane, flags)) + obj_handle_remote_persist_error(pop); + } + rep = rep->replica; + } + + if (pop->has_remote_replicas) + lane_release(pop); + + return ret; +} + +/* + * obj_rep_memset -- (internal) memset with replication + */ +static void * +obj_rep_memset(void *ctx, void *dest, int c, size_t len, unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p dest %p c 0x%02x len %zu flags 0x%x", pop, dest, c, len, + flags); + + unsigned lane = UINT_MAX; + + if (pop->has_remote_replicas) + lane = lane_hold(pop, NULL); + + void *ret = pop->memset_local(dest, c, len, flags); + + PMEMobjpool *rep = pop->replica; + while (rep) { + void *rdest = (char *)rep + (uintptr_t)dest - (uintptr_t)pop; + if (rep->rpp == NULL) { + rep->memset_local(rdest, c, len, + flags & PMEM_F_MEM_VALID_FLAGS); + } else { + if (rep->persist_remote(rep, rdest, len, lane, flags)) + obj_handle_remote_persist_error(pop); + } + rep = rep->replica; + } + + if (pop->has_remote_replicas) + lane_release(pop); + + return ret; +} + +/* + * obj_rep_persist -- (internal) persist with replication + */ +static int +obj_rep_persist(void *ctx, const void *addr, size_t len, unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + unsigned lane = UINT_MAX; + + if (pop->has_remote_replicas) + lane = lane_hold(pop, NULL); + + pop->persist_local(addr, len); + + PMEMobjpool *rep = pop->replica; + while (rep) { + void *raddr = (char *)rep + (uintptr_t)addr - (uintptr_t)pop; + if (rep->rpp == NULL) { + rep->memcpy_local(raddr, addr, len, 0); + } else { + if (rep->persist_remote(rep, raddr, len, lane, flags)) + obj_handle_remote_persist_error(pop); + } + rep = rep->replica; + } + + if (pop->has_remote_replicas) + lane_release(pop); + + return 0; +} + +/* + * obj_rep_flush -- (internal) flush with replication + */ +static int +obj_rep_flush(void *ctx, const void *addr, size_t len, unsigned flags) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + unsigned lane = UINT_MAX; + + if (pop->has_remote_replicas) + lane = lane_hold(pop, NULL); + + pop->flush_local(addr, len); + + PMEMobjpool *rep = pop->replica; + while (rep) { + void *raddr = (char *)rep + (uintptr_t)addr - (uintptr_t)pop; + if (rep->rpp == NULL) { + rep->memcpy_local(raddr, addr, len, + PMEM_F_MEM_NODRAIN); + } else { + if (rep->persist_remote(rep, raddr, len, lane, flags)) + obj_handle_remote_persist_error(pop); + } + rep = rep->replica; + } + + if (pop->has_remote_replicas) + lane_release(pop); + + return 0; +} + +/* + * obj_rep_drain -- (internal) drain with replication + */ +static void +obj_rep_drain(void *ctx) +{ + PMEMobjpool *pop = ctx; + LOG(15, "pop %p", pop); + + pop->drain_local(); + + PMEMobjpool *rep = pop->replica; + while (rep) { + if (rep->rpp == NULL) + rep->drain_local(); + rep = rep->replica; + } +} + +#if VG_MEMCHECK_ENABLED +/* + * Arbitrary value. When there's more undefined regions than MAX_UNDEFS, it's + * not worth reporting everything - developer should fix the code. + */ +#define MAX_UNDEFS 1000 + +/* + * obj_vg_check_no_undef -- (internal) check whether there are any undefined + * regions + */ +static void +obj_vg_check_no_undef(struct pmemobjpool *pop) +{ + LOG(4, "pop %p", pop); + + struct { + void *start, *end; + } undefs[MAX_UNDEFS]; + int num_undefs = 0; + + VALGRIND_DO_DISABLE_ERROR_REPORTING; + char *addr_start = pop->addr; + char *addr_end = addr_start + pop->set->poolsize; + + while (addr_start < addr_end) { + char *noaccess = (char *)VALGRIND_CHECK_MEM_IS_ADDRESSABLE( + addr_start, addr_end - addr_start); + if (noaccess == NULL) + noaccess = addr_end; + + while (addr_start < noaccess) { + char *undefined = + (char *)VALGRIND_CHECK_MEM_IS_DEFINED( + addr_start, noaccess - addr_start); + + if (undefined) { + addr_start = undefined; + +#ifdef VALGRIND_CHECK_MEM_IS_UNDEFINED + addr_start = (char *) + VALGRIND_CHECK_MEM_IS_UNDEFINED( + addr_start, noaccess - addr_start); + if (addr_start == NULL) + addr_start = noaccess; +#else + while (addr_start < noaccess && + VALGRIND_CHECK_MEM_IS_DEFINED( + addr_start, 1)) + addr_start++; +#endif + + if (num_undefs < MAX_UNDEFS) { + undefs[num_undefs].start = undefined; + undefs[num_undefs].end = addr_start - 1; + num_undefs++; + } + } else + addr_start = noaccess; + } + +#ifdef VALGRIND_CHECK_MEM_IS_UNADDRESSABLE + addr_start = (char *)VALGRIND_CHECK_MEM_IS_UNADDRESSABLE( + addr_start, addr_end - addr_start); + if (addr_start == NULL) + addr_start = addr_end; +#else + while (addr_start < addr_end && + (char *)VALGRIND_CHECK_MEM_IS_ADDRESSABLE( + addr_start, 1) == addr_start) + addr_start++; +#endif + } + VALGRIND_DO_ENABLE_ERROR_REPORTING; + + if (num_undefs) { + /* + * How to resolve this error: + * If it's part of the free space Valgrind should be told about + * it by VALGRIND_DO_MAKE_MEM_NOACCESS request. If it's + * allocated - initialize it or use VALGRIND_DO_MAKE_MEM_DEFINED + * request. + */ + + VALGRIND_PRINTF("Part of the pool is left in undefined state on" + " boot. This is pmemobj's bug.\nUndefined" + " regions: [pool address: %p]\n", pop); + for (int i = 0; i < num_undefs; ++i) + VALGRIND_PRINTF(" [%p, %p]\n", undefs[i].start, + undefs[i].end); + if (num_undefs == MAX_UNDEFS) + VALGRIND_PRINTF(" ...\n"); + + /* Trigger error. */ + VALGRIND_CHECK_MEM_IS_DEFINED(undefs[0].start, 1); + } +} + +/* + * obj_vg_boot -- (internal) notify Valgrind about pool objects + */ +static void +obj_vg_boot(struct pmemobjpool *pop) +{ + if (!On_memcheck) + return; + + LOG(4, "pop %p", pop); + + if (os_getenv("PMEMOBJ_VG_CHECK_UNDEF")) + obj_vg_check_no_undef(pop); +} + +#endif + +/* + * obj_runtime_init_common -- (internal) runtime initialization + * + * Common routine for create/open and check. + */ +static int +obj_runtime_init_common(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + if ((errno = lane_boot(pop)) != 0) { + ERR("!lane_boot"); + return errno; + } + + if ((errno = lane_recover_and_section_boot(pop)) != 0) { + ERR("!lane_recover_and_section_boot"); + return errno; + } + + pop->conversion_flags = 0; + pmemops_persist(&pop->p_ops, + &pop->conversion_flags, sizeof(pop->conversion_flags)); + + return 0; +} + +/* + * obj_runtime_cleanup_common -- (internal) runtime cleanup + * + * Common routine for create/open and check + */ +static void +obj_runtime_cleanup_common(PMEMobjpool *pop) +{ + lane_section_cleanup(pop); + lane_cleanup(pop); +} + +/* + * obj_descr_create -- (internal) create obj pool descriptor + */ +static int +obj_descr_create(PMEMobjpool *pop, const char *layout, size_t poolsize) +{ + LOG(3, "pop %p layout %s poolsize %zu", pop, layout, poolsize); + + ASSERTeq(poolsize % Pagesize, 0); + + /* opaque info lives at the beginning of mapped memory pool */ + void *dscp = (void *)((uintptr_t)pop + sizeof(struct pool_hdr)); + + /* create the persistent part of pool's descriptor */ + memset(dscp, 0, OBJ_DSC_P_SIZE); + if (layout) + strncpy(pop->layout, layout, PMEMOBJ_MAX_LAYOUT - 1); + struct pmem_ops *p_ops = &pop->p_ops; + + pop->lanes_offset = OBJ_LANES_OFFSET; + pop->nlanes = OBJ_NLANES; + + /* zero all lanes */ + lane_init_data(pop); + + pop->heap_offset = pop->lanes_offset + + pop->nlanes * sizeof(struct lane_layout); + pop->heap_offset = (pop->heap_offset + Pagesize - 1) & ~(Pagesize - 1); + + size_t heap_size = pop->set->poolsize - pop->heap_offset; + + /* initialize heap prior to storing the checksum */ + errno = palloc_init((char *)pop + pop->heap_offset, heap_size, + &pop->heap_size, p_ops); + if (errno != 0) { + ERR("!palloc_init"); + return -1; + } + + util_checksum(dscp, OBJ_DSC_P_SIZE, &pop->checksum, 1, 0); + + /* + * store the persistent part of pool's descriptor (2kB) + * + * It's safe to use PMEMOBJ_F_RELAXED flag because the entire + * structure is protected by checksum. + */ + pmemops_xpersist(p_ops, dscp, OBJ_DSC_P_SIZE, PMEMOBJ_F_RELAXED); + + /* initialize run_id, it will be incremented later */ + pop->run_id = 0; + pmemops_persist(p_ops, &pop->run_id, sizeof(pop->run_id)); + + pop->root_offset = 0; + pmemops_persist(p_ops, &pop->root_offset, sizeof(pop->root_offset)); + pop->root_size = 0; + pmemops_persist(p_ops, &pop->root_size, sizeof(pop->root_size)); + + pop->conversion_flags = 0; + pmemops_persist(p_ops, &pop->conversion_flags, + sizeof(pop->conversion_flags)); + + /* + * It's safe to use PMEMOBJ_F_RELAXED flag because the reserved + * area must be entirely zeroed. + */ + pmemops_memset(p_ops, pop->pmem_reserved, 0, + sizeof(pop->pmem_reserved), PMEMOBJ_F_RELAXED); + + return 0; +} + +/* + * obj_descr_check -- (internal) validate obj pool descriptor + */ +static int +obj_descr_check(PMEMobjpool *pop, const char *layout, size_t poolsize) +{ + LOG(3, "pop %p layout %s poolsize %zu", pop, layout, poolsize); + + void *dscp = (void *)((uintptr_t)pop + sizeof(struct pool_hdr)); + + if (pop->rpp) { + /* read remote descriptor */ + if (obj_read_remote(pop->rpp, pop->remote_base, dscp, dscp, + OBJ_DSC_P_SIZE)) { + ERR("!obj_read_remote"); + return -1; + } + } + + if (!util_checksum(dscp, OBJ_DSC_P_SIZE, &pop->checksum, 0, 0)) { + ERR("invalid checksum of pool descriptor"); + errno = EINVAL; + return -1; + } + + if (layout && + strncmp(pop->layout, layout, PMEMOBJ_MAX_LAYOUT)) { + ERR("wrong layout (\"%s\"), " + "pool created with layout \"%s\"", + layout, pop->layout); + errno = EINVAL; + return -1; + } + + if (pop->heap_offset % Pagesize) { + ERR("unaligned heap: off %" PRIu64, pop->heap_offset); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * obj_replica_init_local -- (internal) initialize runtime part + * of the local replicas + */ +static int +obj_replica_init_local(PMEMobjpool *rep, int is_pmem, size_t resvsize) +{ + LOG(3, "rep %p is_pmem %d resvsize %zu", rep, is_pmem, resvsize); + + /* + * Use some of the memory pool area for run-time info. This + * run-time state is never loaded from the file, it is always + * created here, so no need to worry about byte-order. + */ + rep->is_pmem = is_pmem; + + /* init hooks */ + rep->persist_remote = NULL; + + /* + * All replicas, except for master, are ignored as far as valgrind is + * concerned. This is to save CPU time and lessen the complexity of + * instrumentation. + */ + if (!rep->is_master_replica) + VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(rep, resvsize); + + if (rep->is_pmem) { + rep->persist_local = pmem_persist; + rep->flush_local = pmem_flush; + rep->drain_local = pmem_drain; + rep->memcpy_local = pmem_memcpy; + rep->memmove_local = pmem_memmove; + rep->memset_local = pmem_memset; + } else { + rep->persist_local = obj_msync_nofail; + rep->flush_local = obj_msync_nofail; + rep->drain_local = obj_drain_empty; + rep->memcpy_local = obj_nopmem_memcpy; + rep->memmove_local = obj_nopmem_memmove; + rep->memset_local = obj_nopmem_memset; + } + + return 0; +} + +/* + * obj_replica_init_remote -- (internal) initialize runtime part + * of a remote replica + */ +static int +obj_replica_init_remote(PMEMobjpool *rep, struct pool_set *set, + unsigned repidx, int create) +{ + LOG(3, "rep %p set %p repidx %u", rep, set, repidx); + + struct pool_replica *repset = set->replica[repidx]; + + ASSERTne(repset->remote->rpp, NULL); + ASSERTne(repset->remote->node_addr, NULL); + ASSERTne(repset->remote->pool_desc, NULL); + + rep->node_addr = Strdup(repset->remote->node_addr); + if (rep->node_addr == NULL) + return -1; + rep->pool_desc = Strdup(repset->remote->pool_desc); + if (rep->pool_desc == NULL) { + Free(rep->node_addr); + return -1; + } + + rep->rpp = repset->remote->rpp; + + /* remote_base - beginning of the remote pool */ + rep->remote_base = (uintptr_t)rep->addr; + + /* init hooks */ + rep->persist_remote = obj_remote_persist; + rep->persist_local = NULL; + rep->flush_local = NULL; + rep->drain_local = NULL; + rep->memcpy_local = NULL; + rep->memmove_local = NULL; + rep->memset_local = NULL; + + rep->p_ops.remote.read = obj_read_remote; + rep->p_ops.remote.ctx = rep->rpp; + rep->p_ops.remote.base = rep->remote_base; + + return 0; +} + +/* + * obj_cleanup_remote -- (internal) clean up the remote pools data + */ +static void +obj_cleanup_remote(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + for (; pop != NULL; pop = pop->replica) { + if (pop->rpp != NULL) { + Free(pop->node_addr); + Free(pop->pool_desc); + pop->rpp = NULL; + } + } +} + +/* + * obj_replica_init -- (internal) initialize runtime part of the replica + */ +static int +obj_replica_init(PMEMobjpool *rep, struct pool_set *set, unsigned repidx, + int create) +{ + struct pool_replica *repset = set->replica[repidx]; + + if (repidx == 0) { + /* master replica */ + rep->is_master_replica = 1; + rep->has_remote_replicas = set->remote; + + if (set->nreplicas > 1) { + rep->p_ops.persist = obj_rep_persist; + rep->p_ops.flush = obj_rep_flush; + rep->p_ops.drain = obj_rep_drain; + rep->p_ops.memcpy = obj_rep_memcpy; + rep->p_ops.memmove = obj_rep_memmove; + rep->p_ops.memset = obj_rep_memset; + } else { + rep->p_ops.persist = obj_norep_persist; + rep->p_ops.flush = obj_norep_flush; + rep->p_ops.drain = obj_norep_drain; + rep->p_ops.memcpy = obj_norep_memcpy; + rep->p_ops.memmove = obj_norep_memmove; + rep->p_ops.memset = obj_norep_memset; + } + rep->p_ops.base = rep; + } else { + /* non-master replicas */ + rep->is_master_replica = 0; + rep->has_remote_replicas = 0; + + rep->p_ops.persist = NULL; + rep->p_ops.flush = NULL; + rep->p_ops.drain = NULL; + rep->p_ops.memcpy = NULL; + rep->p_ops.memmove = NULL; + rep->p_ops.memset = NULL; + + rep->p_ops.base = NULL; + } + + rep->is_dev_dax = set->replica[repidx]->part[0].is_dev_dax; + + int ret; + if (repset->remote) + ret = obj_replica_init_remote(rep, set, repidx, create); + else + ret = obj_replica_init_local(rep, repset->is_pmem, + set->resvsize); + if (ret) + return ret; + + return 0; +} + +/* + * obj_replica_fini -- (internal) deinitialize replica + */ +static void +obj_replica_fini(struct pool_replica *repset) +{ + PMEMobjpool *rep = repset->part[0].addr; + + if (repset->remote) + obj_cleanup_remote(rep); +} + +/* + * obj_runtime_init -- (internal) initialize runtime part of the pool header + */ +static int +obj_runtime_init(PMEMobjpool *pop, int rdonly, int boot, unsigned nlanes) +{ + LOG(3, "pop %p rdonly %d boot %d", pop, rdonly, boot); + struct pmem_ops *p_ops = &pop->p_ops; + + /* run_id is made unique by incrementing the previous value */ + pop->run_id += 2; + if (pop->run_id == 0) + pop->run_id += 2; + pmemops_persist(p_ops, &pop->run_id, sizeof(pop->run_id)); + + /* + * Use some of the memory pool area for run-time info. This + * run-time state is never loaded from the file, it is always + * created here, so no need to worry about byte-order. + */ + pop->rdonly = rdonly; + + pop->uuid_lo = pmemobj_get_uuid_lo(pop); + + pop->lanes_desc.runtime_nlanes = nlanes; + + pop->tx_params = tx_params_new(); + if (pop->tx_params == NULL) + goto err_tx_params; + + pop->stats = stats_new(pop); + if (pop->stats == NULL) + goto err_stat; + + pop->user_data = NULL; + + VALGRIND_REMOVE_PMEM_MAPPING(&pop->mutex_head, + sizeof(pop->mutex_head)); + VALGRIND_REMOVE_PMEM_MAPPING(&pop->rwlock_head, + sizeof(pop->rwlock_head)); + VALGRIND_REMOVE_PMEM_MAPPING(&pop->cond_head, + sizeof(pop->cond_head)); + pop->mutex_head = NULL; + pop->rwlock_head = NULL; + pop->cond_head = NULL; + + if (boot) { + if ((errno = obj_runtime_init_common(pop)) != 0) + goto err_boot; + +#if VG_MEMCHECK_ENABLED + if (On_memcheck) { + /* mark unused part of the pool as not accessible */ + void *end = palloc_heap_end(&pop->heap); + VALGRIND_DO_MAKE_MEM_NOACCESS(end, + (char *)pop + pop->set->poolsize - (char *)end); + } +#endif + + obj_pool_init(); + + if ((errno = critnib_insert(pools_ht, pop->uuid_lo, pop))) { + ERR("!critnib_insert to pools_ht"); + goto err_critnib_insert; + } + + if ((errno = critnib_insert(pools_tree, (uint64_t)pop, pop))) { + ERR("!critnib_insert to pools_tree"); + goto err_tree_insert; + } + } + + if (obj_ctl_init_and_load(pop) != 0) { + errno = EINVAL; + goto err_ctl; + } + + util_mutex_init(&pop->ulog_user_buffers.lock); + pop->ulog_user_buffers.map = ravl_new_sized( + operation_user_buffer_range_cmp, + sizeof(struct user_buffer_def)); + if (pop->ulog_user_buffers.map == NULL) { + ERR("!ravl_new_sized"); + goto err_user_buffers_map; + } + pop->ulog_user_buffers.verify = 0; + + /* + * If possible, turn off all permissions on the pool header page. + * + * The prototype PMFS doesn't allow this when large pages are in + * use. It is not considered an error if this fails. + */ + RANGE_NONE(pop->addr, sizeof(struct pool_hdr), pop->is_dev_dax); + + return 0; + +err_user_buffers_map: + util_mutex_destroy(&pop->ulog_user_buffers.lock); + ctl_delete(pop->ctl); +err_ctl:; + void *n = critnib_remove(pools_tree, (uint64_t)pop); + ASSERTne(n, NULL); +err_tree_insert: + critnib_remove(pools_ht, pop->uuid_lo); +err_critnib_insert: + obj_runtime_cleanup_common(pop); +err_boot: + stats_delete(pop, pop->stats); +err_stat: + tx_params_delete(pop->tx_params); +err_tx_params: + + return -1; +} + +/* + * obj_get_nlanes -- get a number of lanes available at runtime. If the value + * provided with the PMEMOBJ_NLANES environment variable is greater than 0 and + * smaller than OBJ_NLANES constant it returns PMEMOBJ_NLANES. Otherwise it + * returns OBJ_NLANES. + */ +static unsigned +obj_get_nlanes(void) +{ + LOG(3, NULL); + + char *env_nlanes = os_getenv(OBJ_NLANES_ENV_VARIABLE); + if (env_nlanes) { + int nlanes = atoi(env_nlanes); + if (nlanes <= 0) { + ERR("%s variable must be a positive integer", + OBJ_NLANES_ENV_VARIABLE); + errno = EINVAL; + goto no_valid_env; + } + + return (unsigned)(OBJ_NLANES < nlanes ? OBJ_NLANES : nlanes); + } + +no_valid_env: + return OBJ_NLANES; +} + +/* + * pmemobj_createU -- create a transactional memory pool (set) + */ +#ifndef _WIN32 +static inline +#endif +PMEMobjpool * +pmemobj_createU(const char *path, const char *layout, + size_t poolsize, mode_t mode) +{ + LOG(3, "path %s layout %s poolsize %zu mode %o", + path, layout, poolsize, mode); + + PMEMobjpool *pop; + struct pool_set *set; + + /* check length of layout */ + if (layout && (strlen(layout) >= PMEMOBJ_MAX_LAYOUT)) { + ERR("Layout too long"); + errno = EINVAL; + return NULL; + } + + /* + * A number of lanes available at runtime equals the lowest value + * from all reported by remote replicas hosts. In the single host mode + * the runtime number of lanes is equal to the total number of lanes + * available in the pool or the value provided with PMEMOBJ_NLANES + * environment variable whichever is lower. + */ + unsigned runtime_nlanes = obj_get_nlanes(); + + struct pool_attr adj_pool_attr = Obj_create_attr; + + /* force set SDS feature */ + if (SDS_at_create) + adj_pool_attr.features.incompat |= POOL_FEAT_SDS; + else + adj_pool_attr.features.incompat &= ~POOL_FEAT_SDS; + + if (util_pool_create(&set, path, poolsize, PMEMOBJ_MIN_POOL, + PMEMOBJ_MIN_PART, &adj_pool_attr, &runtime_nlanes, + REPLICAS_ENABLED) != 0) { + LOG(2, "cannot create pool or pool set"); + return NULL; + } + + ASSERT(set->nreplicas > 0); + + /* pop is master replica from now on */ + pop = set->replica[0]->part[0].addr; + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *repset = set->replica[r]; + PMEMobjpool *rep = repset->part[0].addr; + + size_t rt_size = (uintptr_t)(rep + 1) - (uintptr_t)&rep->addr; + VALGRIND_REMOVE_PMEM_MAPPING(&rep->addr, rt_size); + + memset(&rep->addr, 0, rt_size); + + rep->addr = rep; + rep->replica = NULL; + rep->rpp = NULL; + + /* initialize replica runtime - is_pmem, funcs, ... */ + if (obj_replica_init(rep, set, r, 1 /* create */) != 0) { + ERR("initialization of replica #%u failed", r); + goto err; + } + + /* link replicas */ + if (r < set->nreplicas - 1) + rep->replica = set->replica[r + 1]->part[0].addr; + } + + pop->set = set; + + /* create pool descriptor */ + if (obj_descr_create(pop, layout, set->poolsize) != 0) { + LOG(2, "creation of pool descriptor failed"); + goto err; + } + + /* initialize runtime parts - lanes, obj stores, ... */ + if (obj_runtime_init(pop, 0, 1 /* boot */, + runtime_nlanes) != 0) { + ERR("pool initialization failed"); + goto err; + } + + if (util_poolset_chmod(set, mode)) + goto err; + + util_poolset_fdclose(set); + + LOG(3, "pop %p", pop); + + return pop; + +err: + LOG(4, "error clean up"); + int oerrno = errno; + if (set->remote) + obj_cleanup_remote(pop); + util_poolset_close(set, DELETE_CREATED_PARTS); + errno = oerrno; + return NULL; +} + +#ifndef _WIN32 +/* + * pmemobj_create -- create a transactional memory pool (set) + */ +PMEMobjpool * +pmemobj_create(const char *path, const char *layout, + size_t poolsize, mode_t mode) +{ + PMEMOBJ_API_START(); + + PMEMobjpool *pop = pmemobj_createU(path, layout, poolsize, mode); + + PMEMOBJ_API_END(); + return pop; +} +#else +/* + * pmemobj_createW -- create a transactional memory pool (set) + */ +PMEMobjpool * +pmemobj_createW(const wchar_t *path, const wchar_t *layout, size_t poolsize, + mode_t mode) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return NULL; + char *ulayout = NULL; + if (layout != NULL) { + ulayout = util_toUTF8(layout); + if (ulayout == NULL) { + util_free_UTF8(upath); + return NULL; + } + } + PMEMobjpool *ret = pmemobj_createU(upath, ulayout, poolsize, mode); + + util_free_UTF8(upath); + util_free_UTF8(ulayout); + + return ret; +} +#endif + +/* + * obj_check_basic_local -- (internal) basic pool consistency check + * of a local replica + */ +static int +obj_check_basic_local(PMEMobjpool *pop, size_t mapped_size) +{ + LOG(3, "pop %p mapped_size %zu", pop, mapped_size); + + ASSERTeq(pop->rpp, NULL); + + int consistent = 1; + + if (pop->run_id % 2) { + ERR("invalid run_id %" PRIu64, pop->run_id); + consistent = 0; + } + + if ((errno = lane_check(pop)) != 0) { + LOG(2, "!lane_check"); + consistent = 0; + } + + /* pop->heap_size can still be 0 at this point */ + size_t heap_size = mapped_size - pop->heap_offset; + errno = palloc_heap_check((char *)pop + pop->heap_offset, + heap_size); + if (errno != 0) { + LOG(2, "!heap_check"); + consistent = 0; + } + + return consistent; +} + +/* + * obj_read_remote -- read data from remote replica + * + * It reads data of size 'length' from the remote replica 'pop' + * from address 'addr' and saves it at address 'dest'. + */ +int +obj_read_remote(void *ctx, uintptr_t base, void *dest, void *addr, + size_t length) +{ + LOG(3, "ctx %p base 0x%lx dest %p addr %p length %zu", ctx, base, dest, + addr, length); + + ASSERTne(ctx, NULL); + ASSERT((uintptr_t)addr >= base); + + uintptr_t offset = (uintptr_t)addr - base; + if (Rpmem_read(ctx, dest, offset, length, RLANE_DEFAULT)) { + ERR("!rpmem_read"); + return -1; + } + + return 0; +} + +/* + * obj_check_basic_remote -- (internal) basic pool consistency check + * of a remote replica + */ +static int +obj_check_basic_remote(PMEMobjpool *pop, size_t mapped_size) +{ + LOG(3, "pop %p mapped_size %zu", pop, mapped_size); + + ASSERTne(pop->rpp, NULL); + + int consistent = 1; + + /* read pop->run_id */ + if (obj_read_remote(pop->rpp, pop->remote_base, &pop->run_id, + &pop->run_id, sizeof(pop->run_id))) { + ERR("!obj_read_remote"); + return -1; + } + + if (pop->run_id % 2) { + ERR("invalid run_id %" PRIu64, pop->run_id); + consistent = 0; + } + + /* XXX add lane_check_remote */ + + /* pop->heap_size can still be 0 at this point */ + size_t heap_size = mapped_size - pop->heap_offset; + if (palloc_heap_check_remote((char *)pop + pop->heap_offset, + heap_size, &pop->p_ops.remote)) { + LOG(2, "!heap_check_remote"); + consistent = 0; + } + + return consistent; +} + +/* + * obj_check_basic -- (internal) basic pool consistency check + * + * Used to check if all the replicas are consistent prior to pool recovery. + */ +static int +obj_check_basic(PMEMobjpool *pop, size_t mapped_size) +{ + LOG(3, "pop %p mapped_size %zu", pop, mapped_size); + + if (pop->rpp == NULL) + return obj_check_basic_local(pop, mapped_size); + else + return obj_check_basic_remote(pop, mapped_size); +} + +/* + * obj_pool_close -- (internal) close the pool set + */ +static void +obj_pool_close(struct pool_set *set) +{ + int oerrno = errno; + util_poolset_close(set, DO_NOT_DELETE_PARTS); + errno = oerrno; +} + +/* + * obj_pool_open -- (internal) open the given pool + */ +static int +obj_pool_open(struct pool_set **set, const char *path, unsigned flags, + unsigned *nlanes) +{ + if (util_pool_open(set, path, PMEMOBJ_MIN_PART, &Obj_open_attr, + nlanes, NULL, flags) != 0) { + LOG(2, "cannot open pool or pool set"); + return -1; + } + + ASSERT((*set)->nreplicas > 0); + + /* read-only mode is not supported in libpmemobj */ + if ((*set)->rdonly) { + ERR("read-only mode is not supported"); + errno = EINVAL; + goto err_rdonly; + } + + return 0; +err_rdonly: + obj_pool_close(*set); + return -1; +} + +/* + * obj_replicas_init -- (internal) initialize all replicas + */ +static int +obj_replicas_init(struct pool_set *set) +{ + unsigned r; + for (r = 0; r < set->nreplicas; r++) { + struct pool_replica *repset = set->replica[r]; + PMEMobjpool *rep = repset->part[0].addr; + + size_t rt_size = (uintptr_t)(rep + 1) - (uintptr_t)&rep->addr; + + VALGRIND_REMOVE_PMEM_MAPPING(&rep->addr, rt_size); + + memset(&rep->addr, 0, rt_size); + + rep->addr = rep; + rep->replica = NULL; + rep->rpp = NULL; + + /* initialize replica runtime - is_pmem, funcs, ... */ + if (obj_replica_init(rep, set, r, 0 /* open */) != 0) { + ERR("initialization of replica #%u failed", r); + goto err; + } + + /* link replicas */ + if (r < set->nreplicas - 1) + rep->replica = set->replica[r + 1]->part[0].addr; + } + + return 0; +err: + for (unsigned p = 0; p < r; p++) + obj_replica_fini(set->replica[p]); + + return -1; +} + +/* + * obj_replicas_fini -- (internal) deinitialize all replicas + */ +static void +obj_replicas_fini(struct pool_set *set) +{ + int oerrno = errno; + for (unsigned r = 0; r < set->nreplicas; r++) + obj_replica_fini(set->replica[r]); + errno = oerrno; +} + +/* + * obj_replicas_check_basic -- (internal) perform basic consistency check + * for all replicas + */ +static int +obj_replicas_check_basic(PMEMobjpool *pop) +{ + PMEMobjpool *rep; + for (unsigned r = 0; r < pop->set->nreplicas; r++) { + rep = pop->set->replica[r]->part[0].addr; + if (obj_check_basic(rep, pop->set->poolsize) == 0) { + ERR("inconsistent replica #%u", r); + return -1; + } + } + + /* copy lanes */ + void *src = (void *)((uintptr_t)pop + pop->lanes_offset); + size_t len = pop->nlanes * sizeof(struct lane_layout); + + for (unsigned r = 1; r < pop->set->nreplicas; r++) { + rep = pop->set->replica[r]->part[0].addr; + void *dst = (void *)((uintptr_t)rep + pop->lanes_offset); + if (rep->rpp == NULL) { + rep->memcpy_local(dst, src, len, 0); + } else { + if (rep->persist_remote(rep, dst, len, + RLANE_DEFAULT, 0)) + obj_handle_remote_persist_error(pop); + } + } + + return 0; +} + +/* + * obj_open_common -- open a transactional memory pool (set) + * + * This routine takes flags and does all the work + * (flag POOL_OPEN_COW - internal calls can map a read-only pool if required). + */ +static PMEMobjpool * +obj_open_common(const char *path, const char *layout, unsigned flags, int boot) +{ + LOG(3, "path %s layout %s flags 0x%x", path, layout, flags); + + PMEMobjpool *pop = NULL; + struct pool_set *set; + + /* + * A number of lanes available at runtime equals the lowest value + * from all reported by remote replicas hosts. In the single host mode + * the runtime number of lanes is equal to the total number of lanes + * available in the pool or the value provided with PMEMOBJ_NLANES + * environment variable whichever is lower. + */ + unsigned runtime_nlanes = obj_get_nlanes(); + if (obj_pool_open(&set, path, flags, &runtime_nlanes)) + return NULL; + + /* pop is master replica from now on */ + pop = set->replica[0]->part[0].addr; + + if (obj_replicas_init(set)) + goto replicas_init; + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *repset = set->replica[r]; + PMEMobjpool *rep = repset->part[0].addr; + /* check descriptor */ + if (obj_descr_check(rep, layout, set->poolsize) != 0) { + LOG(2, "descriptor check of replica #%u failed", r); + goto err_descr_check; + } + } + + pop->set = set; + + if (boot) { + /* check consistency of 'master' replica */ + if (obj_check_basic(pop, pop->set->poolsize) == 0) { + goto err_check_basic; + } + } + + if (set->nreplicas > 1) { + if (obj_replicas_check_basic(pop)) + goto err_replicas_check_basic; + } + + /* + * before runtime initialization lanes are unavailable, remote persists + * should use RLANE_DEFAULT + */ + pop->lanes_desc.runtime_nlanes = 0; + +#if VG_MEMCHECK_ENABLED + pop->vg_boot = boot; +#endif + /* initialize runtime parts - lanes, obj stores, ... */ + if (obj_runtime_init(pop, 0, boot, runtime_nlanes) != 0) { + ERR("pool initialization failed"); + goto err_runtime_init; + } + +#if VG_MEMCHECK_ENABLED + if (boot) + obj_vg_boot(pop); +#endif + + util_poolset_fdclose(set); + + LOG(3, "pop %p", pop); + + return pop; + +err_runtime_init: +err_replicas_check_basic: +err_check_basic: +err_descr_check: + obj_replicas_fini(set); +replicas_init: + obj_pool_close(set); + return NULL; +} + +/* + * pmemobj_openU -- open a transactional memory pool + */ +#ifndef _WIN32 +static inline +#endif +PMEMobjpool * +pmemobj_openU(const char *path, const char *layout) +{ + LOG(3, "path %s layout %s", path, layout); + + return obj_open_common(path, layout, + COW_at_open ? POOL_OPEN_COW : 0, 1); +} + +#ifndef _WIN32 +/* + * pmemobj_open -- open a transactional memory pool + */ +PMEMobjpool * +pmemobj_open(const char *path, const char *layout) +{ + PMEMOBJ_API_START(); + + PMEMobjpool *pop = pmemobj_openU(path, layout); + + PMEMOBJ_API_END(); + return pop; +} +#else +/* + * pmemobj_openW -- open a transactional memory pool + */ +PMEMobjpool * +pmemobj_openW(const wchar_t *path, const wchar_t *layout) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return NULL; + + char *ulayout = NULL; + if (layout != NULL) { + ulayout = util_toUTF8(layout); + if (ulayout == NULL) { + util_free_UTF8(upath); + return NULL; + } + } + + PMEMobjpool *ret = pmemobj_openU(upath, ulayout); + util_free_UTF8(upath); + util_free_UTF8(ulayout); + return ret; +} +#endif + +/* + * obj_replicas_cleanup -- (internal) free resources allocated for replicas + */ +static void +obj_replicas_cleanup(struct pool_set *set) +{ + LOG(3, "set %p", set); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + + PMEMobjpool *pop = rep->part[0].addr; + + if (pop->rpp != NULL) { + /* + * remote replica will be closed in util_poolset_close + */ + pop->rpp = NULL; + + Free(pop->node_addr); + Free(pop->pool_desc); + } + } +} + +/* + * obj_pool_lock_cleanup -- (internal) Destroy any locks or condition + * variables that were allocated at run time + */ +static void +obj_pool_lock_cleanup(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + PMEMmutex_internal *nextm; + for (PMEMmutex_internal *m = pop->mutex_head; m != NULL; m = nextm) { + nextm = m->PMEMmutex_next; + LOG(4, "mutex %p *mutex %p", &m->PMEMmutex_lock, + m->PMEMmutex_bsd_mutex_p); + os_mutex_destroy(&m->PMEMmutex_lock); + m->PMEMmutex_next = NULL; + m->PMEMmutex_bsd_mutex_p = NULL; + } + pop->mutex_head = NULL; + + PMEMrwlock_internal *nextr; + for (PMEMrwlock_internal *r = pop->rwlock_head; r != NULL; r = nextr) { + nextr = r->PMEMrwlock_next; + LOG(4, "rwlock %p *rwlock %p", &r->PMEMrwlock_lock, + r->PMEMrwlock_bsd_rwlock_p); + os_rwlock_destroy(&r->PMEMrwlock_lock); + r->PMEMrwlock_next = NULL; + r->PMEMrwlock_bsd_rwlock_p = NULL; + } + pop->rwlock_head = NULL; + + PMEMcond_internal *nextc; + for (PMEMcond_internal *c = pop->cond_head; c != NULL; c = nextc) { + nextc = c->PMEMcond_next; + LOG(4, "cond %p *cond %p", &c->PMEMcond_cond, + c->PMEMcond_bsd_cond_p); + os_cond_destroy(&c->PMEMcond_cond); + c->PMEMcond_next = NULL; + c->PMEMcond_bsd_cond_p = NULL; + } + pop->cond_head = NULL; +} +/* + * obj_pool_cleanup -- (internal) cleanup the pool and unmap + */ +static void +obj_pool_cleanup(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + ravl_delete(pop->ulog_user_buffers.map); + util_mutex_destroy(&pop->ulog_user_buffers.lock); + + stats_delete(pop, pop->stats); + tx_params_delete(pop->tx_params); + ctl_delete(pop->ctl); + + obj_pool_lock_cleanup(pop); + + lane_section_cleanup(pop); + lane_cleanup(pop); + + /* unmap all the replicas */ + obj_replicas_cleanup(pop->set); + util_poolset_close(pop->set, DO_NOT_DELETE_PARTS); +} + +/* + * pmemobj_close -- close a transactional memory pool + */ +void +pmemobj_close(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + PMEMOBJ_API_START(); + + _pobj_cache_invalidate++; + + if (critnib_remove(pools_ht, pop->uuid_lo) != pop) { + ERR("critnib_remove for pools_ht"); + } + + if (critnib_remove(pools_tree, (uint64_t)pop) != pop) + ERR("critnib_remove for pools_tree"); + +#ifndef _WIN32 + + if (_pobj_cached_pool.pop == pop) { + _pobj_cached_pool.pop = NULL; + _pobj_cached_pool.uuid_lo = 0; + } + +#else /* _WIN32 */ + + struct _pobj_pcache *pcache = os_tls_get(Cached_pool_key); + if (pcache != NULL) { + if (pcache->pop == pop) { + pcache->pop = NULL; + pcache->uuid_lo = 0; + } + } + +#endif /* _WIN32 */ + + obj_pool_cleanup(pop); + PMEMOBJ_API_END(); +} + +/* + * pmemobj_checkU -- transactional memory pool consistency check + */ +#ifndef _WIN32 +static inline +#endif +int +pmemobj_checkU(const char *path, const char *layout) +{ + LOG(3, "path %s layout %s", path, layout); + + PMEMobjpool *pop = obj_open_common(path, layout, POOL_OPEN_COW, 0); + if (pop == NULL) + return -1; /* errno set by obj_open_common() */ + + int consistent = 1; + + /* + * For replicated pools, basic consistency check is performed + * in obj_open_common(). + */ + if (pop->replica == NULL) + consistent = obj_check_basic(pop, pop->set->poolsize); + + if (consistent && (errno = obj_runtime_init_common(pop)) != 0) { + LOG(3, "!obj_boot"); + consistent = 0; + } + + if (consistent) { + obj_pool_cleanup(pop); + } else { + stats_delete(pop, pop->stats); + tx_params_delete(pop->tx_params); + ctl_delete(pop->ctl); + + /* unmap all the replicas */ + obj_replicas_cleanup(pop->set); + util_poolset_close(pop->set, DO_NOT_DELETE_PARTS); + } + + if (consistent) + LOG(4, "pool consistency check OK"); + + return consistent; +} + +#ifndef _WIN32 +/* + * pmemobj_check -- transactional memory pool consistency check + */ +int +pmemobj_check(const char *path, const char *layout) +{ + PMEMOBJ_API_START(); + + int ret = pmemobj_checkU(path, layout); + + PMEMOBJ_API_END(); + return ret; +} +#else +/* + * pmemobj_checkW -- transactional memory pool consistency check + */ +int +pmemobj_checkW(const wchar_t *path, const wchar_t *layout) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) + return -1; + + char *ulayout = NULL; + if (layout != NULL) { + ulayout = util_toUTF8(layout); + if (ulayout == NULL) { + util_free_UTF8(upath); + return -1; + } + } + + int ret = pmemobj_checkU(upath, ulayout); + + util_free_UTF8(upath); + util_free_UTF8(ulayout); + + return ret; +} +#endif + +/* + * pmemobj_pool_by_oid -- returns the pool handle associated with the oid + */ +PMEMobjpool * +pmemobj_pool_by_oid(PMEMoid oid) +{ + LOG(3, "oid.off 0x%016" PRIx64, oid.off); + + /* XXX this is a temporary fix, to be fixed properly later */ + if (pools_ht == NULL) + return NULL; + + return critnib_get(pools_ht, oid.pool_uuid_lo); +} + +/* + * pmemobj_pool_by_ptr -- returns the pool handle associated with the address + */ +PMEMobjpool * +pmemobj_pool_by_ptr(const void *addr) +{ + LOG(3, "addr %p", addr); + + /* fast path for transactions */ + PMEMobjpool *pop = tx_get_pop(); + + if ((pop != NULL) && OBJ_PTR_FROM_POOL(pop, addr)) + return pop; + + /* XXX this is a temporary fix, to be fixed properly later */ + if (pools_tree == NULL) + return NULL; + + pop = critnib_find_le(pools_tree, (uint64_t)addr); + if (pop == NULL) + return NULL; + + size_t pool_size = pop->heap_offset + pop->heap_size; + if ((char *)addr >= (char *)pop + pool_size) + return NULL; + + return pop; +} + +/* + * pmemobj_set_user_data -- sets volatile pointer to the user data for specified + * pool + */ +void +pmemobj_set_user_data(PMEMobjpool *pop, void *data) +{ + LOG(3, "pop %p data %p", pop, data); + + pop->user_data = data; +} + +/* + * pmemobj_get_user_data -- gets volatile pointer to the user data associated + * with the specified pool + */ +void * +pmemobj_get_user_data(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + return pop->user_data; +} + +/* arguments for constructor_alloc */ +struct constr_args { + int zero_init; + pmemobj_constr constructor; + void *arg; +}; + +/* + * constructor_alloc -- (internal) constructor for obj_alloc_construct + */ +static int +constructor_alloc(void *ctx, void *ptr, size_t usable_size, void *arg) +{ + PMEMobjpool *pop = ctx; + LOG(3, "pop %p ptr %p arg %p", pop, ptr, arg); + struct pmem_ops *p_ops = &pop->p_ops; + + ASSERTne(ptr, NULL); + ASSERTne(arg, NULL); + + struct constr_args *carg = arg; + + if (carg->zero_init) + pmemops_memset(p_ops, ptr, 0, usable_size, 0); + + int ret = 0; + if (carg->constructor) + ret = carg->constructor(pop, ptr, carg->arg); + + return ret; +} + +/* + * obj_alloc_construct -- (internal) allocates a new object with constructor + */ +static int +obj_alloc_construct(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + type_num_t type_num, uint64_t flags, + pmemobj_constr constructor, void *arg) +{ + if (size > PMEMOBJ_MAX_ALLOC_SIZE) { + ERR("requested size too large"); + errno = ENOMEM; + return -1; + } + + struct constr_args carg; + + carg.zero_init = flags & POBJ_FLAG_ZERO; + carg.constructor = constructor; + carg.arg = arg; + + struct operation_context *ctx = pmalloc_operation_hold(pop); + + if (oidp) + operation_add_entry(ctx, &oidp->pool_uuid_lo, pop->uuid_lo, + ULOG_OPERATION_SET); + + int ret = palloc_operation(&pop->heap, 0, + oidp != NULL ? &oidp->off : NULL, size, + constructor_alloc, &carg, type_num, 0, + CLASS_ID_FROM_FLAG(flags), ARENA_ID_FROM_FLAG(flags), + ctx); + + pmalloc_operation_release(pop); + + return ret; +} + +/* + * pmemobj_alloc -- allocates a new object + */ +int +pmemobj_alloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num, pmemobj_constr constructor, void *arg) +{ + LOG(3, "pop %p oidp %p size %zu type_num %llx constructor %p arg %p", + pop, oidp, size, (unsigned long long)type_num, + constructor, arg); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + + if (size == 0) { + ERR("allocation with size 0"); + errno = EINVAL; + return -1; + } + + PMEMOBJ_API_START(); + int ret = obj_alloc_construct(pop, oidp, size, type_num, + 0, constructor, arg); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_xalloc -- allocates with flags + */ +int +pmemobj_xalloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num, uint64_t flags, + pmemobj_constr constructor, void *arg) +{ + LOG(3, "pop %p oidp %p size %zu type_num %llx flags %llx " + "constructor %p arg %p", + pop, oidp, size, (unsigned long long)type_num, + (unsigned long long)flags, + constructor, arg); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + + if (size == 0) { + ERR("allocation with size 0"); + errno = EINVAL; + return -1; + } + + if (flags & ~POBJ_TX_XALLOC_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_TX_XALLOC_VALID_FLAGS); + errno = EINVAL; + return -1; + } + + PMEMOBJ_API_START(); + int ret = obj_alloc_construct(pop, oidp, size, type_num, + flags, constructor, arg); + + PMEMOBJ_API_END(); + return ret; +} + +/* arguments for constructor_realloc and constructor_zrealloc */ +struct carg_realloc { + void *ptr; + size_t old_size; + size_t new_size; + int zero_init; + type_num_t user_type; + pmemobj_constr constructor; + void *arg; +}; + +/* + * pmemobj_zalloc -- allocates a new zeroed object + */ +int +pmemobj_zalloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num) +{ + LOG(3, "pop %p oidp %p size %zu type_num %llx", + pop, oidp, size, (unsigned long long)type_num); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + + if (size == 0) { + ERR("allocation with size 0"); + errno = EINVAL; + return -1; + } + + PMEMOBJ_API_START(); + int ret = obj_alloc_construct(pop, oidp, size, type_num, POBJ_FLAG_ZERO, + NULL, NULL); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * obj_free -- (internal) free an object + */ +static void +obj_free(PMEMobjpool *pop, PMEMoid *oidp) +{ + ASSERTne(oidp, NULL); + + struct operation_context *ctx = pmalloc_operation_hold(pop); + + operation_add_entry(ctx, &oidp->pool_uuid_lo, 0, ULOG_OPERATION_SET); + + palloc_operation(&pop->heap, oidp->off, &oidp->off, 0, NULL, NULL, + 0, 0, 0, 0, ctx); + + pmalloc_operation_release(pop); +} + +/* + * constructor_realloc -- (internal) constructor for pmemobj_realloc + */ +static int +constructor_realloc(void *ctx, void *ptr, size_t usable_size, void *arg) +{ + PMEMobjpool *pop = ctx; + LOG(3, "pop %p ptr %p arg %p", pop, ptr, arg); + struct pmem_ops *p_ops = &pop->p_ops; + + ASSERTne(ptr, NULL); + ASSERTne(arg, NULL); + + struct carg_realloc *carg = arg; + + if (!carg->zero_init) + return 0; + + if (usable_size > carg->old_size) { + size_t grow_len = usable_size - carg->old_size; + void *new_data_ptr = (void *)((uintptr_t)ptr + carg->old_size); + + pmemops_memset(p_ops, new_data_ptr, 0, grow_len, 0); + } + + return 0; +} + +/* + * obj_realloc_common -- (internal) common routine for resizing + * existing objects + */ +static int +obj_realloc_common(PMEMobjpool *pop, + PMEMoid *oidp, size_t size, type_num_t type_num, int zero_init) +{ + /* if OID is NULL just allocate memory */ + if (OBJ_OID_IS_NULL(*oidp)) { + /* if size is 0 - do nothing */ + if (size == 0) + return 0; + + return obj_alloc_construct(pop, oidp, size, type_num, + POBJ_FLAG_ZERO, NULL, NULL); + } + + if (size > PMEMOBJ_MAX_ALLOC_SIZE) { + ERR("requested size too large"); + errno = ENOMEM; + return -1; + } + + /* if size is 0 just free */ + if (size == 0) { + obj_free(pop, oidp); + return 0; + } + + struct carg_realloc carg; + carg.ptr = OBJ_OFF_TO_PTR(pop, oidp->off); + carg.new_size = size; + carg.old_size = pmemobj_alloc_usable_size(*oidp); + carg.user_type = type_num; + carg.constructor = NULL; + carg.arg = NULL; + carg.zero_init = zero_init; + + struct operation_context *ctx = pmalloc_operation_hold(pop); + + int ret = palloc_operation(&pop->heap, oidp->off, &oidp->off, + size, constructor_realloc, &carg, type_num, + 0, 0, 0, ctx); + + pmalloc_operation_release(pop); + + return ret; +} + +/* + * constructor_zrealloc_root -- (internal) constructor for pmemobj_root + */ +static int +constructor_zrealloc_root(void *ctx, void *ptr, size_t usable_size, void *arg) +{ + PMEMobjpool *pop = ctx; + LOG(3, "pop %p ptr %p arg %p", pop, ptr, arg); + + ASSERTne(ptr, NULL); + ASSERTne(arg, NULL); + + VALGRIND_ADD_TO_TX(ptr, usable_size); + + struct carg_realloc *carg = arg; + + constructor_realloc(pop, ptr, usable_size, arg); + int ret = 0; + if (carg->constructor) + ret = carg->constructor(pop, ptr, carg->arg); + + VALGRIND_REMOVE_FROM_TX(ptr, usable_size); + + return ret; +} + +/* + * pmemobj_realloc -- resizes an existing object + */ +int +pmemobj_realloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num) +{ + ASSERTne(oidp, NULL); + + LOG(3, "pop %p oid.off 0x%016" PRIx64 " size %zu type_num %" PRIu64, + pop, oidp->off, size, type_num); + + PMEMOBJ_API_START(); + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + ASSERT(OBJ_OID_IS_VALID(pop, *oidp)); + + int ret = obj_realloc_common(pop, oidp, size, (type_num_t)type_num, 0); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_zrealloc -- resizes an existing object, any new space is zeroed. + */ +int +pmemobj_zrealloc(PMEMobjpool *pop, PMEMoid *oidp, size_t size, + uint64_t type_num) +{ + ASSERTne(oidp, NULL); + + LOG(3, "pop %p oid.off 0x%016" PRIx64 " size %zu type_num %" PRIu64, + pop, oidp->off, size, type_num); + + PMEMOBJ_API_START(); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + ASSERT(OBJ_OID_IS_VALID(pop, *oidp)); + + int ret = obj_realloc_common(pop, oidp, size, (type_num_t)type_num, 1); + + PMEMOBJ_API_END(); + return ret; +} + +/* arguments for constructor_strdup */ +struct carg_strdup { + size_t size; + const char *s; +}; + +/* + * constructor_strdup -- (internal) constructor of pmemobj_strdup + */ +static int +constructor_strdup(PMEMobjpool *pop, void *ptr, void *arg) +{ + LOG(3, "pop %p ptr %p arg %p", pop, ptr, arg); + + ASSERTne(ptr, NULL); + ASSERTne(arg, NULL); + + struct carg_strdup *carg = arg; + + /* copy string */ + pmemops_memcpy(&pop->p_ops, ptr, carg->s, carg->size, 0); + + return 0; +} + +/* + * pmemobj_strdup -- allocates a new object with duplicate of the string s. + */ +int +pmemobj_strdup(PMEMobjpool *pop, PMEMoid *oidp, const char *s, + uint64_t type_num) +{ + LOG(3, "pop %p oidp %p string %s type_num %" PRIu64, + pop, oidp, s, type_num); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + + if (NULL == s) { + errno = EINVAL; + return -1; + } + + PMEMOBJ_API_START(); + struct carg_strdup carg; + carg.size = (strlen(s) + 1) * sizeof(char); + carg.s = s; + + int ret = obj_alloc_construct(pop, oidp, carg.size, + (type_num_t)type_num, 0, constructor_strdup, &carg); + + PMEMOBJ_API_END(); + return ret; +} + +/* arguments for constructor_wcsdup */ +struct carg_wcsdup { + size_t size; + const wchar_t *s; +}; + +/* + * constructor_wcsdup -- (internal) constructor of pmemobj_wcsdup + */ +static int +constructor_wcsdup(PMEMobjpool *pop, void *ptr, void *arg) +{ + LOG(3, "pop %p ptr %p arg %p", pop, ptr, arg); + + ASSERTne(ptr, NULL); + ASSERTne(arg, NULL); + + struct carg_wcsdup *carg = arg; + + /* copy string */ + pmemops_memcpy(&pop->p_ops, ptr, carg->s, carg->size, 0); + + return 0; +} + +/* + * pmemobj_wcsdup -- allocates a new object with duplicate of the wide character + * string s. + */ +int +pmemobj_wcsdup(PMEMobjpool *pop, PMEMoid *oidp, const wchar_t *s, + uint64_t type_num) +{ + LOG(3, "pop %p oidp %p string %S type_num %" PRIu64, + pop, oidp, s, type_num); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + + if (NULL == s) { + errno = EINVAL; + return -1; + } + + PMEMOBJ_API_START(); + struct carg_wcsdup carg; + carg.size = (wcslen(s) + 1) * sizeof(wchar_t); + carg.s = s; + + int ret = obj_alloc_construct(pop, oidp, carg.size, + (type_num_t)type_num, 0, constructor_wcsdup, &carg); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_free -- frees an existing object + */ +void +pmemobj_free(PMEMoid *oidp) +{ + ASSERTne(oidp, NULL); + + LOG(3, "oid.off 0x%016" PRIx64, oidp->off); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + + if (oidp->off == 0) + return; + + PMEMOBJ_API_START(); + PMEMobjpool *pop = pmemobj_pool_by_oid(*oidp); + + ASSERTne(pop, NULL); + ASSERT(OBJ_OID_IS_VALID(pop, *oidp)); + + obj_free(pop, oidp); + PMEMOBJ_API_END(); +} + +/* + * pmemobj_alloc_usable_size -- returns usable size of object + */ +size_t +pmemobj_alloc_usable_size(PMEMoid oid) +{ + LOG(3, "oid.off 0x%016" PRIx64, oid.off); + + if (oid.off == 0) + return 0; + + PMEMobjpool *pop = pmemobj_pool_by_oid(oid); + + ASSERTne(pop, NULL); + ASSERT(OBJ_OID_IS_VALID(pop, oid)); + + return (palloc_usable_size(&pop->heap, oid.off)); +} + +/* + * pmemobj_memcpy_persist -- pmemobj version of memcpy + */ +void * +pmemobj_memcpy_persist(PMEMobjpool *pop, void *dest, const void *src, + size_t len) +{ + LOG(15, "pop %p dest %p src %p len %zu", pop, dest, src, len); + PMEMOBJ_API_START(); + + void *ptr = pmemops_memcpy(&pop->p_ops, dest, src, len, 0); + + PMEMOBJ_API_END(); + return ptr; +} + +/* + * pmemobj_memset_persist -- pmemobj version of memset + */ +void * +pmemobj_memset_persist(PMEMobjpool *pop, void *dest, int c, size_t len) +{ + LOG(15, "pop %p dest %p c 0x%02x len %zu", pop, dest, c, len); + PMEMOBJ_API_START(); + + void *ptr = pmemops_memset(&pop->p_ops, dest, c, len, 0); + + PMEMOBJ_API_END(); + return ptr; +} + +/* + * pmemobj_memcpy -- pmemobj version of memcpy + */ +void * +pmemobj_memcpy(PMEMobjpool *pop, void *dest, const void *src, size_t len, + unsigned flags) +{ + LOG(15, "pop %p dest %p src %p len %zu flags 0x%x", pop, dest, src, len, + flags); + + PMEMOBJ_API_START(); + + void *ptr = pmemops_memcpy(&pop->p_ops, dest, src, len, flags); + + PMEMOBJ_API_END(); + return ptr; +} + +/* + * pmemobj_memmove -- pmemobj version of memmove + */ +void * +pmemobj_memmove(PMEMobjpool *pop, void *dest, const void *src, size_t len, + unsigned flags) +{ + LOG(15, "pop %p dest %p src %p len %zu flags 0x%x", pop, dest, src, len, + flags); + + PMEMOBJ_API_START(); + + void *ptr = pmemops_memmove(&pop->p_ops, dest, src, len, flags); + + PMEMOBJ_API_END(); + return ptr; +} + +/* + * pmemobj_memset -- pmemobj version of memset + */ +void * +pmemobj_memset(PMEMobjpool *pop, void *dest, int c, size_t len, unsigned flags) +{ + LOG(15, "pop %p dest %p c 0x%02x len %zu flags 0x%x", pop, dest, c, len, + flags); + + PMEMOBJ_API_START(); + + void *ptr = pmemops_memset(&pop->p_ops, dest, c, len, flags); + + PMEMOBJ_API_END(); + return ptr; +} + +/* + * pmemobj_persist -- pmemobj version of pmem_persist + */ +void +pmemobj_persist(PMEMobjpool *pop, const void *addr, size_t len) +{ + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + pmemops_persist(&pop->p_ops, addr, len); +} + +/* + * pmemobj_flush -- pmemobj version of pmem_flush + */ +void +pmemobj_flush(PMEMobjpool *pop, const void *addr, size_t len) +{ + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + pmemops_flush(&pop->p_ops, addr, len); +} + +/* + * pmemobj_xpersist -- pmemobj version of pmem_persist with additional flags + * argument + */ +int +pmemobj_xpersist(PMEMobjpool *pop, const void *addr, size_t len, unsigned flags) +{ + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + if (flags & ~OBJ_X_VALID_FLAGS) { + errno = EINVAL; + ERR("invalid flags 0x%x", flags); + return -1; + } + + return pmemops_xpersist(&pop->p_ops, addr, len, flags); +} + +/* + * pmemobj_xflush -- pmemobj version of pmem_flush with additional flags + * argument + */ +int +pmemobj_xflush(PMEMobjpool *pop, const void *addr, size_t len, unsigned flags) +{ + LOG(15, "pop %p addr %p len %zu", pop, addr, len); + + if (flags & ~OBJ_X_VALID_FLAGS) { + errno = EINVAL; + ERR("invalid flags 0x%x", flags); + return -1; + } + + return pmemops_xflush(&pop->p_ops, addr, len, flags); +} + +/* + * pmemobj_drain -- pmemobj version of pmem_drain + */ +void +pmemobj_drain(PMEMobjpool *pop) +{ + LOG(15, "pop %p", pop); + + pmemops_drain(&pop->p_ops); +} + +/* + * pmemobj_type_num -- returns type number of object + */ +uint64_t +pmemobj_type_num(PMEMoid oid) +{ + LOG(3, "oid.off 0x%016" PRIx64, oid.off); + + ASSERT(!OID_IS_NULL(oid)); + + PMEMobjpool *pop = pmemobj_pool_by_oid(oid); + + ASSERTne(pop, NULL); + ASSERT(OBJ_OID_IS_VALID(pop, oid)); + + return palloc_extra(&pop->heap, oid.off); +} + +/* arguments for constructor_alloc_root */ +struct carg_root { + size_t size; + pmemobj_constr constructor; + void *arg; +}; + +/* + * obj_realloc_root -- (internal) reallocate root object + */ +static int +obj_alloc_root(PMEMobjpool *pop, size_t size, + pmemobj_constr constructor, void *arg) +{ + LOG(3, "pop %p size %zu", pop, size); + + struct carg_realloc carg; + + carg.ptr = OBJ_OFF_TO_PTR(pop, pop->root_offset); + carg.old_size = pop->root_size; + carg.new_size = size; + carg.user_type = POBJ_ROOT_TYPE_NUM; + carg.constructor = constructor; + carg.zero_init = 1; + carg.arg = arg; + + struct operation_context *ctx = pmalloc_operation_hold(pop); + + operation_add_entry(ctx, &pop->root_size, size, ULOG_OPERATION_SET); + + int ret = palloc_operation(&pop->heap, pop->root_offset, + &pop->root_offset, size, + constructor_zrealloc_root, &carg, + POBJ_ROOT_TYPE_NUM, OBJ_INTERNAL_OBJECT_MASK, + 0, 0, ctx); + + pmalloc_operation_release(pop); + + return ret; +} + +/* + * pmemobj_root_size -- returns size of the root object + */ +size_t +pmemobj_root_size(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + if (pop->root_offset && pop->root_size) { + return pop->root_size; + } else + return 0; +} + +/* + * pmemobj_root_construct -- returns root object + */ +PMEMoid +pmemobj_root_construct(PMEMobjpool *pop, size_t size, + pmemobj_constr constructor, void *arg) +{ + LOG(3, "pop %p size %zu constructor %p args %p", pop, size, constructor, + arg); + + if (size > PMEMOBJ_MAX_ALLOC_SIZE) { + ERR("requested size too large"); + errno = ENOMEM; + return OID_NULL; + } + + if (size == 0 && pop->root_offset == 0) { + ERR("requested size cannot equals zero"); + errno = EINVAL; + return OID_NULL; + } + + PMEMOBJ_API_START(); + + PMEMoid root; + + pmemobj_mutex_lock_nofail(pop, &pop->rootlock); + + if (size > pop->root_size && + obj_alloc_root(pop, size, constructor, arg)) { + pmemobj_mutex_unlock_nofail(pop, &pop->rootlock); + LOG(2, "obj_realloc_root failed"); + PMEMOBJ_API_END(); + return OID_NULL; + } + + root.pool_uuid_lo = pop->uuid_lo; + root.off = pop->root_offset; + + pmemobj_mutex_unlock_nofail(pop, &pop->rootlock); + + PMEMOBJ_API_END(); + return root; +} + +/* + * pmemobj_root -- returns root object + */ +PMEMoid +pmemobj_root(PMEMobjpool *pop, size_t size) +{ + LOG(3, "pop %p size %zu", pop, size); + + PMEMOBJ_API_START(); + PMEMoid oid = pmemobj_root_construct(pop, size, NULL, NULL); + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_first - returns first object of specified type + */ +PMEMoid +pmemobj_first(PMEMobjpool *pop) +{ + LOG(3, "pop %p", pop); + + PMEMoid ret = {0, 0}; + + uint64_t off = palloc_first(&pop->heap); + if (off != 0) { + ret.off = off; + ret.pool_uuid_lo = pop->uuid_lo; + + if (palloc_flags(&pop->heap, off) & OBJ_INTERNAL_OBJECT_MASK) { + return pmemobj_next(ret); + } + } + + return ret; +} + +/* + * pmemobj_next - returns next object of specified type + */ +PMEMoid +pmemobj_next(PMEMoid oid) +{ + LOG(3, "oid.off 0x%016" PRIx64, oid.off); + + PMEMoid curr = oid; + if (curr.off == 0) + return OID_NULL; + + PMEMobjpool *pop = pmemobj_pool_by_oid(curr); + ASSERTne(pop, NULL); + + do { + ASSERT(OBJ_OID_IS_VALID(pop, curr)); + uint64_t next_off = palloc_next(&pop->heap, curr.off); + + if (next_off == 0) + return OID_NULL; + + /* next object exists */ + curr.off = next_off; + + } while (palloc_flags(&pop->heap, curr.off) & OBJ_INTERNAL_OBJECT_MASK); + + return curr; +} + +/* + * pmemobj_reserve -- reserves a single object + */ +PMEMoid +pmemobj_reserve(PMEMobjpool *pop, struct pobj_action *act, + size_t size, uint64_t type_num) +{ + LOG(3, "pop %p act %p size %zu type_num %llx", + pop, act, size, + (unsigned long long)type_num); + + PMEMOBJ_API_START(); + PMEMoid oid = OID_NULL; + + if (palloc_reserve(&pop->heap, size, NULL, NULL, type_num, + 0, 0, 0, act) != 0) { + PMEMOBJ_API_END(); + return oid; + } + + oid.off = act->heap.offset; + oid.pool_uuid_lo = pop->uuid_lo; + + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_xreserve -- reserves a single object + */ +PMEMoid +pmemobj_xreserve(PMEMobjpool *pop, struct pobj_action *act, + size_t size, uint64_t type_num, uint64_t flags) +{ + LOG(3, "pop %p act %p size %zu type_num %llx flags %llx", + pop, act, size, + (unsigned long long)type_num, (unsigned long long)flags); + + PMEMoid oid = OID_NULL; + + if (flags & ~POBJ_ACTION_XRESERVE_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_ACTION_XRESERVE_VALID_FLAGS); + errno = EINVAL; + return oid; + } + + PMEMOBJ_API_START(); + struct constr_args carg; + + carg.zero_init = flags & POBJ_FLAG_ZERO; + carg.constructor = NULL; + carg.arg = NULL; + + if (palloc_reserve(&pop->heap, size, constructor_alloc, &carg, + type_num, 0, CLASS_ID_FROM_FLAG(flags), + ARENA_ID_FROM_FLAG(flags), act) != 0) { + PMEMOBJ_API_END(); + return oid; + } + + oid.off = act->heap.offset; + oid.pool_uuid_lo = pop->uuid_lo; + + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_set_value -- creates an action to set a value + */ +void +pmemobj_set_value(PMEMobjpool *pop, struct pobj_action *act, + uint64_t *ptr, uint64_t value) +{ + palloc_set_value(&pop->heap, act, ptr, value); +} + +/* + * pmemobj_defer_free -- creates a deferred free action + */ +void +pmemobj_defer_free(PMEMobjpool *pop, PMEMoid oid, struct pobj_action *act) +{ + ASSERT(!OID_IS_NULL(oid)); + palloc_defer_free(&pop->heap, oid.off, act); +} + +/* + * pmemobj_publish -- publishes a collection of actions + */ +int +pmemobj_publish(PMEMobjpool *pop, struct pobj_action *actv, size_t actvcnt) +{ + PMEMOBJ_API_START(); + struct operation_context *ctx = pmalloc_operation_hold(pop); + + size_t entries_size = actvcnt * sizeof(struct ulog_entry_val); + + if (operation_reserve(ctx, entries_size) != 0) { + PMEMOBJ_API_END(); + return -1; + } + + palloc_publish(&pop->heap, actv, actvcnt, ctx); + + pmalloc_operation_release(pop); + + PMEMOBJ_API_END(); + return 0; +} + +/* + * pmemobj_cancel -- cancels collection of actions + */ +void +pmemobj_cancel(PMEMobjpool *pop, struct pobj_action *actv, size_t actvcnt) +{ + PMEMOBJ_API_START(); + palloc_cancel(&pop->heap, actv, actvcnt); + PMEMOBJ_API_END(); +} + +/* + * pmemobj_defrag -- reallocates provided PMEMoids so that the underlying memory + * is efficiently arranged. + */ +int +pmemobj_defrag(PMEMobjpool *pop, PMEMoid **oidv, size_t oidcnt, + struct pobj_defrag_result *result) +{ + PMEMOBJ_API_START(); + + if (result) { + result->relocated = 0; + result->total = 0; + } + + uint64_t **objv = Malloc(sizeof(uint64_t *) * oidcnt); + if (objv == NULL) + return -1; + + int ret = 0; + + size_t j = 0; + for (size_t i = 0; i < oidcnt; ++i) { + if (OID_IS_NULL(*oidv[i])) + continue; + if (oidv[i]->pool_uuid_lo != pop->uuid_lo) { + ret = -1; + ERR("Not all PMEMoids belong to the provided pool"); + goto out; + } + objv[j++] = &oidv[i]->off; + } + + struct operation_context *ctx = pmalloc_operation_hold(pop); + + ret = palloc_defrag(&pop->heap, objv, j, ctx, result); + + pmalloc_operation_release(pop); + +out: + Free(objv); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_list_insert -- adds object to a list + */ +int +pmemobj_list_insert(PMEMobjpool *pop, size_t pe_offset, void *head, + PMEMoid dest, int before, PMEMoid oid) +{ + LOG(3, "pop %p pe_offset %zu head %p dest.off 0x%016" PRIx64 + " before %d oid.off 0x%016" PRIx64, + pop, pe_offset, head, dest.off, before, oid.off); + PMEMOBJ_API_START(); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + ASSERT(OBJ_OID_IS_VALID(pop, oid)); + ASSERT(OBJ_OID_IS_VALID(pop, dest)); + + ASSERT(pe_offset <= pmemobj_alloc_usable_size(dest) + - sizeof(struct list_entry)); + ASSERT(pe_offset <= pmemobj_alloc_usable_size(oid) + - sizeof(struct list_entry)); + + int ret = list_insert(pop, (ssize_t)pe_offset, head, dest, before, oid); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_list_insert_new -- adds new object to a list + */ +PMEMoid +pmemobj_list_insert_new(PMEMobjpool *pop, size_t pe_offset, void *head, + PMEMoid dest, int before, size_t size, + uint64_t type_num, + pmemobj_constr constructor, void *arg) +{ + LOG(3, "pop %p pe_offset %zu head %p dest.off 0x%016" PRIx64 + " before %d size %zu type_num %" PRIu64, + pop, pe_offset, head, dest.off, before, size, type_num); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + ASSERT(OBJ_OID_IS_VALID(pop, dest)); + + ASSERT(pe_offset <= pmemobj_alloc_usable_size(dest) + - sizeof(struct list_entry)); + ASSERT(pe_offset <= size - sizeof(struct list_entry)); + + if (size > PMEMOBJ_MAX_ALLOC_SIZE) { + ERR("requested size too large"); + errno = ENOMEM; + return OID_NULL; + } + + PMEMOBJ_API_START(); + struct constr_args carg; + + carg.constructor = constructor; + carg.arg = arg; + carg.zero_init = 0; + + PMEMoid retoid = OID_NULL; + list_insert_new_user(pop, pe_offset, head, dest, before, size, type_num, + constructor_alloc, &carg, &retoid); + + PMEMOBJ_API_END(); + return retoid; +} + +/* + * pmemobj_list_remove -- removes object from a list + */ +int +pmemobj_list_remove(PMEMobjpool *pop, size_t pe_offset, void *head, + PMEMoid oid, int free) +{ + LOG(3, "pop %p pe_offset %zu head %p oid.off 0x%016" PRIx64 " free %d", + pop, pe_offset, head, oid.off, free); + PMEMOBJ_API_START(); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + ASSERT(OBJ_OID_IS_VALID(pop, oid)); + + ASSERT(pe_offset <= pmemobj_alloc_usable_size(oid) + - sizeof(struct list_entry)); + + int ret; + if (free) + ret = list_remove_free_user(pop, pe_offset, head, &oid); + else + ret = list_remove(pop, (ssize_t)pe_offset, head, oid); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_list_move -- moves object between lists + */ +int +pmemobj_list_move(PMEMobjpool *pop, size_t pe_old_offset, void *head_old, + size_t pe_new_offset, void *head_new, + PMEMoid dest, int before, PMEMoid oid) +{ + LOG(3, "pop %p pe_old_offset %zu pe_new_offset %zu" + " head_old %p head_new %p dest.off 0x%016" PRIx64 + " before %d oid.off 0x%016" PRIx64 "", + pop, pe_old_offset, pe_new_offset, + head_old, head_new, dest.off, before, oid.off); + PMEMOBJ_API_START(); + + /* log notice message if used inside a transaction */ + _POBJ_DEBUG_NOTICE_IN_TX(); + + ASSERT(OBJ_OID_IS_VALID(pop, oid)); + ASSERT(OBJ_OID_IS_VALID(pop, dest)); + + ASSERT(pe_old_offset <= pmemobj_alloc_usable_size(oid) + - sizeof(struct list_entry)); + ASSERT(pe_new_offset <= pmemobj_alloc_usable_size(oid) + - sizeof(struct list_entry)); + ASSERT(pe_old_offset <= pmemobj_alloc_usable_size(dest) + - sizeof(struct list_entry)); + ASSERT(pe_new_offset <= pmemobj_alloc_usable_size(dest) + - sizeof(struct list_entry)); + + int ret = list_move(pop, pe_old_offset, head_old, + pe_new_offset, head_new, + dest, before, oid); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_ctl_getU -- programmatically executes a read ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemobj_ctl_getU(PMEMobjpool *pop, const char *name, void *arg) +{ + LOG(3, "pop %p name %s arg %p", pop, name, arg); + return ctl_query(pop == NULL ? NULL : pop->ctl, pop, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_READ, arg); +} + +/* + * pmemobj_ctl_setU -- programmatically executes a write ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemobj_ctl_setU(PMEMobjpool *pop, const char *name, void *arg) +{ + LOG(3, "pop %p name %s arg %p", pop, name, arg); + return ctl_query(pop == NULL ? NULL : pop->ctl, pop, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_WRITE, arg); +} + +/* + * pmemobj_ctl_execU -- programmatically executes a runnable ctl query + */ +#ifndef _WIN32 +static inline +#endif +int +pmemobj_ctl_execU(PMEMobjpool *pop, const char *name, void *arg) +{ + LOG(3, "pop %p name %s arg %p", pop, name, arg); + return ctl_query(pop == NULL ? NULL : pop->ctl, pop, + CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_RUNNABLE, arg); +} + +#ifndef _WIN32 +/* + * pmemobj_ctl_get -- programmatically executes a read ctl query + */ +int +pmemobj_ctl_get(PMEMobjpool *pop, const char *name, void *arg) +{ + return pmemobj_ctl_getU(pop, name, arg); +} + +/* + * pmemobj_ctl_set -- programmatically executes a write ctl query + */ +int +pmemobj_ctl_set(PMEMobjpool *pop, const char *name, void *arg) +{ + PMEMOBJ_API_START(); + + int ret = pmemobj_ctl_setU(pop, name, arg); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_ctl_exec -- programmatically executes a runnable ctl query + */ +int +pmemobj_ctl_exec(PMEMobjpool *pop, const char *name, void *arg) +{ + PMEMOBJ_API_START(); + + int ret = pmemobj_ctl_execU(pop, name, arg); + + PMEMOBJ_API_END(); + return ret; +} +#else +/* + * pmemobj_ctl_getW -- programmatically executes a read ctl query + */ +int +pmemobj_ctl_getW(PMEMobjpool *pop, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemobj_ctl_getU(pop, uname, arg); + util_free_UTF8(uname); + + return ret; +} + +/* + * pmemobj_ctl_setW -- programmatically executes a write ctl query + */ +int +pmemobj_ctl_setW(PMEMobjpool *pop, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemobj_ctl_setU(pop, uname, arg); + util_free_UTF8(uname); + + return ret; +} + +/* + * pmemobj_ctl_execW -- programmatically executes a runnable ctl query + */ +int +pmemobj_ctl_execW(PMEMobjpool *pop, const wchar_t *name, void *arg) +{ + char *uname = util_toUTF8(name); + if (uname == NULL) + return -1; + + int ret = pmemobj_ctl_execU(pop, uname, arg); + util_free_UTF8(uname); + + return ret; +} +#endif + +/* + * _pobj_debug_notice -- logs notice message if used inside a transaction + */ +void +_pobj_debug_notice(const char *api_name, const char *file, int line) +{ +#ifdef DEBUG + if (pmemobj_tx_stage() != TX_STAGE_NONE) { + if (file) + LOG(4, "Notice: non-transactional API" + " used inside a transaction (%s in %s:%d)", + api_name, file, line); + else + LOG(4, "Notice: non-transactional API" + " used inside a transaction (%s)", api_name); + } +#endif /* DEBUG */ +} + +#if VG_PMEMCHECK_ENABLED +/* + * pobj_emit_log -- logs library and function names to pmemcheck store log + */ +void +pobj_emit_log(const char *func, int order) +{ + util_emit_log("libpmemobj", func, order); +} +#endif + +#if FAULT_INJECTION +void +pmemobj_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + core_inject_fault_at(type, nth, at); +} + +int +pmemobj_fault_injection_enabled(void) +{ + return core_fault_injection_enabled(); +} +#endif diff --git a/src/pmdk/src/libpmemobj/obj.h b/src/pmdk/src/libpmemobj/obj.h new file mode 100644 index 000000000..5269ead33 --- /dev/null +++ b/src/pmdk/src/libpmemobj/obj.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * obj.h -- internal definitions for obj module + */ + +#ifndef LIBPMEMOBJ_OBJ_H +#define LIBPMEMOBJ_OBJ_H 1 + +#include +#include + +#include "lane.h" +#include "pool_hdr.h" +#include "pmalloc.h" +#include "ctl.h" +#include "sync.h" +#include "stats.h" +#include "ctl_debug.h" +#include "page_size.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "alloc.h" +#include "fault_injection.h" + +#define PMEMOBJ_LOG_PREFIX "libpmemobj" +#define PMEMOBJ_LOG_LEVEL_VAR "PMEMOBJ_LOG_LEVEL" +#define PMEMOBJ_LOG_FILE_VAR "PMEMOBJ_LOG_FILE" + +/* attributes of the obj memory pool format for the pool header */ +#define OBJ_HDR_SIG "PMEMOBJ" /* must be 8 bytes including '\0' */ +#define OBJ_FORMAT_MAJOR 6 + +#define OBJ_FORMAT_FEAT_DEFAULT \ + {POOL_FEAT_COMPAT_DEFAULT, POOL_FEAT_INCOMPAT_DEFAULT, 0x0000} + +#define OBJ_FORMAT_FEAT_CHECK \ + {POOL_FEAT_COMPAT_VALID, POOL_FEAT_INCOMPAT_VALID, 0x0000} + +static const features_t obj_format_feat_default = OBJ_FORMAT_FEAT_CHECK; + +/* size of the persistent part of PMEMOBJ pool descriptor */ +#define OBJ_DSC_P_SIZE 2048 +/* size of unused part of the persistent part of PMEMOBJ pool descriptor */ +#define OBJ_DSC_P_UNUSED (OBJ_DSC_P_SIZE - PMEMOBJ_MAX_LAYOUT - 40) + +#define OBJ_LANES_OFFSET (sizeof(struct pmemobjpool)) /* lanes offset */ +#define OBJ_NLANES 1024 /* number of lanes */ + +#define OBJ_OFF_TO_PTR(pop, off) ((void *)((uintptr_t)(pop) + (off))) +#define OBJ_PTR_TO_OFF(pop, ptr) ((uintptr_t)(ptr) - (uintptr_t)(pop)) +#define OBJ_OID_IS_NULL(oid) ((oid).off == 0) +#define OBJ_LIST_EMPTY(head) OBJ_OID_IS_NULL((head)->pe_first) +#define OBJ_OFF_FROM_HEAP(pop, off)\ + ((off) >= (pop)->heap_offset &&\ + (off) < (pop)->heap_offset + (pop)->heap_size) +#define OBJ_OFF_FROM_LANES(pop, off)\ + ((off) >= (pop)->lanes_offset &&\ + (off) < (pop)->lanes_offset +\ + (pop)->nlanes * sizeof(struct lane_layout)) + +#define OBJ_PTR_FROM_POOL(pop, ptr)\ + ((uintptr_t)(ptr) >= (uintptr_t)(pop) &&\ + (uintptr_t)(ptr) < (uintptr_t)(pop) +\ + (pop)->heap_offset + (pop)->heap_size) + +#define OBJ_OFF_IS_VALID(pop, off)\ + (OBJ_OFF_FROM_HEAP(pop, off) ||\ + (OBJ_PTR_TO_OFF(pop, &(pop)->root_offset) == (off)) ||\ + (OBJ_PTR_TO_OFF(pop, &(pop)->root_size) == (off)) ||\ + (OBJ_OFF_FROM_LANES(pop, off))) + +#define OBJ_PTR_IS_VALID(pop, ptr)\ + OBJ_OFF_IS_VALID(pop, OBJ_PTR_TO_OFF(pop, ptr)) + +typedef void (*persist_local_fn)(const void *, size_t); +typedef void (*flush_local_fn)(const void *, size_t); +typedef void (*drain_local_fn)(void); + +typedef void *(*memcpy_local_fn)(void *dest, const void *src, size_t len, + unsigned flags); +typedef void *(*memmove_local_fn)(void *dest, const void *src, size_t len, + unsigned flags); +typedef void *(*memset_local_fn)(void *dest, int c, size_t len, unsigned flags); + +typedef int (*persist_remote_fn)(PMEMobjpool *pop, const void *addr, + size_t len, unsigned lane, unsigned flags); + +typedef uint64_t type_num_t; + +#define CONVERSION_FLAG_OLD_SET_CACHE ((1ULL) << 0) + +/* PMEM_OBJ_POOL_HEAD_SIZE Without the unused and unused2 arrays */ +#define PMEM_OBJ_POOL_HEAD_SIZE 2196 +#define PMEM_OBJ_POOL_UNUSED2_SIZE (PMEM_PAGESIZE \ + - OBJ_DSC_P_UNUSED\ + - PMEM_OBJ_POOL_HEAD_SIZE) + +struct pmemobjpool { + struct pool_hdr hdr; /* memory pool header */ + + /* persistent part of PMEMOBJ pool descriptor (2kB) */ + char layout[PMEMOBJ_MAX_LAYOUT]; + uint64_t lanes_offset; + uint64_t nlanes; + uint64_t heap_offset; + uint64_t unused3; + unsigned char unused[OBJ_DSC_P_UNUSED]; /* must be zero */ + uint64_t checksum; /* checksum of above fields */ + + uint64_t root_offset; + + /* unique runID for this program run - persistent but not checksummed */ + uint64_t run_id; + + uint64_t root_size; + + /* + * These flags can be set from a conversion tool and are set only for + * the first recovery of the pool. + */ + uint64_t conversion_flags; + + uint64_t heap_size; + + struct stats_persistent stats_persistent; + + char pmem_reserved[496]; /* must be zeroed */ + + /* some run-time state, allocated out of memory pool... */ + void *addr; /* mapped region */ + int is_pmem; /* true if pool is PMEM */ + int rdonly; /* true if pool is opened read-only */ + struct palloc_heap heap; + struct lane_descriptor lanes_desc; + uint64_t uuid_lo; + int is_dev_dax; /* true if mapped on device dax */ + + struct ctl *ctl; /* top level node of the ctl tree structure */ + struct stats *stats; + + struct pool_set *set; /* pool set info */ + struct pmemobjpool *replica; /* next replica */ + + /* per-replica functions: pmem or non-pmem */ + persist_local_fn persist_local; /* persist function */ + flush_local_fn flush_local; /* flush function */ + drain_local_fn drain_local; /* drain function */ + memcpy_local_fn memcpy_local; /* persistent memcpy function */ + memmove_local_fn memmove_local; /* persistent memmove function */ + memset_local_fn memset_local; /* persistent memset function */ + + /* for 'master' replica: with or without data replication */ + struct pmem_ops p_ops; + + PMEMmutex rootlock; /* root object lock */ + int is_master_replica; + int has_remote_replicas; + + /* remote replica section */ + void *rpp; /* RPMEMpool opaque handle if it is a remote replica */ + uintptr_t remote_base; /* beginning of the remote pool */ + char *node_addr; /* address of a remote node */ + char *pool_desc; /* descriptor of a poolset */ + + persist_remote_fn persist_remote; /* remote persist function */ + + int vg_boot; + int tx_debug_skip_expensive_checks; + + struct tx_parameters *tx_params; + + /* + * Locks are dynamically allocated on FreeBSD. Keep track so + * we can free them on pmemobj_close. + */ + PMEMmutex_internal *mutex_head; + PMEMrwlock_internal *rwlock_head; + PMEMcond_internal *cond_head; + + struct { + struct ravl *map; + os_mutex_t lock; + int verify; + } ulog_user_buffers; + + void *user_data; + + /* padding to align size of this structure to page boundary */ + /* sizeof(unused2) == 8192 - offsetof(struct pmemobjpool, unused2) */ + char unused2[PMEM_OBJ_POOL_UNUSED2_SIZE]; +}; + +/* + * Stored in the 'size' field of oobh header, determines whether the object + * is internal or not. Internal objects are skipped in pmemobj iteration + * functions. + */ +#define OBJ_INTERNAL_OBJECT_MASK ((1ULL) << 15) + +#define CLASS_ID_FROM_FLAG(flag)\ +((uint16_t)((flag) >> 48)) + +#define ARENA_ID_FROM_FLAG(flag)\ +((uint16_t)((flag) >> 32)) + +/* + * pmemobj_get_uuid_lo -- (internal) evaluates XOR sum of least significant + * 8 bytes with most significant 8 bytes. + */ +static inline uint64_t +pmemobj_get_uuid_lo(PMEMobjpool *pop) +{ + uint64_t uuid_lo = 0; + + for (int i = 0; i < 8; i++) { + uuid_lo = (uuid_lo << 8) | + (pop->hdr.poolset_uuid[i] ^ + pop->hdr.poolset_uuid[8 + i]); + } + + return uuid_lo; +} + +/* + * OBJ_OID_IS_VALID -- (internal) checks if 'oid' is valid + */ +static inline int +OBJ_OID_IS_VALID(PMEMobjpool *pop, PMEMoid oid) +{ + return OBJ_OID_IS_NULL(oid) || + (oid.pool_uuid_lo == pop->uuid_lo && + oid.off >= pop->heap_offset && + oid.off < pop->heap_offset + pop->heap_size); +} + +static inline int +OBJ_OFF_IS_VALID_FROM_CTX(void *ctx, uint64_t offset) +{ + PMEMobjpool *pop = (PMEMobjpool *)ctx; + return OBJ_OFF_IS_VALID(pop, offset); +} + +void obj_init(void); +void obj_fini(void); +int obj_read_remote(void *ctx, uintptr_t base, void *dest, void *addr, + size_t length); + +/* + * (debug helper macro) logs notice message if used inside a transaction + */ +#ifdef DEBUG +#define _POBJ_DEBUG_NOTICE_IN_TX()\ + _pobj_debug_notice(__func__, NULL, 0) +#else +#define _POBJ_DEBUG_NOTICE_IN_TX() do {} while (0) +#endif + +#if FAULT_INJECTION +void +pmemobj_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at); + +int +pmemobj_fault_injection_enabled(void); +#else +static inline void +pmemobj_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + abort(); +} + +static inline int +pmemobj_fault_injection_enabled(void) +{ + return 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/palloc.c b/src/pmdk/src/libpmemobj/palloc.c new file mode 100644 index 000000000..856aba6be --- /dev/null +++ b/src/pmdk/src/libpmemobj/palloc.c @@ -0,0 +1,1336 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * palloc.c -- implementation of pmalloc POSIX-like API + * + * This is the front-end part of the persistent memory allocator. It uses both + * transient and persistent representation of the heap to provide memory blocks + * in a reasonable time and with an acceptable common-case fragmentation. + * + * Lock ordering in the entirety of the allocator is simple, but might be hard + * to follow at times because locks are, by necessity, externalized. + * There are two sets of locks that need to be taken into account: + * - runtime state locks, represented by buckets. + * - persistent state locks, represented by memory block mutexes. + * + * To properly use them, follow these rules: + * - When nesting, always lock runtime state first. + * Doing the reverse might cause deadlocks in other parts of the code. + * + * - When introducing functions that would require runtime state locks, + * always try to move the lock acquiring to the upper most layer. This + * usually means that the functions will simply take "struct bucket" as + * their argument. By doing so most of the locking can happen in + * the frontend part of the allocator and it's easier to follow the first + * rule because all functions in the backend can safely use the persistent + * state locks - the runtime lock, if it is needed, will be already taken + * by the upper layer. + * + * General lock ordering: + * 1. arenas.lock + * 2. buckets (sorted by ID) + * 3. memory blocks (sorted by lock address) + */ + +#include "valgrind_internal.h" +#include "heap_layout.h" +#include "heap.h" +#include "alloc_class.h" +#include "out.h" +#include "sys_util.h" +#include "palloc.h" +#include "ravl.h" +#include "vec.h" + +struct pobj_action_internal { + /* type of operation (alloc/free vs set) */ + enum pobj_action_type type; + + /* not used */ + uint32_t padding; + + /* + * Action-specific lock that needs to be taken for the duration of + * an action. + */ + os_mutex_t *lock; + + /* action-specific data */ + union { + /* valid only when type == POBJ_ACTION_TYPE_HEAP */ + struct { + uint64_t offset; + uint64_t usable_size; + enum memblock_state new_state; + struct memory_block m; + struct memory_block_reserved *mresv; + }; + + /* valid only when type == POBJ_ACTION_TYPE_MEM */ + struct { + uint64_t *ptr; + uint64_t value; + }; + + /* padding, not used */ + uint64_t data2[14]; + }; +}; + +/* + * palloc_set_value -- creates a new set memory action + */ +void +palloc_set_value(struct palloc_heap *heap, struct pobj_action *act, + uint64_t *ptr, uint64_t value) +{ + act->type = POBJ_ACTION_TYPE_MEM; + + struct pobj_action_internal *actp = (struct pobj_action_internal *)act; + actp->ptr = ptr; + actp->value = value; + actp->lock = NULL; +} + +/* + * alloc_prep_block -- (internal) prepares a memory block for allocation + * + * Once the block is fully reserved and it's guaranteed that no one else will + * be able to write to this memory region it is safe to write the allocation + * header and call the object construction function. + * + * Because the memory block at this stage is only reserved in transient state + * there's no need to worry about fail-safety of this method because in case + * of a crash the memory will be back in the free blocks collection. + */ +static int +alloc_prep_block(struct palloc_heap *heap, const struct memory_block *m, + palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, + struct pobj_action_internal *out) +{ + void *uptr = m->m_ops->get_user_data(m); + size_t usize = m->m_ops->get_user_size(m); + + VALGRIND_DO_MEMPOOL_ALLOC(heap->layout, uptr, usize); + VALGRIND_DO_MAKE_MEM_UNDEFINED(uptr, usize); + VALGRIND_ANNOTATE_NEW_MEMORY(uptr, usize); + + m->m_ops->write_header(m, extra_field, object_flags); + + /* + * Set allocated memory with pattern, if debug.heap.alloc_pattern CTL + * parameter had been set. + */ + if (unlikely(heap->alloc_pattern > PALLOC_CTL_DEBUG_NO_PATTERN)) { + pmemops_memset(&heap->p_ops, uptr, heap->alloc_pattern, + usize, 0); + VALGRIND_DO_MAKE_MEM_UNDEFINED(uptr, usize); + } + + int ret; + if (constructor != NULL && + (ret = constructor(heap->base, uptr, usize, arg)) != 0) { + + /* + * If canceled, revert the block back to the free state in vg + * machinery. + */ + VALGRIND_DO_MEMPOOL_FREE(heap->layout, uptr); + + return ret; + } + + /* + * To avoid determining the user data pointer twice this method is also + * responsible for calculating the offset of the object in the pool that + * will be used to set the offset destination pointer provided by the + * caller. + */ + out->offset = HEAP_PTR_TO_OFF(heap, uptr); + out->usable_size = usize; + + return 0; +} + +/* + * palloc_reservation_create -- creates a volatile reservation of a + * memory block. + * + * The first step in the allocation of a new block is reserving it in + * the transient heap - which is represented by the bucket abstraction. + * + * To provide optimal scaling for multi-threaded applications and reduce + * fragmentation the appropriate bucket is chosen depending on the + * current thread context and to which allocation class the requested + * size falls into. + * + * Once the bucket is selected, just enough memory is reserved for the + * requested size. The underlying block allocation algorithm + * (best-fit, next-fit, ...) varies depending on the bucket container. + */ +static int +palloc_reservation_create(struct palloc_heap *heap, size_t size, + palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, + uint16_t class_id, uint16_t arena_id, + struct pobj_action_internal *out) +{ + int err = 0; + + struct memory_block *new_block = &out->m; + out->type = POBJ_ACTION_TYPE_HEAP; + + ASSERT(class_id < UINT8_MAX); + struct alloc_class *c = class_id == 0 ? + heap_get_best_class(heap, size) : + alloc_class_by_id(heap_alloc_classes(heap), + (uint8_t)class_id); + + if (c == NULL) { + ERR("no allocation class for size %lu bytes", size); + errno = EINVAL; + return -1; + } + + /* + * The caller provided size in bytes, but buckets operate in + * 'size indexes' which are multiples of the block size in the + * bucket. + * + * For example, to allocate 500 bytes from a bucket that + * provides 256 byte blocks two memory 'units' are required. + */ + ssize_t size_idx = alloc_class_calc_size_idx(c, size); + if (size_idx < 0) { + ERR("allocation class not suitable for size %lu bytes", + size); + errno = EINVAL; + return -1; + } + ASSERT(size_idx <= UINT32_MAX); + *new_block = MEMORY_BLOCK_NONE; + new_block->size_idx = (uint32_t)size_idx; + + struct bucket *b = heap_bucket_acquire(heap, c->id, arena_id); + + err = heap_get_bestfit_block(heap, b, new_block); + if (err != 0) + goto out; + + if (alloc_prep_block(heap, new_block, constructor, arg, + extra_field, object_flags, out) != 0) { + /* + * Constructor returned non-zero value which means + * the memory block reservation has to be rolled back. + */ + if (new_block->type == MEMORY_BLOCK_HUGE) { + bucket_insert_block(b, new_block); + } + err = ECANCELED; + goto out; + } + + /* + * Each as of yet unfulfilled reservation needs to be tracked in the + * runtime state. + * The memory block cannot be put back into the global state unless + * there are no active reservations. + */ + if ((out->mresv = b->active_memory_block) != NULL) + util_fetch_and_add64(&out->mresv->nresv, 1); + + out->lock = new_block->m_ops->get_lock(new_block); + out->new_state = MEMBLOCK_ALLOCATED; + +out: + heap_bucket_release(heap, b); + + if (err == 0) + return 0; + + errno = err; + return -1; +} + +/* + * palloc_heap_action_exec -- executes a single heap action (alloc, free) + */ +static void +palloc_heap_action_exec(struct palloc_heap *heap, + const struct pobj_action_internal *act, + struct operation_context *ctx) +{ +#ifdef DEBUG + if (act->m.m_ops->get_state(&act->m) == act->new_state) { + ERR("invalid operation or heap corruption"); + ASSERT(0); + } +#endif /* DEBUG */ + + /* + * The actual required metadata modifications are chunk-type + * dependent, but it always is a modification of a single 8 byte + * value - either modification of few bits in a bitmap or + * changing a chunk type from free to used or vice versa. + */ + act->m.m_ops->prep_hdr(&act->m, act->new_state, ctx); +} + +/* + * palloc_restore_free_chunk_state -- updates the runtime state of a free chunk. + * + * This function also takes care of coalescing of huge chunks. + */ +static void +palloc_restore_free_chunk_state(struct palloc_heap *heap, + struct memory_block *m) +{ + if (m->type == MEMORY_BLOCK_HUGE) { + struct bucket *b = heap_bucket_acquire(heap, + DEFAULT_ALLOC_CLASS_ID, + HEAP_ARENA_PER_THREAD); + if (heap_free_chunk_reuse(heap, b, m) != 0) { + if (errno == EEXIST) { + FATAL( + "duplicate runtime chunk state, possible double free"); + } else { + LOG(2, "unable to track runtime chunk state"); + } + } + heap_bucket_release(heap, b); + } +} + +/* + * palloc_mem_action_noop -- empty handler for unused memory action funcs + */ +static void +palloc_mem_action_noop(struct palloc_heap *heap, + struct pobj_action_internal *act) +{ + +} + +/* + * palloc_reservation_clear -- clears the reservation state of the block, + * discards the associated memory block if possible + */ +static void +palloc_reservation_clear(struct palloc_heap *heap, + struct pobj_action_internal *act, int publish) +{ + if (act->mresv == NULL) + return; + + struct memory_block_reserved *mresv = act->mresv; + struct bucket *b = mresv->bucket; + + if (!publish) { + util_mutex_lock(&b->lock); + struct memory_block *am = &b->active_memory_block->m; + + /* + * If a memory block used for the action is the currently active + * memory block of the bucket it can be inserted back to the + * bucket. This way it will be available for future allocation + * requests, improving performance. + */ + if (b->is_active && + am->chunk_id == act->m.chunk_id && + am->zone_id == act->m.zone_id) { + ASSERTeq(b->active_memory_block, mresv); + bucket_insert_block(b, &act->m); + } + + util_mutex_unlock(&b->lock); + } + + if (util_fetch_and_sub64(&mresv->nresv, 1) == 1) { + VALGRIND_ANNOTATE_HAPPENS_AFTER(&mresv->nresv); + /* + * If the memory block used for the action is not currently used + * in any bucket nor action it can be discarded (given back to + * the heap). + */ + heap_discard_run(heap, &mresv->m); + Free(mresv); + } else { + VALGRIND_ANNOTATE_HAPPENS_BEFORE(&mresv->nresv); + } +} + +/* + * palloc_heap_action_on_cancel -- restores the state of the heap + */ +static void +palloc_heap_action_on_cancel(struct palloc_heap *heap, + struct pobj_action_internal *act) +{ + if (act->new_state == MEMBLOCK_FREE) + return; + + VALGRIND_DO_MEMPOOL_FREE(heap->layout, + act->m.m_ops->get_user_data(&act->m)); + + act->m.m_ops->invalidate(&act->m); + palloc_restore_free_chunk_state(heap, &act->m); + + palloc_reservation_clear(heap, act, 0 /* publish */); +} + +/* + * palloc_heap_action_on_process -- performs finalization steps under a lock + * on the persistent state + */ +static void +palloc_heap_action_on_process(struct palloc_heap *heap, + struct pobj_action_internal *act) +{ + if (act->new_state == MEMBLOCK_ALLOCATED) { + STATS_INC(heap->stats, persistent, heap_curr_allocated, + act->m.m_ops->get_real_size(&act->m)); + if (act->m.type == MEMORY_BLOCK_RUN) { + STATS_INC(heap->stats, transient, heap_run_allocated, + act->m.m_ops->get_real_size(&act->m)); + } + } else if (act->new_state == MEMBLOCK_FREE) { + if (On_memcheck) { + void *ptr = act->m.m_ops->get_user_data(&act->m); + VALGRIND_DO_MEMPOOL_FREE(heap->layout, ptr); + } else if (On_pmemcheck) { + /* + * The sync module, responsible for implementations of + * persistent memory resident volatile variables, + * de-registers the pmemcheck pmem mapping at the time + * of initialization. This is done so that usage of + * pmem locks is not reported as an error due to + * missing flushes/stores outside of transaction. But, + * after we freed an object, we need to reestablish + * the pmem mapping, otherwise pmemchek might miss bugs + * that occur in newly allocated memory locations, that + * once were occupied by a lock/volatile variable. + */ + void *ptr = act->m.m_ops->get_user_data(&act->m); + size_t size = act->m.m_ops->get_real_size(&act->m); + VALGRIND_REGISTER_PMEM_MAPPING(ptr, size); + } + + STATS_SUB(heap->stats, persistent, heap_curr_allocated, + act->m.m_ops->get_real_size(&act->m)); + if (act->m.type == MEMORY_BLOCK_RUN) { + STATS_SUB(heap->stats, transient, heap_run_allocated, + act->m.m_ops->get_real_size(&act->m)); + } + heap_memblock_on_free(heap, &act->m); + } +} + +/* + * palloc_heap_action_on_unlock -- performs finalization steps that need to be + * performed without a lock on persistent state + */ +static void +palloc_heap_action_on_unlock(struct palloc_heap *heap, + struct pobj_action_internal *act) +{ + if (act->new_state == MEMBLOCK_ALLOCATED) { + palloc_reservation_clear(heap, act, 1 /* publish */); + } else if (act->new_state == MEMBLOCK_FREE) { + palloc_restore_free_chunk_state(heap, &act->m); + } +} + +/* + * palloc_mem_action_exec -- executes a single memory action (set, and, or) + */ +static void +palloc_mem_action_exec(struct palloc_heap *heap, + const struct pobj_action_internal *act, + struct operation_context *ctx) +{ + operation_add_entry(ctx, act->ptr, act->value, ULOG_OPERATION_SET); +} + +static const struct { + /* + * Translate action into some number of operation_entry'ies. + */ + void (*exec)(struct palloc_heap *heap, + const struct pobj_action_internal *act, + struct operation_context *ctx); + + /* + * Cancel any runtime state changes. Can be called only when action has + * not been translated to persistent operation yet. + */ + void (*on_cancel)(struct palloc_heap *heap, + struct pobj_action_internal *act); + + /* + * Final steps after persistent state has been modified. Performed + * under action-specific lock. + */ + void (*on_process)(struct palloc_heap *heap, + struct pobj_action_internal *act); + + /* + * Final steps after persistent state has been modified. Performed + * after action-specific lock has been dropped. + */ + void (*on_unlock)(struct palloc_heap *heap, + struct pobj_action_internal *act); +} action_funcs[POBJ_MAX_ACTION_TYPE] = { + [POBJ_ACTION_TYPE_HEAP] = { + .exec = palloc_heap_action_exec, + .on_cancel = palloc_heap_action_on_cancel, + .on_process = palloc_heap_action_on_process, + .on_unlock = palloc_heap_action_on_unlock, + }, + [POBJ_ACTION_TYPE_MEM] = { + .exec = palloc_mem_action_exec, + .on_cancel = palloc_mem_action_noop, + .on_process = palloc_mem_action_noop, + .on_unlock = palloc_mem_action_noop, + } +}; + +/* + * palloc_action_compare -- compares two actions based on lock address + */ +static int +palloc_action_compare(const void *lhs, const void *rhs) +{ + const struct pobj_action_internal *mlhs = lhs; + const struct pobj_action_internal *mrhs = rhs; + uintptr_t vlhs = (uintptr_t)(mlhs->lock); + uintptr_t vrhs = (uintptr_t)(mrhs->lock); + + if (vlhs < vrhs) + return -1; + if (vlhs > vrhs) + return 1; + + return 0; +} + +/* + * palloc_exec_actions -- perform the provided free/alloc operations + */ +static void +palloc_exec_actions(struct palloc_heap *heap, + struct operation_context *ctx, + struct pobj_action_internal *actv, + size_t actvcnt) +{ + /* + * The operations array is sorted so that proper lock ordering is + * ensured. + */ + if (actv) { + qsort(actv, actvcnt, sizeof(struct pobj_action_internal), + palloc_action_compare); + } else { + ASSERTeq(actvcnt, 0); + } + + struct pobj_action_internal *act; + for (size_t i = 0; i < actvcnt; ++i) { + act = &actv[i]; + + /* + * This lock must be held for the duration between the creation + * of the allocation metadata updates in the operation context + * and the operation processing. This is because a different + * thread might operate on the same 8-byte value of the run + * bitmap and override allocation performed by this thread. + */ + if (i == 0 || act->lock != actv[i - 1].lock) { + if (act->lock) + util_mutex_lock(act->lock); + } + + /* translate action to some number of operation_entry'ies */ + action_funcs[act->type].exec(heap, act, ctx); + } + + /* wait for all allocated object headers to be persistent */ + pmemops_drain(&heap->p_ops); + + /* perform all persistent memory operations */ + operation_process(ctx); + + for (size_t i = 0; i < actvcnt; ++i) { + act = &actv[i]; + + action_funcs[act->type].on_process(heap, act); + + if (i == actvcnt - 1 || act->lock != actv[i + 1].lock) { + if (act->lock) + util_mutex_unlock(act->lock); + } + } + + for (size_t i = 0; i < actvcnt; ++i) { + act = &actv[i]; + + action_funcs[act->type].on_unlock(heap, act); + } + + operation_finish(ctx, 0); +} + +/* + * palloc_reserve -- creates a single reservation + */ +int +palloc_reserve(struct palloc_heap *heap, size_t size, + palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, + uint16_t class_id, uint16_t arena_id, + struct pobj_action *act) +{ + COMPILE_ERROR_ON(sizeof(struct pobj_action) != + sizeof(struct pobj_action_internal)); + + return palloc_reservation_create(heap, size, constructor, arg, + extra_field, object_flags, class_id, arena_id, + (struct pobj_action_internal *)act); +} + +/* + * palloc_defer_free -- creates an internal deferred free action + */ +static void +palloc_defer_free_create(struct palloc_heap *heap, uint64_t off, + struct pobj_action_internal *out) +{ + COMPILE_ERROR_ON(sizeof(struct pobj_action) != + sizeof(struct pobj_action_internal)); + + out->type = POBJ_ACTION_TYPE_HEAP; + out->offset = off; + out->m = memblock_from_offset(heap, off); + + /* + * For the duration of free we may need to protect surrounding + * metadata from being modified. + */ + out->lock = out->m.m_ops->get_lock(&out->m); + out->mresv = NULL; + out->new_state = MEMBLOCK_FREE; +} + +/* + * palloc_defer_free -- creates a deferred free action + */ +void +palloc_defer_free(struct palloc_heap *heap, uint64_t off, + struct pobj_action *act) +{ + COMPILE_ERROR_ON(sizeof(struct pobj_action) != + sizeof(struct pobj_action_internal)); + + palloc_defer_free_create(heap, off, (struct pobj_action_internal *)act); +} + +/* + * palloc_cancel -- cancels all reservations in the array + */ +void +palloc_cancel(struct palloc_heap *heap, + struct pobj_action *actv, size_t actvcnt) +{ + struct pobj_action_internal *act; + for (size_t i = 0; i < actvcnt; ++i) { + act = (struct pobj_action_internal *)&actv[i]; + action_funcs[act->type].on_cancel(heap, act); + } +} + +/* + * palloc_publish -- publishes all reservations in the array + */ +void +palloc_publish(struct palloc_heap *heap, + struct pobj_action *actv, size_t actvcnt, + struct operation_context *ctx) +{ + palloc_exec_actions(heap, ctx, + (struct pobj_action_internal *)actv, actvcnt); +} + +/* + * palloc_operation -- persistent memory operation. Takes a NULL pointer + * or an existing memory block and modifies it to occupy, at least, 'size' + * number of bytes. + * + * The malloc, free and realloc routines are implemented in the context of this + * common operation which encompasses all of the functionality usually done + * separately in those methods. + * + * The first thing that needs to be done is determining which memory blocks + * will be affected by the operation - this varies depending on the whether the + * operation will need to modify or free an existing block and/or allocate + * a new one. + * + * Simplified allocation process flow is as follows: + * - reserve a new block in the transient heap + * - prepare the new block + * - create redo log of required modifications + * - chunk metadata + * - offset of the new object + * - commit and process the redo log + * + * And similarly, the deallocation process: + * - create redo log of required modifications + * - reverse the chunk metadata back to the 'free' state + * - set the destination of the object offset to zero + * - commit and process the redo log + * There's an important distinction in the deallocation process - it does not + * return the memory block to the transient container. That is done once no more + * memory is available. + * + * Reallocation is a combination of the above, with one additional step + * of copying the old content. + */ +int +palloc_operation(struct palloc_heap *heap, + uint64_t off, uint64_t *dest_off, size_t size, + palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, + uint16_t class_id, uint16_t arena_id, + struct operation_context *ctx) +{ + size_t user_size = 0; + + size_t nops = 0; + struct pobj_action_internal ops[2]; + struct pobj_action_internal *alloc = NULL; + struct pobj_action_internal *dealloc = NULL; + + /* + * The offset of an existing block can be nonzero which means this + * operation is either free or a realloc - either way the offset of the + * object needs to be translated into memory block, which is a structure + * that all of the heap methods expect. + */ + if (off != 0) { + dealloc = &ops[nops++]; + palloc_defer_free_create(heap, off, dealloc); + user_size = dealloc->m.m_ops->get_user_size(&dealloc->m); + if (user_size == size) { + operation_cancel(ctx); + return 0; + } + } + + /* alloc or realloc */ + if (size != 0) { + alloc = &ops[nops++]; + if (palloc_reservation_create(heap, size, constructor, arg, + extra_field, object_flags, + class_id, arena_id, alloc) != 0) { + operation_cancel(ctx); + return -1; + } + } + + /* realloc */ + if (alloc != NULL && dealloc != NULL) { + /* copy data to newly allocated memory */ + size_t old_size = user_size; + size_t to_cpy = old_size > size ? size : old_size; + VALGRIND_ADD_TO_TX( + HEAP_OFF_TO_PTR(heap, alloc->offset), + to_cpy); + pmemops_memcpy(&heap->p_ops, + HEAP_OFF_TO_PTR(heap, alloc->offset), + HEAP_OFF_TO_PTR(heap, off), + to_cpy, + 0); + VALGRIND_REMOVE_FROM_TX( + HEAP_OFF_TO_PTR(heap, alloc->offset), + to_cpy); + } + + /* + * If the caller provided a destination value to update, it needs to be + * modified atomically alongside the heap metadata, and so the operation + * context must be used. + */ + if (dest_off) { + operation_add_entry(ctx, dest_off, + alloc ? alloc->offset : 0, ULOG_OPERATION_SET); + } + + /* and now actually perform the requested operation! */ + palloc_exec_actions(heap, ctx, ops, nops); + + return 0; +} + +/* + * palloc_offset_compare -- (internal) comparator for sorting by the offset of + * an object. + */ +static int +palloc_offset_compare(const void *lhs, const void *rhs) +{ + const uint64_t * const * mlhs = lhs; + const uint64_t * const * mrhs = rhs; + uintptr_t vlhs = **mlhs; + uintptr_t vrhs = **mrhs; + + if (vlhs < vrhs) + return 1; + if (vlhs > vrhs) + return -1; + + return 0; +} + +struct palloc_defrag_entry { + uint64_t **offsetp; +}; + +/* + * palloc_pointer_compare -- (internal) comparator for sorting by the + * pointer of an offset in the tree. + */ +static int +palloc_pointer_compare(const void *lhs, const void *rhs) +{ + const struct palloc_defrag_entry *mlhs = lhs; + const struct palloc_defrag_entry *mrhs = rhs; + uintptr_t vlhs = (uintptr_t)*mlhs->offsetp; + uintptr_t vrhs = (uintptr_t)*mrhs->offsetp; + + if (vlhs > vrhs) + return 1; + if (vlhs < vrhs) + return -1; + + return 0; +} + +VEC(pobj_actions, struct pobj_action); + +/* + * pobj_actions_add -- add a new action to the end of the vector and return + * its slot. Vector must be able to hold the new value. Reallocation is + * forbidden. + */ +static struct pobj_action * +pobj_actions_add(struct pobj_actions *actv) +{ + /* + * This shouldn't happen unless there's a bug in the calculation + * of the maximum number of actions. + */ + if (VEC_SIZE(actv) == VEC_CAPACITY(actv)) + abort(); + + actv->size++; + + return &VEC_BACK(actv); +} + +/* + * palloc_defrag -- forces recycling of all available memory, and reallocates + * provided objects so that they have the lowest possible address. + */ +int +palloc_defrag(struct palloc_heap *heap, uint64_t **objv, size_t objcnt, + struct operation_context *ctx, struct pobj_defrag_result *result) +{ + int ret = -1; + /* + * Offsets pointers need to be sorted by the offset of the object in + * descending order. This gives us two things, a) the defragmentation + * process is more likely to move objects to a lower offset, improving + * locality and tentatively enabling the heap to shrink, and b) pointers + * to the same object are next to each other in the array, so it's easy + * to reallocate the object once and simply update all remaining + * pointers. + */ + qsort(objv, objcnt, sizeof(uint64_t *), palloc_offset_compare); + + /* + * We also need to store pointers to objects in a tree, so that it's + * possible to update pointers to other objects on the provided list + * that reside in the objects that were already reallocated or + * will be reallocated later on in the process. + */ + struct ravl *objvp = ravl_new_sized(palloc_pointer_compare, + sizeof(struct palloc_defrag_entry)); + if (objvp == NULL) + goto err_ravl; + + /* + * We need to calculate how many pointers to the same object we will + * need to update during defrag. This will be used to calculate capacity + * for the action vector and the redo log. + */ + size_t longest_object_sequence = 1; + size_t current_object_sequence = 1; + for (size_t i = 0; i < objcnt; ++i) { + if (i != 0 && *objv[i - 1] == *objv[i]) { + current_object_sequence += 1; + } else { + if (current_object_sequence > longest_object_sequence) + longest_object_sequence = + current_object_sequence; + current_object_sequence = 1; + } + + struct palloc_defrag_entry e = {&objv[i]}; + if (ravl_emplace_copy(objvp, &e) != 0) + goto err_objvp; + } + + if (current_object_sequence > longest_object_sequence) + longest_object_sequence = current_object_sequence; + + heap_force_recycle(heap); + + /* + * The number of actions at which the action vector will be processed. + */ + const size_t actions_per_realloc = 3; /* alloc + free + set */ + const size_t max_actions = + LANE_REDO_EXTERNAL_SIZE / sizeof(struct ulog_entry_val) + - actions_per_realloc; + + struct pobj_actions actv; + VEC_INIT(&actv); + + /* + * Vector needs enough capacity to handle the largest + * possible sequence of actions. Given that the actions are published + * once the max_actions threshold is crossed AND the sequence for the + * current object is finished, worst-case capacity is a sum of + * max_actions and the largest object sequence - because that sequence + * might happen to begin when current object number i == max_action. + */ + size_t actv_required_capacity = + max_actions + longest_object_sequence + actions_per_realloc; + + if (VEC_RESERVE(&actv, actv_required_capacity) != 0) + goto err; + + /* + * Do NOT reallocate action vector after this line, because + * prev_reserve can point to the slot in the original vector. + */ + + struct pobj_action *prev_reserve = NULL; + uint64_t prev_offset = 0; + for (size_t i = 0; i < objcnt; ++i) { + uint64_t *offsetp = objv[i]; + uint64_t offset = *offsetp; + + /* + * We want to keep our redo logs relatively small, and so + * actions vector is processed on a regular basis. + */ + if (prev_offset != offset && VEC_SIZE(&actv) >= max_actions) { + /* + * If there are any pointers on the tree to the + * memory actions that are being applied, they need to + * be removed. Future reallocations will already have + * these modifications applied. + */ + struct pobj_action *iter; + VEC_FOREACH_BY_PTR(iter, &actv) { + if (iter->type != POBJ_ACTION_TYPE_MEM) + continue; + struct pobj_action_internal *iteri = + (struct pobj_action_internal *)iter; + struct palloc_defrag_entry e = {&iteri->ptr}; + struct ravl_node *n = ravl_find(objvp, &e, + RAVL_PREDICATE_EQUAL); + if (n != NULL) + ravl_remove(objvp, n); + } + + size_t entries_size = + VEC_SIZE(&actv) * sizeof(struct ulog_entry_val); + if (operation_reserve(ctx, entries_size) != 0) + goto err; + + palloc_publish(heap, VEC_ARR(&actv), VEC_SIZE(&actv), + ctx); + + operation_start(ctx); + VEC_CLEAR(&actv); + } + + /* + * If the previous pointer of this offset was skipped, + * skip all pointers for that object. + */ + if (prev_reserve == NULL && prev_offset == offset) + continue; + + /* + * If this is an offset to an object that was already + * reallocated in the previous iteration, we need to only update + * the pointer to the new offset. + */ + if (prev_reserve && prev_offset == offset) { + struct pobj_action *set = pobj_actions_add(&actv); + + palloc_set_value(heap, set, + offsetp, prev_reserve->heap.offset); + struct pobj_action_internal *seti = + (struct pobj_action_internal *)set; + + /* + * Since this pointer can reside in an object that will + * be reallocated later on we need to be able to + * find and update it when that happens. + */ + struct palloc_defrag_entry e = {&seti->ptr}; + struct ravl_node *n = ravl_find(objvp, &e, + RAVL_PREDICATE_EQUAL); + if (n != NULL) + ravl_remove(objvp, n); + /* + * Notice that the tree is ordered by the content of the + * pointer, not the pointer itself. This might look odd, + * but we are inserting a *different* pointer to the + * same pointer to an offset. + */ + if (ravl_emplace_copy(objvp, &e) != 0) + goto err; + + continue; + } + + if (result) + result->total++; + + prev_reserve = NULL; + prev_offset = offset; + + struct memory_block m = memblock_from_offset(heap, offset); + + if (m.type == MEMORY_BLOCK_HUGE) + continue; + + os_mutex_t *mlock = m.m_ops->get_lock(&m); + os_mutex_lock(mlock); + unsigned original_fillpct = m.m_ops->fill_pct(&m); + os_mutex_unlock(mlock); + + /* + * Empirically, 50% fill rate is the sweetspot for moving + * objects between runs. Other values tend to produce worse + * results. + */ + if (original_fillpct > 50) + continue; + + size_t user_size = m.m_ops->get_user_size(&m); + + struct pobj_action *reserve = pobj_actions_add(&actv); + + if (palloc_reservation_create(heap, user_size, + NULL, NULL, + m.m_ops->get_extra(&m), m.m_ops->get_flags(&m), + 0, HEAP_ARENA_PER_THREAD, + (struct pobj_action_internal *)reserve) != 0) { + VEC_POP_BACK(&actv); + continue; + } + + uint64_t new_offset = reserve->heap.offset; + + VALGRIND_ADD_TO_TX( + HEAP_OFF_TO_PTR(heap, new_offset), + user_size); + pmemops_memcpy(&heap->p_ops, + HEAP_OFF_TO_PTR(heap, new_offset), + HEAP_OFF_TO_PTR(heap, *offsetp), + user_size, + 0); + VALGRIND_REMOVE_FROM_TX( + HEAP_OFF_TO_PTR(heap, new_offset), + user_size); + + /* + * If there is a pointer provided by the user inside of the + * object we are in the process of reallocating, we need to + * find that pointer and update it to reflect the new location + * of PMEMoid. + */ + ptrdiff_t diff = (ptrdiff_t)(new_offset - offset); + uint64_t *objptr = (uint64_t *)((uint64_t)heap->base + offset); + uint64_t objend = ((uint64_t)objptr + user_size); + struct ravl_node *nptr = NULL; + enum ravl_predicate p = RAVL_PREDICATE_GREATER_EQUAL; + struct palloc_defrag_entry search_entry = {&objptr}; + + while ((nptr = ravl_find(objvp, &search_entry, p)) != NULL) { + p = RAVL_PREDICATE_GREATER; + struct palloc_defrag_entry *e = ravl_data(nptr); + uint64_t poffset = (uint64_t)(*e->offsetp); + + if (poffset >= objend) + break; + + struct palloc_defrag_entry ne = *e; + ravl_remove(objvp, nptr); + + objptr = (uint64_t *)poffset; + + poffset = (uint64_t)((ptrdiff_t)poffset + diff); + + *ne.offsetp = (uint64_t *)poffset; + } + offsetp = objv[i]; + + struct pobj_action *set = pobj_actions_add(&actv); + + /* + * We need to change the pointer in the tree to the pointer + * of this new unpublished action, so that it can be updated + * later on if needed. + */ + palloc_set_value(heap, set, offsetp, new_offset); + struct pobj_action_internal *seti = + (struct pobj_action_internal *)set; + struct palloc_defrag_entry e = {&seti->ptr}; + struct ravl_node *n = ravl_find(objvp, &e, + RAVL_PREDICATE_EQUAL); + if (n != NULL) + ravl_remove(objvp, n); + + /* same as above, this is a different pointer to same content */ + if (ravl_emplace_copy(objvp, &e) != 0) + goto err; + + struct pobj_action *dfree = pobj_actions_add(&actv); + + palloc_defer_free(heap, offset, dfree); + + if (result) + result->relocated++; + + prev_reserve = reserve; + prev_offset = offset; + } + + if (VEC_SIZE(&actv) != 0) { + size_t entries_size = + VEC_SIZE(&actv) * sizeof(struct ulog_entry_val); + if (operation_reserve(ctx, entries_size) != 0) + goto err; + palloc_publish(heap, VEC_ARR(&actv), VEC_SIZE(&actv), ctx); + } else { + operation_cancel(ctx); + } + + ret = 0; + +err: + if (ret != 0) + palloc_cancel(heap, VEC_ARR(&actv), VEC_SIZE(&actv)); + VEC_DELETE(&actv); +err_objvp: + ravl_delete(objvp); +err_ravl: + if (ret != 0) + operation_cancel(ctx); + + return ret; +} + +/* + * palloc_usable_size -- returns the number of bytes in the memory block + */ +size_t +palloc_usable_size(struct palloc_heap *heap, uint64_t off) +{ + struct memory_block m = memblock_from_offset(heap, off); + + return m.m_ops->get_user_size(&m); +} + +/* + * palloc_extra -- returns allocation extra field + */ +uint64_t +palloc_extra(struct palloc_heap *heap, uint64_t off) +{ + struct memory_block m = memblock_from_offset(heap, off); + + return m.m_ops->get_extra(&m); +} + +/* + * palloc_flags -- returns allocation flags + */ +uint16_t +palloc_flags(struct palloc_heap *heap, uint64_t off) +{ + struct memory_block m = memblock_from_offset(heap, off); + + return m.m_ops->get_flags(&m); +} + +/* + * pmalloc_search_cb -- (internal) foreach callback. + */ +static int +pmalloc_search_cb(const struct memory_block *m, void *arg) +{ + struct memory_block *out = arg; + + if (MEMORY_BLOCK_EQUALS(*m, *out)) + return 0; /* skip the same object */ + + *out = *m; + + return 1; +} + +/* + * palloc_first -- returns the first object from the heap. + */ +uint64_t +palloc_first(struct palloc_heap *heap) +{ + struct memory_block search = MEMORY_BLOCK_NONE; + + heap_foreach_object(heap, pmalloc_search_cb, + &search, MEMORY_BLOCK_NONE); + + if (MEMORY_BLOCK_IS_NONE(search)) + return 0; + + void *uptr = search.m_ops->get_user_data(&search); + + return HEAP_PTR_TO_OFF(heap, uptr); +} + +/* + * palloc_next -- returns the next object relative to 'off'. + */ +uint64_t +palloc_next(struct palloc_heap *heap, uint64_t off) +{ + struct memory_block m = memblock_from_offset(heap, off); + struct memory_block search = m; + + heap_foreach_object(heap, pmalloc_search_cb, &search, m); + + if (MEMORY_BLOCK_IS_NONE(search) || + MEMORY_BLOCK_EQUALS(search, m)) + return 0; + + void *uptr = search.m_ops->get_user_data(&search); + + return HEAP_PTR_TO_OFF(heap, uptr); +} + +/* + * palloc_boot -- initializes allocator section + */ +int +palloc_boot(struct palloc_heap *heap, void *heap_start, + uint64_t heap_size, uint64_t *sizep, + void *base, struct pmem_ops *p_ops, struct stats *stats, + struct pool_set *set) +{ + return heap_boot(heap, heap_start, heap_size, sizep, + base, p_ops, stats, set); +} + +/* + * palloc_buckets_init -- initialize buckets + */ +int +palloc_buckets_init(struct palloc_heap *heap) +{ + return heap_buckets_init(heap); +} + +/* + * palloc_init -- initializes palloc heap + */ +int +palloc_init(void *heap_start, uint64_t heap_size, uint64_t *sizep, + struct pmem_ops *p_ops) +{ + return heap_init(heap_start, heap_size, sizep, p_ops); +} + +/* + * palloc_heap_end -- returns first address after heap + */ +void * +palloc_heap_end(struct palloc_heap *h) +{ + return heap_end(h); +} + +/* + * palloc_heap_check -- verifies heap state + */ +int +palloc_heap_check(void *heap_start, uint64_t heap_size) +{ + return heap_check(heap_start, heap_size); +} + +/* + * palloc_heap_check_remote -- verifies state of remote replica + */ +int +palloc_heap_check_remote(void *heap_start, uint64_t heap_size, + struct remote_ops *ops) +{ + return heap_check_remote(heap_start, heap_size, ops); +} + +/* + * palloc_heap_cleanup -- cleanups the volatile heap state + */ +void +palloc_heap_cleanup(struct palloc_heap *heap) +{ + heap_cleanup(heap); +} + +#if VG_MEMCHECK_ENABLED +/* + * palloc_vg_register_alloc -- (internal) registers allocation header + * in Valgrind + */ +static int +palloc_vg_register_alloc(const struct memory_block *m, void *arg) +{ + struct palloc_heap *heap = arg; + + m->m_ops->reinit_header(m); + + void *uptr = m->m_ops->get_user_data(m); + size_t usize = m->m_ops->get_user_size(m); + VALGRIND_DO_MEMPOOL_ALLOC(heap->layout, uptr, usize); + VALGRIND_DO_MAKE_MEM_DEFINED(uptr, usize); + + return 0; +} + +/* + * palloc_heap_vg_open -- notifies Valgrind about heap layout + */ +void +palloc_heap_vg_open(struct palloc_heap *heap, int objects) +{ + heap_vg_open(heap, palloc_vg_register_alloc, heap, objects); +} +#endif diff --git a/src/pmdk/src/libpmemobj/palloc.h b/src/pmdk/src/libpmemobj/palloc.h new file mode 100644 index 000000000..f433cd4f5 --- /dev/null +++ b/src/pmdk/src/libpmemobj/palloc.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * palloc.h -- internal definitions for persistent allocator + */ + +#ifndef LIBPMEMOBJ_PALLOC_H +#define LIBPMEMOBJ_PALLOC_H 1 + +#include +#include + +#include "libpmemobj.h" +#include "memops.h" +#include "ulog.h" +#include "valgrind_internal.h" +#include "stats.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PALLOC_CTL_DEBUG_NO_PATTERN (-1) + +struct palloc_heap { + struct pmem_ops p_ops; + struct heap_layout *layout; + struct heap_rt *rt; + uint64_t *sizep; + uint64_t growsize; + + struct stats *stats; + struct pool_set *set; + + void *base; + + int alloc_pattern; +}; + +struct memory_block; + +typedef int (*palloc_constr)(void *base, void *ptr, + size_t usable_size, void *arg); + +int palloc_operation(struct palloc_heap *heap, uint64_t off, uint64_t *dest_off, + size_t size, palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, + uint16_t class_id, uint16_t arena_id, + struct operation_context *ctx); + +int +palloc_reserve(struct palloc_heap *heap, size_t size, + palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, + uint16_t class_id, uint16_t arena_id, + struct pobj_action *act); + +void +palloc_defer_free(struct palloc_heap *heap, uint64_t off, + struct pobj_action *act); + +void +palloc_cancel(struct palloc_heap *heap, + struct pobj_action *actv, size_t actvcnt); + +void +palloc_publish(struct palloc_heap *heap, + struct pobj_action *actv, size_t actvcnt, + struct operation_context *ctx); + +void +palloc_set_value(struct palloc_heap *heap, struct pobj_action *act, + uint64_t *ptr, uint64_t value); + +uint64_t palloc_first(struct palloc_heap *heap); +uint64_t palloc_next(struct palloc_heap *heap, uint64_t off); + +size_t palloc_usable_size(struct palloc_heap *heap, uint64_t off); +uint64_t palloc_extra(struct palloc_heap *heap, uint64_t off); +uint16_t palloc_flags(struct palloc_heap *heap, uint64_t off); + +int palloc_boot(struct palloc_heap *heap, void *heap_start, + uint64_t heap_size, uint64_t *sizep, + void *base, struct pmem_ops *p_ops, + struct stats *stats, struct pool_set *set); + +int palloc_buckets_init(struct palloc_heap *heap); + +int palloc_init(void *heap_start, uint64_t heap_size, uint64_t *sizep, + struct pmem_ops *p_ops); +void *palloc_heap_end(struct palloc_heap *h); +int palloc_heap_check(void *heap_start, uint64_t heap_size); +int palloc_heap_check_remote(void *heap_start, uint64_t heap_size, + struct remote_ops *ops); +void palloc_heap_cleanup(struct palloc_heap *heap); +size_t palloc_heap(void *heap_start); + +int palloc_defrag(struct palloc_heap *heap, uint64_t **objv, size_t objcnt, + struct operation_context *ctx, struct pobj_defrag_result *result); + +/* foreach callback, terminates iteration if return value is non-zero */ +typedef int (*object_callback)(const struct memory_block *m, void *arg); + +#if VG_MEMCHECK_ENABLED +void palloc_heap_vg_open(struct palloc_heap *heap, int objects); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/pmalloc.c b/src/pmdk/src/libpmemobj/pmalloc.c new file mode 100644 index 000000000..aa0523120 --- /dev/null +++ b/src/pmdk/src/libpmemobj/pmalloc.c @@ -0,0 +1,797 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * pmalloc.c -- implementation of pmalloc POSIX-like API + * + * This is the front-end part of the persistent memory allocator. It uses both + * transient and persistent representation of the heap to provide memory blocks + * in a reasonable time and with an acceptable common-case fragmentation. + */ + +#include +#include "valgrind_internal.h" +#include "heap.h" +#include "lane.h" +#include "memblock.h" +#include "memops.h" +#include "obj.h" +#include "out.h" +#include "palloc.h" +#include "pmalloc.h" +#include "alloc_class.h" +#include "set.h" +#include "mmap.h" + +enum pmalloc_operation_type { + OPERATION_INTERNAL, /* used only for single, one-off operations */ + OPERATION_EXTERNAL, /* used for everything else, incl. large redos */ + + MAX_OPERATION_TYPE, +}; + +struct lane_alloc_runtime { + struct operation_context *ctx[MAX_OPERATION_TYPE]; +}; + +/* + * pmalloc_operation_hold_type -- acquires allocator lane section and returns a + * pointer to its operation context + */ +static struct operation_context * +pmalloc_operation_hold_type(PMEMobjpool *pop, enum pmalloc_operation_type type, + int start) +{ + struct lane *lane; + lane_hold(pop, &lane); + struct operation_context *ctx = type == OPERATION_INTERNAL ? + lane->internal : lane->external; + + if (start) + operation_start(ctx); + + return ctx; +} + +/* + * pmalloc_operation_hold_type -- acquires allocator lane section and returns a + * pointer to its operation context without starting + */ +struct operation_context * +pmalloc_operation_hold_no_start(PMEMobjpool *pop) +{ + return pmalloc_operation_hold_type(pop, OPERATION_EXTERNAL, 0); +} + +/* + * pmalloc_operation_hold -- acquires allocator lane section and returns a + * pointer to its redo log + */ +struct operation_context * +pmalloc_operation_hold(PMEMobjpool *pop) +{ + return pmalloc_operation_hold_type(pop, OPERATION_EXTERNAL, 1); +} + +/* + * pmalloc_operation_release -- releases allocator lane section + */ +void +pmalloc_operation_release(PMEMobjpool *pop) +{ + lane_release(pop); +} + +/* + * pmalloc -- allocates a new block of memory + * + * The pool offset is written persistently into the off variable. + * + * If successful function returns zero. Otherwise an error number is returned. + */ +int +pmalloc(PMEMobjpool *pop, uint64_t *off, size_t size, + uint64_t extra_field, uint16_t object_flags) +{ + struct operation_context *ctx = + pmalloc_operation_hold_type(pop, OPERATION_INTERNAL, 1); + + int ret = palloc_operation(&pop->heap, 0, off, size, NULL, NULL, + extra_field, object_flags, 0, 0, ctx); + + pmalloc_operation_release(pop); + + return ret; +} + +/* + * pmalloc_construct -- allocates a new block of memory with a constructor + * + * The block offset is written persistently into the off variable, but only + * after the constructor function has been called. + * + * If successful function returns zero. Otherwise an error number is returned. + */ +int +pmalloc_construct(PMEMobjpool *pop, uint64_t *off, size_t size, + palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, uint16_t class_id) +{ + struct operation_context *ctx = + pmalloc_operation_hold_type(pop, OPERATION_INTERNAL, 1); + + int ret = palloc_operation(&pop->heap, 0, off, size, constructor, arg, + extra_field, object_flags, class_id, 0, ctx); + + pmalloc_operation_release(pop); + + return ret; +} + +/* + * prealloc -- resizes in-place a previously allocated memory block + * + * The block offset is written persistently into the off variable. + * + * If successful function returns zero. Otherwise an error number is returned. + */ +int +prealloc(PMEMobjpool *pop, uint64_t *off, size_t size, + uint64_t extra_field, uint16_t object_flags) +{ + struct operation_context *ctx = + pmalloc_operation_hold_type(pop, OPERATION_INTERNAL, 1); + + int ret = palloc_operation(&pop->heap, *off, off, size, NULL, NULL, + extra_field, object_flags, 0, 0, ctx); + + pmalloc_operation_release(pop); + + return ret; +} + +/* + * pfree -- deallocates a memory block previously allocated by pmalloc + * + * A zero value is written persistently into the off variable. + * + * If successful function returns zero. Otherwise an error number is returned. + */ +void +pfree(PMEMobjpool *pop, uint64_t *off) +{ + struct operation_context *ctx = + pmalloc_operation_hold_type(pop, OPERATION_INTERNAL, 1); + + int ret = palloc_operation(&pop->heap, *off, off, 0, NULL, NULL, + 0, 0, 0, 0, ctx); + ASSERTeq(ret, 0); + + pmalloc_operation_release(pop); +} + +/* + * pmalloc_boot -- global runtime init routine of allocator section + */ +int +pmalloc_boot(PMEMobjpool *pop) +{ + int ret = palloc_boot(&pop->heap, (char *)pop + pop->heap_offset, + pop->set->poolsize - pop->heap_offset, &pop->heap_size, + pop, &pop->p_ops, + pop->stats, pop->set); + if (ret) + return ret; + +#if VG_MEMCHECK_ENABLED + if (On_memcheck) + palloc_heap_vg_open(&pop->heap, pop->vg_boot); +#endif + + ret = palloc_buckets_init(&pop->heap); + if (ret) + palloc_heap_cleanup(&pop->heap); + + return ret; +} + +/* + * pmalloc_cleanup -- global cleanup routine of allocator section + */ +int +pmalloc_cleanup(PMEMobjpool *pop) +{ + palloc_heap_cleanup(&pop->heap); + + return 0; +} + +/* + * CTL_WRITE_HANDLER(desc) -- creates a new allocation class + */ +static int +CTL_WRITE_HANDLER(desc)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + uint8_t id; + struct alloc_class_collection *ac = heap_alloc_classes(&pop->heap); + struct pobj_alloc_class_desc *p = arg; + + if (p->unit_size <= 0 || p->unit_size > PMEMOBJ_MAX_ALLOC_SIZE || + p->units_per_block <= 0) { + errno = EINVAL; + return -1; + } + + if (p->alignment != 0 && p->unit_size % p->alignment != 0) { + ERR("unit size must be evenly divisible by alignment"); + errno = EINVAL; + return -1; + } + + if (p->alignment > (MEGABYTE * 2)) { + ERR("alignment cannot be larger than 2 megabytes"); + errno = EINVAL; + return -1; + } + + enum header_type lib_htype = MAX_HEADER_TYPES; + switch (p->header_type) { + case POBJ_HEADER_LEGACY: + lib_htype = HEADER_LEGACY; + break; + case POBJ_HEADER_COMPACT: + lib_htype = HEADER_COMPACT; + break; + case POBJ_HEADER_NONE: + lib_htype = HEADER_NONE; + break; + case MAX_POBJ_HEADER_TYPES: + default: + ERR("invalid header type"); + errno = EINVAL; + return -1; + } + + if (PMDK_SLIST_EMPTY(indexes)) { + if (alloc_class_find_first_free_slot(ac, &id) != 0) { + ERR("no available free allocation class identifier"); + errno = EINVAL; + return -1; + } + } else { + struct ctl_index *idx = PMDK_SLIST_FIRST(indexes); + ASSERTeq(strcmp(idx->name, "class_id"), 0); + + if (idx->value < 0 || idx->value >= MAX_ALLOCATION_CLASSES) { + ERR("class id outside of the allowed range"); + errno = ERANGE; + return -1; + } + + id = (uint8_t)idx->value; + + if (alloc_class_reserve(ac, id) != 0) { + ERR("attempted to overwrite an allocation class"); + errno = EEXIST; + return -1; + } + } + + size_t runsize_bytes = + CHUNK_ALIGN_UP((p->units_per_block * p->unit_size) + + RUN_BASE_METADATA_SIZE); + + /* aligning the buffer might require up-to to 'alignment' bytes */ + if (p->alignment != 0) + runsize_bytes += p->alignment; + + uint32_t size_idx = (uint32_t)(runsize_bytes / CHUNKSIZE); + if (size_idx > UINT16_MAX) + size_idx = UINT16_MAX; + + struct alloc_class *c = alloc_class_new(id, + heap_alloc_classes(&pop->heap), CLASS_RUN, + lib_htype, p->unit_size, p->alignment, size_idx); + if (c == NULL) { + errno = EINVAL; + return -1; + } + + if (heap_create_alloc_class_buckets(&pop->heap, c) != 0) { + alloc_class_delete(ac, c); + return -1; + } + + p->class_id = c->id; + p->units_per_block = c->rdsc.nallocs; + + return 0; +} + +/* + * pmalloc_header_type_parser -- parses the alloc header type argument + */ +static int +pmalloc_header_type_parser(const void *arg, void *dest, size_t dest_size) +{ + const char *vstr = arg; + enum pobj_header_type *htype = dest; + ASSERTeq(dest_size, sizeof(enum pobj_header_type)); + + if (strcmp(vstr, "none") == 0) { + *htype = POBJ_HEADER_NONE; + } else if (strcmp(vstr, "compact") == 0) { + *htype = POBJ_HEADER_COMPACT; + } else if (strcmp(vstr, "legacy") == 0) { + *htype = POBJ_HEADER_LEGACY; + } else { + ERR("invalid header type"); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * CTL_READ_HANDLER(desc) -- reads the information about allocation class + */ +static int +CTL_READ_HANDLER(desc)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + uint8_t id; + + struct ctl_index *idx = PMDK_SLIST_FIRST(indexes); + ASSERTeq(strcmp(idx->name, "class_id"), 0); + + if (idx->value < 0 || idx->value >= MAX_ALLOCATION_CLASSES) { + ERR("class id outside of the allowed range"); + errno = ERANGE; + return -1; + } + + id = (uint8_t)idx->value; + + struct alloc_class *c = alloc_class_by_id( + heap_alloc_classes(&pop->heap), id); + + if (c == NULL) { + ERR("class with the given id does not exist"); + errno = ENOENT; + return -1; + } + + enum pobj_header_type user_htype = MAX_POBJ_HEADER_TYPES; + switch (c->header_type) { + case HEADER_LEGACY: + user_htype = POBJ_HEADER_LEGACY; + break; + case HEADER_COMPACT: + user_htype = POBJ_HEADER_COMPACT; + break; + case HEADER_NONE: + user_htype = POBJ_HEADER_NONE; + break; + default: + ASSERT(0); /* unreachable */ + break; + } + + struct pobj_alloc_class_desc *p = arg; + p->units_per_block = c->type == CLASS_HUGE ? 0 : c->rdsc.nallocs; + p->header_type = user_htype; + p->unit_size = c->unit_size; + p->class_id = c->id; + p->alignment = c->flags & CHUNK_FLAG_ALIGNED ? c->rdsc.alignment : 0; + + return 0; +} + +static const struct ctl_argument CTL_ARG(desc) = { + .dest_size = sizeof(struct pobj_alloc_class_desc), + .parsers = { + CTL_ARG_PARSER_STRUCT(struct pobj_alloc_class_desc, + unit_size, ctl_arg_integer), + CTL_ARG_PARSER_STRUCT(struct pobj_alloc_class_desc, + alignment, ctl_arg_integer), + CTL_ARG_PARSER_STRUCT(struct pobj_alloc_class_desc, + units_per_block, ctl_arg_integer), + CTL_ARG_PARSER_STRUCT(struct pobj_alloc_class_desc, + header_type, pmalloc_header_type_parser), + CTL_ARG_PARSER_END + } +}; + +static const struct ctl_node CTL_NODE(class_id)[] = { + CTL_LEAF_RW(desc), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(new)[] = { + CTL_LEAF_WO(desc), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(alloc_class)[] = { + CTL_INDEXED(class_id), + CTL_INDEXED(new), + + CTL_NODE_END +}; + +/* + * CTL_RUNNABLE_HANDLER(extend) -- extends the pool by the given size + */ +static int +CTL_RUNNABLE_HANDLER(extend)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + ssize_t arg_in = *(ssize_t *)arg; + if (arg_in < (ssize_t)PMEMOBJ_MIN_PART) { + ERR("incorrect size for extend, must be larger than %" PRIu64, + PMEMOBJ_MIN_PART); + return -1; + } + + struct palloc_heap *heap = &pop->heap; + struct bucket *defb = heap_bucket_acquire(heap, + DEFAULT_ALLOC_CLASS_ID, + HEAP_ARENA_PER_THREAD); + + int ret = heap_extend(heap, defb, (size_t)arg_in) < 0 ? -1 : 0; + + heap_bucket_release(heap, defb); + + return ret; +} + +/* + * CTL_READ_HANDLER(granularity) -- reads the current heap grow size + */ +static int +CTL_READ_HANDLER(granularity)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + ssize_t *arg_out = arg; + + *arg_out = (ssize_t)pop->heap.growsize; + + return 0; +} + +/* + * CTL_WRITE_HANDLER(granularity) -- changes the heap grow size + */ +static int +CTL_WRITE_HANDLER(granularity)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + ssize_t arg_in = *(int *)arg; + if (arg_in != 0 && arg_in < (ssize_t)PMEMOBJ_MIN_PART) { + ERR("incorrect grow size, must be 0 or larger than %" PRIu64, + PMEMOBJ_MIN_PART); + return -1; + } + + pop->heap.growsize = (size_t)arg_in; + + return 0; +} + +static const struct ctl_argument CTL_ARG(granularity) = CTL_ARG_LONG_LONG; + +/* + * CTL_READ_HANDLER(total) -- reads a number of the arenas + */ +static int +CTL_READ_HANDLER(total)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned *narenas = arg; + + *narenas = heap_get_narenas_total(&pop->heap); + + return 0; +} + +/* + * CTL_READ_HANDLER(max) -- reads a max number of the arenas + */ +static int +CTL_READ_HANDLER(max)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned *max = arg; + + *max = heap_get_narenas_max(&pop->heap); + + return 0; +} + +/* + * CTL_WRITE_HANDLER(max) -- write a max number of the arenas + */ +static int +CTL_WRITE_HANDLER(max)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned size = *(unsigned *)arg; + + int ret = heap_set_narenas_max(&pop->heap, size); + if (ret) { + LOG(1, "cannot change max arena number"); + return -1; + } + + return 0; +} + +static const struct ctl_argument CTL_ARG(max) = CTL_ARG_LONG_LONG; + +/* + * CTL_READ_HANDLER(automatic) -- reads a number of the automatic arenas + */ +static int +CTL_READ_HANDLER(automatic, narenas)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned *narenas = arg; + + *narenas = heap_get_narenas_auto(&pop->heap); + + return 0; +} + +/* + * CTL_READ_HANDLER(arena_id) -- reads the id of the arena + * assigned to the calling thread + */ +static int +CTL_READ_HANDLER(arena_id)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned *arena_id = arg; + + *arena_id = heap_get_thread_arena_id(&pop->heap); + + return 0; +} + +/* + * CTL_WRITE_HANDLER(arena_id) -- assigns the arena to the calling thread + */ +static int +CTL_WRITE_HANDLER(arena_id)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned arena_id = *(unsigned *)arg; + + unsigned narenas = heap_get_narenas_total(&pop->heap); + + /* + * check if index is not bigger than number of arenas + * or if it is not equal zero + */ + if (arena_id < 1 || arena_id > narenas) { + LOG(1, "arena id outside of the allowed range: <1,%u>", + narenas); + errno = ERANGE; + return -1; + } + + heap_set_arena_thread(&pop->heap, arena_id); + + return 0; +} + +static const struct ctl_argument CTL_ARG(arena_id) = CTL_ARG_LONG_LONG; + +/* + * CTL_WRITE_HANDLER(automatic) -- updates automatic status of the arena + */ +static int +CTL_WRITE_HANDLER(automatic)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + int arg_in = *(int *)arg; + unsigned arena_id; + + struct ctl_index *idx = PMDK_SLIST_FIRST(indexes); + ASSERTeq(strcmp(idx->name, "arena_id"), 0); + arena_id = (unsigned)idx->value; + + unsigned narenas = heap_get_narenas_total(&pop->heap); + + /* + * check if index is not bigger than number of arenas + * or if it is not equal zero + */ + if (arena_id < 1 || arena_id > narenas) { + LOG(1, "arena id outside of the allowed range: <1,%u>", + narenas); + errno = ERANGE; + return -1; + } + + if (arg_in != 0 && arg_in != 1) { + LOG(1, "incorrect arena state, must be 0 or 1"); + return -1; + } + + return heap_set_arena_auto(&pop->heap, arena_id, arg_in); +} + +/* + * CTL_READ_HANDLER(automatic) -- reads automatic status of the arena + */ +static int +CTL_READ_HANDLER(automatic)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + int *arg_out = arg; + unsigned arena_id; + + struct ctl_index *idx = PMDK_SLIST_FIRST(indexes); + ASSERTeq(strcmp(idx->name, "arena_id"), 0); + arena_id = (unsigned)idx->value; + + unsigned narenas = heap_get_narenas_total(&pop->heap); + + /* + * check if index is not bigger than number of arenas + * or if it is not equal zero + */ + if (arena_id < 1 || arena_id > narenas) { + LOG(1, "arena id outside of the allowed range: <1,%u>", + narenas); + errno = ERANGE; + return -1; + } + + *arg_out = heap_get_arena_auto(&pop->heap, arena_id); + + return 0; +} + +static struct ctl_argument CTL_ARG(automatic) = CTL_ARG_BOOLEAN; + +static const struct ctl_node CTL_NODE(size)[] = { + CTL_LEAF_RW(granularity), + CTL_LEAF_RUNNABLE(extend), + + CTL_NODE_END +}; + +/* + * CTL_READ_HANDLER(size) -- reads usable size of specified arena + */ +static int +CTL_READ_HANDLER(size)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned arena_id; + unsigned narenas; + size_t *arena_size = arg; + + struct ctl_index *idx = PMDK_SLIST_FIRST(indexes); + ASSERTeq(strcmp(idx->name, "arena_id"), 0); + + /* take index of arena */ + arena_id = (unsigned)idx->value; + /* take number of arenas */ + narenas = heap_get_narenas_total(&pop->heap); + + /* + * check if index is not bigger than number of arenas + * or if it is not equal zero + */ + if (arena_id < 1 || arena_id > narenas) { + LOG(1, "arena id outside of the allowed range: <1,%u>", + narenas); + errno = ERANGE; + return -1; + } + + /* take buckets for arena */ + struct bucket **buckets; + buckets = heap_get_arena_buckets(&pop->heap, arena_id); + + /* calculate number of reservation for arena using buckets */ + unsigned size = 0; + for (int i = 0; i < MAX_ALLOCATION_CLASSES; ++i) { + if (buckets[i] != NULL && buckets[i]->is_active) + size += buckets[i]->active_memory_block->m.size_idx; + } + + *arena_size = size * CHUNKSIZE; + + return 0; +} + +/* + * CTL_RUNNABLE_HANDLER(create) -- create new arena in the heap + */ +static int +CTL_RUNNABLE_HANDLER(create)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + unsigned *arena_id = arg; + struct palloc_heap *heap = &pop->heap; + + int ret = heap_arena_create(heap); + if (ret < 0) + return -1; + + *arena_id = (unsigned)ret; + + return 0; +} + +static const struct ctl_node CTL_NODE(arena_id)[] = { + CTL_LEAF_RO(size), + CTL_LEAF_RW(automatic), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(arena)[] = { + CTL_INDEXED(arena_id), + CTL_LEAF_RUNNABLE(create), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(narenas)[] = { + CTL_LEAF_RO(automatic, narenas), + CTL_LEAF_RO(total), + CTL_LEAF_RW(max), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(thread)[] = { + CTL_LEAF_RW(arena_id), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(heap)[] = { + CTL_CHILD(alloc_class), + CTL_CHILD(arena), + CTL_CHILD(size), + CTL_CHILD(thread), + CTL_CHILD(narenas), + + CTL_NODE_END +}; + +/* + * pmalloc_ctl_register -- registers ctl nodes for "heap" module + */ +void +pmalloc_ctl_register(PMEMobjpool *pop) +{ + CTL_REGISTER_MODULE(pop->ctl, heap); +} diff --git a/src/pmdk/src/libpmemobj/pmalloc.h b/src/pmdk/src/libpmemobj/pmalloc.h new file mode 100644 index 000000000..23ebe5ba8 --- /dev/null +++ b/src/pmdk/src/libpmemobj/pmalloc.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * pmalloc.h -- internal definitions for persistent malloc + */ + +#ifndef LIBPMEMOBJ_PMALLOC_H +#define LIBPMEMOBJ_PMALLOC_H 1 + +#include +#include + +#include "libpmemobj.h" +#include "memops.h" +#include "palloc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* single operations done in the internal context of the lane */ + +int pmalloc(PMEMobjpool *pop, uint64_t *off, size_t size, + uint64_t extra_field, uint16_t object_flags); +int pmalloc_construct(PMEMobjpool *pop, uint64_t *off, size_t size, + palloc_constr constructor, void *arg, + uint64_t extra_field, uint16_t object_flags, uint16_t class_id); + +int prealloc(PMEMobjpool *pop, uint64_t *off, size_t size, + uint64_t extra_field, uint16_t object_flags); + +void pfree(PMEMobjpool *pop, uint64_t *off); + +/* external operation to be used together with context-aware palloc funcs */ + +struct operation_context *pmalloc_operation_hold(PMEMobjpool *pop); +struct operation_context *pmalloc_operation_hold_no_start(PMEMobjpool *pop); +void pmalloc_operation_release(PMEMobjpool *pop); + +void pmalloc_ctl_register(PMEMobjpool *pop); + +int pmalloc_cleanup(PMEMobjpool *pop); +int pmalloc_boot(PMEMobjpool *pop); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/pmemops.h b/src/pmdk/src/libpmemobj/pmemops.h new file mode 100644 index 000000000..21619039b --- /dev/null +++ b/src/pmdk/src/libpmemobj/pmemops.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +#ifndef LIBPMEMOBJ_PMEMOPS_H +#define LIBPMEMOBJ_PMEMOPS_H 1 + +#include +#include +#include "util.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*persist_fn)(void *base, const void *, size_t, unsigned); +typedef int (*flush_fn)(void *base, const void *, size_t, unsigned); +typedef void (*drain_fn)(void *base); + +typedef void *(*memcpy_fn)(void *base, void *dest, const void *src, size_t len, + unsigned flags); +typedef void *(*memmove_fn)(void *base, void *dest, const void *src, size_t len, + unsigned flags); +typedef void *(*memset_fn)(void *base, void *dest, int c, size_t len, + unsigned flags); + +typedef int (*remote_read_fn)(void *ctx, uintptr_t base, void *dest, void *addr, + size_t length); + +struct pmem_ops { + /* for 'master' replica: with or without data replication */ + persist_fn persist; /* persist function */ + flush_fn flush; /* flush function */ + drain_fn drain; /* drain function */ + memcpy_fn memcpy; /* persistent memcpy function */ + memmove_fn memmove; /* persistent memmove function */ + memset_fn memset; /* persistent memset function */ + void *base; + + struct remote_ops { + remote_read_fn read; + + void *ctx; + uintptr_t base; + } remote; +}; + +static force_inline int +pmemops_xpersist(const struct pmem_ops *p_ops, const void *d, size_t s, + unsigned flags) +{ + return p_ops->persist(p_ops->base, d, s, flags); +} + +static force_inline void +pmemops_persist(const struct pmem_ops *p_ops, const void *d, size_t s) +{ + (void) pmemops_xpersist(p_ops, d, s, 0); +} + +static force_inline int +pmemops_xflush(const struct pmem_ops *p_ops, const void *d, size_t s, + unsigned flags) +{ + return p_ops->flush(p_ops->base, d, s, flags); +} + +static force_inline void +pmemops_flush(const struct pmem_ops *p_ops, const void *d, size_t s) +{ + (void) pmemops_xflush(p_ops, d, s, 0); +} + +static force_inline void +pmemops_drain(const struct pmem_ops *p_ops) +{ + p_ops->drain(p_ops->base); +} + +static force_inline void * +pmemops_memcpy(const struct pmem_ops *p_ops, void *dest, + const void *src, size_t len, unsigned flags) +{ + return p_ops->memcpy(p_ops->base, dest, src, len, flags); +} + +static force_inline void * +pmemops_memmove(const struct pmem_ops *p_ops, void *dest, + const void *src, size_t len, unsigned flags) +{ + return p_ops->memmove(p_ops->base, dest, src, len, flags); +} + +static force_inline void * +pmemops_memset(const struct pmem_ops *p_ops, void *dest, int c, + size_t len, unsigned flags) +{ + return p_ops->memset(p_ops->base, dest, c, len, flags); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/recycler.c b/src/pmdk/src/libpmemobj/recycler.c new file mode 100644 index 000000000..3827489e8 --- /dev/null +++ b/src/pmdk/src/libpmemobj/recycler.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * recycler.c -- implementation of run recycler + */ + +#include "heap.h" +#include "recycler.h" +#include "vec.h" +#include "out.h" +#include "util.h" +#include "sys_util.h" +#include "ravl.h" +#include "valgrind_internal.h" + +#define THRESHOLD_MUL 4 + +/* + * recycler_element_cmp -- compares two recycler elements + */ +static int +recycler_element_cmp(const void *lhs, const void *rhs) +{ + const struct recycler_element *l = lhs; + const struct recycler_element *r = rhs; + + int64_t diff = (int64_t)l->max_free_block - (int64_t)r->max_free_block; + if (diff != 0) + return diff > 0 ? 1 : -1; + + diff = (int64_t)l->free_space - (int64_t)r->free_space; + if (diff != 0) + return diff > 0 ? 1 : -1; + + diff = (int64_t)l->zone_id - (int64_t)r->zone_id; + if (diff != 0) + return diff > 0 ? 1 : -1; + + diff = (int64_t)l->chunk_id - (int64_t)r->chunk_id; + if (diff != 0) + return diff > 0 ? 1 : -1; + + return 0; +} + +struct recycler { + struct ravl *runs; + struct palloc_heap *heap; + + /* + * How many unaccounted units there *might* be inside of the memory + * blocks stored in the recycler. + * The value is not meant to be accurate, but rather a rough measure on + * how often should the memory block scores be recalculated. + * + * Per-chunk unaccounted units are shared for all zones, which might + * lead to some unnecessary recalculations. + */ + size_t unaccounted_units[MAX_CHUNK]; + size_t unaccounted_total; + size_t nallocs; + size_t *peak_arenas; + + VEC(, struct recycler_element) recalc; + + os_mutex_t lock; +}; + +/* + * recycler_new -- creates new recycler instance + */ +struct recycler * +recycler_new(struct palloc_heap *heap, size_t nallocs, size_t *peak_arenas) +{ + struct recycler *r = Malloc(sizeof(struct recycler)); + if (r == NULL) + goto error_alloc_recycler; + + r->runs = ravl_new_sized(recycler_element_cmp, + sizeof(struct recycler_element)); + if (r->runs == NULL) + goto error_alloc_tree; + + r->heap = heap; + r->nallocs = nallocs; + r->peak_arenas = peak_arenas; + r->unaccounted_total = 0; + memset(&r->unaccounted_units, 0, sizeof(r->unaccounted_units)); + + VEC_INIT(&r->recalc); + + util_mutex_init(&r->lock); + + return r; + +error_alloc_tree: + Free(r); +error_alloc_recycler: + return NULL; +} + +/* + * recycler_delete -- deletes recycler instance + */ +void +recycler_delete(struct recycler *r) +{ + VEC_DELETE(&r->recalc); + + util_mutex_destroy(&r->lock); + ravl_delete(r->runs); + Free(r); +} + +/* + * recycler_element_new -- calculates how many free bytes does a run have and + * what's the largest request that the run can handle, returns that as + * recycler element struct + */ +struct recycler_element +recycler_element_new(struct palloc_heap *heap, const struct memory_block *m) +{ + /* + * Counting of the clear bits can race with a concurrent deallocation + * that operates on the same run. This race is benign and has absolutely + * no effect on the correctness of this algorithm. Ideally, we would + * avoid grabbing the lock, but helgrind gets very confused if we + * try to disable reporting for this function. + */ + os_mutex_t *lock = m->m_ops->get_lock(m); + util_mutex_lock(lock); + + struct recycler_element e = { + .free_space = 0, + .max_free_block = 0, + .chunk_id = m->chunk_id, + .zone_id = m->zone_id, + }; + m->m_ops->calc_free(m, &e.free_space, &e.max_free_block); + + util_mutex_unlock(lock); + + return e; +} + +/* + * recycler_put -- inserts new run into the recycler + */ +int +recycler_put(struct recycler *r, const struct memory_block *m, + struct recycler_element element) +{ + int ret = 0; + + util_mutex_lock(&r->lock); + + ret = ravl_emplace_copy(r->runs, &element); + + util_mutex_unlock(&r->lock); + + return ret; +} + +/* + * recycler_get -- retrieves a chunk from the recycler + */ +int +recycler_get(struct recycler *r, struct memory_block *m) +{ + int ret = 0; + + util_mutex_lock(&r->lock); + + struct recycler_element e = { .max_free_block = m->size_idx, 0, 0, 0}; + struct ravl_node *n = ravl_find(r->runs, &e, + RAVL_PREDICATE_GREATER_EQUAL); + if (n == NULL) { + ret = ENOMEM; + goto out; + } + + struct recycler_element *ne = ravl_data(n); + m->chunk_id = ne->chunk_id; + m->zone_id = ne->zone_id; + + ravl_remove(r->runs, n); + + struct chunk_header *hdr = heap_get_chunk_hdr(r->heap, m); + m->size_idx = hdr->size_idx; + + memblock_rebuild_state(r->heap, m); + +out: + util_mutex_unlock(&r->lock); + + return ret; +} + +/* + * recycler_recalc -- recalculates the scores of runs in the recycler to match + * the updated persistent state + */ +struct empty_runs +recycler_recalc(struct recycler *r, int force) +{ + struct empty_runs runs; + VEC_INIT(&runs); + + uint64_t units = r->unaccounted_total; + + size_t peak_arenas; + util_atomic_load64(r->peak_arenas, &peak_arenas); + + uint64_t recalc_threshold = + THRESHOLD_MUL * peak_arenas * r->nallocs; + + if (!force && units < recalc_threshold) + return runs; + + if (util_mutex_trylock(&r->lock) != 0) + return runs; + + /* If the search is forced, recalculate everything */ + uint64_t search_limit = force ? UINT64_MAX : units; + + uint64_t found_units = 0; + struct memory_block nm = MEMORY_BLOCK_NONE; + struct ravl_node *n; + struct recycler_element next = {0, 0, 0, 0}; + enum ravl_predicate p = RAVL_PREDICATE_GREATER_EQUAL; + do { + if ((n = ravl_find(r->runs, &next, p)) == NULL) + break; + + p = RAVL_PREDICATE_GREATER; + + struct recycler_element *ne = ravl_data(n); + next = *ne; + + uint64_t chunk_units = r->unaccounted_units[ne->chunk_id]; + if (!force && chunk_units == 0) + continue; + + uint32_t existing_free_space = ne->free_space; + + nm.chunk_id = ne->chunk_id; + nm.zone_id = ne->zone_id; + memblock_rebuild_state(r->heap, &nm); + + struct recycler_element e = recycler_element_new(r->heap, &nm); + + ASSERT(e.free_space >= existing_free_space); + uint64_t free_space_diff = e.free_space - existing_free_space; + found_units += free_space_diff; + + if (free_space_diff == 0) + continue; + + /* + * Decrease the per chunk_id counter by the number of nallocs + * found, increased by the blocks potentially freed in the + * active memory block. Cap the sub value to prevent overflow. + */ + util_fetch_and_sub64(&r->unaccounted_units[nm.chunk_id], + MIN(chunk_units, free_space_diff + r->nallocs)); + + ravl_remove(r->runs, n); + + if (e.free_space == r->nallocs) { + memblock_rebuild_state(r->heap, &nm); + if (VEC_PUSH_BACK(&runs, nm) != 0) + ASSERT(0); /* XXX: fix after refactoring */ + } else { + VEC_PUSH_BACK(&r->recalc, e); + } + } while (found_units < search_limit); + + struct recycler_element *e; + VEC_FOREACH_BY_PTR(e, &r->recalc) { + ravl_emplace_copy(r->runs, e); + } + + VEC_CLEAR(&r->recalc); + + util_mutex_unlock(&r->lock); + + util_fetch_and_sub64(&r->unaccounted_total, units); + + return runs; +} + +/* + * recycler_inc_unaccounted -- increases the number of unaccounted units in the + * recycler + */ +void +recycler_inc_unaccounted(struct recycler *r, const struct memory_block *m) +{ + util_fetch_and_add64(&r->unaccounted_total, m->size_idx); + util_fetch_and_add64(&r->unaccounted_units[m->chunk_id], + m->size_idx); +} diff --git a/src/pmdk/src/libpmemobj/recycler.h b/src/pmdk/src/libpmemobj/recycler.h new file mode 100644 index 000000000..c8c824cc6 --- /dev/null +++ b/src/pmdk/src/libpmemobj/recycler.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * recycler.h -- internal definitions of run recycler + * + * This is a container that stores runs that are currently not used by any of + * the buckets. + */ + +#ifndef LIBPMEMOBJ_RECYCLER_H +#define LIBPMEMOBJ_RECYCLER_H 1 + +#include "memblock.h" +#include "vec.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct recycler; +VEC(empty_runs, struct memory_block); + +struct recycler_element { + uint32_t max_free_block; + uint32_t free_space; + + uint32_t chunk_id; + uint32_t zone_id; +}; + +struct recycler *recycler_new(struct palloc_heap *layout, + size_t nallocs, size_t *peak_arenas); +void recycler_delete(struct recycler *r); +struct recycler_element recycler_element_new(struct palloc_heap *heap, + const struct memory_block *m); + +int recycler_put(struct recycler *r, const struct memory_block *m, + struct recycler_element element); + +int recycler_get(struct recycler *r, struct memory_block *m); + +struct empty_runs recycler_recalc(struct recycler *r, int force); + +void recycler_inc_unaccounted(struct recycler *r, + const struct memory_block *m); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/stats.c b/src/pmdk/src/libpmemobj/stats.c new file mode 100644 index 000000000..a0fc25428 --- /dev/null +++ b/src/pmdk/src/libpmemobj/stats.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2019, Intel Corporation */ + +/* + * stats.c -- implementation of statistics + */ + +#include "obj.h" +#include "stats.h" + +STATS_CTL_HANDLER(persistent, curr_allocated, heap_curr_allocated); + +STATS_CTL_HANDLER(transient, run_allocated, heap_run_allocated); +STATS_CTL_HANDLER(transient, run_active, heap_run_active); + +static const struct ctl_node CTL_NODE(heap)[] = { + STATS_CTL_LEAF(persistent, curr_allocated), + STATS_CTL_LEAF(transient, run_allocated), + STATS_CTL_LEAF(transient, run_active), + + CTL_NODE_END +}; + +/* + * CTL_READ_HANDLER(enabled) -- returns whether or not statistics are enabled + */ +static int +CTL_READ_HANDLER(enabled)(void *ctx, + enum ctl_query_source source, void *arg, + struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + enum pobj_stats_enabled *arg_out = arg; + + *arg_out = pop->stats->enabled; + + return 0; +} + +/* + * stats_enabled_parser -- parses the stats enabled type + */ +static int +stats_enabled_parser(const void *arg, void *dest, size_t dest_size) +{ + const char *vstr = arg; + enum pobj_stats_enabled *enabled = dest; + ASSERTeq(dest_size, sizeof(enum pobj_stats_enabled)); + + int bool_out; + if (ctl_arg_boolean(arg, &bool_out, sizeof(bool_out)) == 0) { + *enabled = bool_out ? + POBJ_STATS_ENABLED_BOTH : POBJ_STATS_DISABLED; + return 0; + } + + if (strcmp(vstr, "disabled") == 0) { + *enabled = POBJ_STATS_DISABLED; + } else if (strcmp(vstr, "both") == 0) { + *enabled = POBJ_STATS_ENABLED_BOTH; + } else if (strcmp(vstr, "persistent") == 0) { + *enabled = POBJ_STATS_ENABLED_PERSISTENT; + } else if (strcmp(vstr, "transient") == 0) { + *enabled = POBJ_STATS_ENABLED_TRANSIENT; + } else { + ERR("invalid enable type"); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * CTL_WRITE_HANDLER(enabled) -- enables or disables statistics counting + */ +static int +CTL_WRITE_HANDLER(enabled)(void *ctx, + enum ctl_query_source source, void *arg, + struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + pop->stats->enabled = *(enum pobj_stats_enabled *)arg; + + return 0; +} + +static const struct ctl_argument CTL_ARG(enabled) = { + .dest_size = sizeof(enum pobj_stats_enabled), + .parsers = { + CTL_ARG_PARSER(sizeof(enum pobj_stats_enabled), + stats_enabled_parser), + CTL_ARG_PARSER_END + } +}; + +static const struct ctl_node CTL_NODE(stats)[] = { + CTL_CHILD(heap), + CTL_LEAF_RW(enabled), + + CTL_NODE_END +}; + +/* + * stats_new -- allocates and initializes statistics instance + */ +struct stats * +stats_new(PMEMobjpool *pop) +{ + struct stats *s = Malloc(sizeof(*s)); + if (s == NULL) { + ERR("!Malloc"); + return NULL; + } + + s->enabled = POBJ_STATS_ENABLED_TRANSIENT; + s->persistent = &pop->stats_persistent; + VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(s->persistent, sizeof(*s->persistent)); + s->transient = Zalloc(sizeof(struct stats_transient)); + if (s->transient == NULL) + goto error_transient_alloc; + + return s; + +error_transient_alloc: + Free(s); + return NULL; +} + +/* + * stats_delete -- deletes statistics instance + */ +void +stats_delete(PMEMobjpool *pop, struct stats *s) +{ + pmemops_persist(&pop->p_ops, s->persistent, + sizeof(struct stats_persistent)); + Free(s->transient); + Free(s); +} + +/* + * stats_ctl_register -- registers ctl nodes for statistics + */ +void +stats_ctl_register(PMEMobjpool *pop) +{ + CTL_REGISTER_MODULE(pop->ctl, stats); +} diff --git a/src/pmdk/src/libpmemobj/stats.h b/src/pmdk/src/libpmemobj/stats.h new file mode 100644 index 000000000..c66a1e8ea --- /dev/null +++ b/src/pmdk/src/libpmemobj/stats.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * stats.h -- definitions of statistics + */ + +#ifndef LIBPMEMOBJ_STATS_H +#define LIBPMEMOBJ_STATS_H 1 + +#include "ctl.h" +#include "libpmemobj/ctl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct stats_transient { + uint64_t heap_run_allocated; + uint64_t heap_run_active; +}; + +struct stats_persistent { + uint64_t heap_curr_allocated; +}; + +struct stats { + enum pobj_stats_enabled enabled; + struct stats_transient *transient; + struct stats_persistent *persistent; +}; + +#define STATS_INC(stats, type, name, value) do {\ + STATS_INC_##type(stats, name, value);\ +} while (0) + +#define STATS_INC_transient(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_TRANSIENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_add64((&(stats)->transient->name), (value));\ +} while (0) + +#define STATS_INC_persistent(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_add64((&(stats)->persistent->name), (value));\ +} while (0) + +#define STATS_SUB(stats, type, name, value) do {\ + STATS_SUB_##type(stats, name, value);\ +} while (0) + +#define STATS_SUB_transient(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_TRANSIENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_sub64((&(stats)->transient->name), (value));\ +} while (0) + +#define STATS_SUB_persistent(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_sub64((&(stats)->persistent->name), (value));\ +} while (0) + +#define STATS_SET(stats, type, name, value) do {\ + STATS_SET_##type(stats, name, value);\ +} while (0) + +#define STATS_SET_transient(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_TRANSIENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_atomic_store_explicit64((&(stats)->transient->name),\ + (value), memory_order_release);\ +} while (0) + +#define STATS_SET_persistent(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_atomic_store_explicit64((&(stats)->persistent->name),\ + (value), memory_order_release);\ +} while (0) + +#define STATS_CTL_LEAF(type, name)\ +{CTL_STR(name), CTL_NODE_LEAF,\ +{CTL_READ_HANDLER(type##_##name), NULL, NULL},\ +NULL, NULL} + +#define STATS_CTL_HANDLER(type, name, varname)\ +static int CTL_READ_HANDLER(type##_##name)(void *ctx,\ + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes)\ +{\ + PMEMobjpool *pop = ctx;\ + uint64_t *argv = arg;\ + util_atomic_load_explicit64(&pop->stats->type->varname,\ + argv, memory_order_acquire);\ + return 0;\ +} + +void stats_ctl_register(PMEMobjpool *pop); + +struct stats *stats_new(PMEMobjpool *pop); +void stats_delete(PMEMobjpool *pop, struct stats *stats); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/sync.c b/src/pmdk/src/libpmemobj/sync.c new file mode 100644 index 000000000..7a1e89762 --- /dev/null +++ b/src/pmdk/src/libpmemobj/sync.c @@ -0,0 +1,642 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2018, Intel Corporation */ + +/* + * sync.c -- persistent memory resident synchronization primitives + */ + +#include + +#include "obj.h" +#include "out.h" +#include "util.h" +#include "sync.h" +#include "sys_util.h" +#include "util.h" +#include "valgrind_internal.h" + +#ifdef __FreeBSD__ +#define RECORD_LOCK(init, type, p) \ + if (init) {\ + PMEM##type##_internal *head = pop->type##_head;\ + while (!util_bool_compare_and_swap64(&pop->type##_head, head,\ + p)) {\ + head = pop->type##_head;\ + }\ + p->PMEM##type##_next = head;\ + } +#else +#define RECORD_LOCK(init, type, p) +#endif + +/* + * _get_value -- (internal) atomically initialize and return a value. + * Returns -1 on error, 0 if the caller is not the value + * initializer, 1 if the caller is the value initializer. + */ +static int +_get_value(uint64_t pop_runid, volatile uint64_t *runid, void *value, void *arg, + int (*init_value)(void *value, void *arg)) +{ + uint64_t tmp_runid; + int initializer = 0; + + while ((tmp_runid = *runid) != pop_runid) { + if (tmp_runid == pop_runid - 1) + continue; + + if (!util_bool_compare_and_swap64(runid, tmp_runid, + pop_runid - 1)) + continue; + + initializer = 1; + + if (init_value(value, arg)) { + ERR("error initializing lock"); + util_fetch_and_and64(runid, 0); + return -1; + } + + if (util_bool_compare_and_swap64(runid, pop_runid - 1, + pop_runid) == 0) { + ERR("error setting lock runid"); + return -1; + } + } + + return initializer; +} + +/* + * get_mutex -- (internal) atomically initialize, record and return a mutex + */ +static inline os_mutex_t * +get_mutex(PMEMobjpool *pop, PMEMmutex_internal *imp) +{ + if (likely(imp->pmemmutex.runid == pop->run_id)) + return &imp->PMEMmutex_lock; + + volatile uint64_t *runid = &imp->pmemmutex.runid; + + LOG(5, "PMEMmutex %p pop->run_id %" PRIu64 " pmemmutex.runid %" PRIu64, + imp, pop->run_id, *runid); + + ASSERTeq((uintptr_t)runid % util_alignof(uint64_t), 0); + + COMPILE_ERROR_ON(sizeof(PMEMmutex) != sizeof(PMEMmutex_internal)); + COMPILE_ERROR_ON(util_alignof(PMEMmutex) != util_alignof(os_mutex_t)); + + VALGRIND_REMOVE_PMEM_MAPPING(imp, _POBJ_CL_SIZE); + + int initializer = _get_value(pop->run_id, runid, &imp->PMEMmutex_lock, + NULL, (void *)os_mutex_init); + if (initializer == -1) { + return NULL; + } + + RECORD_LOCK(initializer, mutex, imp); + + return &imp->PMEMmutex_lock; +} + +/* + * get_rwlock -- (internal) atomically initialize, record and return a rwlock + */ +static inline os_rwlock_t * +get_rwlock(PMEMobjpool *pop, PMEMrwlock_internal *irp) +{ + if (likely(irp->pmemrwlock.runid == pop->run_id)) + return &irp->PMEMrwlock_lock; + + volatile uint64_t *runid = &irp->pmemrwlock.runid; + + LOG(5, "PMEMrwlock %p pop->run_id %"\ + PRIu64 " pmemrwlock.runid %" PRIu64, + irp, pop->run_id, *runid); + + ASSERTeq((uintptr_t)runid % util_alignof(uint64_t), 0); + + COMPILE_ERROR_ON(sizeof(PMEMrwlock) != sizeof(PMEMrwlock_internal)); + COMPILE_ERROR_ON(util_alignof(PMEMrwlock) + != util_alignof(os_rwlock_t)); + + VALGRIND_REMOVE_PMEM_MAPPING(irp, _POBJ_CL_SIZE); + + int initializer = _get_value(pop->run_id, runid, &irp->PMEMrwlock_lock, + NULL, (void *)os_rwlock_init); + if (initializer == -1) { + return NULL; + } + + RECORD_LOCK(initializer, rwlock, irp); + + return &irp->PMEMrwlock_lock; +} + +/* + * get_cond -- (internal) atomically initialize, record and return a + * condition variable + */ +static inline os_cond_t * +get_cond(PMEMobjpool *pop, PMEMcond_internal *icp) +{ + if (likely(icp->pmemcond.runid == pop->run_id)) + return &icp->PMEMcond_cond; + + volatile uint64_t *runid = &icp->pmemcond.runid; + + LOG(5, "PMEMcond %p pop->run_id %" PRIu64 " pmemcond.runid %" PRIu64, + icp, pop->run_id, *runid); + + ASSERTeq((uintptr_t)runid % util_alignof(uint64_t), 0); + + COMPILE_ERROR_ON(sizeof(PMEMcond) != sizeof(PMEMcond_internal)); + COMPILE_ERROR_ON(util_alignof(PMEMcond) != util_alignof(os_cond_t)); + + VALGRIND_REMOVE_PMEM_MAPPING(icp, _POBJ_CL_SIZE); + + int initializer = _get_value(pop->run_id, runid, &icp->PMEMcond_cond, + NULL, (void *)os_cond_init); + if (initializer == -1) { + return NULL; + } + + RECORD_LOCK(initializer, cond, icp); + + return &icp->PMEMcond_cond; +} + +/* + * pmemobj_mutex_zero -- zero-initialize a pmem resident mutex + * + * This function is not MT safe. + */ +void +pmemobj_mutex_zero(PMEMobjpool *pop, PMEMmutex *mutexp) +{ + LOG(3, "pop %p mutex %p", pop, mutexp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + mutexip->pmemmutex.runid = 0; + pmemops_persist(&pop->p_ops, &mutexip->pmemmutex.runid, + sizeof(mutexip->pmemmutex.runid)); +} + +/* + * pmemobj_mutex_lock -- lock a pmem resident mutex + * + * Atomically initializes and locks a PMEMmutex, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_mutex_lock(PMEMobjpool *pop, PMEMmutex *mutexp) +{ + LOG(3, "pop %p mutex %p", pop, mutexp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + os_mutex_t *mutex = get_mutex(pop, mutexip); + + if (mutex == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)mutex % util_alignof(os_mutex_t), 0); + + return os_mutex_lock(mutex); +} + +/* + * pmemobj_mutex_assert_locked -- checks whether mutex is locked. + * + * Returns 0 when mutex is locked. + */ +int +pmemobj_mutex_assert_locked(PMEMobjpool *pop, PMEMmutex *mutexp) +{ + LOG(3, "pop %p mutex %p", pop, mutexp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + os_mutex_t *mutex = get_mutex(pop, mutexip); + if (mutex == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)mutex % util_alignof(os_mutex_t), 0); + + int ret = os_mutex_trylock(mutex); + if (ret == EBUSY) + return 0; + if (ret == 0) { + util_mutex_unlock(mutex); + /* + * There's no good error code for this case. EINVAL is used for + * something else here. + */ + return ENODEV; + } + return ret; +} + +/* + * pmemobj_mutex_timedlock -- lock a pmem resident mutex + * + * Atomically initializes and locks a PMEMmutex, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_mutex_timedlock(PMEMobjpool *pop, PMEMmutex *__restrict mutexp, + const struct timespec *__restrict abs_timeout) +{ + LOG(3, "pop %p mutex %p", pop, mutexp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + os_mutex_t *mutex = get_mutex(pop, mutexip); + if (mutex == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)mutex % util_alignof(os_mutex_t), 0); + + return os_mutex_timedlock(mutex, abs_timeout); +} + +/* + * pmemobj_mutex_trylock -- trylock a pmem resident mutex + * + * Atomically initializes and trylocks a PMEMmutex, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_mutex_trylock(PMEMobjpool *pop, PMEMmutex *mutexp) +{ + LOG(3, "pop %p mutex %p", pop, mutexp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + os_mutex_t *mutex = get_mutex(pop, mutexip); + if (mutex == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)mutex % util_alignof(os_mutex_t), 0); + + return os_mutex_trylock(mutex); +} + +/* + * pmemobj_mutex_unlock -- unlock a pmem resident mutex + */ +int +pmemobj_mutex_unlock(PMEMobjpool *pop, PMEMmutex *mutexp) +{ + LOG(3, "pop %p mutex %p", pop, mutexp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + + /* XXX potential performance improvement - move GET to debug version */ + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + os_mutex_t *mutex = get_mutex(pop, mutexip); + if (mutex == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)mutex % util_alignof(os_mutex_t), 0); + + return os_mutex_unlock(mutex); +} + +/* + * pmemobj_rwlock_zero -- zero-initialize a pmem resident rwlock + * + * This function is not MT safe. + */ +void +pmemobj_rwlock_zero(PMEMobjpool *pop, PMEMrwlock *rwlockp) +{ + LOG(3, "pop %p rwlock %p", pop, rwlockp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + rwlockip->pmemrwlock.runid = 0; + pmemops_persist(&pop->p_ops, &rwlockip->pmemrwlock.runid, + sizeof(rwlockip->pmemrwlock.runid)); +} + +/* + * pmemobj_rwlock_rdlock -- rdlock a pmem resident mutex + * + * Atomically initializes and rdlocks a PMEMrwlock, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_rwlock_rdlock(PMEMobjpool *pop, PMEMrwlock *rwlockp) +{ + LOG(3, "pop %p rwlock %p", pop, rwlockp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + os_rwlock_t *rwlock = get_rwlock(pop, rwlockip); + if (rwlock == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)rwlock % util_alignof(os_rwlock_t), 0); + + return os_rwlock_rdlock(rwlock); +} + +/* + * pmemobj_rwlock_wrlock -- wrlock a pmem resident mutex + * + * Atomically initializes and wrlocks a PMEMrwlock, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_rwlock_wrlock(PMEMobjpool *pop, PMEMrwlock *rwlockp) +{ + LOG(3, "pop %p rwlock %p", pop, rwlockp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + os_rwlock_t *rwlock = get_rwlock(pop, rwlockip); + if (rwlock == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)rwlock % util_alignof(os_rwlock_t), 0); + + return os_rwlock_wrlock(rwlock); +} + +/* + * pmemobj_rwlock_timedrdlock -- timedrdlock a pmem resident mutex + * + * Atomically initializes and timedrdlocks a PMEMrwlock, otherwise behaves as + * its POSIX counterpart. + */ +int +pmemobj_rwlock_timedrdlock(PMEMobjpool *pop, PMEMrwlock *__restrict rwlockp, + const struct timespec *__restrict abs_timeout) +{ + LOG(3, "pop %p rwlock %p timeout sec %ld nsec %ld", pop, rwlockp, + abs_timeout->tv_sec, abs_timeout->tv_nsec); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + os_rwlock_t *rwlock = get_rwlock(pop, rwlockip); + if (rwlock == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)rwlock % util_alignof(os_rwlock_t), 0); + + return os_rwlock_timedrdlock(rwlock, abs_timeout); +} + +/* + * pmemobj_rwlock_timedwrlock -- timedwrlock a pmem resident mutex + * + * Atomically initializes and timedwrlocks a PMEMrwlock, otherwise behaves as + * its POSIX counterpart. + */ +int +pmemobj_rwlock_timedwrlock(PMEMobjpool *pop, PMEMrwlock *__restrict rwlockp, + const struct timespec *__restrict abs_timeout) +{ + LOG(3, "pop %p rwlock %p timeout sec %ld nsec %ld", pop, rwlockp, + abs_timeout->tv_sec, abs_timeout->tv_nsec); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + os_rwlock_t *rwlock = get_rwlock(pop, rwlockip); + if (rwlock == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)rwlock % util_alignof(os_rwlock_t), 0); + + return os_rwlock_timedwrlock(rwlock, abs_timeout); +} + +/* + * pmemobj_rwlock_tryrdlock -- tryrdlock a pmem resident mutex + * + * Atomically initializes and tryrdlocks a PMEMrwlock, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_rwlock_tryrdlock(PMEMobjpool *pop, PMEMrwlock *rwlockp) +{ + LOG(3, "pop %p rwlock %p", pop, rwlockp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + os_rwlock_t *rwlock = get_rwlock(pop, rwlockip); + if (rwlock == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)rwlock % util_alignof(os_rwlock_t), 0); + + return os_rwlock_tryrdlock(rwlock); +} + +/* + * pmemobj_rwlock_trywrlock -- trywrlock a pmem resident mutex + * + * Atomically initializes and trywrlocks a PMEMrwlock, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_rwlock_trywrlock(PMEMobjpool *pop, PMEMrwlock *rwlockp) +{ + LOG(3, "pop %p rwlock %p", pop, rwlockp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + os_rwlock_t *rwlock = get_rwlock(pop, rwlockip); + if (rwlock == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)rwlock % util_alignof(os_rwlock_t), 0); + + return os_rwlock_trywrlock(rwlock); +} + +/* + * pmemobj_rwlock_unlock -- unlock a pmem resident rwlock + */ +int +pmemobj_rwlock_unlock(PMEMobjpool *pop, PMEMrwlock *rwlockp) +{ + LOG(3, "pop %p rwlock %p", pop, rwlockp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(rwlockp)); + + /* XXX potential performance improvement - move GET to debug version */ + PMEMrwlock_internal *rwlockip = (PMEMrwlock_internal *)rwlockp; + os_rwlock_t *rwlock = get_rwlock(pop, rwlockip); + if (rwlock == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)rwlock % util_alignof(os_rwlock_t), 0); + + return os_rwlock_unlock(rwlock); +} + +/* + * pmemobj_cond_zero -- zero-initialize a pmem resident condition variable + * + * This function is not MT safe. + */ +void +pmemobj_cond_zero(PMEMobjpool *pop, PMEMcond *condp) +{ + LOG(3, "pop %p cond %p", pop, condp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(condp)); + + PMEMcond_internal *condip = (PMEMcond_internal *)condp; + condip->pmemcond.runid = 0; + pmemops_persist(&pop->p_ops, &condip->pmemcond.runid, + sizeof(condip->pmemcond.runid)); +} + +/* + * pmemobj_cond_broadcast -- broadcast a pmem resident condition variable + * + * Atomically initializes and broadcast a PMEMcond, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_cond_broadcast(PMEMobjpool *pop, PMEMcond *condp) +{ + LOG(3, "pop %p cond %p", pop, condp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(condp)); + + PMEMcond_internal *condip = (PMEMcond_internal *)condp; + os_cond_t *cond = get_cond(pop, condip); + if (cond == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)cond % util_alignof(os_cond_t), 0); + + return os_cond_broadcast(cond); +} + +/* + * pmemobj_cond_signal -- signal a pmem resident condition variable + * + * Atomically initializes and signal a PMEMcond, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_cond_signal(PMEMobjpool *pop, PMEMcond *condp) +{ + LOG(3, "pop %p cond %p", pop, condp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(condp)); + + PMEMcond_internal *condip = (PMEMcond_internal *)condp; + os_cond_t *cond = get_cond(pop, condip); + if (cond == NULL) + return EINVAL; + + ASSERTeq((uintptr_t)cond % util_alignof(os_cond_t), 0); + + return os_cond_signal(cond); +} + +/* + * pmemobj_cond_timedwait -- timedwait on a pmem resident condition variable + * + * Atomically initializes and timedwait on a PMEMcond, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_cond_timedwait(PMEMobjpool *pop, PMEMcond *__restrict condp, + PMEMmutex *__restrict mutexp, + const struct timespec *__restrict abs_timeout) +{ + LOG(3, "pop %p cond %p mutex %p abstime sec %ld nsec %ld", pop, condp, + mutexp, abs_timeout->tv_sec, abs_timeout->tv_nsec); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + ASSERTeq(pop, pmemobj_pool_by_ptr(condp)); + + PMEMcond_internal *condip = (PMEMcond_internal *)condp; + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + os_cond_t *cond = get_cond(pop, condip); + os_mutex_t *mutex = get_mutex(pop, mutexip); + if ((cond == NULL) || (mutex == NULL)) + return EINVAL; + + ASSERTeq((uintptr_t)mutex % util_alignof(os_mutex_t), 0); + ASSERTeq((uintptr_t)cond % util_alignof(os_cond_t), 0); + + return os_cond_timedwait(cond, mutex, abs_timeout); +} + +/* + * pmemobj_cond_wait -- wait on a pmem resident condition variable + * + * Atomically initializes and wait on a PMEMcond, otherwise behaves as its + * POSIX counterpart. + */ +int +pmemobj_cond_wait(PMEMobjpool *pop, PMEMcond *condp, + PMEMmutex *__restrict mutexp) +{ + LOG(3, "pop %p cond %p mutex %p", pop, condp, mutexp); + + ASSERTeq(pop, pmemobj_pool_by_ptr(mutexp)); + ASSERTeq(pop, pmemobj_pool_by_ptr(condp)); + + PMEMcond_internal *condip = (PMEMcond_internal *)condp; + PMEMmutex_internal *mutexip = (PMEMmutex_internal *)mutexp; + os_cond_t *cond = get_cond(pop, condip); + os_mutex_t *mutex = get_mutex(pop, mutexip); + if ((cond == NULL) || (mutex == NULL)) + return EINVAL; + + ASSERTeq((uintptr_t)mutex % util_alignof(os_mutex_t), 0); + ASSERTeq((uintptr_t)cond % util_alignof(os_cond_t), 0); + + return os_cond_wait(cond, mutex); +} + +/* + * pmemobj_volatile -- atomically initialize, record and return a + * generic value + */ +void * +pmemobj_volatile(PMEMobjpool *pop, struct pmemvlt *vlt, + void *ptr, size_t size, + int (*constr)(void *ptr, void *arg), void *arg) +{ + LOG(3, "pop %p vlt %p ptr %p constr %p arg %p", pop, vlt, ptr, + constr, arg); + + if (likely(vlt->runid == pop->run_id)) + return ptr; + + VALGRIND_REMOVE_PMEM_MAPPING(ptr, size); + + VALGRIND_ADD_TO_TX(vlt, sizeof(*vlt)); + if (_get_value(pop->run_id, &vlt->runid, ptr, arg, constr) < 0) { + VALGRIND_REMOVE_FROM_TX(vlt, sizeof(*vlt)); + return NULL; + } + + VALGRIND_REMOVE_FROM_TX(vlt, sizeof(*vlt)); + VALGRIND_SET_CLEAN(vlt, sizeof(*vlt)); + + return ptr; +} diff --git a/src/pmdk/src/libpmemobj/sync.h b/src/pmdk/src/libpmemobj/sync.h new file mode 100644 index 000000000..46aa1bb22 --- /dev/null +++ b/src/pmdk/src/libpmemobj/sync.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * sync.h -- internal to obj synchronization API + */ + +#ifndef LIBPMEMOBJ_SYNC_H +#define LIBPMEMOBJ_SYNC_H 1 + +#include +#include + +#include "libpmemobj.h" +#include "out.h" +#include "os_thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * internal definitions of PMEM-locks + */ +typedef union padded_pmemmutex { + char padding[_POBJ_CL_SIZE]; + struct { + uint64_t runid; + union { + os_mutex_t mutex; + struct { + void *bsd_mutex_p; + union padded_pmemmutex *next; + } bsd_u; + } mutex_u; + } pmemmutex; +} PMEMmutex_internal; +#define PMEMmutex_lock pmemmutex.mutex_u.mutex +#define PMEMmutex_bsd_mutex_p pmemmutex.mutex_u.bsd_u.bsd_mutex_p +#define PMEMmutex_next pmemmutex.mutex_u.bsd_u.next + +typedef union padded_pmemrwlock { + char padding[_POBJ_CL_SIZE]; + struct { + uint64_t runid; + union { + os_rwlock_t rwlock; + struct { + void *bsd_rwlock_p; + union padded_pmemrwlock *next; + } bsd_u; + } rwlock_u; + } pmemrwlock; +} PMEMrwlock_internal; +#define PMEMrwlock_lock pmemrwlock.rwlock_u.rwlock +#define PMEMrwlock_bsd_rwlock_p pmemrwlock.rwlock_u.bsd_u.bsd_rwlock_p +#define PMEMrwlock_next pmemrwlock.rwlock_u.bsd_u.next + +typedef union padded_pmemcond { + char padding[_POBJ_CL_SIZE]; + struct { + uint64_t runid; + union { + os_cond_t cond; + struct { + void *bsd_cond_p; + union padded_pmemcond *next; + } bsd_u; + } cond_u; + } pmemcond; +} PMEMcond_internal; +#define PMEMcond_cond pmemcond.cond_u.cond +#define PMEMcond_bsd_cond_p pmemcond.cond_u.bsd_u.bsd_cond_p +#define PMEMcond_next pmemcond.cond_u.bsd_u.next + +/* + * pmemobj_mutex_lock_nofail -- pmemobj_mutex_lock variant that never + * fails from caller perspective. If pmemobj_mutex_lock failed, this function + * aborts the program. + */ +static inline void +pmemobj_mutex_lock_nofail(PMEMobjpool *pop, PMEMmutex *mutexp) +{ + int ret = pmemobj_mutex_lock(pop, mutexp); + if (ret) { + errno = ret; + FATAL("!pmemobj_mutex_lock"); + } +} + +/* + * pmemobj_mutex_unlock_nofail -- pmemobj_mutex_unlock variant that never + * fails from caller perspective. If pmemobj_mutex_unlock failed, this function + * aborts the program. + */ +static inline void +pmemobj_mutex_unlock_nofail(PMEMobjpool *pop, PMEMmutex *mutexp) +{ + int ret = pmemobj_mutex_unlock(pop, mutexp); + if (ret) { + errno = ret; + FATAL("!pmemobj_mutex_unlock"); + } +} + +int pmemobj_mutex_assert_locked(PMEMobjpool *pop, PMEMmutex *mutexp); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/tx.c b/src/pmdk/src/libpmemobj/tx.c new file mode 100644 index 000000000..2213dd04a --- /dev/null +++ b/src/pmdk/src/libpmemobj/tx.c @@ -0,0 +1,2375 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * tx.c -- transactions implementation + */ + +#include +#include + +#include "queue.h" +#include "ravl.h" +#include "obj.h" +#include "out.h" +#include "pmalloc.h" +#include "tx.h" +#include "valgrind_internal.h" +#include "memops.h" + +struct tx_data { + PMDK_SLIST_ENTRY(tx_data) tx_entry; + jmp_buf env; + enum pobj_tx_failure_behavior failure_behavior; +}; + +struct tx { + PMEMobjpool *pop; + enum pobj_tx_stage stage; + int last_errnum; + struct lane *lane; + PMDK_SLIST_HEAD(txl, tx_lock_data) tx_locks; + PMDK_SLIST_HEAD(txd, tx_data) tx_entries; + + struct ravl *ranges; + + VEC(, struct pobj_action) actions; + VEC(, struct user_buffer_def) redo_userbufs; + size_t redo_userbufs_capacity; + + pmemobj_tx_callback stage_callback; + void *stage_callback_arg; + + int first_snapshot; + + void *user_data; +}; + +/* + * get_tx -- (internal) returns current transaction + * + * This function should be used only in high-level functions. + */ +static struct tx * +get_tx() +{ + static __thread struct tx tx; + return &tx; +} + +struct tx_lock_data { + union { + PMEMmutex *mutex; + PMEMrwlock *rwlock; + } lock; + enum pobj_tx_param lock_type; + PMDK_SLIST_ENTRY(tx_lock_data) tx_lock; +}; + +struct tx_alloc_args { + uint64_t flags; + const void *copy_ptr; + size_t copy_size; +}; + +#define COPY_ARGS(flags, copy_ptr, copy_size)\ +(struct tx_alloc_args){flags, copy_ptr, copy_size} + +#define ALLOC_ARGS(flags)\ +(struct tx_alloc_args){flags, NULL, 0} + +struct tx_range_def { + uint64_t offset; + uint64_t size; + uint64_t flags; +}; + +/* + * tx_range_def_cmp -- compares two snapshot ranges + */ +static int +tx_range_def_cmp(const void *lhs, const void *rhs) +{ + const struct tx_range_def *l = lhs; + const struct tx_range_def *r = rhs; + + if (l->offset > r->offset) + return 1; + else if (l->offset < r->offset) + return -1; + + return 0; +} + +/* + * tx_params_new -- creates a new transactional parameters instance and fills it + * with default values. + */ +struct tx_parameters * +tx_params_new(void) +{ + struct tx_parameters *tx_params = Malloc(sizeof(*tx_params)); + if (tx_params == NULL) + return NULL; + + tx_params->cache_size = TX_DEFAULT_RANGE_CACHE_SIZE; + + return tx_params; +} + +/* + * tx_params_delete -- deletes transactional parameters instance + */ +void +tx_params_delete(struct tx_parameters *tx_params) +{ + Free(tx_params); +} + +static void +obj_tx_abort(int errnum, int user); + +/* + * obj_tx_fail_err -- (internal) pmemobj_tx_abort variant that returns + * error code + */ +static inline int +obj_tx_fail_err(int errnum, uint64_t flags) +{ + if ((flags & POBJ_FLAG_TX_NO_ABORT) == 0) + obj_tx_abort(errnum, 0); + errno = errnum; + return errnum; +} + +/* + * obj_tx_fail_null -- (internal) pmemobj_tx_abort variant that returns + * null PMEMoid + */ +static inline PMEMoid +obj_tx_fail_null(int errnum, uint64_t flags) +{ + if ((flags & POBJ_FLAG_TX_NO_ABORT) == 0) + obj_tx_abort(errnum, 0); + errno = errnum; + return OID_NULL; +} + +/* ASSERT_IN_TX -- checks whether there's open transaction */ +#define ASSERT_IN_TX(tx) do {\ + if ((tx)->stage == TX_STAGE_NONE)\ + FATAL("%s called outside of transaction", __func__);\ +} while (0) + +/* ASSERT_TX_STAGE_WORK -- checks whether current transaction stage is WORK */ +#define ASSERT_TX_STAGE_WORK(tx) do {\ + if ((tx)->stage != TX_STAGE_WORK)\ + FATAL("%s called in invalid stage %d", __func__, (tx)->stage);\ +} while (0) + +/* + * tx_action_reserve -- (internal) reserve space for the given number of actions + */ +static int +tx_action_reserve(struct tx *tx, size_t n) +{ + size_t entries_size = (VEC_SIZE(&tx->actions) + n) * + sizeof(struct ulog_entry_val); + + /* take the provided user buffers into account when reserving */ + entries_size -= MIN(tx->redo_userbufs_capacity, entries_size); + + if (operation_reserve(tx->lane->external, entries_size) != 0) + return -1; + + return 0; +} + +/* + * tx_action_add -- (internal) reserve space and add a new tx action + */ +static struct pobj_action * +tx_action_add(struct tx *tx) +{ + if (tx_action_reserve(tx, 1) != 0) + return NULL; + + VEC_INC_BACK(&tx->actions); + + return &VEC_BACK(&tx->actions); +} + +/* + * tx_action_remove -- (internal) remove last tx action + */ +static void +tx_action_remove(struct tx *tx) +{ + VEC_POP_BACK(&tx->actions); +} + +/* + * constructor_tx_alloc -- (internal) constructor for normal alloc + */ +static int +constructor_tx_alloc(void *ctx, void *ptr, size_t usable_size, void *arg) +{ + LOG(5, NULL); + + ASSERTne(ptr, NULL); + ASSERTne(arg, NULL); + + struct tx_alloc_args *args = arg; + + /* do not report changes to the new object */ + VALGRIND_ADD_TO_TX(ptr, usable_size); + + if (args->flags & POBJ_FLAG_ZERO) + memset(ptr, 0, usable_size); + + if (args->copy_ptr && args->copy_size != 0) { + memcpy(ptr, args->copy_ptr, args->copy_size); + } + + return 0; +} + +struct tx_range_data { + void *begin; + void *end; + PMDK_SLIST_ENTRY(tx_range_data) tx_range; +}; + +PMDK_SLIST_HEAD(txr, tx_range_data); + +/* + * tx_remove_range -- (internal) removes specified range from ranges list + */ +static void +tx_remove_range(struct txr *tx_ranges, void *begin, void *end) +{ + struct tx_range_data *txr = PMDK_SLIST_FIRST(tx_ranges); + + while (txr) { + if (begin >= txr->end || end < txr->begin) { + txr = PMDK_SLIST_NEXT(txr, tx_range); + continue; + } + + LOG(4, "detected PMEM lock in undo log; " + "range %p-%p, lock %p-%p", + txr->begin, txr->end, begin, end); + + /* split the range into new ones */ + if (begin > txr->begin) { + struct tx_range_data *txrn = Malloc(sizeof(*txrn)); + if (txrn == NULL) + /* we can't do it any other way */ + FATAL("!Malloc"); + + txrn->begin = txr->begin; + txrn->end = begin; + LOG(4, "range split; %p-%p", txrn->begin, txrn->end); + PMDK_SLIST_INSERT_HEAD(tx_ranges, txrn, tx_range); + } + + if (end < txr->end) { + struct tx_range_data *txrn = Malloc(sizeof(*txrn)); + if (txrn == NULL) + /* we can't do it any other way */ + FATAL("!Malloc"); + + txrn->begin = end; + txrn->end = txr->end; + LOG(4, "range split; %p-%p", txrn->begin, txrn->end); + PMDK_SLIST_INSERT_HEAD(tx_ranges, txrn, tx_range); + } + + struct tx_range_data *next = PMDK_SLIST_NEXT(txr, tx_range); + /* remove the original range from the list */ + PMDK_SLIST_REMOVE(tx_ranges, txr, tx_range_data, tx_range); + Free(txr); + + txr = next; + } +} +/* + * tx_restore_range -- (internal) restore a single range from undo log + * + * If the snapshot contains any PMEM locks that are held by the current + * transaction, they won't be overwritten with the saved data to avoid changing + * their state. Those locks will be released in tx_end(). + */ +static void +tx_restore_range(PMEMobjpool *pop, struct tx *tx, struct ulog_entry_buf *range) +{ + COMPILE_ERROR_ON(sizeof(PMEMmutex) != _POBJ_CL_SIZE); + COMPILE_ERROR_ON(sizeof(PMEMrwlock) != _POBJ_CL_SIZE); + COMPILE_ERROR_ON(sizeof(PMEMcond) != _POBJ_CL_SIZE); + + struct txr tx_ranges; + PMDK_SLIST_INIT(&tx_ranges); + + struct tx_range_data *txr; + txr = Malloc(sizeof(*txr)); + if (txr == NULL) { + /* we can't do it any other way */ + FATAL("!Malloc"); + } + + uint64_t range_offset = ulog_entry_offset(&range->base); + + txr->begin = OBJ_OFF_TO_PTR(pop, range_offset); + txr->end = (char *)txr->begin + range->size; + PMDK_SLIST_INSERT_HEAD(&tx_ranges, txr, tx_range); + + struct tx_lock_data *txl; + + /* check if there are any locks within given memory range */ + PMDK_SLIST_FOREACH(txl, &tx->tx_locks, tx_lock) { + void *lock_begin = txl->lock.mutex; + /* all PMEM locks have the same size */ + void *lock_end = (char *)lock_begin + _POBJ_CL_SIZE; + + tx_remove_range(&tx_ranges, lock_begin, lock_end); + } + + ASSERT(!PMDK_SLIST_EMPTY(&tx_ranges)); + + void *dst_ptr = OBJ_OFF_TO_PTR(pop, range_offset); + + while (!PMDK_SLIST_EMPTY(&tx_ranges)) { + txr = PMDK_SLIST_FIRST(&tx_ranges); + PMDK_SLIST_REMOVE_HEAD(&tx_ranges, tx_range); + /* restore partial range data from snapshot */ + ASSERT((char *)txr->begin >= (char *)dst_ptr); + uint8_t *src = &range->data[ + (char *)txr->begin - (char *)dst_ptr]; + ASSERT((char *)txr->end >= (char *)txr->begin); + size_t size = (size_t)((char *)txr->end - (char *)txr->begin); + pmemops_memcpy(&pop->p_ops, txr->begin, src, size, 0); + Free(txr); + } +} + +/* + * tx_undo_entry_apply -- applies modifications of a single ulog entry + */ +static int +tx_undo_entry_apply(struct ulog_entry_base *e, void *arg, + const struct pmem_ops *p_ops) +{ + struct ulog_entry_buf *eb; + + switch (ulog_entry_type(e)) { + case ULOG_OPERATION_BUF_CPY: + eb = (struct ulog_entry_buf *)e; + + tx_restore_range(p_ops->base, get_tx(), eb); + break; + case ULOG_OPERATION_AND: + case ULOG_OPERATION_OR: + case ULOG_OPERATION_SET: + case ULOG_OPERATION_BUF_SET: + default: + ASSERT(0); + } + + return 0; +} + +/* + * tx_abort_set -- (internal) abort all set operations + */ +static void +tx_abort_set(PMEMobjpool *pop, struct lane *lane) +{ + LOG(7, NULL); + + ulog_foreach_entry((struct ulog *)&lane->layout->undo, + tx_undo_entry_apply, NULL, &pop->p_ops); + pmemops_drain(&pop->p_ops); + operation_finish(lane->undo, ULOG_INC_FIRST_GEN_NUM); +} + +/* + * tx_flush_range -- (internal) flush one range + */ +static void +tx_flush_range(void *data, void *ctx) +{ + PMEMobjpool *pop = ctx; + struct tx_range_def *range = data; + if (!(range->flags & POBJ_FLAG_NO_FLUSH)) { + pmemops_xflush(&pop->p_ops, OBJ_OFF_TO_PTR(pop, range->offset), + range->size, PMEMOBJ_F_RELAXED); + } + VALGRIND_REMOVE_FROM_TX(OBJ_OFF_TO_PTR(pop, range->offset), + range->size); +} + +/* + * tx_clean_range -- (internal) clean one range + */ +static void +tx_clean_range(void *data, void *ctx) +{ + PMEMobjpool *pop = ctx; + struct tx_range_def *range = data; + VALGRIND_REMOVE_FROM_TX(OBJ_OFF_TO_PTR(pop, range->offset), + range->size); + VALGRIND_SET_CLEAN(OBJ_OFF_TO_PTR(pop, range->offset), range->size); +} + +/* + * tx_pre_commit -- (internal) do pre-commit operations + */ +static void +tx_pre_commit(struct tx *tx) +{ + LOG(5, NULL); + + /* Flush all regions and destroy the whole tree. */ + ravl_delete_cb(tx->ranges, tx_flush_range, tx->pop); + tx->ranges = NULL; +} + +/* + * tx_abort -- (internal) abort all allocated objects + */ +static void +tx_abort(PMEMobjpool *pop, struct lane *lane) +{ + LOG(7, NULL); + + struct tx *tx = get_tx(); + + tx_abort_set(pop, lane); + + ravl_delete_cb(tx->ranges, tx_clean_range, pop); + palloc_cancel(&pop->heap, + VEC_ARR(&tx->actions), VEC_SIZE(&tx->actions)); + tx->ranges = NULL; +} + +/* + * tx_get_pop -- returns the current transaction's pool handle, NULL if not + * within a transaction. + */ +PMEMobjpool * +tx_get_pop(void) +{ + return get_tx()->pop; +} + +/* + * add_to_tx_and_lock -- (internal) add lock to the transaction and acquire it + */ +static int +add_to_tx_and_lock(struct tx *tx, enum pobj_tx_param type, void *lock) +{ + LOG(15, NULL); + + int retval = 0; + struct tx_lock_data *txl; + /* check if the lock is already on the list */ + PMDK_SLIST_FOREACH(txl, &tx->tx_locks, tx_lock) { + if (memcmp(&txl->lock, &lock, sizeof(lock)) == 0) + return 0; + } + + txl = Malloc(sizeof(*txl)); + if (txl == NULL) + return ENOMEM; + + txl->lock_type = type; + switch (txl->lock_type) { + case TX_PARAM_MUTEX: + txl->lock.mutex = lock; + retval = pmemobj_mutex_lock(tx->pop, + txl->lock.mutex); + if (retval) { + ERR("!pmemobj_mutex_lock"); + goto err; + } + break; + case TX_PARAM_RWLOCK: + txl->lock.rwlock = lock; + retval = pmemobj_rwlock_wrlock(tx->pop, + txl->lock.rwlock); + if (retval) { + ERR("!pmemobj_rwlock_wrlock"); + goto err; + } + break; + default: + ERR("Unrecognized lock type"); + ASSERT(0); + break; + } + + PMDK_SLIST_INSERT_HEAD(&tx->tx_locks, txl, tx_lock); + return 0; + +err: + errno = retval; + Free(txl); + + return retval; +} + +/* + * release_and_free_tx_locks -- (internal) release and remove all locks from the + * transaction + */ +static void +release_and_free_tx_locks(struct tx *tx) +{ + LOG(15, NULL); + + while (!PMDK_SLIST_EMPTY(&tx->tx_locks)) { + struct tx_lock_data *tx_lock = PMDK_SLIST_FIRST(&tx->tx_locks); + PMDK_SLIST_REMOVE_HEAD(&tx->tx_locks, tx_lock); + switch (tx_lock->lock_type) { + case TX_PARAM_MUTEX: + pmemobj_mutex_unlock(tx->pop, + tx_lock->lock.mutex); + break; + case TX_PARAM_RWLOCK: + pmemobj_rwlock_unlock(tx->pop, + tx_lock->lock.rwlock); + break; + default: + ERR("Unrecognized lock type"); + ASSERT(0); + break; + } + Free(tx_lock); + } +} + +/* + * tx_lane_ranges_insert_def -- (internal) allocates and inserts a new range + * definition into the ranges tree + */ +static int +tx_lane_ranges_insert_def(PMEMobjpool *pop, struct tx *tx, + const struct tx_range_def *rdef) +{ + LOG(3, "rdef->offset %"PRIu64" rdef->size %"PRIu64, + rdef->offset, rdef->size); + + int ret = ravl_emplace_copy(tx->ranges, rdef); + if (ret && errno == EEXIST) + FATAL("invalid state of ranges tree"); + return ret; +} + +/* + * tx_alloc_common -- (internal) common function for alloc and zalloc + */ +static PMEMoid +tx_alloc_common(struct tx *tx, size_t size, type_num_t type_num, + palloc_constr constructor, struct tx_alloc_args args) +{ + LOG(3, NULL); + + if (size > PMEMOBJ_MAX_ALLOC_SIZE) { + ERR("requested size too large"); + return obj_tx_fail_null(ENOMEM, args.flags); + } + + PMEMobjpool *pop = tx->pop; + + struct pobj_action *action = tx_action_add(tx); + if (action == NULL) + return obj_tx_fail_null(ENOMEM, args.flags); + + if (palloc_reserve(&pop->heap, size, constructor, &args, type_num, 0, + CLASS_ID_FROM_FLAG(args.flags), + ARENA_ID_FROM_FLAG(args.flags), action) != 0) + goto err_oom; + + /* allocate object to undo log */ + PMEMoid retoid = OID_NULL; + retoid.off = action->heap.offset; + retoid.pool_uuid_lo = pop->uuid_lo; + size = action->heap.usable_size; + + const struct tx_range_def r = {retoid.off, size, args.flags}; + if (tx_lane_ranges_insert_def(pop, tx, &r) != 0) + goto err_oom; + + return retoid; + +err_oom: + tx_action_remove(tx); + ERR("out of memory"); + return obj_tx_fail_null(ENOMEM, args.flags); +} + +/* + * tx_realloc_common -- (internal) common function for tx realloc + */ +static PMEMoid +tx_realloc_common(struct tx *tx, PMEMoid oid, size_t size, uint64_t type_num, + palloc_constr constructor_alloc, + palloc_constr constructor_realloc, + uint64_t flags) +{ + LOG(3, NULL); + + if (size > PMEMOBJ_MAX_ALLOC_SIZE) { + ERR("requested size too large"); + return obj_tx_fail_null(ENOMEM, flags); + } + + /* if oid is NULL just alloc */ + if (OBJ_OID_IS_NULL(oid)) + return tx_alloc_common(tx, size, (type_num_t)type_num, + constructor_alloc, ALLOC_ARGS(flags)); + + ASSERT(OBJ_OID_IS_VALID(tx->pop, oid)); + + /* if size is 0 just free */ + if (size == 0) { + if (pmemobj_tx_free(oid)) { + ERR("pmemobj_tx_free failed"); + return oid; + } else { + return OID_NULL; + } + } + + /* oid is not NULL and size is not 0 so do realloc by alloc and free */ + void *ptr = OBJ_OFF_TO_PTR(tx->pop, oid.off); + size_t old_size = palloc_usable_size(&tx->pop->heap, oid.off); + + size_t copy_size = old_size < size ? old_size : size; + + PMEMoid new_obj = tx_alloc_common(tx, size, (type_num_t)type_num, + constructor_realloc, COPY_ARGS(flags, ptr, copy_size)); + + if (!OBJ_OID_IS_NULL(new_obj)) { + if (pmemobj_tx_free(oid)) { + ERR("pmemobj_tx_free failed"); + VEC_POP_BACK(&tx->actions); + return OID_NULL; + } + } + + return new_obj; +} + +/* + * tx_construct_user_buffer -- add user buffer to the ulog + */ +static int +tx_construct_user_buffer(struct tx *tx, void *addr, size_t size, + enum pobj_log_type type, int outer_tx, uint64_t flags) +{ + if (tx->pop != pmemobj_pool_by_ptr(addr)) { + ERR("Buffer from a different pool"); + goto err; + } + + /* + * We want to extend a log of a specified type, but if it is + * an outer transaction and the first user buffer we need to + * free all logs except the first at the beginning. + */ + struct operation_context *ctx = type == TX_LOG_TYPE_INTENT ? + tx->lane->external : tx->lane->undo; + + if (outer_tx && !operation_get_any_user_buffer(ctx)) + operation_free_logs(ctx, ULOG_ANY_USER_BUFFER); + + struct user_buffer_def userbuf = {addr, size}; + if (operation_user_buffer_verify_align(ctx, &userbuf) != 0) + goto err; + + if (type == TX_LOG_TYPE_INTENT) { + /* + * Redo log context is not used until transaction commit and + * cannot be used until then, and so the user buffers have to + * be stored and added the operation at commit time. + * This is because atomic operations can executed independently + * in the same lane as a running transaction. + */ + if (VEC_PUSH_BACK(&tx->redo_userbufs, userbuf) != 0) + goto err; + tx->redo_userbufs_capacity += + userbuf.size - TX_INTENT_LOG_BUFFER_OVERHEAD; + } else { + operation_add_user_buffer(ctx, &userbuf); + } + + return 0; + +err: + return obj_tx_fail_err(EINVAL, flags); +} + +/* + * pmemobj_tx_begin -- initializes new transaction + */ +int +pmemobj_tx_begin(PMEMobjpool *pop, jmp_buf env, ...) +{ + LOG(3, NULL); + + int err = 0; + struct tx *tx = get_tx(); + + enum pobj_tx_failure_behavior failure_behavior = POBJ_TX_FAILURE_ABORT; + + if (tx->stage == TX_STAGE_WORK) { + ASSERTne(tx->lane, NULL); + if (tx->pop != pop) { + ERR("nested transaction for different pool"); + return obj_tx_fail_err(EINVAL, 0); + } + + /* inherits this value from the parent transaction */ + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + failure_behavior = txd->failure_behavior; + + VALGRIND_START_TX; + } else if (tx->stage == TX_STAGE_NONE) { + VALGRIND_START_TX; + + lane_hold(pop, &tx->lane); + operation_start(tx->lane->undo); + + VEC_INIT(&tx->actions); + VEC_INIT(&tx->redo_userbufs); + tx->redo_userbufs_capacity = 0; + PMDK_SLIST_INIT(&tx->tx_entries); + PMDK_SLIST_INIT(&tx->tx_locks); + + tx->ranges = ravl_new_sized(tx_range_def_cmp, + sizeof(struct tx_range_def)); + + tx->pop = pop; + + tx->first_snapshot = 1; + + tx->user_data = NULL; + } else { + FATAL("Invalid stage %d to begin new transaction", tx->stage); + } + + struct tx_data *txd = Malloc(sizeof(*txd)); + if (txd == NULL) { + err = errno; + ERR("!Malloc"); + goto err_abort; + } + + tx->last_errnum = 0; + if (env != NULL) + memcpy(txd->env, env, sizeof(jmp_buf)); + else + memset(txd->env, 0, sizeof(jmp_buf)); + + txd->failure_behavior = failure_behavior; + + PMDK_SLIST_INSERT_HEAD(&tx->tx_entries, txd, tx_entry); + + tx->stage = TX_STAGE_WORK; + + /* handle locks */ + va_list argp; + va_start(argp, env); + enum pobj_tx_param param_type; + + while ((param_type = va_arg(argp, enum pobj_tx_param)) != + TX_PARAM_NONE) { + if (param_type == TX_PARAM_CB) { + pmemobj_tx_callback cb = + va_arg(argp, pmemobj_tx_callback); + void *arg = va_arg(argp, void *); + + if (tx->stage_callback && + (tx->stage_callback != cb || + tx->stage_callback_arg != arg)) { + FATAL("transaction callback is already set, " + "old %p new %p old_arg %p new_arg %p", + tx->stage_callback, cb, + tx->stage_callback_arg, arg); + } + + tx->stage_callback = cb; + tx->stage_callback_arg = arg; + } else { + err = add_to_tx_and_lock(tx, param_type, + va_arg(argp, void *)); + if (err) { + va_end(argp); + goto err_abort; + } + } + } + va_end(argp); + + ASSERT(err == 0); + return 0; + +err_abort: + if (tx->stage == TX_STAGE_WORK) + obj_tx_abort(err, 0); + else + tx->stage = TX_STAGE_ONABORT; + return err; +} + +/* + * tx_abort_on_failure_flag -- (internal) return 0 or POBJ_FLAG_TX_NO_ABORT + * based on transaction setting + */ +static uint64_t +tx_abort_on_failure_flag(struct tx *tx) +{ + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + + if (txd->failure_behavior == POBJ_TX_FAILURE_RETURN) + return POBJ_FLAG_TX_NO_ABORT; + return 0; +} + +/* + * pmemobj_tx_xlock -- get lane from pool and add lock to transaction, + * with no_abort option + */ +int +pmemobj_tx_xlock(enum pobj_tx_param type, void *lockp, uint64_t flags) +{ + struct tx *tx = get_tx(); + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_XLOCK_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_XLOCK_VALID_FLAGS); + return obj_tx_fail_err(EINVAL, flags); + } + + int ret = add_to_tx_and_lock(tx, type, lockp); + if (ret) + return obj_tx_fail_err(ret, flags); + return 0; +} + +/* + * pmemobj_tx_lock -- get lane from pool and add lock to transaction. + */ +int +pmemobj_tx_lock(enum pobj_tx_param type, void *lockp) +{ + return pmemobj_tx_xlock(type, lockp, POBJ_XLOCK_NO_ABORT); +} + +/* + * obj_tx_callback -- (internal) executes callback associated with current stage + */ +static void +obj_tx_callback(struct tx *tx) +{ + if (!tx->stage_callback) + return; + + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + + /* is this the outermost transaction? */ + if (PMDK_SLIST_NEXT(txd, tx_entry) == NULL) + tx->stage_callback(tx->pop, tx->stage, tx->stage_callback_arg); +} + +/* + * pmemobj_tx_stage -- returns current transaction stage + */ +enum pobj_tx_stage +pmemobj_tx_stage(void) +{ + LOG(3, NULL); + + return get_tx()->stage; +} + +/* + * obj_tx_abort -- aborts current transaction + */ +static void +obj_tx_abort(int errnum, int user) +{ + LOG(3, NULL); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + ASSERT(tx->lane != NULL); + + if (errnum == 0) + errnum = ECANCELED; + + tx->stage = TX_STAGE_ONABORT; + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + + if (PMDK_SLIST_NEXT(txd, tx_entry) == NULL) { + /* this is the outermost transaction */ + + /* process the undo log */ + tx_abort(tx->pop, tx->lane); + + lane_release(tx->pop); + tx->lane = NULL; + } + + tx->last_errnum = errnum; + errno = errnum; + if (user) + ERR("!explicit transaction abort"); + + /* ONABORT */ + obj_tx_callback(tx); + + if (!util_is_zeroed(txd->env, sizeof(jmp_buf))) + longjmp(txd->env, errnum); +} + +/* + * pmemobj_tx_abort -- aborts current transaction + * + * Note: this function should not be called from inside of pmemobj. + */ +void +pmemobj_tx_abort(int errnum) +{ + PMEMOBJ_API_START(); + obj_tx_abort(errnum, 1); + PMEMOBJ_API_END(); +} + +/* + * pmemobj_tx_errno -- returns last transaction error code + */ +int +pmemobj_tx_errno(void) +{ + LOG(3, NULL); + + return get_tx()->last_errnum; +} + +static void +tx_post_commit(struct tx *tx) +{ + operation_finish(tx->lane->undo, 0); +} + +/* + * pmemobj_tx_commit -- commits current transaction + */ +void +pmemobj_tx_commit(void) +{ + LOG(3, NULL); + + PMEMOBJ_API_START(); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + /* WORK */ + obj_tx_callback(tx); + + ASSERT(tx->lane != NULL); + + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + + if (PMDK_SLIST_NEXT(txd, tx_entry) == NULL) { + /* this is the outermost transaction */ + + PMEMobjpool *pop = tx->pop; + + /* pre-commit phase */ + tx_pre_commit(tx); + + pmemops_drain(&pop->p_ops); + + operation_start(tx->lane->external); + + struct user_buffer_def *userbuf; + VEC_FOREACH_BY_PTR(userbuf, &tx->redo_userbufs) + operation_add_user_buffer(tx->lane->external, userbuf); + + palloc_publish(&pop->heap, VEC_ARR(&tx->actions), + VEC_SIZE(&tx->actions), tx->lane->external); + + tx_post_commit(tx); + + lane_release(pop); + + tx->lane = NULL; + } + + tx->stage = TX_STAGE_ONCOMMIT; + + /* ONCOMMIT */ + obj_tx_callback(tx); + PMEMOBJ_API_END(); +} + +/* + * pmemobj_tx_end -- ends current transaction + */ +int +pmemobj_tx_end(void) +{ + LOG(3, NULL); + + struct tx *tx = get_tx(); + + if (tx->stage == TX_STAGE_WORK) + FATAL("pmemobj_tx_end called without pmemobj_tx_commit"); + + if (tx->pop == NULL) + FATAL("pmemobj_tx_end called without pmemobj_tx_begin"); + + if (tx->stage_callback && + (tx->stage == TX_STAGE_ONCOMMIT || + tx->stage == TX_STAGE_ONABORT)) { + tx->stage = TX_STAGE_FINALLY; + obj_tx_callback(tx); + } + + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + PMDK_SLIST_REMOVE_HEAD(&tx->tx_entries, tx_entry); + + Free(txd); + + VALGRIND_END_TX; + + if (PMDK_SLIST_EMPTY(&tx->tx_entries)) { + ASSERTeq(tx->lane, NULL); + + release_and_free_tx_locks(tx); + tx->pop = NULL; + tx->stage = TX_STAGE_NONE; + VEC_DELETE(&tx->actions); + VEC_DELETE(&tx->redo_userbufs); + + if (tx->stage_callback) { + pmemobj_tx_callback cb = tx->stage_callback; + void *arg = tx->stage_callback_arg; + + tx->stage_callback = NULL; + tx->stage_callback_arg = NULL; + + cb(tx->pop, TX_STAGE_NONE, arg); + } + } else { + /* resume the next transaction */ + tx->stage = TX_STAGE_WORK; + + /* abort called within inner transaction, waterfall the error */ + if (tx->last_errnum) + obj_tx_abort(tx->last_errnum, 0); + } + + return tx->last_errnum; +} + +/* + * pmemobj_tx_process -- processes current transaction stage + */ +void +pmemobj_tx_process(void) +{ + LOG(5, NULL); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + + switch (tx->stage) { + case TX_STAGE_NONE: + break; + case TX_STAGE_WORK: + pmemobj_tx_commit(); + break; + case TX_STAGE_ONABORT: + case TX_STAGE_ONCOMMIT: + tx->stage = TX_STAGE_FINALLY; + obj_tx_callback(tx); + break; + case TX_STAGE_FINALLY: + tx->stage = TX_STAGE_NONE; + break; + default: + ASSERT(0); + } +} + +/* + * vg_verify_initialized -- when executed under Valgrind verifies that + * the buffer has been initialized; explicit check at snapshotting time, + * because Valgrind may find it much later when it's impossible to tell + * for which snapshot it triggered + */ +static void +vg_verify_initialized(PMEMobjpool *pop, const struct tx_range_def *def) +{ +#if VG_MEMCHECK_ENABLED + if (!On_memcheck) + return; + + VALGRIND_DO_DISABLE_ERROR_REPORTING; + char *start = (char *)pop + def->offset; + char *uninit = (char *)VALGRIND_CHECK_MEM_IS_DEFINED(start, def->size); + if (uninit) { + VALGRIND_PRINTF( + "Snapshotting uninitialized data in range <%p,%p> ()\n", + start, start + def->size, def->offset, def->size); + + if (uninit != start) + VALGRIND_PRINTF("Uninitialized data starts at: %p\n", + uninit); + + VALGRIND_DO_ENABLE_ERROR_REPORTING; + VALGRIND_CHECK_MEM_IS_DEFINED(start, def->size); + } else { + VALGRIND_DO_ENABLE_ERROR_REPORTING; + } +#endif +} + +/* + * pmemobj_tx_add_snapshot -- (internal) creates a variably sized snapshot + */ +static int +pmemobj_tx_add_snapshot(struct tx *tx, struct tx_range_def *snapshot) +{ + /* + * Depending on the size of the block, either allocate an + * entire new object or use cache. + */ + void *ptr = OBJ_OFF_TO_PTR(tx->pop, snapshot->offset); + + VALGRIND_ADD_TO_TX(ptr, snapshot->size); + + /* do nothing */ + if (snapshot->flags & POBJ_XADD_NO_SNAPSHOT) + return 0; + + if (!(snapshot->flags & POBJ_XADD_ASSUME_INITIALIZED)) + vg_verify_initialized(tx->pop, snapshot); + + /* + * If we are creating the first snapshot, setup a redo log action to + * increment counter in the undo log, so that the log becomes + * invalid once the redo log is processed. + */ + if (tx->first_snapshot) { + struct pobj_action *action = tx_action_add(tx); + if (action == NULL) + return -1; + + uint64_t *n = &tx->lane->layout->undo.gen_num; + palloc_set_value(&tx->pop->heap, action, + n, *n + 1); + + tx->first_snapshot = 0; + } + + return operation_add_buffer(tx->lane->undo, ptr, ptr, snapshot->size, + ULOG_OPERATION_BUF_CPY); +} + +/* + * pmemobj_tx_merge_flags -- (internal) common code for merging flags between + * two ranges to ensure resultant behavior is correct + */ +static void +pmemobj_tx_merge_flags(struct tx_range_def *dest, struct tx_range_def *merged) +{ + /* + * POBJ_XADD_NO_FLUSH should only be set in merged range if set in + * both ranges + */ + if ((dest->flags & POBJ_XADD_NO_FLUSH) && + !(merged->flags & POBJ_XADD_NO_FLUSH)) { + dest->flags = dest->flags & (~POBJ_XADD_NO_FLUSH); + } +} + +/* + * pmemobj_tx_add_common -- (internal) common code for adding persistent memory + * into the transaction + */ +static int +pmemobj_tx_add_common(struct tx *tx, struct tx_range_def *args) +{ + LOG(15, NULL); + + if (args->size > PMEMOBJ_MAX_ALLOC_SIZE) { + ERR("snapshot size too large"); + return obj_tx_fail_err(EINVAL, args->flags); + } + + if (args->offset < tx->pop->heap_offset || + (args->offset + args->size) > + (tx->pop->heap_offset + tx->pop->heap_size)) { + ERR("object outside of heap"); + return obj_tx_fail_err(EINVAL, args->flags); + } + + int ret = 0; + + /* + * Search existing ranges backwards starting from the end of the + * snapshot. + */ + struct tx_range_def r = *args; + struct tx_range_def search = {0, 0, 0}; + /* + * If the range is directly adjacent to an existing one, + * they can be merged, so search for less or equal elements. + */ + enum ravl_predicate p = RAVL_PREDICATE_LESS_EQUAL; + struct ravl_node *nprev = NULL; + while (r.size != 0) { + search.offset = r.offset + r.size; + struct ravl_node *n = ravl_find(tx->ranges, &search, p); + /* + * We have to skip searching for LESS_EQUAL because + * the snapshot we would find is the one that was just + * created. + */ + p = RAVL_PREDICATE_LESS; + + struct tx_range_def *f = n ? ravl_data(n) : NULL; + + size_t fend = f == NULL ? 0: f->offset + f->size; + size_t rend = r.offset + r.size; + if (fend == 0 || fend < r.offset) { + /* + * If found no range or the found range is not + * overlapping or adjacent on the left side, we can just + * create the entire r.offset + r.size snapshot. + * + * Snapshot: + * --+- + * Existing ranges: + * ---- (no ranges) + * or +--- (no overlap) + * or ---+ (adjacent on on right side) + */ + if (nprev != NULL) { + /* + * But, if we have an existing adjacent snapshot + * on the right side, we can just extend it to + * include the desired range. + */ + struct tx_range_def *fprev = ravl_data(nprev); + ASSERTeq(rend, fprev->offset); + fprev->offset -= r.size; + fprev->size += r.size; + } else { + /* + * If we don't have anything adjacent, create + * a new range in the tree. + */ + ret = tx_lane_ranges_insert_def(tx->pop, + tx, &r); + if (ret != 0) + break; + } + ret = pmemobj_tx_add_snapshot(tx, &r); + break; + } else if (fend <= rend) { + /* + * If found range has its end inside of the desired + * snapshot range, we can extend the found range by the + * size leftover on the left side. + * + * Snapshot: + * --+++-- + * Existing ranges: + * +++---- (overlap on left) + * or ---+--- (found snapshot is inside) + * or ---+-++ (inside, and adjacent on the right) + * or +++++-- (desired snapshot is inside) + * + */ + struct tx_range_def snapshot = *args; + snapshot.offset = fend; + /* the side not yet covered by an existing snapshot */ + snapshot.size = rend - fend; + + /* the number of bytes intersecting in both ranges */ + size_t intersection = fend - MAX(f->offset, r.offset); + r.size -= intersection + snapshot.size; + f->size += snapshot.size; + pmemobj_tx_merge_flags(f, args); + + if (snapshot.size != 0) { + ret = pmemobj_tx_add_snapshot(tx, &snapshot); + if (ret != 0) + break; + } + + /* + * If there's a snapshot adjacent on right side, merge + * the two ranges together. + */ + if (nprev != NULL) { + struct tx_range_def *fprev = ravl_data(nprev); + ASSERTeq(rend, fprev->offset); + f->size += fprev->size; + pmemobj_tx_merge_flags(f, fprev); + ravl_remove(tx->ranges, nprev); + } + } else if (fend >= r.offset) { + /* + * If found range has its end extending beyond the + * desired snapshot. + * + * Snapshot: + * --+++-- + * Existing ranges: + * -----++ (adjacent on the right) + * or ----++- (overlapping on the right) + * or ----+++ (overlapping and adjacent on the right) + * or --+++++ (desired snapshot is inside) + * + * Notice that we cannot create a snapshot based solely + * on this information without risking overwriting an + * existing one. We have to continue iterating, but we + * keep the information about adjacent snapshots in the + * nprev variable. + */ + size_t overlap = rend - MAX(f->offset, r.offset); + r.size -= overlap; + pmemobj_tx_merge_flags(f, args); + } else { + ASSERT(0); + } + + nprev = n; + } + + if (ret != 0) { + ERR("out of memory"); + return obj_tx_fail_err(ENOMEM, args->flags); + } + + return 0; +} + +/* + * pmemobj_tx_add_range_direct -- adds persistent memory range into the + * transaction + */ +int +pmemobj_tx_add_range_direct(const void *ptr, size_t size) +{ + LOG(3, NULL); + + PMEMOBJ_API_START(); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + int ret; + + uint64_t flags = tx_abort_on_failure_flag(tx); + + if (!OBJ_PTR_FROM_POOL(tx->pop, ptr)) { + ERR("object outside of pool"); + ret = obj_tx_fail_err(EINVAL, flags); + PMEMOBJ_API_END(); + return ret; + } + + struct tx_range_def args = { + .offset = (uint64_t)((char *)ptr - (char *)tx->pop), + .size = size, + .flags = flags, + }; + + ret = pmemobj_tx_add_common(tx, &args); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_tx_xadd_range_direct -- adds persistent memory range into the + * transaction + */ +int +pmemobj_tx_xadd_range_direct(const void *ptr, size_t size, uint64_t flags) +{ + LOG(3, NULL); + + PMEMOBJ_API_START(); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + int ret; + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_XADD_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, flags + & ~POBJ_XADD_VALID_FLAGS); + ret = obj_tx_fail_err(EINVAL, flags); + PMEMOBJ_API_END(); + return ret; + } + + if (!OBJ_PTR_FROM_POOL(tx->pop, ptr)) { + ERR("object outside of pool"); + ret = obj_tx_fail_err(EINVAL, flags); + PMEMOBJ_API_END(); + return ret; + } + + struct tx_range_def args = { + .offset = (uint64_t)((char *)ptr - (char *)tx->pop), + .size = size, + .flags = flags, + }; + + ret = pmemobj_tx_add_common(tx, &args); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_tx_add_range -- adds persistent memory range into the transaction + */ +int +pmemobj_tx_add_range(PMEMoid oid, uint64_t hoff, size_t size) +{ + LOG(3, NULL); + + PMEMOBJ_API_START(); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + int ret; + + uint64_t flags = tx_abort_on_failure_flag(tx); + + if (oid.pool_uuid_lo != tx->pop->uuid_lo) { + ERR("invalid pool uuid"); + ret = obj_tx_fail_err(EINVAL, flags); + PMEMOBJ_API_END(); + return ret; + } + ASSERT(OBJ_OID_IS_VALID(tx->pop, oid)); + + struct tx_range_def args = { + .offset = oid.off + hoff, + .size = size, + .flags = flags, + }; + + ret = pmemobj_tx_add_common(tx, &args); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_tx_xadd_range -- adds persistent memory range into the transaction + */ +int +pmemobj_tx_xadd_range(PMEMoid oid, uint64_t hoff, size_t size, uint64_t flags) +{ + LOG(3, NULL); + + PMEMOBJ_API_START(); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + int ret; + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_XADD_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, flags + & ~POBJ_XADD_VALID_FLAGS); + ret = obj_tx_fail_err(EINVAL, flags); + PMEMOBJ_API_END(); + return ret; + } + + if (oid.pool_uuid_lo != tx->pop->uuid_lo) { + ERR("invalid pool uuid"); + ret = obj_tx_fail_err(EINVAL, flags); + PMEMOBJ_API_END(); + return ret; + } + ASSERT(OBJ_OID_IS_VALID(tx->pop, oid)); + + struct tx_range_def args = { + .offset = oid.off + hoff, + .size = size, + .flags = flags, + }; + + ret = pmemobj_tx_add_common(tx, &args); + + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_tx_alloc -- allocates a new object + */ +PMEMoid +pmemobj_tx_alloc(size_t size, uint64_t type_num) +{ + LOG(3, NULL); + + PMEMOBJ_API_START(); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + uint64_t flags = tx_abort_on_failure_flag(tx); + + PMEMoid oid; + if (size == 0) { + ERR("allocation with size 0"); + oid = obj_tx_fail_null(EINVAL, flags); + PMEMOBJ_API_END(); + return oid; + } + + oid = tx_alloc_common(tx, size, (type_num_t)type_num, + constructor_tx_alloc, ALLOC_ARGS(flags)); + + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_tx_zalloc -- allocates a new zeroed object + */ +PMEMoid +pmemobj_tx_zalloc(size_t size, uint64_t type_num) +{ + LOG(3, NULL); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + uint64_t flags = POBJ_FLAG_ZERO; + flags |= tx_abort_on_failure_flag(tx); + + PMEMOBJ_API_START(); + PMEMoid oid; + if (size == 0) { + ERR("allocation with size 0"); + oid = obj_tx_fail_null(EINVAL, flags); + PMEMOBJ_API_END(); + return oid; + } + + oid = tx_alloc_common(tx, size, (type_num_t)type_num, + constructor_tx_alloc, ALLOC_ARGS(flags)); + + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_tx_xalloc -- allocates a new object + */ +PMEMoid +pmemobj_tx_xalloc(size_t size, uint64_t type_num, uint64_t flags) +{ + LOG(3, NULL); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + flags |= tx_abort_on_failure_flag(tx); + + PMEMOBJ_API_START(); + + PMEMoid oid; + if (size == 0) { + ERR("allocation with size 0"); + oid = obj_tx_fail_null(EINVAL, flags); + PMEMOBJ_API_END(); + return oid; + } + + if (flags & ~POBJ_TX_XALLOC_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, flags + & ~(POBJ_TX_XALLOC_VALID_FLAGS)); + oid = obj_tx_fail_null(EINVAL, flags); + PMEMOBJ_API_END(); + return oid; + } + + oid = tx_alloc_common(tx, size, (type_num_t)type_num, + constructor_tx_alloc, ALLOC_ARGS(flags)); + + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_tx_realloc -- resizes an existing object + */ +PMEMoid +pmemobj_tx_realloc(PMEMoid oid, size_t size, uint64_t type_num) +{ + LOG(3, NULL); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + PMEMOBJ_API_START(); + PMEMoid ret = tx_realloc_common(tx, oid, size, type_num, + constructor_tx_alloc, constructor_tx_alloc, 0); + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_zrealloc -- resizes an existing object, any new space is zeroed. + */ +PMEMoid +pmemobj_tx_zrealloc(PMEMoid oid, size_t size, uint64_t type_num) +{ + LOG(3, NULL); + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + PMEMOBJ_API_START(); + PMEMoid ret = tx_realloc_common(tx, oid, size, type_num, + constructor_tx_alloc, constructor_tx_alloc, + POBJ_FLAG_ZERO); + PMEMOBJ_API_END(); + return ret; +} + +/* + * pmemobj_tx_xstrdup -- allocates a new object with duplicate of the string s. + */ +PMEMoid +pmemobj_tx_xstrdup(const char *s, uint64_t type_num, uint64_t flags) +{ + LOG(3, NULL); + + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_TX_XALLOC_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_TX_XALLOC_VALID_FLAGS); + return obj_tx_fail_null(EINVAL, flags); + } + + PMEMOBJ_API_START(); + PMEMoid oid; + if (NULL == s) { + ERR("cannot duplicate NULL string"); + oid = obj_tx_fail_null(EINVAL, flags); + PMEMOBJ_API_END(); + return oid; + } + + size_t len = strlen(s); + + if (len == 0) { + oid = tx_alloc_common(tx, sizeof(char), (type_num_t)type_num, + constructor_tx_alloc, + ALLOC_ARGS(POBJ_XALLOC_ZERO)); + PMEMOBJ_API_END(); + return oid; + } + + size_t size = (len + 1) * sizeof(char); + + oid = tx_alloc_common(tx, size, (type_num_t)type_num, + constructor_tx_alloc, COPY_ARGS(flags, s, size)); + + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_tx_strdup -- allocates a new object with duplicate of the string s. + */ +PMEMoid +pmemobj_tx_strdup(const char *s, uint64_t type_num) +{ + return pmemobj_tx_xstrdup(s, type_num, 0); +} +/* + * pmemobj_tx_xwcsdup -- allocates a new object with duplicate of the wide + * character string s. + */ +PMEMoid +pmemobj_tx_xwcsdup(const wchar_t *s, uint64_t type_num, uint64_t flags) +{ + LOG(3, NULL); + + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_TX_XALLOC_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_TX_XALLOC_VALID_FLAGS); + return obj_tx_fail_null(EINVAL, flags); + } + + PMEMOBJ_API_START(); + PMEMoid oid; + if (NULL == s) { + ERR("cannot duplicate NULL string"); + oid = obj_tx_fail_null(EINVAL, flags); + PMEMOBJ_API_END(); + return oid; + } + + size_t len = wcslen(s); + + if (len == 0) { + oid = tx_alloc_common(tx, sizeof(wchar_t), + (type_num_t)type_num, constructor_tx_alloc, + ALLOC_ARGS(POBJ_XALLOC_ZERO)); + PMEMOBJ_API_END(); + return oid; + } + + size_t size = (len + 1) * sizeof(wchar_t); + + oid = tx_alloc_common(tx, size, (type_num_t)type_num, + constructor_tx_alloc, COPY_ARGS(flags, s, size)); + + PMEMOBJ_API_END(); + return oid; +} + +/* + * pmemobj_tx_wcsdup -- allocates a new object with duplicate of the wide + * character string s. + */ +PMEMoid +pmemobj_tx_wcsdup(const wchar_t *s, uint64_t type_num) +{ + return pmemobj_tx_xwcsdup(s, type_num, 0); +} + +/* + * pmemobj_tx_xfree -- frees an existing object, with no_abort option + */ +int +pmemobj_tx_xfree(PMEMoid oid, uint64_t flags) +{ + LOG(3, NULL); + + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_XFREE_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_XFREE_VALID_FLAGS); + return obj_tx_fail_err(EINVAL, flags); + } + + if (OBJ_OID_IS_NULL(oid)) + return 0; + + PMEMobjpool *pop = tx->pop; + + if (pop->uuid_lo != oid.pool_uuid_lo) { + ERR("invalid pool uuid"); + return obj_tx_fail_err(EINVAL, flags); + } + + ASSERT(OBJ_OID_IS_VALID(pop, oid)); + + PMEMOBJ_API_START(); + + struct pobj_action *action; + + struct tx_range_def range = {oid.off, 0, 0}; + struct ravl_node *n = ravl_find(tx->ranges, &range, + RAVL_PREDICATE_EQUAL); + + /* + * If attempting to free an object allocated within the same + * transaction, simply cancel the alloc and remove it from the actions. + */ + if (n != NULL) { + VEC_FOREACH_BY_PTR(action, &tx->actions) { + if (action->type == POBJ_ACTION_TYPE_HEAP && + action->heap.offset == oid.off) { + struct tx_range_def *r = ravl_data(n); + void *ptr = OBJ_OFF_TO_PTR(pop, r->offset); + VALGRIND_SET_CLEAN(ptr, r->size); + VALGRIND_REMOVE_FROM_TX(ptr, r->size); + ravl_remove(tx->ranges, n); + palloc_cancel(&pop->heap, action, 1); + VEC_ERASE_BY_PTR(&tx->actions, action); + PMEMOBJ_API_END(); + return 0; + } + } + } + + action = tx_action_add(tx); + if (action == NULL) { + int ret = obj_tx_fail_err(errno, flags); + PMEMOBJ_API_END(); + return ret; + } + + palloc_defer_free(&pop->heap, oid.off, action); + + PMEMOBJ_API_END(); + return 0; +} + +/* + * pmemobj_tx_free -- frees an existing object + */ +int +pmemobj_tx_free(PMEMoid oid) +{ + return pmemobj_tx_xfree(oid, 0); +} + +/* + * pmemobj_tx_xpublish -- publishes actions inside of a transaction, + * with no_abort option + */ +int +pmemobj_tx_xpublish(struct pobj_action *actv, size_t actvcnt, uint64_t flags) +{ + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_XPUBLISH_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_XPUBLISH_VALID_FLAGS); + return obj_tx_fail_err(EINVAL, flags); + } + + PMEMOBJ_API_START(); + + if (tx_action_reserve(tx, actvcnt) != 0) { + int ret = obj_tx_fail_err(ENOMEM, flags); + PMEMOBJ_API_END(); + return ret; + } + + for (size_t i = 0; i < actvcnt; ++i) { + VEC_PUSH_BACK(&tx->actions, actv[i]); + } + + PMEMOBJ_API_END(); + return 0; +} + +/* + * pmemobj_tx_publish -- publishes actions inside of a transaction + */ +int +pmemobj_tx_publish(struct pobj_action *actv, size_t actvcnt) +{ + return pmemobj_tx_xpublish(actv, actvcnt, 0); +} + +/* + * pmemobj_tx_xlog_append_buffer -- append user allocated buffer to the ulog + */ +int +pmemobj_tx_xlog_append_buffer(enum pobj_log_type type, void *addr, size_t size, + uint64_t flags) +{ + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + flags |= tx_abort_on_failure_flag(tx); + + if (flags & ~POBJ_XLOG_APPEND_BUFFER_VALID_FLAGS) { + ERR("unknown flags 0x%" PRIx64, + flags & ~POBJ_XLOG_APPEND_BUFFER_VALID_FLAGS); + return obj_tx_fail_err(EINVAL, flags); + } + + PMEMOBJ_API_START(); + int err; + + struct tx_data *td = PMDK_SLIST_FIRST(&tx->tx_entries); + err = tx_construct_user_buffer(tx, addr, size, type, + PMDK_SLIST_NEXT(td, tx_entry) == NULL, flags); + + PMEMOBJ_API_END(); + return err; +} + +/* + * pmemobj_tx_log_append_buffer -- append user allocated buffer to the ulog + */ +int +pmemobj_tx_log_append_buffer(enum pobj_log_type type, void *addr, size_t size) +{ + return pmemobj_tx_xlog_append_buffer(type, addr, size, 0); +} + +/* + * pmemobj_tx_log_auto_alloc -- enable/disable automatic ulog allocation + */ +int +pmemobj_tx_log_auto_alloc(enum pobj_log_type type, int on_off) +{ + struct tx *tx = get_tx(); + ASSERT_TX_STAGE_WORK(tx); + + struct operation_context *ctx = type == TX_LOG_TYPE_INTENT ? + tx->lane->external : tx->lane->undo; + + operation_set_auto_reserve(ctx, on_off); + + return 0; +} + +/* + * pmemobj_tx_log_snapshots_max_size -- calculates the maximum + * size of a buffer which will be able to hold nsizes snapshots, + * each of size from sizes array + */ +size_t +pmemobj_tx_log_snapshots_max_size(size_t *sizes, size_t nsizes) +{ + LOG(3, NULL); + + /* each buffer has its header */ + size_t result = TX_SNAPSHOT_LOG_BUFFER_OVERHEAD; + for (size_t i = 0; i < nsizes; ++i) { + /* check for overflow */ + if (sizes[i] + TX_SNAPSHOT_LOG_ENTRY_OVERHEAD + + TX_SNAPSHOT_LOG_ENTRY_ALIGNMENT < sizes[i]) + goto err_overflow; + /* each entry has its header */ + size_t size = + ALIGN_UP(sizes[i] + TX_SNAPSHOT_LOG_ENTRY_OVERHEAD, + TX_SNAPSHOT_LOG_ENTRY_ALIGNMENT); + /* check for overflow */ + if (result + size < result) + goto err_overflow; + /* sum up */ + result += size; + } + + /* + * if the result is bigger than a single allocation it must be divided + * into multiple allocations where each of them will have its own buffer + * header and entry header + */ + size_t allocs_overhead = (result / PMEMOBJ_MAX_ALLOC_SIZE) * + (TX_SNAPSHOT_LOG_BUFFER_OVERHEAD + TX_SNAPSHOT_LOG_ENTRY_OVERHEAD); + /* check for overflow */ + if (result + allocs_overhead < result) + goto err_overflow; + result += allocs_overhead; + + /* SIZE_MAX is a special value */ + if (result == SIZE_MAX) + goto err_overflow; + + return result; + +err_overflow: + errno = ERANGE; + return SIZE_MAX; +} + +/* + * pmemobj_tx_log_intents_max_size -- calculates the maximum size of a buffer + * which will be able to hold nintents + */ +size_t +pmemobj_tx_log_intents_max_size(size_t nintents) +{ + LOG(3, NULL); + + /* check for overflow */ + if (nintents > SIZE_MAX / TX_INTENT_LOG_ENTRY_OVERHEAD) + goto err_overflow; + /* each entry has its header */ + size_t entries_overhead = nintents * TX_INTENT_LOG_ENTRY_OVERHEAD; + /* check for overflow */ + if (entries_overhead + TX_INTENT_LOG_BUFFER_ALIGNMENT + < entries_overhead) + goto err_overflow; + /* the whole buffer is aligned */ + size_t result = + ALIGN_UP(entries_overhead, TX_INTENT_LOG_BUFFER_ALIGNMENT); + + /* check for overflow */ + if (result + TX_INTENT_LOG_BUFFER_OVERHEAD < result) + goto err_overflow; + /* add a buffer overhead */ + result += TX_INTENT_LOG_BUFFER_OVERHEAD; + + /* + * if the result is bigger than a single allocation it must be divided + * into multiple allocations where each of them will have its own buffer + * header and entry header + */ + size_t allocs_overhead = (result / PMEMOBJ_MAX_ALLOC_SIZE) * + (TX_INTENT_LOG_BUFFER_OVERHEAD + TX_INTENT_LOG_ENTRY_OVERHEAD); + /* check for overflow */ + if (result + allocs_overhead < result) + goto err_overflow; + result += allocs_overhead; + + /* SIZE_MAX is a special value */ + if (result == SIZE_MAX) + goto err_overflow; + + return result; + +err_overflow: + errno = ERANGE; + return SIZE_MAX; +} + +/* + * pmemobj_tx_set_user_data -- sets volatile pointer to the user data for the + * current transaction + */ +void +pmemobj_tx_set_user_data(void *data) +{ + LOG(3, "data %p", data); + + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + + tx->user_data = data; +} + +/* + * pmemobj_tx_get_user_data -- gets volatile pointer to the user data associated + * with the current transaction + */ +void * +pmemobj_tx_get_user_data(void) +{ + LOG(3, NULL); + + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + + return tx->user_data; +} + +/* + * pmemobj_tx_set_failure_behavior -- enables or disables automatic transaction + * abort in case of an error + */ +void +pmemobj_tx_set_failure_behavior(enum pobj_tx_failure_behavior behavior) +{ + LOG(3, "behavior %d", behavior); + + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + + txd->failure_behavior = behavior; +} + +/* + * pmemobj_tx_get_failure_behavior -- returns enum specifying failure event + * for the current transaction. + */ +enum pobj_tx_failure_behavior +pmemobj_tx_get_failure_behavior(void) +{ + LOG(3, NULL); + + struct tx *tx = get_tx(); + + ASSERT_IN_TX(tx); + ASSERT_TX_STAGE_WORK(tx); + + struct tx_data *txd = PMDK_SLIST_FIRST(&tx->tx_entries); + + return txd->failure_behavior; +} + +/* + * CTL_READ_HANDLER(size) -- gets the cache size transaction parameter + */ +static int +CTL_READ_HANDLER(size)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + ssize_t *arg_out = arg; + + *arg_out = (ssize_t)pop->tx_params->cache_size; + + return 0; +} + +/* + * CTL_WRITE_HANDLER(size) -- sets the cache size transaction parameter + */ +static int +CTL_WRITE_HANDLER(size)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + ssize_t arg_in = *(int *)arg; + + if (arg_in < 0 || arg_in > (ssize_t)PMEMOBJ_MAX_ALLOC_SIZE) { + errno = EINVAL; + ERR("invalid cache size, must be between 0 and max alloc size"); + return -1; + } + + size_t argu = (size_t)arg_in; + + pop->tx_params->cache_size = argu; + + return 0; +} + +static const struct ctl_argument CTL_ARG(size) = CTL_ARG_LONG_LONG; + +/* + * CTL_READ_HANDLER(threshold) -- gets the cache threshold transaction parameter + */ +static int +CTL_READ_HANDLER(threshold)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + LOG(1, "tx.cache.threshold parameter is deprecated"); + + return 0; +} + +/* + * CTL_WRITE_HANDLER(threshold) -- deprecated + */ +static int +CTL_WRITE_HANDLER(threshold)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + LOG(1, "tx.cache.threshold parameter is deprecated"); + + return 0; +} + +static const struct ctl_argument CTL_ARG(threshold) = CTL_ARG_LONG_LONG; + +static const struct ctl_node CTL_NODE(cache)[] = { + CTL_LEAF_RW(size), + CTL_LEAF_RW(threshold), + + CTL_NODE_END +}; + +/* + * CTL_READ_HANDLER(skip_expensive_checks) -- returns "skip_expensive_checks" + * var from pool ctl + */ +static int +CTL_READ_HANDLER(skip_expensive_checks)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + int *arg_out = arg; + + *arg_out = pop->tx_debug_skip_expensive_checks; + + return 0; +} + +/* + * CTL_WRITE_HANDLER(skip_expensive_checks) -- stores "skip_expensive_checks" + * var in pool ctl + */ +static int +CTL_WRITE_HANDLER(skip_expensive_checks)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + int arg_in = *(int *)arg; + + pop->tx_debug_skip_expensive_checks = arg_in; + return 0; +} + +static const struct ctl_argument CTL_ARG(skip_expensive_checks) = + CTL_ARG_BOOLEAN; + +/* + * CTL_READ_HANDLER(verify_user_buffers) -- returns "ulog_user_buffers.verify" + * variable from the pool + */ +static int +CTL_READ_HANDLER(verify_user_buffers)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + int *arg_out = arg; + + *arg_out = pop->ulog_user_buffers.verify; + + return 0; +} + +/* + * CTL_WRITE_HANDLER(verify_user_buffers) -- sets "ulog_user_buffers.verify" + * variable in the pool + */ +static int +CTL_WRITE_HANDLER(verify_user_buffers)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + PMEMobjpool *pop = ctx; + + int arg_in = *(int *)arg; + + pop->ulog_user_buffers.verify = arg_in; + return 0; +} + +static const struct ctl_argument CTL_ARG(verify_user_buffers) = + CTL_ARG_BOOLEAN; + +static const struct ctl_node CTL_NODE(debug)[] = { + CTL_LEAF_RW(skip_expensive_checks), + CTL_LEAF_RW(verify_user_buffers), + + CTL_NODE_END +}; + +/* + * CTL_READ_HANDLER(queue_depth) -- returns the depth of the post commit queue + */ +static int +CTL_READ_HANDLER(queue_depth)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + return 0; +} + +/* + * CTL_WRITE_HANDLER(queue_depth) -- sets the depth of the post commit queue + */ +static int +CTL_WRITE_HANDLER(queue_depth)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + return 0; +} + +static const struct ctl_argument CTL_ARG(queue_depth) = CTL_ARG_INT; + +/* + * CTL_READ_HANDLER(worker) -- launches the post commit worker thread function + */ +static int +CTL_READ_HANDLER(worker)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + return 0; +} + +/* + * CTL_READ_HANDLER(stop) -- stops all post commit workers + */ +static int +CTL_READ_HANDLER(stop)(void *ctx, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) +{ + return 0; +} + +static const struct ctl_node CTL_NODE(post_commit)[] = { + CTL_LEAF_RW(queue_depth), + CTL_LEAF_RO(worker), + CTL_LEAF_RO(stop), + + CTL_NODE_END +}; + +static const struct ctl_node CTL_NODE(tx)[] = { + CTL_CHILD(debug), + CTL_CHILD(cache), + CTL_CHILD(post_commit), + + CTL_NODE_END +}; + +/* + * tx_ctl_register -- registers ctl nodes for "tx" module + */ +void +tx_ctl_register(PMEMobjpool *pop) +{ + CTL_REGISTER_MODULE(pop->ctl, tx); +} diff --git a/src/pmdk/src/libpmemobj/tx.h b/src/pmdk/src/libpmemobj/tx.h new file mode 100644 index 000000000..c6a204e64 --- /dev/null +++ b/src/pmdk/src/libpmemobj/tx.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * tx.h -- internal definitions for transactions + */ + +#ifndef LIBPMEMOBJ_INTERNAL_TX_H +#define LIBPMEMOBJ_INTERNAL_TX_H 1 + +#include +#include "obj.h" +#include "ulog.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define TX_DEFAULT_RANGE_CACHE_SIZE (1 << 15) +#define TX_DEFAULT_RANGE_CACHE_THRESHOLD (1 << 12) + +#define TX_RANGE_MASK (8ULL - 1) +#define TX_RANGE_MASK_LEGACY (32ULL - 1) + +#define TX_ALIGN_SIZE(s, amask) (((s) + (amask)) & ~(amask)) + +#define TX_SNAPSHOT_LOG_ENTRY_ALIGNMENT CACHELINE_SIZE +#define TX_SNAPSHOT_LOG_BUFFER_OVERHEAD sizeof(struct ulog) +#define TX_SNAPSHOT_LOG_ENTRY_OVERHEAD sizeof(struct ulog_entry_buf) + +#define TX_INTENT_LOG_BUFFER_ALIGNMENT CACHELINE_SIZE +#define TX_INTENT_LOG_BUFFER_OVERHEAD sizeof(struct ulog) +#define TX_INTENT_LOG_ENTRY_OVERHEAD sizeof(struct ulog_entry_val) + +struct tx_parameters { + size_t cache_size; +}; + +/* + * Returns the current transaction's pool handle, NULL if not within + * a transaction. + */ +PMEMobjpool *tx_get_pop(void); + +void tx_ctl_register(PMEMobjpool *pop); + +struct tx_parameters *tx_params_new(void); +void tx_params_delete(struct tx_parameters *tx_params); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmemobj/ulog.c b/src/pmdk/src/libpmemobj/ulog.c new file mode 100644 index 000000000..1f03e2c01 --- /dev/null +++ b/src/pmdk/src/libpmemobj/ulog.c @@ -0,0 +1,883 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2020, Intel Corporation */ + +/* + * ulog.c -- unified log implementation + */ + +#include +#include + +#include "libpmemobj.h" +#include "pmemops.h" +#include "ulog.h" +#include "obj.h" +#include "out.h" +#include "util.h" +#include "valgrind_internal.h" + +/* + * Operation flag at the three most significant bits + */ +#define ULOG_OPERATION(op) ((uint64_t)(op)) +#define ULOG_OPERATION_MASK ((uint64_t)(0b111ULL << 61ULL)) +#define ULOG_OPERATION_FROM_OFFSET(off) (ulog_operation_type)\ + ((off) & ULOG_OPERATION_MASK) +#define ULOG_OFFSET_MASK (~(ULOG_OPERATION_MASK)) + +#define CACHELINE_ALIGN(size) ALIGN_UP(size, CACHELINE_SIZE) +#define IS_CACHELINE_ALIGNED(ptr)\ + (((uintptr_t)(ptr) & (CACHELINE_SIZE - 1)) == 0) + +/* + * ulog_by_offset -- calculates the ulog pointer + */ +struct ulog * +ulog_by_offset(size_t offset, const struct pmem_ops *p_ops) +{ + if (offset == 0) + return NULL; + + size_t aligned_offset = CACHELINE_ALIGN(offset); + + return (struct ulog *)((char *)p_ops->base + aligned_offset); +} + +/* + * ulog_next -- retrieves the pointer to the next ulog + */ +struct ulog * +ulog_next(struct ulog *ulog, const struct pmem_ops *p_ops) +{ + return ulog_by_offset(ulog->next, p_ops); +} + +/* + * ulog_operation -- returns the type of entry operation + */ +ulog_operation_type +ulog_entry_type(const struct ulog_entry_base *entry) +{ + return ULOG_OPERATION_FROM_OFFSET(entry->offset); +} + +/* + * ulog_offset -- returns offset + */ +uint64_t +ulog_entry_offset(const struct ulog_entry_base *entry) +{ + return entry->offset & ULOG_OFFSET_MASK; +} + +/* + * ulog_entry_size -- returns the size of a ulog entry + */ +size_t +ulog_entry_size(const struct ulog_entry_base *entry) +{ + struct ulog_entry_buf *eb; + + switch (ulog_entry_type(entry)) { + case ULOG_OPERATION_AND: + case ULOG_OPERATION_OR: + case ULOG_OPERATION_SET: + return sizeof(struct ulog_entry_val); + case ULOG_OPERATION_BUF_SET: + case ULOG_OPERATION_BUF_CPY: + eb = (struct ulog_entry_buf *)entry; + return CACHELINE_ALIGN( + sizeof(struct ulog_entry_buf) + eb->size); + default: + ASSERT(0); + } + + return 0; +} + +/* + * ulog_entry_valid -- (internal) checks if a ulog entry is valid + * Returns 1 if the range is valid, otherwise 0 is returned. + */ +static int +ulog_entry_valid(struct ulog *ulog, const struct ulog_entry_base *entry) +{ + if (entry->offset == 0) + return 0; + + size_t size; + struct ulog_entry_buf *b; + + switch (ulog_entry_type(entry)) { + case ULOG_OPERATION_BUF_CPY: + case ULOG_OPERATION_BUF_SET: + size = ulog_entry_size(entry); + b = (struct ulog_entry_buf *)entry; + + uint64_t csum = util_checksum_compute(b, size, + &b->checksum, 0); + csum = util_checksum_seq(&ulog->gen_num, + sizeof(ulog->gen_num), csum); + + if (b->checksum != csum) + return 0; + break; + default: + break; + } + + return 1; +} + +/* + * ulog_construct -- initializes the ulog structure + */ +void +ulog_construct(uint64_t offset, size_t capacity, uint64_t gen_num, + int flush, uint64_t flags, const struct pmem_ops *p_ops) +{ + struct ulog *ulog = ulog_by_offset(offset, p_ops); + ASSERTne(ulog, NULL); + + size_t diff = OBJ_PTR_TO_OFF(p_ops->base, ulog) - offset; + if (diff > 0) + capacity = ALIGN_DOWN(capacity - diff, CACHELINE_SIZE); + + VALGRIND_ADD_TO_TX(ulog, SIZEOF_ULOG(capacity)); + + ulog->capacity = capacity; + ulog->checksum = 0; + ulog->next = 0; + ulog->gen_num = gen_num; + ulog->flags = flags; + memset(ulog->unused, 0, sizeof(ulog->unused)); + + /* we only need to zero out the header of ulog's first entry */ + size_t zeroed_data = CACHELINE_ALIGN(sizeof(struct ulog_entry_base)); + + if (flush) { + pmemops_xflush(p_ops, ulog, sizeof(*ulog), + PMEMOBJ_F_RELAXED); + pmemops_memset(p_ops, ulog->data, 0, zeroed_data, + PMEMOBJ_F_MEM_NONTEMPORAL | + PMEMOBJ_F_MEM_NODRAIN | + PMEMOBJ_F_RELAXED); + } else { + /* + * We want to avoid replicating zeroes for every ulog of every + * lane, to do that, we need to use plain old memset. + */ + memset(ulog->data, 0, zeroed_data); + } + + VALGRIND_REMOVE_FROM_TX(ulog, SIZEOF_ULOG(capacity)); +} + +/* + * ulog_foreach_entry -- iterates over every existing entry in the ulog + */ +int +ulog_foreach_entry(struct ulog *ulog, + ulog_entry_cb cb, void *arg, const struct pmem_ops *ops) +{ + struct ulog_entry_base *e; + int ret = 0; + + for (struct ulog *r = ulog; r != NULL; r = ulog_next(r, ops)) { + for (size_t offset = 0; offset < r->capacity; ) { + e = (struct ulog_entry_base *)(r->data + offset); + if (!ulog_entry_valid(ulog, e)) + return ret; + + if ((ret = cb(e, arg, ops)) != 0) + return ret; + + offset += ulog_entry_size(e); + } + } + + return ret; +} + +/* + * ulog_capacity -- (internal) returns the total capacity of the ulog + */ +size_t +ulog_capacity(struct ulog *ulog, size_t ulog_base_bytes, + const struct pmem_ops *p_ops) +{ + size_t capacity = ulog_base_bytes; + + /* skip the first one, we count it in 'ulog_base_bytes' */ + while ((ulog = ulog_next(ulog, p_ops)) != NULL) { + capacity += ulog->capacity; + } + + return capacity; +} + +/* + * ulog_rebuild_next_vec -- rebuilds the vector of next entries + */ +void +ulog_rebuild_next_vec(struct ulog *ulog, struct ulog_next *next, + const struct pmem_ops *p_ops) +{ + do { + if (ulog->next != 0) + VEC_PUSH_BACK(next, ulog->next); + } while ((ulog = ulog_next(ulog, p_ops)) != NULL); +} + +/* + * ulog_reserve -- reserves new capacity in the ulog + */ +int +ulog_reserve(struct ulog *ulog, + size_t ulog_base_nbytes, size_t gen_num, + int auto_reserve, size_t *new_capacity, + ulog_extend_fn extend, struct ulog_next *next, + const struct pmem_ops *p_ops) +{ + if (!auto_reserve) { + LOG(1, "cannot auto reserve next ulog"); + return -1; + } + + size_t capacity = ulog_base_nbytes; + + uint64_t offset; + VEC_FOREACH(offset, next) { + ulog = ulog_by_offset(offset, p_ops); + ASSERTne(ulog, NULL); + + capacity += ulog->capacity; + } + + while (capacity < *new_capacity) { + if (extend(p_ops->base, &ulog->next, gen_num) != 0) + return -1; + VEC_PUSH_BACK(next, ulog->next); + ulog = ulog_next(ulog, p_ops); + ASSERTne(ulog, NULL); + + capacity += ulog->capacity; + } + *new_capacity = capacity; + + return 0; +} + +/* + * ulog_checksum -- (internal) calculates ulog checksum + */ +static int +ulog_checksum(struct ulog *ulog, size_t ulog_base_bytes, int insert) +{ + return util_checksum(ulog, SIZEOF_ULOG(ulog_base_bytes), + &ulog->checksum, insert, 0); +} + +/* + * ulog_store -- stores the transient src ulog in the + * persistent dest ulog + * + * The source and destination ulogs must be cacheline aligned. + */ +void +ulog_store(struct ulog *dest, struct ulog *src, size_t nbytes, + size_t ulog_base_nbytes, size_t ulog_total_capacity, + struct ulog_next *next, const struct pmem_ops *p_ops) +{ + /* + * First, store all entries over the base capacity of the ulog in + * the next logs. + * Because the checksum is only in the first part, we don't have to + * worry about failsafety here. + */ + struct ulog *ulog = dest; + size_t offset = ulog_base_nbytes; + + /* + * Copy at least 8 bytes more than needed. If the user always + * properly uses entry creation functions, this will zero-out the + * potential leftovers of the previous log. Since all we really need + * to zero is the offset, sizeof(struct redo_log_entry_base) is enough. + * If the nbytes is aligned, an entire cacheline needs to be + * additionally zeroed. + * But the checksum must be calculated based solely on actual data. + * If the ulog total capacity is equal to the size of the + * ulog being stored (nbytes == ulog_total_capacity), then there's + * nothing to invalidate because the entire log data will + * be overwritten. + */ + size_t checksum_nbytes = MIN(ulog_base_nbytes, nbytes); + if (nbytes != ulog_total_capacity) + nbytes = CACHELINE_ALIGN(nbytes + + sizeof(struct ulog_entry_base)); + ASSERT(nbytes <= ulog_total_capacity); + + size_t base_nbytes = MIN(ulog_base_nbytes, nbytes); + size_t next_nbytes = nbytes - base_nbytes; + + size_t nlog = 0; + + while (next_nbytes > 0) { + ulog = ulog_by_offset(VEC_ARR(next)[nlog++], p_ops); + ASSERTne(ulog, NULL); + + size_t copy_nbytes = MIN(next_nbytes, ulog->capacity); + next_nbytes -= copy_nbytes; + + ASSERT(IS_CACHELINE_ALIGNED(ulog->data)); + + VALGRIND_ADD_TO_TX(ulog->data, copy_nbytes); + pmemops_memcpy(p_ops, + ulog->data, + src->data + offset, + copy_nbytes, + PMEMOBJ_F_MEM_WC | + PMEMOBJ_F_MEM_NODRAIN | + PMEMOBJ_F_RELAXED); + VALGRIND_REMOVE_FROM_TX(ulog->data, copy_nbytes); + offset += copy_nbytes; + } + + if (nlog != 0) + pmemops_drain(p_ops); + + /* + * Then, calculate the checksum and store the first part of the + * ulog. + */ + size_t old_capacity = src->capacity; + src->capacity = base_nbytes; + src->next = VEC_SIZE(next) == 0 ? 0 : VEC_FRONT(next); + ulog_checksum(src, checksum_nbytes, 1); + + pmemops_memcpy(p_ops, dest, src, + SIZEOF_ULOG(base_nbytes), + PMEMOBJ_F_MEM_WC); + + src->capacity = old_capacity; +} + +/* + * ulog_entry_val_create -- creates a new log value entry in the ulog + * + * This function requires at least a cacheline of space to be available in the + * ulog. + */ +struct ulog_entry_val * +ulog_entry_val_create(struct ulog *ulog, size_t offset, uint64_t *dest, + uint64_t value, ulog_operation_type type, + const struct pmem_ops *p_ops) +{ + struct ulog_entry_val *e = + (struct ulog_entry_val *)(ulog->data + offset); + + struct { + struct ulog_entry_val v; + struct ulog_entry_base zeroes; + } data; + COMPILE_ERROR_ON(sizeof(data) != sizeof(data.v) + sizeof(data.zeroes)); + + /* + * Write a little bit more to the buffer so that the next entry that + * resides in the log is erased. This will prevent leftovers from + * a previous, clobbered, log from being incorrectly applied. + */ + data.zeroes.offset = 0; + data.v.base.offset = (uint64_t)(dest) - (uint64_t)p_ops->base; + data.v.base.offset |= ULOG_OPERATION(type); + data.v.value = value; + + pmemops_memcpy(p_ops, e, &data, sizeof(data), + PMEMOBJ_F_MEM_NOFLUSH | PMEMOBJ_F_RELAXED); + + return e; +} + +/* + * ulog_clobber_entry -- zeroes out a single log entry header + */ +void +ulog_clobber_entry(const struct ulog_entry_base *e, + const struct pmem_ops *p_ops) +{ + static const size_t aligned_entry_size = + CACHELINE_ALIGN(sizeof(struct ulog_entry_base)); + + VALGRIND_ADD_TO_TX(e, aligned_entry_size); + pmemops_memset(p_ops, (char *)e, 0, aligned_entry_size, + PMEMOBJ_F_MEM_NONTEMPORAL); + VALGRIND_REMOVE_FROM_TX(e, aligned_entry_size); +} + +/* + * ulog_entry_buf_create -- atomically creates a buffer entry in the log + */ +struct ulog_entry_buf * +ulog_entry_buf_create(struct ulog *ulog, size_t offset, uint64_t gen_num, + uint64_t *dest, const void *src, uint64_t size, + ulog_operation_type type, const struct pmem_ops *p_ops) +{ + struct ulog_entry_buf *e = + (struct ulog_entry_buf *)(ulog->data + offset); + + /* + * Depending on the size of the source buffer, we might need to perform + * up to three separate copies: + * 1. The first cacheline, 24b of metadata and 40b of data + * If there's still data to be logged: + * 2. The entire remainder of data data aligned down to cacheline, + * for example, if there's 150b left, this step will copy only + * 128b. + * Now, we are left with between 0 to 63 bytes. If nonzero: + * 3. Create a stack allocated cacheline-sized buffer, fill in the + * remainder of the data, and copy the entire cacheline. + * + * This is done so that we avoid a cache-miss on misaligned writes. + */ + + struct ulog_entry_buf *b = alloca(CACHELINE_SIZE); + b->base.offset = (uint64_t)(dest) - (uint64_t)p_ops->base; + b->base.offset |= ULOG_OPERATION(type); + b->size = size; + b->checksum = 0; + + size_t bdatasize = CACHELINE_SIZE - sizeof(struct ulog_entry_buf); + size_t ncopy = MIN(size, bdatasize); + memcpy(b->data, src, ncopy); + memset(b->data + ncopy, 0, bdatasize - ncopy); + + size_t remaining_size = ncopy > size ? 0 : size - ncopy; + + char *srcof = (char *)src + ncopy; + size_t rcopy = ALIGN_DOWN(remaining_size, CACHELINE_SIZE); + size_t lcopy = remaining_size - rcopy; + + uint8_t last_cacheline[CACHELINE_SIZE]; + if (lcopy != 0) { + memcpy(last_cacheline, srcof + rcopy, lcopy); + memset(last_cacheline + lcopy, 0, CACHELINE_SIZE - lcopy); + } + + if (rcopy != 0) { + void *dest = e->data + ncopy; + ASSERT(IS_CACHELINE_ALIGNED(dest)); + + VALGRIND_ADD_TO_TX(dest, rcopy); + pmemops_memcpy(p_ops, dest, srcof, rcopy, + PMEMOBJ_F_MEM_NODRAIN | PMEMOBJ_F_MEM_NONTEMPORAL); + VALGRIND_REMOVE_FROM_TX(dest, rcopy); + } + + if (lcopy != 0) { + void *dest = e->data + ncopy + rcopy; + ASSERT(IS_CACHELINE_ALIGNED(dest)); + + VALGRIND_ADD_TO_TX(dest, CACHELINE_SIZE); + pmemops_memcpy(p_ops, dest, last_cacheline, CACHELINE_SIZE, + PMEMOBJ_F_MEM_NODRAIN | PMEMOBJ_F_MEM_NONTEMPORAL); + VALGRIND_REMOVE_FROM_TX(dest, CACHELINE_SIZE); + } + + b->checksum = util_checksum_seq(b, CACHELINE_SIZE, 0); + if (rcopy != 0) + b->checksum = util_checksum_seq(srcof, rcopy, b->checksum); + if (lcopy != 0) + b->checksum = util_checksum_seq(last_cacheline, + CACHELINE_SIZE, b->checksum); + + b->checksum = util_checksum_seq(&gen_num, sizeof(gen_num), + b->checksum); + + ASSERT(IS_CACHELINE_ALIGNED(e)); + + VALGRIND_ADD_TO_TX(e, CACHELINE_SIZE); + pmemops_memcpy(p_ops, e, b, CACHELINE_SIZE, + PMEMOBJ_F_MEM_NODRAIN | PMEMOBJ_F_MEM_NONTEMPORAL); + VALGRIND_REMOVE_FROM_TX(e, CACHELINE_SIZE); + + pmemops_drain(p_ops); + + /* + * Allow having uninitialized data in the buffer - this requires marking + * data as defined so that comparing checksums is not reported as an + * error by memcheck. + */ +#if VG_MEMCHECK_ENABLED + if (On_memcheck) { + VALGRIND_MAKE_MEM_DEFINED(e->data, ncopy + rcopy + lcopy); + VALGRIND_MAKE_MEM_DEFINED(&e->checksum, sizeof(e->checksum)); + } +#endif + + ASSERT(ulog_entry_valid(ulog, &e->base)); + + return e; +} + +/* + * ulog_entry_apply -- applies modifications of a single ulog entry + */ +void +ulog_entry_apply(const struct ulog_entry_base *e, int persist, + const struct pmem_ops *p_ops) +{ + ulog_operation_type t = ulog_entry_type(e); + uint64_t offset = ulog_entry_offset(e); + + size_t dst_size = sizeof(uint64_t); + uint64_t *dst = (uint64_t *)((uintptr_t)p_ops->base + offset); + + struct ulog_entry_val *ev; + struct ulog_entry_buf *eb; + + flush_fn f = persist ? p_ops->persist : p_ops->flush; + + switch (t) { + case ULOG_OPERATION_AND: + ev = (struct ulog_entry_val *)e; + + VALGRIND_ADD_TO_TX(dst, dst_size); + *dst &= ev->value; + f(p_ops->base, dst, sizeof(uint64_t), + PMEMOBJ_F_RELAXED); + break; + case ULOG_OPERATION_OR: + ev = (struct ulog_entry_val *)e; + + VALGRIND_ADD_TO_TX(dst, dst_size); + *dst |= ev->value; + f(p_ops->base, dst, sizeof(uint64_t), + PMEMOBJ_F_RELAXED); + break; + case ULOG_OPERATION_SET: + ev = (struct ulog_entry_val *)e; + + VALGRIND_ADD_TO_TX(dst, dst_size); + *dst = ev->value; + f(p_ops->base, dst, sizeof(uint64_t), + PMEMOBJ_F_RELAXED); + break; + case ULOG_OPERATION_BUF_SET: + eb = (struct ulog_entry_buf *)e; + + dst_size = eb->size; + VALGRIND_ADD_TO_TX(dst, dst_size); + pmemops_memset(p_ops, dst, *eb->data, eb->size, + PMEMOBJ_F_RELAXED | PMEMOBJ_F_MEM_NODRAIN); + break; + case ULOG_OPERATION_BUF_CPY: + eb = (struct ulog_entry_buf *)e; + + dst_size = eb->size; + VALGRIND_ADD_TO_TX(dst, dst_size); + pmemops_memcpy(p_ops, dst, eb->data, eb->size, + PMEMOBJ_F_RELAXED | PMEMOBJ_F_MEM_NODRAIN); + break; + default: + ASSERT(0); + } + VALGRIND_REMOVE_FROM_TX(dst, dst_size); +} + +/* + * ulog_process_entry -- (internal) processes a single ulog entry + */ +static int +ulog_process_entry(struct ulog_entry_base *e, void *arg, + const struct pmem_ops *p_ops) +{ + ulog_entry_apply(e, 0, p_ops); + + return 0; +} +/* + * ulog_inc_gen_num -- (internal) increments gen num in the ulog + */ +static void +ulog_inc_gen_num(struct ulog *ulog, const struct pmem_ops *p_ops) +{ + size_t gns = sizeof(ulog->gen_num); + + VALGRIND_ADD_TO_TX(&ulog->gen_num, gns); + ulog->gen_num++; + + if (p_ops) + pmemops_persist(p_ops, &ulog->gen_num, gns); + else + VALGRIND_SET_CLEAN(&ulog->gen_num, gns); + + VALGRIND_REMOVE_FROM_TX(&ulog->gen_num, gns); +} + +/* + * ulog_free_by_ptr_next -- free all ulogs starting from the indicated one. + * Function returns 1 if any ulog have been freed or unpinned, 0 otherwise. + */ +int +ulog_free_next(struct ulog *u, const struct pmem_ops *p_ops, + ulog_free_fn ulog_free, ulog_rm_user_buffer_fn user_buff_remove, + uint64_t flags) +{ + int ret = 0; + + if (u == NULL) + return ret; + + VEC(, uint64_t *) ulogs_internal_except_first; + VEC_INIT(&ulogs_internal_except_first); + + /* + * last_internal - pointer to a last found ulog allocated + * internally by the libpmemobj + */ + struct ulog *last_internal = u; + struct ulog *current; + + /* iterate all linked logs and unpin user defined */ + while ((flags & ULOG_ANY_USER_BUFFER) && + last_internal != NULL && last_internal->next != 0) { + current = ulog_by_offset(last_internal->next, p_ops); + /* + * handle case with user logs one after the other + * or mixed user and internal logs + */ + while (current != NULL && + (current->flags & ULOG_USER_OWNED)) { + + last_internal->next = current->next; + pmemops_persist(p_ops, &last_internal->next, + sizeof(last_internal->next)); + + user_buff_remove(p_ops->base, current); + + current = ulog_by_offset(last_internal->next, p_ops); + /* any ulog has been unpinned - set return value to 1 */ + ret = 1; + } + last_internal = ulog_by_offset(last_internal->next, p_ops); + } + + while (u->next != 0) { + if (VEC_PUSH_BACK(&ulogs_internal_except_first, + &u->next) != 0) { + /* this is fine, it will just use more pmem */ + LOG(1, "unable to free transaction logs memory"); + goto out; + } + u = ulog_by_offset(u->next, p_ops); + } + + /* free non-user defined logs */ + uint64_t *ulog_ptr; + VEC_FOREACH_REVERSE(ulog_ptr, &ulogs_internal_except_first) { + ulog_free(p_ops->base, ulog_ptr); + ret = 1; + } + +out: + VEC_DELETE(&ulogs_internal_except_first); + return ret; +} + +/* + * ulog_clobber -- zeroes the metadata of the ulog + */ +void +ulog_clobber(struct ulog *dest, struct ulog_next *next, + const struct pmem_ops *p_ops) +{ + struct ulog empty; + memset(&empty, 0, sizeof(empty)); + + if (next != NULL) + empty.next = VEC_SIZE(next) == 0 ? 0 : VEC_FRONT(next); + else + empty.next = dest->next; + + pmemops_memcpy(p_ops, dest, &empty, sizeof(empty), + PMEMOBJ_F_MEM_WC); +} + +/* + * ulog_clobber_data -- zeroes out 'nbytes' of data in the logs + */ +int +ulog_clobber_data(struct ulog *ulog_first, + size_t nbytes, size_t ulog_base_nbytes, + struct ulog_next *next, ulog_free_fn ulog_free, + ulog_rm_user_buffer_fn user_buff_remove, + const struct pmem_ops *p_ops, unsigned flags) +{ + ASSERTne(ulog_first, NULL); + + /* In case of abort we need to increment counter in the first ulog. */ + if (flags & ULOG_INC_FIRST_GEN_NUM) + ulog_inc_gen_num(ulog_first, p_ops); + + /* + * In the case of abort or commit, we are not going to free all ulogs, + * but rather increment the generation number to be consistent in the + * first two ulogs. + */ + size_t second_offset = VEC_SIZE(next) == 0 ? 0 : *VEC_GET(next, 0); + struct ulog *ulog_second = ulog_by_offset(second_offset, p_ops); + if (ulog_second && !(flags & ULOG_FREE_AFTER_FIRST)) + /* + * We want to keep gen_nums consistent between ulogs. + * If the transaction will commit successfully we'll reuse the + * second buffer (third and next ones will be freed anyway). + * If the application will crash we'll free 2nd ulog on + * recovery, which means we'll never read gen_num of the + * second ulog in case of an ungraceful shutdown. + */ + ulog_inc_gen_num(ulog_second, NULL); + + /* The ULOG_ANY_USER_BUFFER flag indicates more than one ulog exist */ + if (flags & ULOG_ANY_USER_BUFFER) + ASSERTne(ulog_second, NULL); + + struct ulog *u; + /* + * only if there was any user buffer it make sense to check + * if the second ulog is allocated by user + */ + if ((flags & ULOG_ANY_USER_BUFFER) && + (ulog_second->flags & ULOG_USER_OWNED)) { + /* + * function ulog_free_next() starts from 'next' ulog, + * so to start from the second ulog we need to + * pass the first one + */ + u = ulog_first; + } else { + /* + * To make sure that transaction logs do not occupy too + * much of space, all of them, expect for the first one, + * are freed at the end of the operation. The reasoning for + * this is that pmalloc() is a relatively cheap operation for + * transactions where many hundreds of kilobytes are being + * snapshot, and so, allocating and freeing the buffer for + * each transaction is an acceptable overhead for the average + * case. + */ + if (flags & ULOG_FREE_AFTER_FIRST) + u = ulog_first; + else + u = ulog_second; + } + + if (u == NULL) + return 0; + + return ulog_free_next(u, p_ops, ulog_free, user_buff_remove, flags); +} + +/* + * ulog_process -- process ulog entries + */ +void +ulog_process(struct ulog *ulog, ulog_check_offset_fn check, + const struct pmem_ops *p_ops) +{ + LOG(15, "ulog %p", ulog); + +#ifdef DEBUG + if (check) + ulog_check(ulog, check, p_ops); +#endif + + ulog_foreach_entry(ulog, ulog_process_entry, NULL, p_ops); + pmemops_drain(p_ops); +} + +/* + * ulog_base_nbytes -- (internal) counts the actual of number of bytes + * occupied by the ulog + */ +size_t +ulog_base_nbytes(struct ulog *ulog) +{ + size_t offset = 0; + struct ulog_entry_base *e; + + for (offset = 0; offset < ulog->capacity; ) { + e = (struct ulog_entry_base *)(ulog->data + offset); + if (!ulog_entry_valid(ulog, e)) + break; + + offset += ulog_entry_size(e); + } + + return offset; +} + +/* + * ulog_recovery_needed -- checks if the logs needs recovery + */ +int +ulog_recovery_needed(struct ulog *ulog, int verify_checksum) +{ + size_t nbytes = MIN(ulog_base_nbytes(ulog), ulog->capacity); + if (nbytes == 0) + return 0; + + if (verify_checksum && !ulog_checksum(ulog, nbytes, 0)) + return 0; + + return 1; +} + +/* + * ulog_recover -- recovery of ulog + * + * The ulog_recover shall be preceded by ulog_check call. + */ +void +ulog_recover(struct ulog *ulog, ulog_check_offset_fn check, + const struct pmem_ops *p_ops) +{ + LOG(15, "ulog %p", ulog); + + if (ulog_recovery_needed(ulog, 1)) { + ulog_process(ulog, check, p_ops); + ulog_clobber(ulog, NULL, p_ops); + } +} + +/* + * ulog_check_entry -- + * (internal) checks consistency of a single ulog entry + */ +static int +ulog_check_entry(struct ulog_entry_base *e, + void *arg, const struct pmem_ops *p_ops) +{ + uint64_t offset = ulog_entry_offset(e); + ulog_check_offset_fn check = arg; + + if (!check(p_ops->base, offset)) { + LOG(15, "ulog %p invalid offset %" PRIu64, + e, e->offset); + return -1; + } + + return offset == 0 ? -1 : 0; +} + +/* + * ulog_check -- (internal) check consistency of ulog entries + */ +int +ulog_check(struct ulog *ulog, ulog_check_offset_fn check, + const struct pmem_ops *p_ops) +{ + LOG(15, "ulog %p", ulog); + + return ulog_foreach_entry(ulog, + ulog_check_entry, check, p_ops); +} diff --git a/src/pmdk/src/libpmemobj/ulog.h b/src/pmdk/src/libpmemobj/ulog.h new file mode 100644 index 000000000..1dc8d308c --- /dev/null +++ b/src/pmdk/src/libpmemobj/ulog.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * ulog.h -- unified log public interface + */ + +#ifndef LIBPMEMOBJ_ULOG_H +#define LIBPMEMOBJ_ULOG_H 1 + +#include +#include + +#include "vec.h" +#include "pmemops.h" + +struct ulog_entry_base { + uint64_t offset; /* offset with operation type flag */ +}; + +/* + * ulog_entry_val -- log entry + */ +struct ulog_entry_val { + struct ulog_entry_base base; + uint64_t value; /* value to be applied */ +}; + +/* + * ulog_entry_buf - ulog buffer entry + */ +struct ulog_entry_buf { + struct ulog_entry_base base; /* offset with operation type flag */ + uint64_t checksum; /* checksum of the entire log entry */ + uint64_t size; /* size of the buffer to be modified */ + uint8_t data[]; /* content to fill in */ +}; + +#define ULOG_UNUSED ((CACHELINE_SIZE - 40) / 8) +/* + * This structure *must* be located at a cacheline boundary. To achieve this, + * the next field is always allocated with extra padding, and then the offset + * is additionally aligned. + */ +#define ULOG(capacity_bytes) {\ + /* 64 bytes of metadata */\ + uint64_t checksum; /* checksum of ulog header and its entries */\ + uint64_t next; /* offset of ulog extension */\ + uint64_t capacity; /* capacity of this ulog in bytes */\ + uint64_t gen_num; /* generation counter */\ + uint64_t flags; /* ulog flags */\ + uint64_t unused[ULOG_UNUSED]; /* must be 0 */\ + uint8_t data[capacity_bytes]; /* N bytes of data */\ +}\ + +#define SIZEOF_ULOG(base_capacity)\ +(sizeof(struct ulog) + base_capacity) + +/* + * Ulog buffer allocated by the user must be marked by this flag. + * It is important to not free it at the end: + * what user has allocated - user should free himself. + */ +#define ULOG_USER_OWNED (1U << 0) + +/* use this for allocations of aligned ulog extensions */ +#define SIZEOF_ALIGNED_ULOG(base_capacity)\ +ALIGN_UP(SIZEOF_ULOG(base_capacity + (2 * CACHELINE_SIZE)), CACHELINE_SIZE) + +struct ulog ULOG(0); + +VEC(ulog_next, uint64_t); + +typedef uint64_t ulog_operation_type; + +#define ULOG_OPERATION_SET (0b000ULL << 61ULL) +#define ULOG_OPERATION_AND (0b001ULL << 61ULL) +#define ULOG_OPERATION_OR (0b010ULL << 61ULL) +#define ULOG_OPERATION_BUF_SET (0b101ULL << 61ULL) +#define ULOG_OPERATION_BUF_CPY (0b110ULL << 61ULL) + +#define ULOG_BIT_OPERATIONS (ULOG_OPERATION_AND | ULOG_OPERATION_OR) + +/* immediately frees all associated ulog structures */ +#define ULOG_FREE_AFTER_FIRST (1U << 0) +/* increments gen_num of the first, preallocated, ulog */ +#define ULOG_INC_FIRST_GEN_NUM (1U << 1) +/* informs if there was any buffer allocated by user in the tx */ +#define ULOG_ANY_USER_BUFFER (1U << 2) + +typedef int (*ulog_check_offset_fn)(void *ctx, uint64_t offset); +typedef int (*ulog_extend_fn)(void *, uint64_t *, uint64_t); +typedef int (*ulog_entry_cb)(struct ulog_entry_base *e, void *arg, + const struct pmem_ops *p_ops); +typedef void (*ulog_free_fn)(void *base, uint64_t *next); +typedef int (*ulog_rm_user_buffer_fn)(void *, void *addr); + +struct ulog *ulog_next(struct ulog *ulog, const struct pmem_ops *p_ops); + +void ulog_construct(uint64_t offset, size_t capacity, uint64_t gen_num, + int flush, uint64_t flags, const struct pmem_ops *p_ops); + +size_t ulog_capacity(struct ulog *ulog, size_t ulog_base_bytes, + const struct pmem_ops *p_ops); +void ulog_rebuild_next_vec(struct ulog *ulog, struct ulog_next *next, + const struct pmem_ops *p_ops); + +int ulog_foreach_entry(struct ulog *ulog, + ulog_entry_cb cb, void *arg, const struct pmem_ops *ops); + +int ulog_reserve(struct ulog *ulog, + size_t ulog_base_nbytes, size_t gen_num, + int auto_reserve, size_t *new_capacity_bytes, + ulog_extend_fn extend, struct ulog_next *next, + const struct pmem_ops *p_ops); + +void ulog_store(struct ulog *dest, + struct ulog *src, size_t nbytes, size_t ulog_base_nbytes, + size_t ulog_total_capacity, + struct ulog_next *next, const struct pmem_ops *p_ops); + +int ulog_free_next(struct ulog *u, const struct pmem_ops *p_ops, + ulog_free_fn ulog_free, ulog_rm_user_buffer_fn user_buff_remove, + uint64_t flags); +void ulog_clobber(struct ulog *dest, struct ulog_next *next, + const struct pmem_ops *p_ops); +int ulog_clobber_data(struct ulog *dest, + size_t nbytes, size_t ulog_base_nbytes, + struct ulog_next *next, ulog_free_fn ulog_free, + ulog_rm_user_buffer_fn user_buff_remove, + const struct pmem_ops *p_ops, unsigned flags); +void ulog_clobber_entry(const struct ulog_entry_base *e, + const struct pmem_ops *p_ops); + +void ulog_process(struct ulog *ulog, ulog_check_offset_fn check, + const struct pmem_ops *p_ops); + +size_t ulog_base_nbytes(struct ulog *ulog); +int ulog_recovery_needed(struct ulog *ulog, int verify_checksum); +struct ulog *ulog_by_offset(size_t offset, const struct pmem_ops *p_ops); + +uint64_t ulog_entry_offset(const struct ulog_entry_base *entry); +ulog_operation_type ulog_entry_type( + const struct ulog_entry_base *entry); + +struct ulog_entry_val *ulog_entry_val_create(struct ulog *ulog, + size_t offset, uint64_t *dest, uint64_t value, + ulog_operation_type type, + const struct pmem_ops *p_ops); + +struct ulog_entry_buf * +ulog_entry_buf_create(struct ulog *ulog, size_t offset, + uint64_t gen_num, uint64_t *dest, const void *src, uint64_t size, + ulog_operation_type type, const struct pmem_ops *p_ops); + +void ulog_entry_apply(const struct ulog_entry_base *e, int persist, + const struct pmem_ops *p_ops); + +size_t ulog_entry_size(const struct ulog_entry_base *entry); + +void ulog_recover(struct ulog *ulog, ulog_check_offset_fn check, + const struct pmem_ops *p_ops); +int ulog_check(struct ulog *ulog, ulog_check_offset_fn check, + const struct pmem_ops *p_ops); + +#endif diff --git a/src/pmdk/src/libpmempool/Makefile b/src/pmdk/src/libpmempool/Makefile new file mode 100644 index 000000000..0ae33de4c --- /dev/null +++ b/src/pmdk/src/libpmempool/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# src/libpmempool/Makefile -- Makefile for libpmempool +# + +LIBRARY_NAME = pmempool +LIBRARY_SO_VERSION = 1 +LIBRARY_VERSION = 0.0 + +INCS += -I$(TOP)/src/libpmemlog +INCS += -I$(TOP)/src/libpmemblk +INCS += -I$(TOP)/src/libpmemobj +INCS += -I$(TOP)/src/rpmem_common +INCS += -I$(TOP)/src/librpmem +INCS += -I$(TOP)/src/libpmem2 + +vpath %.c ../librpmem + +include ../core/pmemcore.inc +include ../common/pmemcommon.inc + +SOURCE +=\ + libpmempool.c\ + check.c\ + check_bad_blocks.c\ + check_backup.c\ + check_btt_info.c\ + check_btt_map_flog.c\ + check_log.c\ + check_blk.c\ + check_pool_hdr.c\ + check_sds.c\ + check_util.c\ + check_write.c\ + pool.c\ + replica.c\ + feature.c\ + $(RPMEM_COMMON)/rpmem_common.c\ + rpmem_ssh.c\ + rpmem_cmd.c\ + rpmem_util.c\ + sync.c\ + transform.c\ + rm.c + +LIBPMEMBLK_PRIV_FUNCS=btt_info_set btt_arena_datasize btt_flog_size\ + btt_map_size btt_flog_get_valid map_entry_is_initial btt_info_convert2h\ + btt_info_convert2le btt_flog_convert2h btt_flog_convert2le + +include ../Makefile.inc + +CFLAGS += $(LIBNDCTL_CFLAGS) +LIBS += -pthread -lpmem $(LIBDL) $(LIBNDCTL_LIBS) +CFLAGS += -DUSE_LIBDL +CFLAGS += -DUSE_RPMEM + +pmemblk_priv_funcs.o: $(PMEMBLK_PRIV_OBJ) + $(OBJCOPY) --localize-hidden $(addprefix -G, $(LIBPMEMBLK_PRIV_FUNCS)) \ + $< $@ diff --git a/src/pmdk/src/libpmempool/check.c b/src/pmdk/src/libpmempool/check.c new file mode 100644 index 000000000..62a594bd7 --- /dev/null +++ b/src/pmdk/src/libpmempool/check.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * check.c -- functions performing checks in proper order + */ + +#include + +#include "out.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check.h" +#include "check_util.h" + +#define CHECK_RESULT_IS_STOP(result)\ + ((result) == CHECK_RESULT_ERROR ||\ + (result) == CHECK_RESULT_INTERNAL_ERROR ||\ + ((result) == CHECK_RESULT_CANNOT_REPAIR) ||\ + ((result) == CHECK_RESULT_NOT_CONSISTENT)) + +struct step { + void (*func)(PMEMpoolcheck *); + enum pool_type type; + bool part; +}; + +static const struct step steps[] = { + { + .type = POOL_TYPE_ANY, + .func = check_bad_blocks, + .part = true, + }, + { + .type = POOL_TYPE_ANY, + .func = check_backup, + .part = true, + }, + { + .type = POOL_TYPE_BLK | POOL_TYPE_LOG | + POOL_TYPE_OBJ, + .func = check_sds, + .part = true, + }, + { + .type = POOL_TYPE_BLK | POOL_TYPE_LOG | + POOL_TYPE_OBJ | + POOL_TYPE_UNKNOWN, + .func = check_pool_hdr, + .part = true, + }, + { + .type = POOL_TYPE_BLK | POOL_TYPE_LOG | + POOL_TYPE_OBJ | + POOL_TYPE_UNKNOWN, + .func = check_pool_hdr_uuids, + .part = true, + }, + { + .type = POOL_TYPE_LOG, + .func = check_log, + .part = false, + }, + { + .type = POOL_TYPE_BLK, + .func = check_blk, + .part = false, + }, + { + .type = POOL_TYPE_BLK | POOL_TYPE_BTT, + .func = check_btt_info, + .part = false, + }, + { + .type = POOL_TYPE_BLK | POOL_TYPE_BTT, + .func = check_btt_map_flog, + .part = false, + }, + { + .type = POOL_TYPE_BLK | POOL_TYPE_LOG | + POOL_TYPE_BTT, + .func = check_write, + .part = false, + }, + { + .func = NULL, + }, +}; + +/* + * check_init -- initialize check process + */ +int +check_init(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + if (!(ppc->data = check_data_alloc())) + goto error_data_malloc; + if (!(ppc->pool = pool_data_alloc(ppc))) + goto error_pool_malloc; + + return 0; + +error_pool_malloc: + check_data_free(ppc->data); +error_data_malloc: + return -1; +} + +#ifdef _WIN32 +void +convert_status_cache(PMEMpoolcheck *ppc, char *buf, size_t size) +{ + cache_to_utf8(ppc->data, buf, size); +} +#endif + +/* + * status_get -- (internal) get next check_status + * + * The assumed order of check_statuses is: all info messages, error or question. + */ +static struct check_status * +status_get(PMEMpoolcheck *ppc) +{ + struct check_status *status = NULL; + + /* clear cache if exists */ + check_clear_status_cache(ppc->data); + + /* return next info if exists */ + if ((status = check_pop_info(ppc->data))) + return status; + + /* return error if exists */ + if ((status = check_pop_error(ppc->data))) + return status; + + if (ppc->result == CHECK_RESULT_ASK_QUESTIONS) { + /* + * push answer for previous question and return info if answer + * is not valid + */ + if (check_push_answer(ppc)) + if ((status = check_pop_info(ppc->data))) + return status; + + /* if has next question ask it */ + if ((status = check_pop_question(ppc->data))) + return status; + + /* process answers otherwise */ + ppc->result = CHECK_RESULT_PROCESS_ANSWERS; + } else if (CHECK_RESULT_IS_STOP(ppc->result)) + check_end(ppc->data); + + return NULL; +} + +/* + * check_step -- perform single check step + */ +struct check_status * +check_step(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + struct check_status *status = NULL; + /* return if we have information or questions to ask or check ended */ + if ((status = status_get(ppc)) || check_is_end(ppc->data)) + return status; + + /* get next step and check if exists */ + const struct step *step = &steps[check_step_get(ppc->data)]; + if (step->func == NULL) { + check_end(ppc->data); + return status; + } + + /* + * step would be performed if pool type is one of the required pool type + * and it is not part if parts are excluded from current step + */ + if (!(step->type & ppc->pool->params.type) || + (ppc->pool->params.is_part && !step->part)) { + /* skip test */ + check_step_inc(ppc->data); + return NULL; + } + + /* perform step */ + step->func(ppc); + + /* move on to next step if no questions were generated */ + if (ppc->result != CHECK_RESULT_ASK_QUESTIONS) + check_step_inc(ppc->data); + + /* get current status and return */ + return status_get(ppc); +} + +/* + * check_fini -- stop check process + */ +void +check_fini(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + pool_data_free(ppc->pool); + check_data_free(ppc->data); +} + +/* + * check_is_end -- return if check has ended + */ +int +check_is_end(struct check_data *data) +{ + return check_is_end_util(data); +} + +/* + * check_status_get -- extract pmempool_check_status from check_status + */ +struct pmempool_check_status * +check_status_get(struct check_status *status) +{ + return check_status_get_util(status); +} diff --git a/src/pmdk/src/libpmempool/check.h b/src/pmdk/src/libpmempool/check.h new file mode 100644 index 000000000..86b228405 --- /dev/null +++ b/src/pmdk/src/libpmempool/check.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * check.h -- internal definitions for logic performing check + */ + +#ifndef CHECK_H +#define CHECK_H + +#ifdef __cplusplus +extern "C" { +#endif + +int check_init(PMEMpoolcheck *ppc); +struct check_status *check_step(PMEMpoolcheck *ppc); +void check_fini(PMEMpoolcheck *ppc); + +int check_is_end(struct check_data *data); +struct pmempool_check_status *check_status_get(struct check_status *status); + +#ifdef _WIN32 +void convert_status_cache(PMEMpoolcheck *ppc, char *buf, size_t size); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmempool/check_backup.c b/src/pmdk/src/libpmempool/check_backup.c new file mode 100644 index 000000000..f339bf04b --- /dev/null +++ b/src/pmdk/src/libpmempool/check_backup.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * check_backup.c -- pre-check backup + */ + +#include +#include +#include + +#include "out.h" +#include "file.h" +#include "os.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" + +enum question { + Q_OVERWRITE_EXISTING_FILE, + Q_OVERWRITE_EXISTING_PARTS +}; + +/* + * location_release -- (internal) release poolset structure + */ +static void +location_release(location *loc) +{ + if (loc->set) { + util_poolset_free(loc->set); + loc->set = NULL; + } +} + +/* + * backup_nonpoolset_requirements -- (internal) check backup requirements + */ +static int +backup_nonpoolset_requirements(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, "backup_path %s", ppc->backup_path); + + int exists = util_file_exists(ppc->backup_path); + if (exists < 0) { + return CHECK_ERR(ppc, + "unable to access the backup destination: %s", + ppc->backup_path); + } + + if (!exists) { + errno = 0; + return 0; + } + + if ((size_t)util_file_get_size(ppc->backup_path) != + ppc->pool->set_file->size) { + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, + "destination of the backup does not match the size of the source pool file: %s", + ppc->backup_path); + } + + if (CHECK_WITHOUT_FIXING(ppc)) { + location_release(loc); + loc->step = CHECK_STEP_COMPLETE; + return 0; + } + + CHECK_ASK(ppc, Q_OVERWRITE_EXISTING_FILE, + "destination of the backup already exists.|Do you want to overwrite it?"); + + return check_questions_sequence_validate(ppc); +} + +/* + * backup_nonpoolset_overwrite -- (internal) overwrite pool + */ +static int +backup_nonpoolset_overwrite(PMEMpoolcheck *ppc, location *loc, + uint32_t question, void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + + switch (question) { + case Q_OVERWRITE_EXISTING_FILE: + if (pool_copy(ppc->pool, ppc->backup_path, 1 /* overwrite */)) { + location_release(loc); + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, "cannot perform backup"); + } + + location_release(loc); + loc->step = CHECK_STEP_COMPLETE; + return 0; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * backup_nonpoolset_create -- (internal) create backup + */ +static int +backup_nonpoolset_create(PMEMpoolcheck *ppc, location *loc) +{ + CHECK_INFO(ppc, "creating backup file: %s", ppc->backup_path); + + if (pool_copy(ppc->pool, ppc->backup_path, 0)) { + location_release(loc); + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, "cannot perform backup"); + } + + location_release(loc); + loc->step = CHECK_STEP_COMPLETE; + return 0; +} + +/* + * backup_poolset_requirements -- (internal) check backup requirements + */ +static int +backup_poolset_requirements(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, "backup_path %s", ppc->backup_path); + + if (ppc->pool->set_file->poolset->nreplicas > 1) { + CHECK_INFO(ppc, + "backup of a poolset with multiple replicas is not supported"); + goto err; + } + + if (pool_set_parse(&loc->set, ppc->backup_path)) { + CHECK_INFO_ERRNO(ppc, "invalid poolset backup file: %s", + ppc->backup_path); + goto err; + } + + if (loc->set->nreplicas > 1) { + CHECK_INFO(ppc, + "backup to a poolset with multiple replicas is not supported"); + goto err_poolset; + } + + ASSERTeq(loc->set->nreplicas, 1); + struct pool_replica *srep = ppc->pool->set_file->poolset->replica[0]; + struct pool_replica *drep = loc->set->replica[0]; + if (srep->nparts != drep->nparts) { + CHECK_INFO(ppc, + "number of part files in the backup poolset must match number of part files in the source poolset"); + goto err_poolset; + } + + int overwrite_required = 0; + for (unsigned p = 0; p < srep->nparts; p++) { + int exists = util_file_exists(drep->part[p].path); + if (exists < 0) { + CHECK_INFO(ppc, + "unable to access the part of the destination poolset: %s", + ppc->backup_path); + goto err_poolset; + } + + if (srep->part[p].filesize != drep->part[p].filesize) { + CHECK_INFO(ppc, + "size of the part %u of the backup poolset does not match source poolset", + p); + goto err_poolset; + } + + if (!exists) { + errno = 0; + continue; + } + + overwrite_required = true; + + if ((size_t)util_file_get_size(drep->part[p].path) != + srep->part[p].filesize) { + CHECK_INFO(ppc, + "destination of the backup part does not match size of the source part file: %s", + drep->part[p].path); + goto err_poolset; + } + } + + if (CHECK_WITHOUT_FIXING(ppc)) { + location_release(loc); + loc->step = CHECK_STEP_COMPLETE; + return 0; + } + + if (overwrite_required) { + CHECK_ASK(ppc, Q_OVERWRITE_EXISTING_PARTS, + "part files of the destination poolset of the backup already exist.|" + "Do you want to overwrite them?"); + } + + return check_questions_sequence_validate(ppc); + +err_poolset: + location_release(loc); +err: + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, "unable to backup poolset"); +} + +/* + * backup_poolset -- (internal) backup the poolset + */ +static int +backup_poolset(PMEMpoolcheck *ppc, location *loc, int overwrite) +{ + struct pool_replica *srep = ppc->pool->set_file->poolset->replica[0]; + struct pool_replica *drep = loc->set->replica[0]; + for (unsigned p = 0; p < srep->nparts; p++) { + if (overwrite == 0) { + CHECK_INFO(ppc, "creating backup file: %s", + drep->part[p].path); + } + if (pool_set_part_copy(&drep->part[p], &srep->part[p], + overwrite)) { + location_release(loc); + ppc->result = CHECK_RESULT_ERROR; + CHECK_INFO(ppc, "unable to create backup file"); + return CHECK_ERR(ppc, "unable to backup poolset"); + } + } + + return 0; +} + +/* + * backup_poolset_overwrite -- (internal) backup poolset with overwrite + */ +static int +backup_poolset_overwrite(PMEMpoolcheck *ppc, location *loc, + uint32_t question, void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + + switch (question) { + case Q_OVERWRITE_EXISTING_PARTS: + if (backup_poolset(ppc, loc, 1 /* overwrite */)) { + location_release(loc); + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, "cannot perform backup"); + } + + location_release(loc); + loc->step = CHECK_STEP_COMPLETE; + return 0; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * backup_poolset_create -- (internal) backup poolset + */ +static int +backup_poolset_create(PMEMpoolcheck *ppc, location *loc) +{ + if (backup_poolset(ppc, loc, 0)) { + location_release(loc); + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, "cannot perform backup"); + } + + location_release(loc); + loc->step = CHECK_STEP_COMPLETE; + return 0; +} + +struct step { + int (*check)(PMEMpoolcheck *, location *); + int (*fix)(PMEMpoolcheck *, location *, uint32_t, void *); + int poolset; +}; + +static const struct step steps[] = { + { + .check = backup_nonpoolset_requirements, + .poolset = false, + }, + { + .fix = backup_nonpoolset_overwrite, + .poolset = false, + }, + { + .check = backup_nonpoolset_create, + .poolset = false + }, + { + .check = backup_poolset_requirements, + .poolset = true, + }, + { + .fix = backup_poolset_overwrite, + .poolset = true, + }, + { + .check = backup_poolset_create, + .poolset = true + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static int +step_exe(PMEMpoolcheck *ppc, location *loc) +{ + ASSERT(loc->step < ARRAY_SIZE(steps)); + + const struct step *step = &steps[loc->step++]; + + if (step->poolset == 0 && ppc->pool->params.is_poolset == 1) + return 0; + + if (!step->fix) + return step->check(ppc, loc); + + if (!check_has_answer(ppc->data)) + return 0; + + if (check_answer_loop(ppc, loc, NULL, 1, step->fix)) + return -1; + + ppc->result = CHECK_RESULT_CONSISTENT; + + return 0; +} + +/* + * check_backup -- perform backup if requested and needed + */ +void +check_backup(PMEMpoolcheck *ppc) +{ + LOG(3, "backup_path %s", ppc->backup_path); + + if (ppc->backup_path == NULL) + return; + + location *loc = check_get_step_data(ppc->data); + + /* do all checks */ + while (CHECK_NOT_COMPLETE(loc, steps)) { + if (step_exe(ppc, loc)) + break; + } +} diff --git a/src/pmdk/src/libpmempool/check_bad_blocks.c b/src/pmdk/src/libpmempool/check_bad_blocks.c new file mode 100644 index 000000000..d9d58fbba --- /dev/null +++ b/src/pmdk/src/libpmempool/check_bad_blocks.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * check_bad_blocks.c -- pre-check bad_blocks + */ + +#include +#include +#include + +#include "out.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" +#include "set_badblocks.h" +#include "badblocks.h" + +/* + * check_bad_blocks -- check poolset for bad_blocks + */ +void +check_bad_blocks(PMEMpoolcheck *ppc) +{ + LOG(3, "ppc %p", ppc); + + int ret; + + if (!(ppc->pool->params.features.compat & POOL_FEAT_CHECK_BAD_BLOCKS)) { + /* skipping checking poolset for bad blocks */ + ppc->result = CHECK_RESULT_CONSISTENT; + return; + } + + if (ppc->pool->set_file->poolset) { + ret = badblocks_check_poolset(ppc->pool->set_file->poolset, 0); + } else { + ret = badblocks_check_file(ppc->pool->set_file->fname); + } + + if (ret < 0) { + if (errno == ENOTSUP) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_ERR(ppc, BB_NOT_SUPP); + return; + } + + ppc->result = CHECK_RESULT_ERROR; + CHECK_ERR(ppc, "checking poolset for bad blocks failed -- '%s'", + ppc->path); + return; + } + + if (ret > 0) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_ERR(ppc, + "poolset contains bad blocks, use 'pmempool info --bad-blocks=yes' to print or 'pmempool sync --bad-blocks' to clear them"); + } +} diff --git a/src/pmdk/src/libpmempool/check_blk.c b/src/pmdk/src/libpmempool/check_blk.c new file mode 100644 index 000000000..05e38c350 --- /dev/null +++ b/src/pmdk/src/libpmempool/check_blk.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * check_blk.c -- check pmemblk + */ + +#include +#include +#include + +#include "out.h" +#include "btt.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" + +enum question { + Q_BLK_BSIZE, +}; + +/* + * blk_get_max_bsize -- (internal) return maximum size of block for given file + * size + */ +static inline uint32_t +blk_get_max_bsize(uint64_t fsize) +{ + LOG(3, NULL); + + if (fsize == 0) + return 0; + + /* default nfree */ + uint32_t nfree = BTT_DEFAULT_NFREE; + + /* number of blocks must be at least 2 * nfree */ + uint32_t internal_nlba = 2 * nfree; + + /* compute arena size from file size without pmemblk structure */ + uint64_t arena_size = fsize - sizeof(struct pmemblk); + if (arena_size > BTT_MAX_ARENA) + arena_size = BTT_MAX_ARENA; + arena_size = btt_arena_datasize(arena_size, nfree); + + /* compute maximum internal LBA size */ + uint64_t internal_lbasize = (arena_size - BTT_ALIGNMENT) / + internal_nlba - BTT_MAP_ENTRY_SIZE; + ASSERT(internal_lbasize <= UINT32_MAX); + + if (internal_lbasize < BTT_MIN_LBA_SIZE) + internal_lbasize = BTT_MIN_LBA_SIZE; + + internal_lbasize = roundup(internal_lbasize, BTT_INTERNAL_LBA_ALIGNMENT) + - BTT_INTERNAL_LBA_ALIGNMENT; + + return (uint32_t)internal_lbasize; +} + +/* + * blk_read -- (internal) read pmemblk header + */ +static int +blk_read(PMEMpoolcheck *ppc) +{ + /* + * Here we want to read the pmemblk header without the pool_hdr as we've + * already done it before. + * + * Take the pointer to fields right after pool_hdr, compute the size and + * offset of remaining fields. + */ + uint8_t *ptr = (uint8_t *)&ppc->pool->hdr.blk; + ptr += sizeof(ppc->pool->hdr.blk.hdr); + + size_t size = sizeof(ppc->pool->hdr.blk) - + sizeof(ppc->pool->hdr.blk.hdr); + uint64_t offset = sizeof(ppc->pool->hdr.blk.hdr); + + if (pool_read(ppc->pool, ptr, size, offset)) { + return CHECK_ERR(ppc, "cannot read pmemblk structure"); + } + + /* endianness conversion */ + ppc->pool->hdr.blk.bsize = le32toh(ppc->pool->hdr.blk.bsize); + + return 0; +} + +/* + * blk_bsize_valid -- (internal) check if block size is valid for given file + * size + */ +static int +blk_bsize_valid(uint32_t bsize, uint64_t fsize) +{ + uint32_t max_bsize = blk_get_max_bsize(fsize); + return (bsize >= max_bsize); +} + +/* + * blk_hdr_check -- (internal) check pmemblk header + */ +static int +blk_hdr_check(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + CHECK_INFO(ppc, "checking pmemblk header"); + + if (blk_read(ppc)) { + ppc->result = CHECK_RESULT_ERROR; + return -1; + } + + /* check for valid BTT Info arena as we can take bsize from it */ + if (!ppc->pool->bttc.valid) + pool_blk_get_first_valid_arena(ppc->pool, &ppc->pool->bttc); + + if (ppc->pool->bttc.valid) { + const uint32_t btt_bsize = + ppc->pool->bttc.btt_info.external_lbasize; + + if (ppc->pool->hdr.blk.bsize != btt_bsize) { + CHECK_ASK(ppc, Q_BLK_BSIZE, + "invalid pmemblk.bsize.|Do you want to set " + "pmemblk.bsize to %u from BTT Info?", + btt_bsize); + } + } else if (!ppc->pool->bttc.zeroed) { + if (ppc->pool->hdr.blk.bsize < BTT_MIN_LBA_SIZE || + blk_bsize_valid(ppc->pool->hdr.blk.bsize, + ppc->pool->set_file->size)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return CHECK_ERR(ppc, "invalid pmemblk.bsize"); + } + } + + if (ppc->result == CHECK_RESULT_CONSISTENT || + ppc->result == CHECK_RESULT_REPAIRED) + CHECK_INFO(ppc, "pmemblk header correct"); + + return check_questions_sequence_validate(ppc); +} + +/* + * blk_hdr_fix -- (internal) fix pmemblk header + */ +static int +blk_hdr_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, void *ctx) +{ + LOG(3, NULL); + + uint32_t btt_bsize; + + switch (question) { + case Q_BLK_BSIZE: + /* + * check for valid BTT Info arena as we can take bsize from it + */ + if (!ppc->pool->bttc.valid) + pool_blk_get_first_valid_arena(ppc->pool, + &ppc->pool->bttc); + btt_bsize = ppc->pool->bttc.btt_info.external_lbasize; + CHECK_INFO(ppc, "setting pmemblk.b_size to 0x%x", btt_bsize); + ppc->pool->hdr.blk.bsize = btt_bsize; + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +struct step { + int (*check)(PMEMpoolcheck *, location *); + int (*fix)(PMEMpoolcheck *, location *, uint32_t, void *); + enum pool_type type; +}; + +static const struct step steps[] = { + { + .check = blk_hdr_check, + .type = POOL_TYPE_BLK + }, + { + .fix = blk_hdr_fix, + .type = POOL_TYPE_BLK + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static inline int +step_exe(PMEMpoolcheck *ppc, location *loc) +{ + ASSERT(loc->step < ARRAY_SIZE(steps)); + ASSERTeq(ppc->pool->params.type, POOL_TYPE_BLK); + + const struct step *step = &steps[loc->step++]; + + if (!(step->type & ppc->pool->params.type)) + return 0; + + if (!step->fix) + return step->check(ppc, loc); + + if (blk_read(ppc)) { + ppc->result = CHECK_RESULT_ERROR; + return -1; + } + + return check_answer_loop(ppc, loc, NULL, 1, step->fix); +} + +/* + * check_blk -- entry point for pmemblk checks + */ +void +check_blk(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + location *loc = check_get_step_data(ppc->data); + + /* do all checks */ + while (CHECK_NOT_COMPLETE(loc, steps)) { + if (step_exe(ppc, loc)) + break; + } +} diff --git a/src/pmdk/src/libpmempool/check_btt_info.c b/src/pmdk/src/libpmempool/check_btt_info.c new file mode 100644 index 000000000..2ef14e48f --- /dev/null +++ b/src/pmdk/src/libpmempool/check_btt_info.c @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * check_btt_info.c -- check BTT Info + */ + +#include +#include +#include + +#include "out.h" +#include "util.h" +#include "btt.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" + +enum question { + Q_RESTORE_FROM_BACKUP, + Q_REGENERATE, + Q_REGENERATE_CHECKSUM, + Q_RESTORE_FROM_HEADER +}; + +/* + * location_release -- (internal) release check_btt_info_loc allocations + */ +static void +location_release(location *loc) +{ + free(loc->arenap); + loc->arenap = NULL; +} + +/* + * btt_info_checksum -- (internal) check BTT Info checksum + */ +static int +btt_info_checksum(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + loc->arenap = calloc(1, sizeof(struct arena)); + if (!loc->arenap) { + ERR("!calloc"); + ppc->result = CHECK_RESULT_INTERNAL_ERROR; + CHECK_ERR(ppc, "cannot allocate memory for arena"); + goto error_cleanup; + } + + /* read the BTT Info header at well known offset */ + if (pool_read(ppc->pool, &loc->arenap->btt_info, + sizeof(loc->arenap->btt_info), loc->offset)) { + CHECK_ERR(ppc, "arena %u: cannot read BTT Info header", + loc->arenap->id); + ppc->result = CHECK_RESULT_ERROR; + goto error_cleanup; + } + + loc->arenap->id = ppc->pool->narenas; + + /* BLK is consistent even without BTT Layout */ + if (ppc->pool->params.type == POOL_TYPE_BLK) { + int is_zeroed = util_is_zeroed((const void *) + &loc->arenap->btt_info, sizeof(loc->arenap->btt_info)); + if (is_zeroed) { + CHECK_INFO(ppc, "BTT Layout not written"); + loc->step = CHECK_STEP_COMPLETE; + ppc->pool->blk_no_layout = 1; + location_release(loc); + check_end(ppc->data); + return 0; + } + } + + /* check consistency of BTT Info */ + if (pool_btt_info_valid(&loc->arenap->btt_info)) { + CHECK_INFO(ppc, "arena %u: BTT Info header checksum correct", + loc->arenap->id); + loc->valid.btti_header = 1; + } else if (CHECK_IS_NOT(ppc, REPAIR)) { + CHECK_ERR(ppc, "arena %u: BTT Info header checksum incorrect", + loc->arenap->id); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + check_end(ppc->data); + goto error_cleanup; + } + + return 0; + +error_cleanup: + location_release(loc); + return -1; +} + +/* + * btt_info_backup -- (internal) check BTT Info backup + */ +static int +btt_info_backup(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + /* check BTT Info backup consistency */ + const size_t btt_info_size = sizeof(ppc->pool->bttc.btt_info); + uint64_t btt_info_off = pool_next_arena_offset(ppc->pool, loc->offset) - + btt_info_size; + + if (pool_read(ppc->pool, &ppc->pool->bttc.btt_info, btt_info_size, + btt_info_off)) { + CHECK_ERR(ppc, "arena %u: cannot read BTT Info backup", + loc->arenap->id); + goto error; + } + + /* check whether this BTT Info backup is valid */ + if (pool_btt_info_valid(&ppc->pool->bttc.btt_info)) { + loc->valid.btti_backup = 1; + + /* restore BTT Info from backup */ + if (!loc->valid.btti_header && CHECK_IS(ppc, REPAIR)) + CHECK_ASK(ppc, Q_RESTORE_FROM_BACKUP, "arena %u: BTT " + "Info header checksum incorrect.|Restore BTT " + "Info from backup?", loc->arenap->id); + } + + /* + * if BTT Info backup require repairs it will be fixed in further steps + */ + + return check_questions_sequence_validate(ppc); + +error: + ppc->result = CHECK_RESULT_ERROR; + location_release(loc); + return -1; +} + +/* + * btt_info_from_backup_fix -- (internal) fix BTT Info using its backup + */ +static int +btt_info_from_backup_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *ctx) +{ + LOG(3, NULL); + + ASSERTeq(ctx, NULL); + ASSERTne(loc, NULL); + + switch (question) { + case Q_RESTORE_FROM_BACKUP: + CHECK_INFO(ppc, + "arena %u: restoring BTT Info header from backup", + loc->arenap->id); + + memcpy(&loc->arenap->btt_info, &ppc->pool->bttc.btt_info, + sizeof(loc->arenap->btt_info)); + loc->valid.btti_header = 1; + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * btt_info_gen -- (internal) ask whether try to regenerate BTT Info + */ +static int +btt_info_gen(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + if (loc->valid.btti_header) + return 0; + + ASSERT(CHECK_IS(ppc, REPAIR)); + + if (!loc->pool_valid.btti_offset) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + check_end(ppc->data); + return CHECK_ERR(ppc, "can not find any valid BTT Info"); + } + + CHECK_ASK(ppc, Q_REGENERATE, + "arena %u: BTT Info header checksum incorrect.|Do you want to " + "regenerate BTT Info?", loc->arenap->id); + + return check_questions_sequence_validate(ppc); +} + +/* + * btt_info_gen_fix -- (internal) fix by regenerating BTT Info + */ +static int +btt_info_gen_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *ctx) +{ + LOG(3, NULL); + + ASSERTeq(ctx, NULL); + ASSERTne(loc, NULL); + + switch (question) { + case Q_REGENERATE: + CHECK_INFO(ppc, "arena %u: regenerating BTT Info header", + loc->arenap->id); + + /* + * We do not have valid BTT Info backup so we get first valid + * BTT Info and try to calculate BTT Info for current arena + */ + uint64_t arena_size = ppc->pool->set_file->size - loc->offset; + if (arena_size > BTT_MAX_ARENA) + arena_size = BTT_MAX_ARENA; + + uint64_t space_left = ppc->pool->set_file->size - loc->offset - + arena_size; + + struct btt_info *bttd = &loc->arenap->btt_info; + struct btt_info *btts = &loc->pool_valid.btti; + + btt_info_convert2h(bttd); + + /* + * all valid BTT Info structures have the same signature, UUID, + * parent UUID, flags, major, minor, external LBA size, internal + * LBA size, nfree, info size and data offset + */ + memcpy(bttd->sig, btts->sig, BTTINFO_SIG_LEN); + memcpy(bttd->uuid, btts->uuid, BTTINFO_UUID_LEN); + memcpy(bttd->parent_uuid, btts->parent_uuid, BTTINFO_UUID_LEN); + memset(bttd->unused, 0, BTTINFO_UNUSED_LEN); + bttd->flags = btts->flags; + bttd->major = btts->major; + bttd->minor = btts->minor; + + /* other parameters can be calculated */ + if (btt_info_set(bttd, btts->external_lbasize, btts->nfree, + arena_size, space_left)) { + CHECK_ERR(ppc, "can not restore BTT Info"); + return -1; + } + + ASSERTeq(bttd->external_lbasize, btts->external_lbasize); + ASSERTeq(bttd->internal_lbasize, btts->internal_lbasize); + ASSERTeq(bttd->nfree, btts->nfree); + ASSERTeq(bttd->infosize, btts->infosize); + ASSERTeq(bttd->dataoff, btts->dataoff); + return 0; + + default: + ERR("not implemented question id: %u", question); + return -1; + } +} + +/* + * btt_info_checksum_retry -- (internal) check BTT Info checksum + */ +static int +btt_info_checksum_retry(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + if (loc->valid.btti_header) + return 0; + + btt_info_convert2le(&loc->arenap->btt_info); + + /* check consistency of BTT Info */ + if (pool_btt_info_valid(&loc->arenap->btt_info)) { + CHECK_INFO(ppc, "arena %u: BTT Info header checksum correct", + loc->arenap->id); + loc->valid.btti_header = 1; + return 0; + } + + if (CHECK_IS_NOT(ppc, ADVANCED)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_INFO(ppc, REQUIRE_ADVANCED); + CHECK_ERR(ppc, "arena %u: BTT Info header checksum incorrect", + loc->arenap->id); + check_end(ppc->data); + goto error_cleanup; + } + + CHECK_ASK(ppc, Q_REGENERATE_CHECKSUM, + "arena %u: BTT Info header checksum incorrect.|Do you want to " + "regenerate BTT Info checksum?", loc->arenap->id); + + return check_questions_sequence_validate(ppc); + +error_cleanup: + location_release(loc); + return -1; +} + +/* + * btt_info_checksum_fix -- (internal) fix by regenerating BTT Info checksum + */ +static int +btt_info_checksum_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *ctx) +{ + LOG(3, NULL); + + ASSERTeq(ctx, NULL); + ASSERTne(loc, NULL); + + switch (question) { + case Q_REGENERATE_CHECKSUM: + util_checksum(&loc->arenap->btt_info, sizeof(struct btt_info), + &loc->arenap->btt_info.checksum, 1, 0); + loc->valid.btti_header = 1; + break; + + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * btt_info_backup_checksum -- (internal) check BTT Info backup checksum + */ +static int +btt_info_backup_checksum(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + ASSERT(loc->valid.btti_header); + + if (loc->valid.btti_backup) + return 0; + + /* BTT Info backup is not valid so it must be fixed */ + if (CHECK_IS_NOT(ppc, REPAIR)) { + CHECK_ERR(ppc, + "arena %u: BTT Info backup checksum incorrect", + loc->arenap->id); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + check_end(ppc->data); + goto error_cleanup; + } + + CHECK_ASK(ppc, Q_RESTORE_FROM_HEADER, + "arena %u: BTT Info backup checksum incorrect.|Do you want to " + "restore it from BTT Info header?", loc->arenap->id); + + return check_questions_sequence_validate(ppc); + +error_cleanup: + location_release(loc); + return -1; +} + +/* + * btt_info_backup_fix -- (internal) prepare restore BTT Info backup from header + */ +static int +btt_info_backup_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *ctx) +{ + LOG(3, NULL); + + ASSERTeq(ctx, NULL); + ASSERTne(loc, NULL); + + switch (question) { + case Q_RESTORE_FROM_HEADER: + /* BTT Info backup would be restored in check_write step */ + CHECK_INFO(ppc, + "arena %u: restoring BTT Info backup from header", + loc->arenap->id); + break; + + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +struct step { + int (*check)(PMEMpoolcheck *, location *); + int (*fix)(PMEMpoolcheck *, location *, uint32_t, void *); +}; + +static const struct step steps[] = { + { + .check = btt_info_checksum, + }, + { + .check = btt_info_backup, + }, + { + .fix = btt_info_from_backup_fix, + }, + { + .check = btt_info_gen, + }, + { + .fix = btt_info_gen_fix, + }, + { + .check = btt_info_checksum_retry, + }, + { + .fix = btt_info_checksum_fix, + }, + { + .check = btt_info_backup_checksum, + }, + { + .fix = btt_info_backup_fix, + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static inline int +step_exe(PMEMpoolcheck *ppc, location *loc) +{ + ASSERT(loc->step < ARRAY_SIZE(steps)); + + const struct step *step = &steps[loc->step++]; + + if (!step->fix) + return step->check(ppc, loc); + + if (!check_answer_loop(ppc, loc, NULL, 1, step->fix)) + return 0; + + if (check_has_error(ppc->data)) + location_release(loc); + + return -1; +} + +/* + * check_btt_info -- entry point for btt info check + */ +void +check_btt_info(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + location *loc = check_get_step_data(ppc->data); + uint64_t nextoff = 0; + + /* initialize check */ + if (!loc->offset) { + CHECK_INFO(ppc, "checking BTT Info headers"); + loc->offset = sizeof(struct pool_hdr); + if (ppc->pool->params.type == POOL_TYPE_BLK) + loc->offset += ALIGN_UP(sizeof(struct pmemblk) - + sizeof(struct pool_hdr), + BLK_FORMAT_DATA_ALIGN); + + loc->pool_valid.btti_offset = pool_get_first_valid_btt( + ppc->pool, &loc->pool_valid.btti, loc->offset, NULL); + + /* Without valid BTT Info we can not proceed */ + if (!loc->pool_valid.btti_offset) { + if (ppc->pool->params.type == POOL_TYPE_BTT) { + CHECK_ERR(ppc, + "can not find any valid BTT Info"); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + check_end(ppc->data); + return; + } + } else + btt_info_convert2h(&loc->pool_valid.btti); + } + + do { + /* jump to next offset */ + if (ppc->result != CHECK_RESULT_PROCESS_ANSWERS) { + loc->offset += nextoff; + loc->step = 0; + loc->valid.btti_header = 0; + loc->valid.btti_backup = 0; + } + + /* do all checks */ + while (CHECK_NOT_COMPLETE(loc, steps)) { + if (step_exe(ppc, loc) || ppc->pool->blk_no_layout == 1) + return; + } + + /* save offset and insert BTT to cache for next steps */ + loc->arenap->offset = loc->offset; + loc->arenap->valid = true; + check_insert_arena(ppc, loc->arenap); + nextoff = le64toh(loc->arenap->btt_info.nextoff); + + } while (nextoff > 0); +} diff --git a/src/pmdk/src/libpmempool/check_btt_map_flog.c b/src/pmdk/src/libpmempool/check_btt_map_flog.c new file mode 100644 index 000000000..4d782eec2 --- /dev/null +++ b/src/pmdk/src/libpmempool/check_btt_map_flog.c @@ -0,0 +1,685 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * check_btt_map_flog.c -- check BTT Map and Flog + */ + +#include +#include +#include + +#include "out.h" +#include "btt.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" + +enum questions { + Q_REPAIR_MAP, + Q_REPAIR_FLOG, +}; + +/* + * flog_read -- (internal) read and convert flog from file + */ +static int +flog_read(PMEMpoolcheck *ppc, struct arena *arenap) +{ + uint64_t flogoff = arenap->offset + arenap->btt_info.flogoff; + arenap->flogsize = btt_flog_size(arenap->btt_info.nfree); + + arenap->flog = malloc(arenap->flogsize); + if (!arenap->flog) { + ERR("!malloc"); + goto error_malloc; + } + + if (pool_read(ppc->pool, arenap->flog, arenap->flogsize, flogoff)) + goto error_read; + + uint8_t *ptr = arenap->flog; + uint32_t i; + for (i = 0; i < arenap->btt_info.nfree; i++) { + struct btt_flog *flog = (struct btt_flog *)ptr; + btt_flog_convert2h(&flog[0]); + btt_flog_convert2h(&flog[1]); + + ptr += BTT_FLOG_PAIR_ALIGN; + } + + return 0; + +error_read: + free(arenap->flog); + arenap->flog = NULL; + +error_malloc: + return -1; +} + +/* + * map_read -- (internal) read and convert map from file + */ +static int +map_read(PMEMpoolcheck *ppc, struct arena *arenap) +{ + uint64_t mapoff = arenap->offset + arenap->btt_info.mapoff; + arenap->mapsize = btt_map_size(arenap->btt_info.external_nlba); + + ASSERT(arenap->mapsize != 0); + arenap->map = malloc(arenap->mapsize); + if (!arenap->map) { + ERR("!malloc"); + goto error_malloc; + } + + if (pool_read(ppc->pool, arenap->map, arenap->mapsize, mapoff)) { + goto error_read; + } + + uint32_t i; + for (i = 0; i < arenap->btt_info.external_nlba; i++) + arenap->map[i] = le32toh(arenap->map[i]); + + return 0; + +error_read: + free(arenap->map); + arenap->map = NULL; +error_malloc: + return -1; +} + +/* + * list_item -- item for simple list + */ +struct list_item { + PMDK_LIST_ENTRY(list_item) next; + uint32_t val; +}; + +/* + * list -- simple list for storing numbers + */ +struct list { + PMDK_LIST_HEAD(listhead, list_item) head; + uint32_t count; +}; + +/* + * list_alloc -- (internal) allocate an empty list + */ +static struct list * +list_alloc(void) +{ + struct list *list = malloc(sizeof(struct list)); + if (!list) { + ERR("!malloc"); + return NULL; + } + PMDK_LIST_INIT(&list->head); + list->count = 0; + return list; +} + +/* + * list_push -- (internal) insert new element to the list + */ +static struct list_item * +list_push(struct list *list, uint32_t val) +{ + struct list_item *item = malloc(sizeof(*item)); + if (!item) { + ERR("!malloc"); + return NULL; + } + item->val = val; + list->count++; + PMDK_LIST_INSERT_HEAD(&list->head, item, next); + return item; +} + +/* + * list_pop -- (internal) pop element from list head + */ +static int +list_pop(struct list *list, uint32_t *valp) +{ + if (!PMDK_LIST_EMPTY(&list->head)) { + struct list_item *i = PMDK_LIST_FIRST(&list->head); + PMDK_LIST_REMOVE(i, next); + if (valp) + *valp = i->val; + free(i); + + list->count--; + + return 1; + } + return 0; +} + +/* + * list_free -- (internal) free the list + */ +static void +list_free(struct list *list) +{ + while (list_pop(list, NULL)) + ; + free(list); +} + +/* + * cleanup -- (internal) prepare resources for map and flog check + */ +static int +cleanup(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + if (loc->list_unmap) + list_free(loc->list_unmap); + if (loc->list_flog_inval) + list_free(loc->list_flog_inval); + if (loc->list_inval) + list_free(loc->list_inval); + if (loc->fbitmap) + free(loc->fbitmap); + if (loc->bitmap) + free(loc->bitmap); + if (loc->dup_bitmap) + free(loc->dup_bitmap); + + return 0; +} + +/* + * init -- (internal) initialize map and flog check + */ +static int +init(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + struct arena *arenap = loc->arenap; + + /* read flog and map entries */ + if (flog_read(ppc, arenap)) { + CHECK_ERR(ppc, "arena %u: cannot read BTT Flog", arenap->id); + goto error; + } + + if (map_read(ppc, arenap)) { + CHECK_ERR(ppc, "arena %u: cannot read BTT Map", arenap->id); + goto error; + } + + /* create bitmaps for checking duplicated blocks */ + uint32_t bitmapsize = howmany(arenap->btt_info.internal_nlba, 8); + loc->bitmap = calloc(bitmapsize, 1); + if (!loc->bitmap) { + ERR("!calloc"); + CHECK_ERR(ppc, "arena %u: cannot allocate memory for blocks " + "bitmap", arenap->id); + goto error; + } + + loc->dup_bitmap = calloc(bitmapsize, 1); + if (!loc->dup_bitmap) { + ERR("!calloc"); + CHECK_ERR(ppc, "arena %u: cannot allocate memory for " + "duplicated blocks bitmap", arenap->id); + goto error; + } + + loc->fbitmap = calloc(bitmapsize, 1); + if (!loc->fbitmap) { + ERR("!calloc"); + CHECK_ERR(ppc, "arena %u: cannot allocate memory for BTT Flog " + "bitmap", arenap->id); + goto error; + } + + /* list of invalid map entries */ + loc->list_inval = list_alloc(); + if (!loc->list_inval) { + CHECK_ERR(ppc, + "arena %u: cannot allocate memory for invalid BTT map " + "entries list", arenap->id); + goto error; + } + + /* list of invalid flog entries */ + loc->list_flog_inval = list_alloc(); + if (!loc->list_flog_inval) { + CHECK_ERR(ppc, + "arena %u: cannot allocate memory for invalid BTT Flog " + "entries list", arenap->id); + goto error; + } + + /* list of unmapped blocks */ + loc->list_unmap = list_alloc(); + if (!loc->list_unmap) { + CHECK_ERR(ppc, + "arena %u: cannot allocate memory for unmaped blocks " + "list", arenap->id); + goto error; + } + + return 0; + +error: + ppc->result = CHECK_RESULT_ERROR; + cleanup(ppc, loc); + return -1; +} + +/* + * map_get_postmap_lba -- extract postmap LBA from map entry + */ +static inline uint32_t +map_get_postmap_lba(struct arena *arenap, uint32_t i) +{ + uint32_t entry = arenap->map[i]; + + /* if map record is in initial state (flags == 0b00) */ + if (map_entry_is_initial(entry)) + return i; + + /* read postmap LBA otherwise */ + return entry & BTT_MAP_ENTRY_LBA_MASK; +} + +/* + * map_entry_check -- (internal) check single map entry + */ +static int +map_entry_check(PMEMpoolcheck *ppc, location *loc, uint32_t i) +{ + struct arena *arenap = loc->arenap; + uint32_t lba = map_get_postmap_lba(arenap, i); + + /* add duplicated and invalid entries to list */ + if (lba < arenap->btt_info.internal_nlba) { + if (util_isset(loc->bitmap, lba)) { + CHECK_INFO(ppc, "arena %u: BTT Map entry %u duplicated " + "at %u", arenap->id, lba, i); + util_setbit(loc->dup_bitmap, lba); + if (!list_push(loc->list_inval, i)) + return -1; + } else + util_setbit(loc->bitmap, lba); + } else { + CHECK_INFO(ppc, "arena %u: invalid BTT Map entry at %u", + arenap->id, i); + if (!list_push(loc->list_inval, i)) + return -1; + } + + return 0; +} + +/* + * flog_entry_check -- (internal) check single flog entry + */ +static int +flog_entry_check(PMEMpoolcheck *ppc, location *loc, uint32_t i, + uint8_t **ptr) +{ + struct arena *arenap = loc->arenap; + + /* flog entry consists of two btt_flog structures */ + struct btt_flog *flog = (struct btt_flog *)*ptr; + + int next; + struct btt_flog *flog_cur = btt_flog_get_valid(flog, &next); + + /* insert invalid and duplicated indexes to list */ + if (!flog_cur) { + CHECK_INFO(ppc, "arena %u: invalid BTT Flog entry at %u", + arenap->id, i); + if (!list_push(loc->list_flog_inval, i)) + return -1; + + goto next; + } + + uint32_t entry = flog_cur->old_map & BTT_MAP_ENTRY_LBA_MASK; + uint32_t new_entry = flog_cur->new_map & BTT_MAP_ENTRY_LBA_MASK; + + /* + * Check if lba is in extranal_nlba range, and check if both old_map and + * new_map are in internal_nlba range. + */ + if (flog_cur->lba >= arenap->btt_info.external_nlba || + entry >= arenap->btt_info.internal_nlba || + new_entry >= arenap->btt_info.internal_nlba) { + CHECK_INFO(ppc, "arena %u: invalid BTT Flog entry at %u", + arenap->id, i); + if (!list_push(loc->list_flog_inval, i)) + return -1; + + goto next; + } + + if (util_isset(loc->fbitmap, entry)) { + /* + * here we have two flog entries which holds the same free block + */ + CHECK_INFO(ppc, "arena %u: duplicated BTT Flog entry at %u\n", + arenap->id, i); + if (!list_push(loc->list_flog_inval, i)) + return -1; + } else if (util_isset(loc->bitmap, entry)) { + /* here we have probably an unfinished write */ + if (util_isset(loc->bitmap, new_entry)) { + /* Both old_map and new_map are already used in map. */ + CHECK_INFO(ppc, "arena %u: duplicated BTT Flog entry " + "at %u", arenap->id, i); + util_setbit(loc->dup_bitmap, new_entry); + if (!list_push(loc->list_flog_inval, i)) + return -1; + } else { + /* + * Unfinished write. Next time pool is opened, the map + * will be updated to new_map. + */ + util_setbit(loc->bitmap, new_entry); + util_setbit(loc->fbitmap, entry); + } + } else { + int flog_valid = 1; + /* + * Either flog entry is in its initial state: + * - current_btt_flog entry is first one in pair and + * - current_btt_flog.old_map == current_btt_flog.new_map and + * - current_btt_flog.seq == 0b01 and + * - second flog entry in pair is zeroed + * or + * current_btt_flog.old_map != current_btt_flog.new_map + */ + if (entry == new_entry) + flog_valid = (next == 1) && (flog_cur->seq == 1) && + util_is_zeroed((const void *)&flog[1], + sizeof(flog[1])); + + if (flog_valid) { + /* totally fine case */ + util_setbit(loc->bitmap, entry); + util_setbit(loc->fbitmap, entry); + } else { + CHECK_INFO(ppc, "arena %u: invalid BTT Flog entry at " + "%u", arenap->id, i); + if (!list_push(loc->list_flog_inval, i)) + return -1; + } + } + +next: + *ptr += BTT_FLOG_PAIR_ALIGN; + return 0; +} + +/* + * arena_map_flog_check -- (internal) check map and flog + */ +static int +arena_map_flog_check(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + struct arena *arenap = loc->arenap; + + /* check map entries */ + uint32_t i; + for (i = 0; i < arenap->btt_info.external_nlba; i++) { + if (map_entry_check(ppc, loc, i)) + goto error_push; + } + + /* check flog entries */ + uint8_t *ptr = arenap->flog; + for (i = 0; i < arenap->btt_info.nfree; i++) { + if (flog_entry_check(ppc, loc, i, &ptr)) + goto error_push; + } + + /* check unmapped blocks and insert to list */ + for (i = 0; i < arenap->btt_info.internal_nlba; i++) { + if (!util_isset(loc->bitmap, i)) { + CHECK_INFO(ppc, "arena %u: unmapped block %u", + arenap->id, i); + if (!list_push(loc->list_unmap, i)) + goto error_push; + } + } + + if (loc->list_unmap->count) + CHECK_INFO(ppc, "arena %u: number of unmapped blocks: %u", + arenap->id, loc->list_unmap->count); + if (loc->list_inval->count) + CHECK_INFO(ppc, "arena %u: number of invalid BTT Map entries: " + "%u", arenap->id, loc->list_inval->count); + if (loc->list_flog_inval->count) + CHECK_INFO(ppc, "arena %u: number of invalid BTT Flog entries: " + "%u", arenap->id, loc->list_flog_inval->count); + + if (CHECK_IS_NOT(ppc, REPAIR) && loc->list_unmap->count > 0) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + check_end(ppc->data); + goto cleanup; + } + + /* + * We are able to repair if and only if number of unmapped blocks is + * equal to sum of invalid map and flog entries. + */ + if (loc->list_unmap->count != (loc->list_inval->count + + loc->list_flog_inval->count)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_ERR(ppc, "arena %u: cannot repair BTT Map and Flog", + arenap->id); + goto cleanup; + } + + if (CHECK_IS_NOT(ppc, ADVANCED) && loc->list_inval->count + + loc->list_flog_inval->count > 0) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_INFO(ppc, REQUIRE_ADVANCED); + CHECK_ERR(ppc, "BTT Map and / or BTT Flog contain invalid " + "entries"); + check_end(ppc->data); + goto cleanup; + } + + if (loc->list_inval->count > 0) { + CHECK_ASK(ppc, Q_REPAIR_MAP, "Do you want to repair invalid " + "BTT Map entries?"); + } + + if (loc->list_flog_inval->count > 0) { + CHECK_ASK(ppc, Q_REPAIR_FLOG, "Do you want to repair invalid " + "BTT Flog entries?"); + } + + return check_questions_sequence_validate(ppc); + +error_push: + CHECK_ERR(ppc, "arena %u: cannot allocate momory for list item", + arenap->id); + ppc->result = CHECK_RESULT_ERROR; +cleanup: + cleanup(ppc, loc); + return -1; +} + +/* + * arena_map_flog_fix -- (internal) fix map and flog + */ +static int +arena_map_flog_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *ctx) +{ + LOG(3, NULL); + + ASSERTeq(ctx, NULL); + ASSERTne(loc, NULL); + + struct arena *arenap = loc->arenap; + uint32_t inval; + uint32_t unmap; + switch (question) { + case Q_REPAIR_MAP: + /* + * Cause first of duplicated map entries seems valid till we + * find second of them we must find all first map entries + * pointing to the postmap LBA's we know are duplicated to mark + * them with error flag. + */ + for (uint32_t i = 0; i < arenap->btt_info.external_nlba; i++) { + uint32_t lba = map_get_postmap_lba(arenap, i); + if (lba >= arenap->btt_info.internal_nlba) + continue; + + if (!util_isset(loc->dup_bitmap, lba)) + continue; + + arenap->map[i] = BTT_MAP_ENTRY_ERROR | lba; + util_clrbit(loc->dup_bitmap, lba); + CHECK_INFO(ppc, + "arena %u: storing 0x%x at %u BTT Map entry", + arenap->id, arenap->map[i], i); + } + + /* + * repair invalid or duplicated map entries by using unmapped + * blocks + */ + while (list_pop(loc->list_inval, &inval)) { + if (!list_pop(loc->list_unmap, &unmap)) { + ppc->result = CHECK_RESULT_ERROR; + return -1; + } + arenap->map[inval] = unmap | BTT_MAP_ENTRY_ERROR; + CHECK_INFO(ppc, "arena %u: storing 0x%x at %u BTT Map " + "entry", arenap->id, arenap->map[inval], inval); + } + break; + case Q_REPAIR_FLOG: + /* repair invalid flog entries using unmapped blocks */ + while (list_pop(loc->list_flog_inval, &inval)) { + if (!list_pop(loc->list_unmap, &unmap)) { + ppc->result = CHECK_RESULT_ERROR; + return -1; + } + + struct btt_flog *flog = (struct btt_flog *) + (arenap->flog + inval * BTT_FLOG_PAIR_ALIGN); + memset(&flog[1], 0, sizeof(flog[1])); + uint32_t entry = unmap | BTT_MAP_ENTRY_ERROR; + flog[0].lba = inval; + flog[0].new_map = entry; + flog[0].old_map = entry; + flog[0].seq = 1; + + CHECK_INFO(ppc, "arena %u: repairing BTT Flog at %u " + "with free block entry 0x%x", loc->arenap->id, + inval, entry); + } + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +struct step { + int (*check)(PMEMpoolcheck *, location *); + int (*fix)(PMEMpoolcheck *, location *, uint32_t, void *); +}; + +static const struct step steps[] = { + { + .check = init, + }, + { + .check = arena_map_flog_check, + }, + { + .fix = arena_map_flog_fix, + }, + { + .check = cleanup, + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static inline int +step_exe(PMEMpoolcheck *ppc, location *loc) +{ + ASSERT(loc->step < ARRAY_SIZE(steps)); + + const struct step *step = &steps[loc->step++]; + + if (!step->fix) + return step->check(ppc, loc); + + if (!check_answer_loop(ppc, loc, NULL, 1, step->fix)) + return 0; + + cleanup(ppc, loc); + return -1; +} + +/* + * check_btt_map_flog -- perform check and fixing of map and flog + */ +void +check_btt_map_flog(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + location *loc = check_get_step_data(ppc->data); + + if (ppc->pool->blk_no_layout) + return; + + /* initialize check */ + if (!loc->arenap && loc->narena == 0 && + ppc->result != CHECK_RESULT_PROCESS_ANSWERS) { + CHECK_INFO(ppc, "checking BTT Map and Flog"); + loc->arenap = PMDK_TAILQ_FIRST(&ppc->pool->arenas); + loc->narena = 0; + } + + while (loc->arenap != NULL) { + /* add info about checking next arena */ + if (ppc->result != CHECK_RESULT_PROCESS_ANSWERS && + loc->step == 0) { + CHECK_INFO(ppc, "arena %u: checking BTT Map and Flog", + loc->narena); + } + + /* do all checks */ + while (CHECK_NOT_COMPLETE(loc, steps)) { + if (step_exe(ppc, loc)) + return; + } + + /* jump to next arena */ + loc->arenap = PMDK_TAILQ_NEXT(loc->arenap, next); + loc->narena++; + loc->step = 0; + } +} diff --git a/src/pmdk/src/libpmempool/check_log.c b/src/pmdk/src/libpmempool/check_log.c new file mode 100644 index 000000000..b751859f6 --- /dev/null +++ b/src/pmdk/src/libpmempool/check_log.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * check_log.c -- check pmemlog + */ + +#include +#include +#include + +#include "out.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" + +enum question { + Q_LOG_START_OFFSET, + Q_LOG_END_OFFSET, + Q_LOG_WRITE_OFFSET, +}; + +/* + * log_read -- (internal) read pmemlog header + */ +static int +log_read(PMEMpoolcheck *ppc) +{ + /* + * Here we want to read the pmemlog header without the pool_hdr as we've + * already done it before. + * + * Take the pointer to fields right after pool_hdr, compute the size and + * offset of remaining fields. + */ + uint8_t *ptr = (uint8_t *)&ppc->pool->hdr.log; + ptr += sizeof(ppc->pool->hdr.log.hdr); + + size_t size = sizeof(ppc->pool->hdr.log) - + sizeof(ppc->pool->hdr.log.hdr); + uint64_t offset = sizeof(ppc->pool->hdr.log.hdr); + + if (pool_read(ppc->pool, ptr, size, offset)) + return CHECK_ERR(ppc, "cannot read pmemlog structure"); + + /* endianness conversion */ + log_convert2h(&ppc->pool->hdr.log); + return 0; +} + +/* + * log_hdr_check -- (internal) check pmemlog header + */ +static int +log_hdr_check(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + CHECK_INFO(ppc, "checking pmemlog header"); + + if (log_read(ppc)) { + ppc->result = CHECK_RESULT_ERROR; + return -1; + } + + /* determine constant values for pmemlog */ + const uint64_t d_start_offset = + roundup(sizeof(ppc->pool->hdr.log), LOG_FORMAT_DATA_ALIGN); + + if (ppc->pool->hdr.log.start_offset != d_start_offset) { + if (CHECK_ASK(ppc, Q_LOG_START_OFFSET, + "invalid pmemlog.start_offset: 0x%jx.|Do you " + "want to set pmemlog.start_offset to default " + "0x%jx?", + ppc->pool->hdr.log.start_offset, + d_start_offset)) + goto error; + } + + if (ppc->pool->hdr.log.end_offset != ppc->pool->set_file->size) { + if (CHECK_ASK(ppc, Q_LOG_END_OFFSET, + "invalid pmemlog.end_offset: 0x%jx.|Do you " + "want to set pmemlog.end_offset to 0x%jx?", + ppc->pool->hdr.log.end_offset, + ppc->pool->set_file->size)) + goto error; + } + + if (ppc->pool->hdr.log.write_offset < d_start_offset || + ppc->pool->hdr.log.write_offset > ppc->pool->set_file->size) { + if (CHECK_ASK(ppc, Q_LOG_WRITE_OFFSET, + "invalid pmemlog.write_offset: 0x%jx.|Do you " + "want to set pmemlog.write_offset to " + "pmemlog.end_offset?", + ppc->pool->hdr.log.write_offset)) + goto error; + } + + if (ppc->result == CHECK_RESULT_CONSISTENT || + ppc->result == CHECK_RESULT_REPAIRED) + CHECK_INFO(ppc, "pmemlog header correct"); + + return check_questions_sequence_validate(ppc); + +error: + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + check_end(ppc->data); + return -1; +} + +/* + * log_hdr_fix -- (internal) fix pmemlog header + */ +static int +log_hdr_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, void *ctx) +{ + LOG(3, NULL); + + uint64_t d_start_offset; + + switch (question) { + case Q_LOG_START_OFFSET: + /* determine constant values for pmemlog */ + d_start_offset = roundup(sizeof(ppc->pool->hdr.log), + LOG_FORMAT_DATA_ALIGN); + CHECK_INFO(ppc, "setting pmemlog.start_offset to 0x%jx", + d_start_offset); + ppc->pool->hdr.log.start_offset = d_start_offset; + break; + case Q_LOG_END_OFFSET: + CHECK_INFO(ppc, "setting pmemlog.end_offset to 0x%jx", + ppc->pool->set_file->size); + ppc->pool->hdr.log.end_offset = ppc->pool->set_file->size; + break; + case Q_LOG_WRITE_OFFSET: + CHECK_INFO(ppc, "setting pmemlog.write_offset to " + "pmemlog.end_offset"); + ppc->pool->hdr.log.write_offset = ppc->pool->set_file->size; + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +struct step { + int (*check)(PMEMpoolcheck *, location *); + int (*fix)(PMEMpoolcheck *, location *, uint32_t, void *); + enum pool_type type; +}; + +static const struct step steps[] = { + { + .check = log_hdr_check, + .type = POOL_TYPE_LOG + }, + { + .fix = log_hdr_fix, + .type = POOL_TYPE_LOG + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static inline int +step_exe(PMEMpoolcheck *ppc, location *loc) +{ + ASSERT(loc->step < ARRAY_SIZE(steps)); + ASSERTeq(ppc->pool->params.type, POOL_TYPE_LOG); + + const struct step *step = &steps[loc->step++]; + + if (!(step->type & ppc->pool->params.type)) + return 0; + + if (!step->fix) + return step->check(ppc, loc); + + if (log_read(ppc)) { + ppc->result = CHECK_RESULT_ERROR; + return -1; + } + + return check_answer_loop(ppc, loc, NULL, 1, step->fix); +} + +/* + * check_log -- entry point for pmemlog checks + */ +void +check_log(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + location *loc = check_get_step_data(ppc->data); + + /* do all checks */ + while (CHECK_NOT_COMPLETE(loc, steps)) { + if (step_exe(ppc, loc)) + break; + } +} diff --git a/src/pmdk/src/libpmempool/check_pool_hdr.c b/src/pmdk/src/libpmempool/check_pool_hdr.c new file mode 100644 index 000000000..9d24855b0 --- /dev/null +++ b/src/pmdk/src/libpmempool/check_pool_hdr.c @@ -0,0 +1,1010 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * check_pool_hdr.c -- pool header check + */ + +#include +#include +#include +#include + +#include "out.h" +#include "util_pmem.h" +#include "libpmempool.h" +#include "libpmem.h" +#include "pmempool.h" +#include "pool.h" +#include "set.h" +#include "check_util.h" + +#define NO_COMMON_POOLSET_UUID "%sno common pool_hdr.poolset_uuid" +#define INVALID_UUID "%sinvalid pool_hdr.uuid" +#define INVALID_CHECKSUM "%sinvalid pool_hdr.checksum" + +enum question { + Q_DEFAULT_SIGNATURE, + Q_DEFAULT_MAJOR, + Q_DEFAULT_COMPAT_FEATURES, + Q_DEFAULT_INCOMPAT_FEATURES, + Q_DEFAULT_RO_COMPAT_FEATURES, + Q_ZERO_UNUSED_AREA, + Q_ARCH_FLAGS, + Q_CRTIME, + Q_CHECKSUM, + Q_POOLSET_UUID_SET, + Q_POOLSET_UUID_FROM_BTT_INFO, + Q_POOLSET_UUID_REGENERATE, + Q_UUID_SET, + Q_UUID_REGENERATE, + Q_NEXT_PART_UUID_SET, + Q_PREV_PART_UUID_SET, + Q_NEXT_REPL_UUID_SET, + Q_PREV_REPL_UUID_SET +}; + +/* + * pool_hdr_possible_type -- (internal) return possible type of pool + */ +static enum pool_type +pool_hdr_possible_type(PMEMpoolcheck *ppc) +{ + if (pool_blk_get_first_valid_arena(ppc->pool, &ppc->pool->bttc)) + return POOL_TYPE_BLK; + + return POOL_TYPE_UNKNOWN; +} + +/* + * pool_hdr_valid -- (internal) return true if pool header is valid + */ +static int +pool_hdr_valid(struct pool_hdr *hdrp) +{ + return !util_is_zeroed((void *)hdrp, sizeof(*hdrp)) && + util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 0, + POOL_HDR_CSUM_END_OFF(hdrp)); +} + +/* + * pool_supported -- (internal) check if pool type is supported + */ +static int +pool_supported(enum pool_type type) +{ + switch (type) { + case POOL_TYPE_LOG: + return 1; + case POOL_TYPE_BLK: + return 1; + case POOL_TYPE_OBJ: + default: + return 0; + } +} + +/* + * pool_hdr_preliminary_check -- (internal) check pool header checksum and pool + * parameters + */ +static int +pool_hdr_preliminary_check(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + CHECK_INFO(ppc, "%schecking pool header", loc->prefix); + + if (util_is_zeroed((void *)&loc->hdr, sizeof(loc->hdr))) { + if (CHECK_IS_NOT(ppc, REPAIR)) { + check_end(ppc->data); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "%sempty pool hdr", loc->prefix); + } + } else if (loc->hdr_valid) { + enum pool_type type = pool_hdr_get_type(&loc->hdr); + if (type == POOL_TYPE_UNKNOWN) { + if (CHECK_IS_NOT(ppc, REPAIR)) { + check_end(ppc->data); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "%sinvalid signature", + loc->prefix); + } + + CHECK_INFO(ppc, "%sinvalid signature", loc->prefix); + } else { + /* valid check sum */ + CHECK_INFO(ppc, "%spool header correct", + loc->prefix); + loc->step = CHECK_STEP_COMPLETE; + return 0; + } + } else if (CHECK_IS_NOT(ppc, REPAIR)) { + check_end(ppc->data); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "%sincorrect pool header", loc->prefix); + } else { + CHECK_INFO(ppc, "%sincorrect pool header", loc->prefix); + } + + ASSERT(CHECK_IS(ppc, REPAIR)); + + if (ppc->pool->params.type == POOL_TYPE_UNKNOWN) { + ppc->pool->params.type = pool_hdr_possible_type(ppc); + if (ppc->pool->params.type == POOL_TYPE_UNKNOWN) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return CHECK_ERR(ppc, "cannot determine pool type"); + } + } + + if (!pool_supported(ppc->pool->params.type)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return CHECK_ERR(ppc, "the repair of %s pools is not supported", + pool_get_pool_type_str(ppc->pool->params.type)); + } + + return 0; +} + +/* + * pool_hdr_default_check -- (internal) check some default values in pool header + */ +static int +pool_hdr_default_check(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + ASSERT(CHECK_IS(ppc, REPAIR)); + + struct pool_hdr def_hdr; + pool_hdr_default(ppc->pool->params.type, &def_hdr); + + if (memcmp(loc->hdr.signature, def_hdr.signature, POOL_HDR_SIG_LEN)) { + CHECK_ASK(ppc, Q_DEFAULT_SIGNATURE, + "%spool_hdr.signature is not valid.|Do you want to set " + "it to %.8s?", loc->prefix, def_hdr.signature); + } + + if (loc->hdr.major != def_hdr.major) { + CHECK_ASK(ppc, Q_DEFAULT_MAJOR, + "%spool_hdr.major is not valid.|Do you want to set it " + "to default value 0x%x?", loc->prefix, def_hdr.major); + } + + features_t unknown = util_get_unknown_features( + loc->hdr.features, def_hdr.features); + if (unknown.compat) { + CHECK_ASK(ppc, Q_DEFAULT_COMPAT_FEATURES, + "%spool_hdr.features.compat is not valid.|Do you want " + "to set it to default value 0x%x?", loc->prefix, + def_hdr.features.compat); + } + + if (unknown.incompat) { + CHECK_ASK(ppc, Q_DEFAULT_INCOMPAT_FEATURES, + "%spool_hdr.features.incompat is not valid.|Do you " + "want to set it to default value 0x%x?", loc->prefix, + def_hdr.features.incompat); + } + + if (unknown.ro_compat) { + CHECK_ASK(ppc, Q_DEFAULT_RO_COMPAT_FEATURES, + "%spool_hdr.features.ro_compat is not valid.|Do you " + "want to set it to default value 0x%x?", loc->prefix, + def_hdr.features.ro_compat); + } + + if (!util_is_zeroed(loc->hdr.unused, sizeof(loc->hdr.unused))) { + CHECK_ASK(ppc, Q_ZERO_UNUSED_AREA, + "%sunused area is not filled by zeros.|Do you want to " + "fill it up?", loc->prefix); + } + + return check_questions_sequence_validate(ppc); +} + +/* + * pool_hdr_default_fix -- (internal) fix some default values in pool header + */ +static int +pool_hdr_default_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + struct pool_hdr def_hdr; + pool_hdr_default(ppc->pool->params.type, &def_hdr); + + switch (question) { + case Q_DEFAULT_SIGNATURE: + CHECK_INFO(ppc, "%ssetting pool_hdr.signature to %.8s", + loc->prefix, def_hdr.signature); + memcpy(&loc->hdr.signature, &def_hdr.signature, + POOL_HDR_SIG_LEN); + break; + case Q_DEFAULT_MAJOR: + CHECK_INFO(ppc, "%ssetting pool_hdr.major to 0x%x", loc->prefix, + def_hdr.major); + loc->hdr.major = def_hdr.major; + break; + case Q_DEFAULT_COMPAT_FEATURES: + CHECK_INFO(ppc, "%ssetting pool_hdr.features.compat to 0x%x", + loc->prefix, def_hdr.features.compat); + loc->hdr.features.compat = def_hdr.features.compat; + break; + case Q_DEFAULT_INCOMPAT_FEATURES: + CHECK_INFO(ppc, "%ssetting pool_hdr.features.incompat to 0x%x", + loc->prefix, def_hdr.features.incompat); + loc->hdr.features.incompat = def_hdr.features.incompat; + break; + case Q_DEFAULT_RO_COMPAT_FEATURES: + CHECK_INFO(ppc, "%ssetting pool_hdr.features.ro_compat to 0x%x", + loc->prefix, def_hdr.features.ro_compat); + loc->hdr.features.ro_compat = def_hdr.features.ro_compat; + break; + case Q_ZERO_UNUSED_AREA: + CHECK_INFO(ppc, "%ssetting pool_hdr.unused to zeros", + loc->prefix); + memset(loc->hdr.unused, 0, sizeof(loc->hdr.unused)); + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * pool_hdr_quick_check -- (internal) end check if pool header is valid + */ +static int +pool_hdr_quick_check(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + if (pool_hdr_valid(loc->hdrp)) + loc->step = CHECK_STEP_COMPLETE; + + return 0; +} + +/* + * pool_hdr_nondefault -- (internal) validate custom value fields + */ +static int +pool_hdr_nondefault(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + if (loc->hdr.crtime > (uint64_t)ppc->pool->set_file->mtime) { + const char * const error = "%spool_hdr.crtime is not valid"; + if (CHECK_IS_NOT(ppc, REPAIR)) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, error, loc->prefix); + } else if (CHECK_IS_NOT(ppc, ADVANCED)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_INFO(ppc, "%s" REQUIRE_ADVANCED, loc->prefix); + return CHECK_ERR(ppc, error, loc->prefix); + } + + CHECK_ASK(ppc, Q_CRTIME, + "%spool_hdr.crtime is not valid.|Do you want to set it " + "to file's modtime [%s]?", loc->prefix, + check_get_time_str(ppc->pool->set_file->mtime)); + } + + if (loc->valid_part_hdrp && + memcmp(&loc->valid_part_hdrp->arch_flags, + &loc->hdr.arch_flags, + sizeof(struct arch_flags)) != 0) { + const char * const error = "%spool_hdr.arch_flags is not valid"; + if (CHECK_IS_NOT(ppc, REPAIR)) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, error, loc->prefix); + } + + CHECK_ASK(ppc, Q_ARCH_FLAGS, + "%spool_hdr.arch_flags is not valid.|Do you want to " + "copy it from a valid part?", loc->prefix); + } + + return check_questions_sequence_validate(ppc); +} + +/* + * pool_hdr_nondefault_fix -- (internal) fix custom value fields + */ +static int +pool_hdr_nondefault_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + uint64_t *flags = NULL; + + switch (question) { + case Q_CRTIME: + CHECK_INFO(ppc, "%ssetting pool_hdr.crtime to file's modtime: " + "%s", loc->prefix, + check_get_time_str(ppc->pool->set_file->mtime)); + util_convert2h_hdr_nocheck(&loc->hdr); + loc->hdr.crtime = (uint64_t)ppc->pool->set_file->mtime; + util_convert2le_hdr(&loc->hdr); + break; + case Q_ARCH_FLAGS: + flags = (uint64_t *)&loc->valid_part_hdrp->arch_flags; + CHECK_INFO(ppc, "%ssetting pool_hdr.arch_flags to 0x%08" PRIx64 + "%08" PRIx64, loc->prefix, flags[0], flags[1]); + util_convert2h_hdr_nocheck(&loc->hdr); + memcpy(&loc->hdr.arch_flags, &loc->valid_part_hdrp->arch_flags, + sizeof(struct arch_flags)); + util_convert2le_hdr(&loc->hdr); + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * pool_hdr_poolset_uuid -- (internal) check poolset_uuid field + */ +static int +pool_hdr_poolset_uuid_find(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + /* + * If the pool header is valid and there is not other parts or replicas + * in the poolset its poolset_uuid is also valid. + */ + if (loc->hdr_valid && loc->single_repl && loc->single_part) + return 0; + + if (loc->replica != 0 || loc->part != 0) + goto after_lookup; + + /* for blk pool we can take the UUID from BTT Info header */ + if (ppc->pool->params.type == POOL_TYPE_BLK && ppc->pool->bttc.valid) { + loc->valid_puuid = &ppc->pool->bttc.btt_info.parent_uuid; + if (uuidcmp(loc->hdr.poolset_uuid, *loc->valid_puuid) != 0) { + CHECK_ASK(ppc, Q_POOLSET_UUID_FROM_BTT_INFO, + "%sinvalid pool_hdr.poolset_uuid.|Do you want " + "to set it to %s from BTT Info?", loc->prefix, + check_get_uuid_str(*loc->valid_puuid)); + goto exit_question; + } + } + + if (loc->single_part && loc->single_repl) { + /* + * If the pool is not blk pool or BTT Info header is invalid + * there is no other way to validate poolset uuid. + */ + return 0; + } + + /* + * if all valid poolset part files have the same poolset uuid it is + * the valid poolset uuid + * if all part files have the same poolset uuid it is valid poolset uuid + */ + struct pool_set *poolset = ppc->pool->set_file->poolset; + unsigned nreplicas = poolset->nreplicas; + uuid_t *common_puuid = loc->valid_puuid; + for (unsigned r = 0; r < nreplicas; r++) { + struct pool_replica *rep = REP(poolset, r); + for (unsigned p = 0; p < rep->nhdrs; p++) { + struct pool_hdr *hdr = HDR(rep, p); + + /* + * find poolset uuid if it is the same for all part + * files + */ + if (common_puuid == NULL) { + if (r == 0 && p == 0) { + common_puuid = &hdr->poolset_uuid; + } + } else if (uuidcmp(*common_puuid, hdr->poolset_uuid) + != 0) { + common_puuid = NULL; + } + + if (!pool_hdr_valid(hdr)) + continue; + + /* + * find poolset uuid if it is the same for all valid + * part files + */ + if (loc->valid_puuid == NULL) { + loc->valid_puuid = &hdr->poolset_uuid; + } else if (uuidcmp(*loc->valid_puuid, hdr->poolset_uuid) + != 0) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "the poolset contains " + "part files from various poolsets"); + } + } + } + + if (!loc->valid_puuid && common_puuid) + loc->valid_puuid = common_puuid; + + if (loc->valid_puuid) + goto after_lookup; + + if (CHECK_IS_NOT(ppc, REPAIR)) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, NO_COMMON_POOLSET_UUID, loc->prefix); + } else if (CHECK_IS_NOT(ppc, ADVANCED)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_INFO(ppc, "%s" REQUIRE_ADVANCED, loc->prefix); + return CHECK_ERR(ppc, NO_COMMON_POOLSET_UUID, loc->prefix); + } else { + CHECK_ASK(ppc, Q_POOLSET_UUID_REGENERATE, NO_COMMON_POOLSET_UUID + ".|Do you want to regenerate pool_hdr.poolset_uuid?", + loc->prefix); + goto exit_question; + } + +after_lookup: + if (loc->valid_puuid) { + if (uuidcmp(*loc->valid_puuid, loc->hdr.poolset_uuid) != 0) { + if (CHECK_IS_NOT(ppc, REPAIR)) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "%sinvalid " + "pool_hdr.poolset_uuid", loc->prefix); + } + + CHECK_ASK(ppc, Q_POOLSET_UUID_SET, "%sinvalid " + "pool_hdr.poolset_uuid.|Do you want to set " + "it to %s from a valid part file?", loc->prefix, + check_get_uuid_str(*loc->valid_puuid)); + } + } + +exit_question: + return check_questions_sequence_validate(ppc); +} + +/* + * pool_hdr_poolset_uuid_fix -- (internal) fix poolset_uuid field + */ +static int +pool_hdr_poolset_uuid_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + + switch (question) { + case Q_POOLSET_UUID_SET: + case Q_POOLSET_UUID_FROM_BTT_INFO: + CHECK_INFO(ppc, "%ssetting pool_hdr.poolset_uuid to %s", + loc->prefix, check_get_uuid_str(*loc->valid_puuid)); + memcpy(loc->hdr.poolset_uuid, loc->valid_puuid, + POOL_HDR_UUID_LEN); + if (question == Q_POOLSET_UUID_SET) + ppc->pool->uuid_op = UUID_NOT_FROM_BTT; + else + ppc->pool->uuid_op = UUID_FROM_BTT; + break; + case Q_POOLSET_UUID_REGENERATE: + if (util_uuid_generate(loc->hdr.poolset_uuid) != 0) { + ppc->result = CHECK_RESULT_INTERNAL_ERROR; + return CHECK_ERR(ppc, "%suuid generation failed", + loc->prefix); + } + CHECK_INFO(ppc, "%ssetting pool_hdr.pooset_uuid to %s", + loc->prefix, + check_get_uuid_str(loc->hdr.poolset_uuid)); + ppc->pool->uuid_op = UUID_NOT_FROM_BTT; + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +#define COMPARE_TO_FIRST_PART_ONLY 2 + +/* + * pool_hdr_uuid_find -- (internal) check UUID value + */ +static int +pool_hdr_uuid_find(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + /* + * If the pool header is valid and there is not other parts or replicas + * in the poolset its uuid is also valid. + */ + if (loc->hdr_valid && loc->single_repl && loc->single_part) + return 0; + + int hdrs_valid[] = { + loc->next_part_hdr_valid, loc->prev_part_hdr_valid, + loc->next_repl_hdr_valid, loc->prev_repl_hdr_valid}; + uuid_t *uuids[] = { + &loc->next_part_hdrp->prev_part_uuid, + &loc->prev_part_hdrp->next_part_uuid, + &loc->next_repl_hdrp->prev_repl_uuid, + &loc->prev_repl_hdrp->next_repl_uuid + }; + + /* + * if all valid poolset part files have the same uuid links to this part + * file it is valid uuid + * if all links have the same uuid and it is single file pool it is also + * the valid uuid + */ + loc->valid_uuid = NULL; + if (loc->hdr_valid) + loc->valid_uuid = &loc->hdr.uuid; + uuid_t *common_uuid = uuids[0]; + + COMPILE_ERROR_ON(ARRAY_SIZE(uuids) != ARRAY_SIZE(hdrs_valid)); + COMPILE_ERROR_ON(COMPARE_TO_FIRST_PART_ONLY >= ARRAY_SIZE(uuids)); + for (unsigned i = 0; i < ARRAY_SIZE(uuids); ++i) { + if (i > 0 && common_uuid != NULL) { + if (uuidcmp(*common_uuid, *uuids[i]) != 0) { + common_uuid = NULL; + } + } + + if (i >= COMPARE_TO_FIRST_PART_ONLY && loc->part != 0) + continue; + + if (!hdrs_valid[i]) + continue; + + if (!loc->valid_uuid) { + loc->valid_uuid = uuids[i]; + } else if (uuidcmp(*loc->valid_uuid, *uuids[i]) != 0) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "%sambiguous pool_hdr.uuid", + loc->prefix); + } + } + + if (!loc->valid_uuid && common_uuid) + loc->valid_uuid = common_uuid; + + if (loc->valid_uuid != NULL) { + if (uuidcmp(*loc->valid_uuid, loc->hdr.uuid) != 0) { + CHECK_ASK(ppc, Q_UUID_SET, INVALID_UUID ".|Do you want " + "to set it to %s from a valid part file?", + loc->prefix, + check_get_uuid_str(*loc->valid_uuid)); + } + } else if (CHECK_IS(ppc, ADVANCED)) { + CHECK_ASK(ppc, Q_UUID_REGENERATE, INVALID_UUID ".|Do you want " + "to regenerate it?", loc->prefix); + } else if (CHECK_IS(ppc, REPAIR)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_INFO(ppc, "%s" REQUIRE_ADVANCED, loc->prefix); + return CHECK_ERR(ppc, INVALID_UUID, loc->prefix); + } else { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, INVALID_UUID, loc->prefix); + } + + return check_questions_sequence_validate(ppc); +} + +/* + * pool_hdr_uuid_fix -- (internal) fix UUID value + */ +static int +pool_hdr_uuid_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + + switch (question) { + case Q_UUID_SET: + CHECK_INFO(ppc, "%ssetting pool_hdr.uuid to %s", loc->prefix, + check_get_uuid_str(*loc->valid_uuid)); + memcpy(loc->hdr.uuid, loc->valid_uuid, POOL_HDR_UUID_LEN); + break; + case Q_UUID_REGENERATE: + if (util_uuid_generate(loc->hdr.uuid) != 0) { + ppc->result = CHECK_RESULT_INTERNAL_ERROR; + return CHECK_ERR(ppc, "%suuid generation failed", + loc->prefix); + } + CHECK_INFO(ppc, "%ssetting pool_hdr.uuid to %s", loc->prefix, + check_get_uuid_str(loc->hdr.uuid)); + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * pool_hdr_uuid_links -- (internal) check UUID links values + */ +static int +pool_hdr_uuid_links(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + /* + * If the pool header is valid and there is not other parts or replicas + * in the poolset its uuid links are also valid. + */ + if (loc->hdr_valid && loc->single_repl && loc->single_part) + return 0; + + uuid_t *links[] = { + &loc->hdr.next_part_uuid, &loc->hdr.prev_part_uuid, + &loc->hdr.next_repl_uuid, &loc->hdr.prev_repl_uuid}; + uuid_t *uuids[] = { + &loc->next_part_hdrp->uuid, &loc->prev_part_hdrp->uuid, + &loc->next_repl_hdrp->uuid, &loc->prev_repl_hdrp->uuid + }; + uint32_t questions[] = { + Q_NEXT_PART_UUID_SET, Q_PREV_PART_UUID_SET, + Q_NEXT_REPL_UUID_SET, Q_PREV_REPL_UUID_SET + }; + const char *fields[] = { + "pool_hdr.next_part_uuid", "pool_hdr.prev_part_uuid", + "pool_hdr.next_repl_uuid", "pool_hdr.prev_repl_uuid" + }; + + COMPILE_ERROR_ON(ARRAY_SIZE(links) != ARRAY_SIZE(uuids)); + COMPILE_ERROR_ON(ARRAY_SIZE(links) != ARRAY_SIZE(questions)); + COMPILE_ERROR_ON(ARRAY_SIZE(links) != ARRAY_SIZE(fields)); + for (uint64_t i = 0; i < ARRAY_SIZE(links); ++i) { + if (uuidcmp(*links[i], *uuids[i]) == 0) + continue; + + if (CHECK_IS(ppc, REPAIR)) { + CHECK_ASK(ppc, questions[i], + "%sinvalid %s.|Do you want to set it to a " + "valid value?", loc->prefix, fields[i]); + } else { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "%sinvalid %s", loc->prefix, + fields[i]); + } + } + + return check_questions_sequence_validate(ppc); +} + +/* + * pool_hdr_uuid_links_fix -- (internal) fix UUID links values + */ +static int +pool_hdr_uuid_links_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + + switch (question) { + case Q_NEXT_PART_UUID_SET: + CHECK_INFO(ppc, "%ssetting pool_hdr.next_part_uuid to %s", + loc->prefix, + check_get_uuid_str(loc->next_part_hdrp->uuid)); + memcpy(loc->hdr.next_part_uuid, loc->next_part_hdrp->uuid, + POOL_HDR_UUID_LEN); + break; + case Q_PREV_PART_UUID_SET: + CHECK_INFO(ppc, "%ssetting pool_hdr.prev_part_uuid to %s", + loc->prefix, + check_get_uuid_str(loc->prev_part_hdrp->uuid)); + memcpy(loc->hdr.prev_part_uuid, loc->prev_part_hdrp->uuid, + POOL_HDR_UUID_LEN); + break; + case Q_NEXT_REPL_UUID_SET: + CHECK_INFO(ppc, "%ssetting pool_hdr.next_repl_uuid to %s", + loc->prefix, + check_get_uuid_str(loc->next_repl_hdrp->uuid)); + memcpy(loc->hdr.next_repl_uuid, loc->next_repl_hdrp->uuid, + POOL_HDR_UUID_LEN); + break; + case Q_PREV_REPL_UUID_SET: + CHECK_INFO(ppc, "%ssetting pool_hdr.prev_repl_uuid to %s", + loc->prefix, + check_get_uuid_str(loc->prev_repl_hdrp->uuid)); + memcpy(loc->hdr.prev_repl_uuid, loc->prev_repl_hdrp->uuid, + POOL_HDR_UUID_LEN); + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +/* + * pool_hdr_checksum -- (internal) validate checksum + */ +static int +pool_hdr_checksum(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + if (loc->hdr_valid) + return 0; + + if (CHECK_IS_NOT(ppc, REPAIR)) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, INVALID_CHECKSUM, loc->prefix); + } else if (CHECK_IS_NOT(ppc, ADVANCED)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + CHECK_INFO(ppc, "%s" REQUIRE_ADVANCED, loc->prefix); + return CHECK_ERR(ppc, INVALID_CHECKSUM, loc->prefix); + } + + CHECK_ASK(ppc, Q_CHECKSUM, INVALID_CHECKSUM ".|Do you want to " + "regenerate checksum?", loc->prefix); + return check_questions_sequence_validate(ppc); +} + +/* + * pool_hdr_checksum_fix -- (internal) fix checksum + */ +static int +pool_hdr_checksum_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *context) +{ + LOG(3, NULL); + + ASSERTne(loc, NULL); + + switch (question) { + case Q_CHECKSUM: + util_checksum(&loc->hdr, sizeof(loc->hdr), &loc->hdr.checksum, + 1, POOL_HDR_CSUM_END_OFF(&loc->hdr)); + CHECK_INFO(ppc, "%ssetting pool_hdr.checksum to 0x%jx", + loc->prefix, le64toh(loc->hdr.checksum)); + break; + default: + ERR("not implemented question id: %u", question); + } + + return 0; +} + +struct step { + int (*check)(PMEMpoolcheck *, location *); + int (*fix)(PMEMpoolcheck *, location *, uint32_t, void *); +}; + +static const struct step steps_initial[] = { + { + .check = pool_hdr_preliminary_check, + }, + { + .check = pool_hdr_default_check, + }, + { + .fix = pool_hdr_default_fix, + .check = pool_hdr_quick_check, + }, + { + .check = pool_hdr_nondefault, + }, + { + .fix = pool_hdr_nondefault_fix, + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +static const struct step steps_uuids[] = { + { + .check = pool_hdr_poolset_uuid_find, + }, + { + .fix = pool_hdr_poolset_uuid_fix, + }, + { + .check = pool_hdr_uuid_find, + }, + { + .fix = pool_hdr_uuid_fix, + }, + { + .check = pool_hdr_uuid_links, + }, + { + .fix = pool_hdr_uuid_links_fix, + }, + { + .check = pool_hdr_checksum, + }, + { + .fix = pool_hdr_checksum_fix, + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static int +step_exe(PMEMpoolcheck *ppc, const struct step *steps, location *loc, + struct pool_replica *rep, unsigned nreplicas) +{ + const struct step *step = &steps[loc->step++]; + + if (!step->fix) + return step->check(ppc, loc); + + if (!check_has_answer(ppc->data)) + return 0; + + if (check_answer_loop(ppc, loc, NULL, 1, step->fix)) + return -1; + + util_convert2le_hdr(&loc->hdr); + memcpy(loc->hdrp, &loc->hdr, sizeof(loc->hdr)); + loc->hdr_valid = pool_hdr_valid(loc->hdrp); + util_persist_auto(rep->part[0].is_dev_dax, loc->hdrp, + sizeof(*loc->hdrp)); + + util_convert2h_hdr_nocheck(&loc->hdr); + loc->pool_hdr_modified = 1; + + /* execute check after fix if available */ + if (step->check) + return step->check(ppc, loc); + + return 0; +} + +/* + * init_location_data -- (internal) prepare location information + */ +static void +init_location_data(PMEMpoolcheck *ppc, location *loc) +{ + /* prepare prefix for messages */ + unsigned nfiles = pool_set_files_count(ppc->pool->set_file); + if (ppc->result != CHECK_RESULT_PROCESS_ANSWERS) { + if (nfiles > 1) { + int ret = util_snprintf(loc->prefix, PREFIX_MAX_SIZE, + "replica %u part %u: ", + loc->replica, loc->part); + if (ret < 0) + FATAL("!snprintf"); + } else + loc->prefix[0] = '\0'; + loc->step = 0; + } + + /* get neighboring parts and replicas and briefly validate them */ + const struct pool_set *poolset = ppc->pool->set_file->poolset; + loc->single_repl = poolset->nreplicas == 1; + loc->single_part = poolset->replica[loc->replica]->nparts == 1; + + struct pool_replica *rep = REP(poolset, loc->replica); + struct pool_replica *next_rep = REPN(poolset, loc->replica); + struct pool_replica *prev_rep = REPP(poolset, loc->replica); + + loc->hdrp = HDR(rep, loc->part); + memcpy(&loc->hdr, loc->hdrp, sizeof(loc->hdr)); + util_convert2h_hdr_nocheck(&loc->hdr); + loc->hdr_valid = pool_hdr_valid(loc->hdrp); + + loc->next_part_hdrp = HDRN(rep, loc->part); + loc->prev_part_hdrp = HDRP(rep, loc->part); + loc->next_repl_hdrp = HDR(next_rep, 0); + loc->prev_repl_hdrp = HDR(prev_rep, 0); + + loc->next_part_hdr_valid = pool_hdr_valid(loc->next_part_hdrp); + loc->prev_part_hdr_valid = pool_hdr_valid(loc->prev_part_hdrp); + loc->next_repl_hdr_valid = pool_hdr_valid(loc->next_repl_hdrp); + loc->prev_repl_hdr_valid = pool_hdr_valid(loc->prev_repl_hdrp); + + if (!loc->valid_part_done || loc->valid_part_replica != loc->replica) { + loc->valid_part_hdrp = NULL; + for (unsigned p = 0; p < rep->nhdrs; ++p) { + if (pool_hdr_valid(HDR(rep, p))) { + loc->valid_part_hdrp = HDR(rep, p); + break; + } + } + loc->valid_part_done = true; + } +} + +/* + * check_pool_hdr -- entry point for pool header checks + */ +void +check_pool_hdr(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + location *loc = check_get_step_data(ppc->data); + unsigned nreplicas = ppc->pool->set_file->poolset->nreplicas; + struct pool_set *poolset = ppc->pool->set_file->poolset; + + for (; loc->replica < nreplicas; loc->replica++) { + struct pool_replica *rep = poolset->replica[loc->replica]; + for (; loc->part < rep->nhdrs; loc->part++) { + init_location_data(ppc, loc); + + /* do all checks */ + while (CHECK_NOT_COMPLETE(loc, steps_initial)) { + ASSERT(loc->step < ARRAY_SIZE(steps_initial)); + if (step_exe(ppc, steps_initial, loc, rep, + nreplicas)) + return; + } + } + + loc->part = 0; + } + + memcpy(&ppc->pool->hdr.pool, poolset->replica[0]->part[0].hdr, + sizeof(struct pool_hdr)); + + if (loc->pool_hdr_modified) { + struct pool_hdr hdr; + memcpy(&hdr, &ppc->pool->hdr.pool, sizeof(struct pool_hdr)); + util_convert2h_hdr_nocheck(&hdr); + pool_params_from_header(&ppc->pool->params, &hdr); + } +} + +/* + * check_pool_hdr_uuids -- entry point for pool header links checks + */ +void +check_pool_hdr_uuids(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + location *loc = check_get_step_data(ppc->data); + unsigned nreplicas = ppc->pool->set_file->poolset->nreplicas; + struct pool_set *poolset = ppc->pool->set_file->poolset; + + for (; loc->replica < nreplicas; loc->replica++) { + struct pool_replica *rep = poolset->replica[loc->replica]; + for (; loc->part < rep->nparts; loc->part++) { + init_location_data(ppc, loc); + + /* do all checks */ + while (CHECK_NOT_COMPLETE(loc, steps_uuids)) { + ASSERT(loc->step < ARRAY_SIZE(steps_uuids)); + if (step_exe(ppc, steps_uuids, loc, rep, + nreplicas)) + return; + } + } + + loc->part = 0; + } + + memcpy(&ppc->pool->hdr.pool, poolset->replica[0]->part[0].hdr, + sizeof(struct pool_hdr)); + + if (loc->pool_hdr_modified) { + struct pool_hdr hdr; + memcpy(&hdr, &ppc->pool->hdr.pool, sizeof(struct pool_hdr)); + util_convert2h_hdr_nocheck(&hdr); + pool_params_from_header(&ppc->pool->params, &hdr); + } +} diff --git a/src/pmdk/src/libpmempool/check_sds.c b/src/pmdk/src/libpmempool/check_sds.c new file mode 100644 index 000000000..520421ea8 --- /dev/null +++ b/src/pmdk/src/libpmempool/check_sds.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * check_shutdown_state.c -- shutdown state check + */ + +#include +#include +#include +#include + +#include "out.h" +#include "util_pmem.h" +#include "libpmempool.h" +#include "libpmem.h" +#include "pmempool.h" +#include "pool.h" +#include "set.h" +#include "check_util.h" + +enum question { + Q_RESET_SDS, +}; + +#define SDS_CHECK_STR "checking shutdown state" +#define SDS_OK_STR "shutdown state correct" +#define SDS_DIRTY_STR "shutdown state is dirty" + +#define ADR_FAILURE_STR \ + "an ADR failure was detected - your pool might be corrupted" + +#define ZERO_SDS_STR \ + "Do you want to zero shutdown state?" + +#define RESET_SDS_STR \ + "Do you want to reset shutdown state at your own risk? " \ + "If you have more then one replica you will have to " \ + "synchronize your pool after this operation." + +#define SDS_FAIL_MSG(hdrp) \ + IGNORE_SDS(hdrp) ? SDS_DIRTY_STR : ADR_FAILURE_STR + +#define SDS_REPAIR_MSG(hdrp) \ + IGNORE_SDS(hdrp) \ + ? SDS_DIRTY_STR ".|" ZERO_SDS_STR \ + : ADR_FAILURE_STR ".|" RESET_SDS_STR + +/* + * sds_check_replica -- (internal) check if replica is healthy + */ +static int +sds_check_replica(location *loc) +{ + LOG(3, NULL); + + struct pool_replica *rep = REP(loc->set, loc->replica); + + if (rep->remote) + return 0; + + /* make a copy of sds as we shouldn't modify a pool */ + struct shutdown_state old_sds = loc->hdr.sds; + struct shutdown_state curr_sds; + + if (IGNORE_SDS(&loc->hdr)) + return util_is_zeroed(&old_sds, sizeof(old_sds)) ? 0 : -1; + + shutdown_state_init(&curr_sds, NULL); + + /* get current shutdown state */ + for (unsigned p = 0; p < rep->nparts; ++p) { + if (shutdown_state_add_part(&curr_sds, + PART(rep, p)->fd, NULL)) + return -1; + } + + /* compare current and old shutdown state */ + return shutdown_state_check(&curr_sds, &old_sds, NULL); +} + +/* + * sds_check -- (internal) check shutdown_state + */ +static int +sds_check(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + CHECK_INFO(ppc, "%s" SDS_CHECK_STR, loc->prefix); + + /* shutdown state is valid */ + if (!sds_check_replica(loc)) { + CHECK_INFO(ppc, "%s" SDS_OK_STR, loc->prefix); + loc->step = CHECK_STEP_COMPLETE; + return 0; + } + + /* shutdown state is NOT valid and can NOT be repaired */ + if (CHECK_IS_NOT(ppc, REPAIR)) { + check_end(ppc->data); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + return CHECK_ERR(ppc, "%s%s", loc->prefix, + SDS_FAIL_MSG(&loc->hdr)); + } + + /* shutdown state is NOT valid but can be repaired */ + CHECK_ASK(ppc, Q_RESET_SDS, "%s%s", loc->prefix, + SDS_REPAIR_MSG(&loc->hdr)); + return check_questions_sequence_validate(ppc); +} + +/* + * sds_fix -- (internal) fix shutdown state + */ +static int +sds_fix(PMEMpoolcheck *ppc, location *loc, uint32_t question, + void *context) +{ + LOG(3, NULL); + + switch (question) { + case Q_RESET_SDS: + CHECK_INFO(ppc, "%sresetting pool_hdr.sds", loc->prefix); + memset(&loc->hdr.sds, 0, sizeof(loc->hdr.sds)); + ++loc->healthy_replicas; + break; + default: + ERR("not implemented question id: %u", question); + } + return 0; +} + +struct step { + int (*check)(PMEMpoolcheck *, location *); + int (*fix)(PMEMpoolcheck *, location *, uint32_t, void *); +}; + +static const struct step steps[] = { + { + .check = sds_check, + }, + { + .fix = sds_fix, + }, + { + .check = NULL, + .fix = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static int +step_exe(PMEMpoolcheck *ppc, const struct step *steps, location *loc) +{ + const struct step *step = &steps[loc->step++]; + + if (!step->fix) + return step->check(ppc, loc); + + if (!check_has_answer(ppc->data)) + return 0; + + if (check_answer_loop(ppc, loc, NULL, 0 /* fail on no */, step->fix)) + return -1; + + util_convert2le_hdr(&loc->hdr); + memcpy(loc->hdrp, &loc->hdr, sizeof(loc->hdr)); + util_persist_auto(loc->is_dev_dax, loc->hdrp, sizeof(*loc->hdrp)); + + util_convert2h_hdr_nocheck(&loc->hdr); + loc->pool_hdr_modified = 1; + + return 0; +} + +/* + * init_prefix -- prepare prefix for messages + */ +static void +init_prefix(location *loc) +{ + if (loc->set->nreplicas > 1) { + int ret = util_snprintf(loc->prefix, PREFIX_MAX_SIZE, + "replica %u: ", + loc->replica); + if (ret < 0) + FATAL("!snprintf"); + } else + loc->prefix[0] = '\0'; + loc->step = 0; +} + +/* + * init_location_data -- (internal) prepare location information + */ +static void +init_location_data(PMEMpoolcheck *ppc, location *loc) +{ + ASSERTeq(loc->part, 0); + + loc->set = ppc->pool->set_file->poolset; + + if (ppc->result != CHECK_RESULT_PROCESS_ANSWERS) + init_prefix(loc); + + struct pool_replica *rep = REP(loc->set, loc->replica); + loc->hdrp = HDR(rep, loc->part); + memcpy(&loc->hdr, loc->hdrp, sizeof(loc->hdr)); + util_convert2h_hdr_nocheck(&loc->hdr); + loc->is_dev_dax = PART(rep, 0)->is_dev_dax; +} + +/* + * sds_get_healthy_replicas_num -- (internal) get number of healthy replicas + */ +static void +sds_get_healthy_replicas_num(PMEMpoolcheck *ppc, location *loc) +{ + const unsigned nreplicas = ppc->pool->set_file->poolset->nreplicas; + loc->healthy_replicas = 0; + loc->part = 0; + + for (; loc->replica < nreplicas; loc->replica++) { + init_location_data(ppc, loc); + + if (!sds_check_replica(loc)) { + ++loc->healthy_replicas; /* healthy replica found */ + } + } + + loc->replica = 0; /* reset replica index */ +} + +/* + * check_sds -- entry point for shutdown state checks + */ +void +check_sds(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + const unsigned nreplicas = ppc->pool->set_file->poolset->nreplicas; + location *loc = check_get_step_data(ppc->data); + + if (!loc->init_done) { + sds_get_healthy_replicas_num(ppc, loc); + + if (loc->healthy_replicas == nreplicas) { + /* all replicas have healthy shutdown state */ + /* print summary */ + for (; loc->replica < nreplicas; loc->replica++) { + init_prefix(loc); + CHECK_INFO(ppc, "%s" SDS_CHECK_STR, + loc->prefix); + CHECK_INFO(ppc, "%s" SDS_OK_STR, loc->prefix); + } + return; + } else if (loc->healthy_replicas > 0) { + ppc->sync_required = true; + return; + } + loc->init_done = true; + } + + /* produce single healthy replica */ + loc->part = 0; + for (; loc->replica < nreplicas; loc->replica++) { + init_location_data(ppc, loc); + + while (CHECK_NOT_COMPLETE(loc, steps)) { + ASSERT(loc->step < ARRAY_SIZE(steps)); + if (step_exe(ppc, steps, loc)) + return; + } + + if (loc->healthy_replicas) + break; + } + + if (loc->healthy_replicas == 0) { + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + CHECK_ERR(ppc, "cannot complete repair, reverting changes"); + } else if (loc->healthy_replicas < nreplicas) { + ppc->sync_required = true; + } +} diff --git a/src/pmdk/src/libpmempool/check_util.c b/src/pmdk/src/libpmempool/check_util.c new file mode 100644 index 000000000..a95967ccc --- /dev/null +++ b/src/pmdk/src/libpmempool/check_util.c @@ -0,0 +1,669 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * check_util.c -- check utility functions + */ + +#include +#include + +#include "out.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" + +#define CHECK_END UINT_MAX + +/* separate info part of message from question part of message */ +#define MSG_SEPARATOR '|' + +/* error part of message must have '.' at the end */ +#define MSG_PLACE_OF_SEPARATION '.' +#define MAX_MSG_STR_SIZE 8192 + +#define CHECK_ANSWER_YES "yes" +#define CHECK_ANSWER_NO "no" + +#define STR_MAX 256 +#define TIME_STR_FMT "%a %b %d %Y %H:%M:%S" + +#define UUID_STR_MAX 37 + +enum check_answer { + PMEMPOOL_CHECK_ANSWER_EMPTY, + PMEMPOOL_CHECK_ANSWER_YES, + PMEMPOOL_CHECK_ANSWER_NO, + PMEMPOOL_CHECK_ANSWER_DEFAULT, +}; + +/* queue of check statuses */ +struct check_status { + PMDK_TAILQ_ENTRY(check_status) next; + struct pmempool_check_status status; + unsigned question; + enum check_answer answer; + char *msg; +}; + +PMDK_TAILQ_HEAD(check_status_head, check_status); + +/* check control context */ +struct check_data { + unsigned step; + location step_data; + + struct check_status *error; + struct check_status_head infos; + struct check_status_head questions; + struct check_status_head answers; + + struct check_status *check_status_cache; +}; + +/* + * check_data_alloc -- allocate and initialize check_data structure + */ +struct check_data * +check_data_alloc(void) +{ + LOG(3, NULL); + + struct check_data *data = calloc(1, sizeof(*data)); + if (data == NULL) { + ERR("!calloc"); + return NULL; + } + + PMDK_TAILQ_INIT(&data->infos); + PMDK_TAILQ_INIT(&data->questions); + PMDK_TAILQ_INIT(&data->answers); + + return data; +} + +/* + * check_data_free -- clean and deallocate check_data + */ +void +check_data_free(struct check_data *data) +{ + LOG(3, NULL); + + if (data->error != NULL) { + free(data->error); + data->error = NULL; + } + + if (data->check_status_cache != NULL) { + free(data->check_status_cache); + data->check_status_cache = NULL; + } + + while (!PMDK_TAILQ_EMPTY(&data->infos)) { + struct check_status *statp = PMDK_TAILQ_FIRST(&data->infos); + PMDK_TAILQ_REMOVE(&data->infos, statp, next); + free(statp); + } + + while (!PMDK_TAILQ_EMPTY(&data->questions)) { + struct check_status *statp = PMDK_TAILQ_FIRST(&data->questions); + PMDK_TAILQ_REMOVE(&data->questions, statp, next); + free(statp); + } + + while (!PMDK_TAILQ_EMPTY(&data->answers)) { + struct check_status *statp = PMDK_TAILQ_FIRST(&data->answers); + PMDK_TAILQ_REMOVE(&data->answers, statp, next); + free(statp); + } + + free(data); +} + +/* + * check_step_get - return current check step number + */ +uint32_t +check_step_get(struct check_data *data) +{ + return data->step; +} + +/* + * check_step_inc -- move to next step number + */ +void +check_step_inc(struct check_data *data) +{ + if (check_is_end_util(data)) + return; + + ++data->step; + memset(&data->step_data, 0, sizeof(location)); +} + +/* + * check_get_step_data -- return pointer to check step data + */ +location * +check_get_step_data(struct check_data *data) +{ + return &data->step_data; +} + +/* + * check_end -- mark check as ended + */ +void +check_end(struct check_data *data) +{ + LOG(3, NULL); + + data->step = CHECK_END; +} + +/* + * check_is_end_util -- return if check has ended + */ +int +check_is_end_util(struct check_data *data) +{ + return data->step == CHECK_END; +} + +/* + * status_alloc -- (internal) allocate and initialize check_status + */ +static inline struct check_status * +status_alloc(void) +{ + struct check_status *status = malloc(sizeof(*status)); + if (!status) + FATAL("!malloc"); + status->msg = malloc(sizeof(char) * MAX_MSG_STR_SIZE); + if (!status->msg) { + free(status); + FATAL("!malloc"); + } + status->status.str.msg = status->msg; + status->answer = PMEMPOOL_CHECK_ANSWER_EMPTY; + status->question = CHECK_INVALID_QUESTION; + return status; +} + +/* + * status_release -- (internal) release check_status + */ +static void +status_release(struct check_status *status) +{ +#ifdef _WIN32 + /* dealloc duplicate string after conversion */ + if (status->status.str.msg != status->msg) + free((void *)status->status.str.msg); +#endif + free(status->msg); + free(status); +} + +/* + * status_msg_info_only -- (internal) separate info part of the message + * + * If message is in form of "info.|question" it modifies it as follows + * "info\0|question" + */ +static inline int +status_msg_info_only(const char *msg) +{ + char *sep = strchr(msg, MSG_SEPARATOR); + if (sep) { + ASSERTne(sep, msg); + --sep; + ASSERTeq(*sep, MSG_PLACE_OF_SEPARATION); + *sep = '\0'; + return 0; + } + return -1; +} + +/* + * status_msg_info_and_question -- (internal) join info and question + * + * If message is in form "info.|question" it will replace MSG_SEPARATOR '|' with + * space to get "info. question" + */ +static inline int +status_msg_info_and_question(const char *msg) +{ + char *sep = strchr(msg, MSG_SEPARATOR); + if (sep) { + *sep = ' '; + return 0; + } + return -1; +} + +/* + * status_push -- (internal) push single status object + */ +static int +status_push(PMEMpoolcheck *ppc, struct check_status *st, uint32_t question) +{ + if (st->status.type == PMEMPOOL_CHECK_MSG_TYPE_ERROR) { + ASSERTeq(ppc->data->error, NULL); + ppc->data->error = st; + return -1; + } else if (st->status.type == PMEMPOOL_CHECK_MSG_TYPE_INFO) { + if (CHECK_IS(ppc, VERBOSE)) + PMDK_TAILQ_INSERT_TAIL(&ppc->data->infos, st, next); + else + check_status_release(ppc, st); + return 0; + } + + /* st->status.type == PMEMPOOL_CHECK_MSG_TYPE_QUESTION */ + if (CHECK_IS_NOT(ppc, REPAIR)) { + /* error status */ + if (status_msg_info_only(st->msg)) { + ERR("no error message for the user"); + st->msg[0] = '\0'; + } + st->status.type = PMEMPOOL_CHECK_MSG_TYPE_ERROR; + return status_push(ppc, st, question); + } + + if (CHECK_IS(ppc, ALWAYS_YES)) { + if (!status_msg_info_only(st->msg)) { + /* information status */ + st->status.type = PMEMPOOL_CHECK_MSG_TYPE_INFO; + status_push(ppc, st, question); + st = status_alloc(); + } + + /* answer status */ + ppc->result = CHECK_RESULT_PROCESS_ANSWERS; + st->question = question; + st->answer = PMEMPOOL_CHECK_ANSWER_YES; + st->status.type = PMEMPOOL_CHECK_MSG_TYPE_QUESTION; + PMDK_TAILQ_INSERT_TAIL(&ppc->data->answers, st, next); + } else { + /* question message */ + status_msg_info_and_question(st->msg); + st->question = question; + ppc->result = CHECK_RESULT_ASK_QUESTIONS; + st->answer = PMEMPOOL_CHECK_ANSWER_EMPTY; + PMDK_TAILQ_INSERT_TAIL(&ppc->data->questions, st, next); + } + + return 0; +} + +/* + * check_status_create -- create single status, push it to proper queue + * + * MSG_SEPARATOR character in fmt is treated as message separator. If creating + * question but check arguments do not allow to make any changes (asking any + * question is pointless) it takes part of message before MSG_SEPARATOR + * character and use it to create error message. Character just before separator + * must be a MSG_PLACE_OF_SEPARATION character. Return non 0 value if error + * status would be created. + * + * The arg is an additional argument for specified type of status. + */ +int +check_status_create(PMEMpoolcheck *ppc, enum pmempool_check_msg_type type, + uint32_t arg, const char *fmt, ...) +{ + if (CHECK_IS_NOT(ppc, VERBOSE) && type == PMEMPOOL_CHECK_MSG_TYPE_INFO) + return 0; + + struct check_status *st = status_alloc(); + ASSERT(CHECK_IS(ppc, FORMAT_STR)); + + va_list ap; + va_start(ap, fmt); + int p = vsnprintf(st->msg, MAX_MSG_STR_SIZE, fmt, ap); + va_end(ap); + + /* append possible strerror at the end of the message */ + if (type != PMEMPOOL_CHECK_MSG_TYPE_QUESTION && arg && p > 0) { + char buff[UTIL_MAX_ERR_MSG]; + util_strerror((int)arg, buff, UTIL_MAX_ERR_MSG); + int ret = util_snprintf(st->msg + p, + MAX_MSG_STR_SIZE - (size_t)p, ": %s", buff); + if (ret < 0) { + ERR("!snprintf"); + status_release(st); + return -1; + } + } + + st->status.type = type; + + return status_push(ppc, st, arg); +} + +/* + * check_status_release -- release single status object + */ +void +check_status_release(PMEMpoolcheck *ppc, struct check_status *status) +{ + if (status->status.type == PMEMPOOL_CHECK_MSG_TYPE_ERROR) + ppc->data->error = NULL; + + status_release(status); +} + +/* + * pop_status -- (internal) pop single message from check_status queue + */ +static struct check_status * +pop_status(struct check_data *data, struct check_status_head *queue) +{ + if (!PMDK_TAILQ_EMPTY(queue)) { + ASSERTeq(data->check_status_cache, NULL); + data->check_status_cache = PMDK_TAILQ_FIRST(queue); + PMDK_TAILQ_REMOVE(queue, data->check_status_cache, next); + return data->check_status_cache; + } + + return NULL; +} + +/* + * check_pop_question -- pop single question from questions queue + */ +struct check_status * +check_pop_question(struct check_data *data) +{ + return pop_status(data, &data->questions); +} + +/* + * check_pop_info -- pop single info from information queue + */ +struct check_status * +check_pop_info(struct check_data *data) +{ + return pop_status(data, &data->infos); +} + +/* + * check_pop_error -- pop error from state + */ +struct check_status * +check_pop_error(struct check_data *data) +{ + if (data->error) { + ASSERTeq(data->check_status_cache, NULL); + + data->check_status_cache = data->error; + data->error = NULL; + return data->check_status_cache; + } + + return NULL; +} + +#ifdef _WIN32 +void +cache_to_utf8(struct check_data *data, char *buf, size_t size) +{ + if (data->check_status_cache == NULL) + return; + + struct check_status *status = data->check_status_cache; + + /* if it was a question, convert it and the answer to utf8 */ + if (status->status.type == PMEMPOOL_CHECK_MSG_TYPE_QUESTION) { + struct pmempool_check_statusW *wstatus = + (struct pmempool_check_statusW *)&status->status; + wchar_t *wstring = (wchar_t *)wstatus->str.msg; + status->status.str.msg = util_toUTF8(wstring); + if (status->status.str.msg == NULL) + FATAL("!malloc"); + util_free_UTF16(wstring); + + if (util_toUTF8_buff(wstatus->str.answer, buf, size) != 0) + FATAL("Invalid answer conversion %s", + out_get_errormsg()); + status->status.str.answer = buf; + } +} +#endif + +/* + * check_clear_status_cache -- release check_status from cache + */ +void +check_clear_status_cache(struct check_data *data) +{ + if (data->check_status_cache) { + switch (data->check_status_cache->status.type) { + case PMEMPOOL_CHECK_MSG_TYPE_INFO: + case PMEMPOOL_CHECK_MSG_TYPE_ERROR: + /* + * Info and error statuses are disposable. After showing + * them to the user we have to release them. + */ + status_release(data->check_status_cache); + data->check_status_cache = NULL; + break; + case PMEMPOOL_CHECK_MSG_TYPE_QUESTION: + /* + * Question status after being showed to the user carry + * users answer. It must be kept till answer would be + * processed so it can not be released from cache. It + * has to be pushed to the answers queue, processed and + * released after that. + */ + break; + default: + ASSERT(0); + } + } +} + +/* + * status_answer_push -- (internal) push single answer to answers queue + */ +static void +status_answer_push(struct check_data *data, struct check_status *st) +{ + ASSERTeq(st->status.type, PMEMPOOL_CHECK_MSG_TYPE_QUESTION); + PMDK_TAILQ_INSERT_TAIL(&data->answers, st, next); +} + +/* + * check_push_answer -- process answer and push it to answers queue + */ +int +check_push_answer(PMEMpoolcheck *ppc) +{ + if (ppc->data->check_status_cache == NULL) + return 0; + + /* check if answer is "yes" or "no" */ + struct check_status *status = ppc->data->check_status_cache; + if (status->status.str.answer != NULL) { + if (strcmp(status->status.str.answer, CHECK_ANSWER_YES) == 0) + status->answer = PMEMPOOL_CHECK_ANSWER_YES; + else if (strcmp(status->status.str.answer, CHECK_ANSWER_NO) + == 0) + status->answer = PMEMPOOL_CHECK_ANSWER_NO; + } + + if (status->answer == PMEMPOOL_CHECK_ANSWER_EMPTY) { + /* invalid answer provided */ + status_answer_push(ppc->data, ppc->data->check_status_cache); + ppc->data->check_status_cache = NULL; + CHECK_INFO(ppc, "Answer must be either %s or %s", + CHECK_ANSWER_YES, CHECK_ANSWER_NO); + return -1; + } + + /* push answer */ + PMDK_TAILQ_INSERT_TAIL(&ppc->data->answers, + ppc->data->check_status_cache, next); + ppc->data->check_status_cache = NULL; + + return 0; +} +/* + * check_has_error - check if error exists + */ +bool +check_has_error(struct check_data *data) +{ + return data->error != NULL; +} + +/* + * check_has_answer - check if any answer exists + */ +bool +check_has_answer(struct check_data *data) +{ + return !PMDK_TAILQ_EMPTY(&data->answers); +} + +/* + * pop_answer -- (internal) pop single answer from answers queue + */ +static struct check_status * +pop_answer(struct check_data *data) +{ + struct check_status *ret = NULL; + if (!PMDK_TAILQ_EMPTY(&data->answers)) { + ret = PMDK_TAILQ_FIRST(&data->answers); + PMDK_TAILQ_REMOVE(&data->answers, ret, next); + } + return ret; +} + +/* + * check_status_get_util -- extract pmempool_check_status from check_status + */ +struct pmempool_check_status * +check_status_get_util(struct check_status *status) +{ + return &status->status; +} + +/* + * check_answer_loop -- loop through all available answers and process them + */ +int +check_answer_loop(PMEMpoolcheck *ppc, location *data, void *ctx, int fail_on_no, + int (*callback)(PMEMpoolcheck *, location *, uint32_t, void *ctx)) +{ + struct check_status *answer; + + while ((answer = pop_answer(ppc->data)) != NULL) { + /* if answer is "no" we cannot fix an issue */ + if (answer->answer != PMEMPOOL_CHECK_ANSWER_YES) { + if (fail_on_no || + answer->answer != PMEMPOOL_CHECK_ANSWER_NO) { + CHECK_ERR(ppc, + "cannot complete repair, reverting changes"); + ppc->result = CHECK_RESULT_NOT_CONSISTENT; + goto error; + } + + ppc->result = CHECK_RESULT_REPAIRED; + check_status_release(ppc, answer); + continue; + } + + /* perform fix */ + if (callback(ppc, data, answer->question, ctx)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + goto error; + } + + if (ppc->result == CHECK_RESULT_ERROR) + goto error; + + /* fix succeeded */ + ppc->result = CHECK_RESULT_REPAIRED; + check_status_release(ppc, answer); + } + + return 0; + +error: + check_status_release(ppc, answer); + return -1; +} + +/* + * check_questions_sequence_validate -- generate return value from result + * + * Sequence of questions can result in one of the following results: CONSISTENT, + * REPAIRED, ASK_QUESTIONS of PROCESS_ANSWERS. If result == ASK_QUESTIONS it + * returns -1 to indicate existence of unanswered questions. + */ +int +check_questions_sequence_validate(PMEMpoolcheck *ppc) +{ + ASSERT(ppc->result == CHECK_RESULT_CONSISTENT || + ppc->result == CHECK_RESULT_ASK_QUESTIONS || + ppc->result == CHECK_RESULT_PROCESS_ANSWERS || + ppc->result == CHECK_RESULT_REPAIRED); + if (ppc->result == CHECK_RESULT_ASK_QUESTIONS) { + ASSERT(!PMDK_TAILQ_EMPTY(&ppc->data->questions)); + return -1; + } + + return 0; +} + +/* + * check_get_time_str -- returns time in human-readable format + */ +const char * +check_get_time_str(time_t time) +{ + static char str_buff[STR_MAX] = {0, }; + struct tm *tm = util_localtime(&time); + + if (tm) + strftime(str_buff, STR_MAX, TIME_STR_FMT, tm); + else { + int ret = util_snprintf(str_buff, STR_MAX, "unknown"); + if (ret < 0) { + ERR("!snprintf"); + return ""; + } + } + return str_buff; +} + +/* + * check_get_uuid_str -- returns uuid in human readable format + */ +const char * +check_get_uuid_str(uuid_t uuid) +{ + static char uuid_str[UUID_STR_MAX] = {0, }; + + int ret = util_uuid_to_string(uuid, uuid_str); + if (ret != 0) { + ERR("failed to covert uuid to string"); + return ""; + } + return uuid_str; +} + +/* + * pmempool_check_insert_arena -- insert arena to list + */ +void +check_insert_arena(PMEMpoolcheck *ppc, struct arena *arenap) +{ + PMDK_TAILQ_INSERT_TAIL(&ppc->pool->arenas, arenap, next); + ppc->pool->narenas++; +} diff --git a/src/pmdk/src/libpmempool/check_util.h b/src/pmdk/src/libpmempool/check_util.h new file mode 100644 index 000000000..f94dceeaf --- /dev/null +++ b/src/pmdk/src/libpmempool/check_util.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * check_util.h -- internal definitions check util + */ +#ifndef CHECK_UTIL_H +#define CHECK_UTIL_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define CHECK_STEP_COMPLETE UINT_MAX +#define CHECK_INVALID_QUESTION UINT_MAX + +#define REQUIRE_ADVANCED "the following error can be fixed using " \ + "PMEMPOOL_CHECK_ADVANCED flag" + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +/* check control context */ +struct check_data; +struct arena; + +/* queue of check statuses */ +struct check_status; + +/* container storing state of all check steps */ +#define PREFIX_MAX_SIZE 30 +typedef struct { + unsigned init_done; + unsigned step; + + unsigned replica; + unsigned part; + + int single_repl; + int single_part; + + struct pool_set *set; + int is_dev_dax; + + struct pool_hdr *hdrp; + /* copy of the pool header in host byte order */ + struct pool_hdr hdr; + int hdr_valid; + /* + * If pool header has been modified this field indicates that + * the pool parameters structure requires refresh. + */ + int pool_hdr_modified; + + unsigned healthy_replicas; + + struct pool_hdr *next_part_hdrp; + struct pool_hdr *prev_part_hdrp; + struct pool_hdr *next_repl_hdrp; + struct pool_hdr *prev_repl_hdrp; + + int next_part_hdr_valid; + int prev_part_hdr_valid; + int next_repl_hdr_valid; + int prev_repl_hdr_valid; + + /* valid poolset uuid */ + uuid_t *valid_puuid; + /* valid part uuid */ + uuid_t *valid_uuid; + + /* valid part pool header */ + struct pool_hdr *valid_part_hdrp; + int valid_part_done; + unsigned valid_part_replica; + + char prefix[PREFIX_MAX_SIZE]; + + struct arena *arenap; + uint64_t offset; + uint32_t narena; + + uint8_t *bitmap; + uint8_t *dup_bitmap; + uint8_t *fbitmap; + + struct list *list_inval; + struct list *list_flog_inval; + struct list *list_unmap; + + struct { + int btti_header; + int btti_backup; + } valid; + + struct { + struct btt_info btti; + uint64_t btti_offset; + } pool_valid; +} location; + +/* check steps */ +void check_bad_blocks(PMEMpoolcheck *ppc); +void check_backup(PMEMpoolcheck *ppc); +void check_pool_hdr(PMEMpoolcheck *ppc); +void check_pool_hdr_uuids(PMEMpoolcheck *ppc); +void check_sds(PMEMpoolcheck *ppc); +void check_log(PMEMpoolcheck *ppc); +void check_blk(PMEMpoolcheck *ppc); +void check_btt_info(PMEMpoolcheck *ppc); +void check_btt_map_flog(PMEMpoolcheck *ppc); +void check_write(PMEMpoolcheck *ppc); + +struct check_data *check_data_alloc(void); +void check_data_free(struct check_data *data); + +uint32_t check_step_get(struct check_data *data); +void check_step_inc(struct check_data *data); +location *check_get_step_data(struct check_data *data); + +void check_end(struct check_data *data); +int check_is_end_util(struct check_data *data); + +int check_status_create(PMEMpoolcheck *ppc, enum pmempool_check_msg_type type, + uint32_t arg, const char *fmt, ...) FORMAT_PRINTF(4, 5); +void check_status_release(PMEMpoolcheck *ppc, struct check_status *status); +void check_clear_status_cache(struct check_data *data); +struct check_status *check_pop_question(struct check_data *data); +struct check_status *check_pop_error(struct check_data *data); +struct check_status *check_pop_info(struct check_data *data); +bool check_has_error(struct check_data *data); +bool check_has_answer(struct check_data *data); +int check_push_answer(PMEMpoolcheck *ppc); + +struct pmempool_check_status *check_status_get_util( + struct check_status *status); +int check_status_is(struct check_status *status, + enum pmempool_check_msg_type type); + +/* create info status */ +#define CHECK_INFO(ppc, ...)\ + check_status_create(ppc, PMEMPOOL_CHECK_MSG_TYPE_INFO, 0, __VA_ARGS__) + +/* create info status and append error message based on errno */ +#define CHECK_INFO_ERRNO(ppc, ...)\ + check_status_create(ppc, PMEMPOOL_CHECK_MSG_TYPE_INFO,\ + (uint32_t)errno, __VA_ARGS__) + +/* create error status */ +#define CHECK_ERR(ppc, ...)\ + check_status_create(ppc, PMEMPOOL_CHECK_MSG_TYPE_ERROR, 0, __VA_ARGS__) + +/* create question status */ +#define CHECK_ASK(ppc, question, ...)\ + check_status_create(ppc, PMEMPOOL_CHECK_MSG_TYPE_QUESTION, question,\ + __VA_ARGS__) + +#define CHECK_NOT_COMPLETE(loc, steps)\ + ((loc)->step != CHECK_STEP_COMPLETE &&\ + ((steps)[(loc)->step].check != NULL ||\ + (steps)[(loc)->step].fix != NULL)) + +int check_answer_loop(PMEMpoolcheck *ppc, location *data, + void *ctx, int fail_on_no, + int (*callback)(PMEMpoolcheck *, location *, uint32_t, void *ctx)); +int check_questions_sequence_validate(PMEMpoolcheck *ppc); + +const char *check_get_time_str(time_t time); +const char *check_get_uuid_str(uuid_t uuid); +const char *check_get_pool_type_str(enum pool_type type); + +void check_insert_arena(PMEMpoolcheck *ppc, struct arena *arenap); + +#ifdef _WIN32 +void cache_to_utf8(struct check_data *data, char *buf, size_t size); +#endif + +#define CHECK_IS(ppc, flag)\ + util_flag_isset((ppc)->args.flags, PMEMPOOL_CHECK_ ## flag) + +#define CHECK_IS_NOT(ppc, flag)\ + util_flag_isclr((ppc)->args.flags, PMEMPOOL_CHECK_ ## flag) + +#define CHECK_WITHOUT_FIXING(ppc)\ + CHECK_IS_NOT(ppc, REPAIR) || CHECK_IS(ppc, DRY_RUN) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmempool/check_write.c b/src/pmdk/src/libpmempool/check_write.c new file mode 100644 index 000000000..8321bd17e --- /dev/null +++ b/src/pmdk/src/libpmempool/check_write.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * check_write.c -- write fixed data back + */ + +#include +#include + +#include "out.h" +#include "btt.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check_util.h" + +enum questions { + Q_REPAIR_MAP, + Q_REPAIR_FLOG, +}; + +/* + * log_write -- (internal) write all structures for log pool + */ +static int +log_write(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + if (CHECK_WITHOUT_FIXING(ppc)) + return 0; + + /* endianness conversion */ + struct pmemlog *log = &ppc->pool->hdr.log; + log_convert2le(log); + + if (pool_write(ppc->pool, log, sizeof(*log), 0)) { + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return CHECK_ERR(ppc, "writing pmemlog structure failed"); + } + + return 0; +} + +/* + * blk_write_flog -- (internal) convert and write flog to file + */ +static int +blk_write_flog(PMEMpoolcheck *ppc, struct arena *arenap) +{ + if (!arenap->flog) { + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, "flog is missing"); + } + + uint64_t flogoff = arenap->offset + arenap->btt_info.flogoff; + + uint8_t *ptr = arenap->flog; + uint32_t i; + for (i = 0; i < arenap->btt_info.nfree; i++) { + struct btt_flog *flog = (struct btt_flog *)ptr; + btt_flog_convert2le(&flog[0]); + btt_flog_convert2le(&flog[1]); + ptr += BTT_FLOG_PAIR_ALIGN; + } + + if (pool_write(ppc->pool, arenap->flog, arenap->flogsize, flogoff)) { + CHECK_INFO(ppc, "%s", ppc->path); + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return CHECK_ERR(ppc, "arena %u: writing BTT FLOG failed\n", + arenap->id); + } + + return 0; +} + +/* + * blk_write_map -- (internal) convert and write map to file + */ +static int +blk_write_map(PMEMpoolcheck *ppc, struct arena *arenap) +{ + if (!arenap->map) { + ppc->result = CHECK_RESULT_ERROR; + return CHECK_ERR(ppc, "map is missing"); + } + + uint64_t mapoff = arenap->offset + arenap->btt_info.mapoff; + + uint32_t i; + for (i = 0; i < arenap->btt_info.external_nlba; i++) + arenap->map[i] = htole32(arenap->map[i]); + + if (pool_write(ppc->pool, arenap->map, arenap->mapsize, mapoff)) { + CHECK_INFO(ppc, "%s", ppc->path); + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return CHECK_ERR(ppc, "arena %u: writing BTT map failed\n", + arenap->id); + } + + return 0; +} + +/* + * blk_write -- (internal) write all structures for blk pool + */ +static int +blk_write(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + if (CHECK_WITHOUT_FIXING(ppc)) + return 0; + + /* endianness conversion */ + ppc->pool->hdr.blk.bsize = htole32(ppc->pool->hdr.blk.bsize); + + if (pool_write(ppc->pool, &ppc->pool->hdr.blk, + sizeof(ppc->pool->hdr.blk), 0)) { + CHECK_INFO(ppc, "%s", ppc->path); + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return CHECK_ERR(ppc, "writing pmemblk structure failed"); + } + + return 0; +} + +/* + * btt_data_write -- (internal) write BTT data + */ +static int +btt_data_write(PMEMpoolcheck *ppc, location *loc) +{ + LOG(3, NULL); + + struct arena *arenap; + + PMDK_TAILQ_FOREACH(arenap, &ppc->pool->arenas, next) { + + if (ppc->pool->uuid_op == UUID_NOT_FROM_BTT) { + memcpy(arenap->btt_info.parent_uuid, + ppc->pool->hdr.pool.poolset_uuid, + sizeof(arenap->btt_info.parent_uuid)); + + util_checksum(&arenap->btt_info, + sizeof(arenap->btt_info), + &arenap->btt_info.checksum, 1, 0); + } + + if (pool_write(ppc->pool, &arenap->btt_info, + sizeof(arenap->btt_info), arenap->offset)) { + CHECK_INFO(ppc, "%s", ppc->path); + CHECK_ERR(ppc, "arena %u: writing BTT Info failed", + arenap->id); + goto error; + } + + if (pool_write(ppc->pool, &arenap->btt_info, + sizeof(arenap->btt_info), arenap->offset + + le64toh(arenap->btt_info.infooff))) { + CHECK_INFO(ppc, "%s", ppc->path); + CHECK_ERR(ppc, + "arena %u: writing BTT Info backup failed", + arenap->id); + goto error; + } + + if (blk_write_flog(ppc, arenap)) + goto error; + + if (blk_write_map(ppc, arenap)) + goto error; + } + + return 0; + +error: + ppc->result = CHECK_RESULT_CANNOT_REPAIR; + return -1; +} + +struct step { + int (*func)(PMEMpoolcheck *, location *loc); + enum pool_type type; +}; + +static const struct step steps[] = { + { + .func = log_write, + .type = POOL_TYPE_LOG, + }, + { + .func = blk_write, + .type = POOL_TYPE_BLK, + }, + { + .func = btt_data_write, + .type = POOL_TYPE_BLK | POOL_TYPE_BTT, + }, + { + .func = NULL, + }, +}; + +/* + * step_exe -- (internal) perform single step according to its parameters + */ +static inline int +step_exe(PMEMpoolcheck *ppc, location *loc) +{ + ASSERT(loc->step < ARRAY_SIZE(steps)); + + const struct step *step = &steps[loc->step++]; + + /* check step conditions */ + if (!(step->type & ppc->pool->params.type)) + return 0; + + return step->func(ppc, loc); +} + +/* + * check_write -- write fixed data back + */ +void +check_write(PMEMpoolcheck *ppc) +{ + /* + * XXX: Disabling individual checks based on type should be done in the + * step structure. This however requires refactor of the step + * processing code. + */ + if (CHECK_IS_NOT(ppc, REPAIR)) + return; + + location *loc = (location *)check_get_step_data(ppc->data); + + /* do all steps */ + while (loc->step != CHECK_STEP_COMPLETE && + steps[loc->step].func != NULL) { + + if (step_exe(ppc, loc)) + return; + } +} diff --git a/src/pmdk/src/libpmempool/feature.c b/src/pmdk/src/libpmempool/feature.c new file mode 100644 index 000000000..eafeab603 --- /dev/null +++ b/src/pmdk/src/libpmempool/feature.c @@ -0,0 +1,789 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018, Intel Corporation */ + +/* + * feature.c -- implementation of pmempool_feature_(enable|disable|query)() + */ + +#include +#include +#include +#include +#include + +#include "libpmempool.h" +#include "util_pmem.h" +#include "pool_hdr.h" +#include "pool.h" + +#define RW 0 +#define RDONLY 1 + +#define FEATURE_INCOMPAT(X) \ + (features_t)FEAT_INCOMPAT(X) + +static const features_t f_singlehdr = FEAT_INCOMPAT(SINGLEHDR); +static const features_t f_cksum_2k = FEAT_INCOMPAT(CKSUM_2K); +static const features_t f_sds = FEAT_INCOMPAT(SDS); +static const features_t f_chkbb = FEAT_COMPAT(CHECK_BAD_BLOCKS); + +#define FEAT_INVALID \ + {UINT32_MAX, UINT32_MAX, UINT32_MAX}; + +static const features_t f_invalid = FEAT_INVALID; + +#define FEATURE_MAXPRINT ((size_t)1024) + +/* + * buff_concat -- (internal) concat formatted string to string buffer + */ +static int +buff_concat(char *buff, size_t *pos, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const size_t size = FEATURE_MAXPRINT - *pos - 1; + int ret = vsnprintf(buff + *pos, size, fmt, ap); + va_end(ap); + + if (ret < 0) { + ERR("vsprintf"); + return ret; + } + + if ((size_t)ret >= size) { + ERR("buffer truncated %d >= %zu", ret, size); + return -1; + } + + *pos += (size_t)ret; + return 0; +} + +/* + * buff_concat_features -- (internal) concat features string to string buffer + */ +static int +buff_concat_features(char *buff, size_t *pos, features_t f) +{ + return buff_concat(buff, pos, + "{compat 0x%x, incompat 0x%x, ro_compat 0x%x}", + f.compat, f.incompat, f.ro_compat); +} + +/* + * poolset_close -- (internal) close pool set + */ +static void +poolset_close(struct pool_set *set) +{ + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + ASSERT(!rep->remote); + for (unsigned p = 0; p < rep->nparts; ++p) { + util_unmap_hdr(PART(rep, p)); + } + } + + util_poolset_close(set, DO_NOT_DELETE_PARTS); +} + +/* + * features_check -- (internal) check if features are correct + */ +static int +features_check(features_t *features, struct pool_hdr *hdrp) +{ + static char msg[FEATURE_MAXPRINT]; + + struct pool_hdr hdr; + memcpy(&hdr, hdrp, sizeof(hdr)); + util_convert2h_hdr_nocheck(&hdr); + + /* (features != f_invlaid) <=> features is set */ + if (!util_feature_cmp(*features, f_invalid)) { + /* features from current and previous headers have to match */ + if (!util_feature_cmp(*features, hdr.features)) { + size_t pos = 0; + if (buff_concat_features(msg, &pos, hdr.features)) + goto err; + if (buff_concat(msg, &pos, "%s", " != ")) + goto err; + if (buff_concat_features(msg, &pos, *features)) + goto err; + ERR("features mismatch detected: %s", msg); + return -1; + } else { + return 0; + } + } + + features_t unknown = util_get_unknown_features( + hdr.features, (features_t)POOL_FEAT_VALID); + + /* all features are known */ + if (util_feature_is_zero(unknown)) { + memcpy(features, &hdr.features, sizeof(*features)); + return 0; + } + + /* unknown features detected - print error message */ + size_t pos = 0; + if (buff_concat_features(msg, &pos, unknown)) + goto err; + ERR("invalid features detected: %s", msg); +err: + return -1; +} + +/* + * get_pool_open_flags -- (internal) generate pool open flags + */ +static inline unsigned +get_pool_open_flags(struct pool_set *set, int rdonly) +{ + unsigned flags = 0; + if (rdonly == RDONLY && !util_pool_has_device_dax(set)) + flags = POOL_OPEN_COW; + flags |= POOL_OPEN_IGNORE_BAD_BLOCKS; + return flags; +} + +/* + * get_mmap_flags -- (internal) generate mmap flags + */ +static inline int +get_mmap_flags(struct pool_set_part *part, int rdonly) +{ + if (part->is_dev_dax) + return MAP_SHARED; + else + return rdonly ? MAP_PRIVATE : MAP_SHARED; +} + +/* + * poolset_open -- (internal) open pool set + */ +static struct pool_set * +poolset_open(const char *path, int rdonly) +{ + struct pool_set *set; + features_t features = FEAT_INVALID; + + /* read poolset */ + int ret = util_poolset_create_set(&set, path, 0, 0, true); + if (ret < 0) { + ERR("cannot open pool set -- '%s'", path); + goto err_poolset; + } + if (set->remote) { + ERR("poolsets with remote replicas are not supported"); + errno = EINVAL; + goto err_open; + } + + /* open a memory pool */ + unsigned flags = get_pool_open_flags(set, rdonly); + if (util_pool_open_nocheck(set, flags)) + goto err_open; + + /* map all headers and check features */ + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + ASSERT(!rep->remote); + + for (unsigned p = 0; p < rep->nparts; ++p) { + struct pool_set_part *part = PART(rep, p); + int mmap_flags = get_mmap_flags(part, rdonly); + if (util_map_hdr(part, mmap_flags, rdonly)) { + part->hdr = NULL; + goto err_map_hdr; + } + + if (features_check(&features, HDR(rep, p))) { + ERR( + "invalid features - replica #%d part #%d", + r, p); + goto err_open; + } + } + } + return set; + +err_map_hdr: + /* unmap all headers */ + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + ASSERT(!rep->remote); + for (unsigned p = 0; p < rep->nparts; ++p) { + util_unmap_hdr(PART(rep, p)); + } + } +err_open: + /* close the memory pool and release pool set structure */ + util_poolset_close(set, DO_NOT_DELETE_PARTS); +err_poolset: + return NULL; +} + +/* + * get_hdr -- (internal) read header in host byte order + */ +static struct pool_hdr * +get_hdr(struct pool_set *set, unsigned rep, unsigned part) +{ + static struct pool_hdr hdr; + + /* copy header */ + struct pool_hdr *hdrp = HDR(REP(set, rep), part); + memcpy(&hdr, hdrp, sizeof(hdr)); + + /* convert to host byte order and return */ + util_convert2h_hdr_nocheck(&hdr); + return &hdr; +} + +/* + * set_hdr -- (internal) convert header to little-endian, checksum and write + */ +static void +set_hdr(struct pool_set *set, unsigned rep, unsigned part, struct pool_hdr *src) +{ + /* convert to little-endian and set new checksum */ + const size_t skip_off = POOL_HDR_CSUM_END_OFF(src); + util_convert2le_hdr(src); + util_checksum(src, sizeof(*src), &src->checksum, 1, skip_off); + + /* write header */ + struct pool_replica *replica = REP(set, rep); + struct pool_hdr *dst = HDR(replica, part); + memcpy(dst, src, sizeof(*src)); + util_persist_auto(PART(replica, part)->is_dev_dax, dst, sizeof(*src)); +} + +typedef enum { + DISABLED, + ENABLED +} fstate_t; + +#define FEATURE_IS_ENABLED_STR "feature already enabled: %s" +#define FEATURE_IS_DISABLED_STR "feature already disabled: %s" + +/* + * require_feature_is -- (internal) check if required feature is enabled + * (or disabled) + */ +static int +require_feature_is(struct pool_set *set, features_t feature, fstate_t req_state) +{ + struct pool_hdr *hdrp = get_hdr((set), 0, 0); + fstate_t state = util_feature_is_set(hdrp->features, feature) + ? ENABLED : DISABLED; + if (state == req_state) + return 1; + + const char *msg = (state == ENABLED) + ? FEATURE_IS_ENABLED_STR : FEATURE_IS_DISABLED_STR; + LOG(3, msg, util_feature2str(feature, NULL)); + return 0; +} + +#define FEATURE_IS_NOT_ENABLED_PRIOR_STR "enable %s prior to %s %s" +#define FEATURE_IS_NOT_DISABLED_PRIOR_STR "disable %s prior to %s %s" + +/* + * require_other_feature_is -- (internal) check if other feature is enabled + * (or disabled) in case the other feature has to be enabled (or disabled) + * prior to the main one + */ +static int +require_other_feature_is(struct pool_set *set, features_t other, + fstate_t req_state, features_t feature, const char *cause) +{ + struct pool_hdr *hdrp = get_hdr((set), 0, 0); + fstate_t state = util_feature_is_set(hdrp->features, other) + ? ENABLED : DISABLED; + if (state == req_state) + return 1; + + const char *msg = (req_state == ENABLED) + ? FEATURE_IS_NOT_ENABLED_PRIOR_STR + : FEATURE_IS_NOT_DISABLED_PRIOR_STR; + ERR(msg, util_feature2str(other, NULL), + cause, util_feature2str(feature, NULL)); + return 0; +} + +/* + * feature_set -- (internal) enable (or disable) feature + */ +static void +feature_set(struct pool_set *set, features_t feature, int value) +{ + for (unsigned r = 0; r < set->nreplicas; ++r) { + for (unsigned p = 0; p < REP(set, r)->nparts; ++p) { + struct pool_hdr *hdrp = get_hdr(set, r, p); + if (value == ENABLED) + util_feature_enable(&hdrp->features, feature); + else + util_feature_disable(&hdrp->features, feature); + set_hdr(set, r, p, hdrp); + } + } +} + +/* + * query_feature -- (internal) query feature value + */ +static int +query_feature(const char *path, features_t feature) +{ + struct pool_set *set = poolset_open(path, RDONLY); + if (!set) + goto err_open; + + struct pool_hdr *hdrp = get_hdr(set, 0, 0); + const int query = util_feature_is_set(hdrp->features, feature); + + poolset_close(set); + + return query; + +err_open: + return -1; +} + +/* + * unsupported_feature -- (internal) report unsupported feature + */ +static inline int +unsupported_feature(features_t feature) +{ + ERR("unsupported feature: %s", util_feature2str(feature, NULL)); + errno = EINVAL; + return -1; +} + +/* + * enable_singlehdr -- (internal) enable POOL_FEAT_SINGLEHDR + */ +static int +enable_singlehdr(const char *path) +{ + return unsupported_feature(f_singlehdr); +} + +/* + * disable_singlehdr -- (internal) disable POOL_FEAT_SINGLEHDR + */ +static int +disable_singlehdr(const char *path) +{ + return unsupported_feature(f_singlehdr); +} + +/* + * query_singlehdr -- (internal) query POOL_FEAT_SINGLEHDR + */ +static int +query_singlehdr(const char *path) +{ + return query_feature(path, f_singlehdr); +} + +/* + * enable_checksum_2k -- (internal) enable POOL_FEAT_CKSUM_2K + */ +static int +enable_checksum_2k(const char *path) +{ + struct pool_set *set = poolset_open(path, RW); + if (!set) + return -1; + if (require_feature_is(set, f_cksum_2k, DISABLED)) + feature_set(set, f_cksum_2k, ENABLED); + + poolset_close(set); + return 0; +} + +/* + * disable_checksum_2k -- (internal) disable POOL_FEAT_CKSUM_2K + */ +static int +disable_checksum_2k(const char *path) +{ + struct pool_set *set = poolset_open(path, RW); + if (!set) + return -1; + + int ret = 0; + if (!require_feature_is(set, f_cksum_2k, ENABLED)) + goto exit; + + /* check if POOL_FEAT_SDS is disabled */ + if (!require_other_feature_is(set, f_sds, DISABLED, + f_cksum_2k, "disabling")) { + ret = -1; + goto exit; + } + + feature_set(set, f_cksum_2k, DISABLED); +exit: + poolset_close(set); + return ret; +} + +/* + * query_checksum_2k -- (internal) query POOL_FEAT_CKSUM_2K + */ +static int +query_checksum_2k(const char *path) +{ + return query_feature(path, f_cksum_2k); +} + +/* + * enable_shutdown_state -- (internal) enable POOL_FEAT_SDS + */ +static int +enable_shutdown_state(const char *path) +{ + struct pool_set *set = poolset_open(path, RW); + if (!set) + return -1; + + int ret = 0; + if (!require_feature_is(set, f_sds, DISABLED)) + goto exit; + + /* check if POOL_FEAT_CKSUM_2K is enabled */ + if (!require_other_feature_is(set, f_cksum_2k, ENABLED, + f_sds, "enabling")) { + ret = -1; + goto exit; + } + + feature_set(set, f_sds, ENABLED); + +exit: + poolset_close(set); + return ret; +} + +/* + * reset_shutdown_state -- zero all shutdown structures + */ +static void +reset_shutdown_state(struct pool_set *set) +{ + for (unsigned rep = 0; rep < set->nreplicas; ++rep) { + for (unsigned part = 0; part < REP(set, rep)->nparts; ++part) { + struct pool_hdr *hdrp = HDR(REP(set, rep), part); + shutdown_state_init(&hdrp->sds, REP(set, rep)); + } + } +} + +/* + * disable_shutdown_state -- (internal) disable POOL_FEAT_SDS + */ +static int +disable_shutdown_state(const char *path) +{ + struct pool_set *set = poolset_open(path, RW); + if (!set) + return -1; + + if (require_feature_is(set, f_sds, ENABLED)) { + feature_set(set, f_sds, DISABLED); + reset_shutdown_state(set); + } + + poolset_close(set); + return 0; +} + +/* + * query_shutdown_state -- (internal) query POOL_FEAT_SDS + */ +static int +query_shutdown_state(const char *path) +{ + return query_feature(path, f_sds); +} + +/* + * enable_badblocks_checking -- (internal) enable POOL_FEAT_CHECK_BAD_BLOCKS + */ +static int +enable_badblocks_checking(const char *path) +{ +#ifdef _WIN32 + ERR("bad blocks checking is not supported on Windows"); + return -1; +#else + struct pool_set *set = poolset_open(path, RW); + if (!set) + return -1; + + if (require_feature_is(set, f_chkbb, DISABLED)) + feature_set(set, f_chkbb, ENABLED); + + poolset_close(set); + + return 0; +#endif +} + +/* + * disable_badblocks_checking -- (internal) disable POOL_FEAT_CHECK_BAD_BLOCKS + */ +static int +disable_badblocks_checking(const char *path) +{ + struct pool_set *set = poolset_open(path, RW); + if (!set) + return -1; + + int ret = 0; + if (!require_feature_is(set, f_chkbb, ENABLED)) + goto exit; + + feature_set(set, f_chkbb, DISABLED); +exit: + poolset_close(set); + + return ret; +} + +/* + * query_badblocks_checking -- (internal) query POOL_FEAT_CHECK_BAD_BLOCKS + */ +static int +query_badblocks_checking(const char *path) +{ + return query_feature(path, f_chkbb); +} + +struct feature_funcs { + int (*enable)(const char *); + int (*disable)(const char *); + int (*query)(const char *); +}; + +static struct feature_funcs features[] = { + { + .enable = enable_singlehdr, + .disable = disable_singlehdr, + .query = query_singlehdr + }, + { + .enable = enable_checksum_2k, + .disable = disable_checksum_2k, + .query = query_checksum_2k + }, + { + .enable = enable_shutdown_state, + .disable = disable_shutdown_state, + .query = query_shutdown_state + }, + { + .enable = enable_badblocks_checking, + .disable = disable_badblocks_checking, + .query = query_badblocks_checking + }, +}; + +#define FEATURE_FUNCS_MAX ARRAY_SIZE(features) + +/* + * are_flags_valid -- (internal) check if flags are valid + */ +static inline int +are_flags_valid(unsigned flags) +{ + if (flags != 0) { + ERR("invalid flags: 0x%x", flags); + errno = EINVAL; + return 0; + } + return 1; +} + +/* + * is_feature_valid -- (internal) check if feature is valid + */ +static inline int +is_feature_valid(uint32_t feature) +{ + if (feature >= FEATURE_FUNCS_MAX) { + ERR("invalid feature: 0x%x", feature); + errno = EINVAL; + return 0; + } + return 1; +} + +/* + * pmempool_feature_enableU -- enable pool set feature + */ +#ifndef _WIN32 +static inline +#endif +int +pmempool_feature_enableU(const char *path, enum pmempool_feature feature, + unsigned flags) +{ + LOG(3, "path %s feature %x flags %x", path, feature, flags); + if (!is_feature_valid(feature)) + return -1; + if (!are_flags_valid(flags)) + return -1; + return features[feature].enable(path); +} + +/* + * pmempool_feature_disableU -- disable pool set feature + */ +#ifndef _WIN32 +static inline +#endif +int +pmempool_feature_disableU(const char *path, enum pmempool_feature feature, + unsigned flags) +{ + LOG(3, "path %s feature %x flags %x", path, feature, flags); + if (!is_feature_valid(feature)) + return -1; + if (!are_flags_valid(flags)) + return -1; + return features[feature].disable(path); +} + +/* + * pmempool_feature_queryU -- query pool set feature + */ +#ifndef _WIN32 +static inline +#endif +int +pmempool_feature_queryU(const char *path, enum pmempool_feature feature, + unsigned flags) +{ + LOG(3, "path %s feature %x flags %x", path, feature, flags); + + /* + * XXX: Windows does not allow function call in a constant expressions + */ +#ifndef _WIN32 +#define CHECK_INCOMPAT_MAPPING(FEAT, ENUM) \ + COMPILE_ERROR_ON( \ + util_feature2pmempool_feature(FEATURE_INCOMPAT(FEAT)) != ENUM) + + CHECK_INCOMPAT_MAPPING(SINGLEHDR, PMEMPOOL_FEAT_SINGLEHDR); + CHECK_INCOMPAT_MAPPING(CKSUM_2K, PMEMPOOL_FEAT_CKSUM_2K); + CHECK_INCOMPAT_MAPPING(SDS, PMEMPOOL_FEAT_SHUTDOWN_STATE); + +#undef CHECK_INCOMPAT_MAPPING +#endif + + if (!is_feature_valid(feature)) + return -1; + if (!are_flags_valid(flags)) + return -1; + return features[feature].query(path); +} + +#ifndef _WIN32 +/* + * pmempool_feature_enable -- enable pool set feature + */ +int +pmempool_feature_enable(const char *path, enum pmempool_feature feature, + unsigned flags) +{ + return pmempool_feature_enableU(path, feature, flags); +} +#else +/* + * pmempool_feature_enableW -- enable pool set feature as widechar + */ +int +pmempool_feature_enableW(const wchar_t *path, enum pmempool_feature feature, + unsigned flags) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) { + ERR("Invalid poolest/pool file path."); + return -1; + } + + int ret = pmempool_feature_enableU(upath, feature, flags); + + util_free_UTF8(upath); + return ret; +} +#endif + +#ifndef _WIN32 +/* + * pmempool_feature_disable -- disable pool set feature + */ +int +pmempool_feature_disable(const char *path, enum pmempool_feature feature, + unsigned flags) +{ + return pmempool_feature_disableU(path, feature, flags); +} +#else +/* + * pmempool_feature_disableW -- disable pool set feature as widechar + */ +int +pmempool_feature_disableW(const wchar_t *path, enum pmempool_feature feature, + unsigned flags) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) { + ERR("Invalid poolest/pool file path."); + return -1; + } + + int ret = pmempool_feature_disableU(upath, feature, flags); + + util_free_UTF8(upath); + return ret; +} +#endif + +#ifndef _WIN32 +/* + * pmempool_feature_query -- query pool set feature + */ +int +pmempool_feature_query(const char *path, enum pmempool_feature feature, + unsigned flags) +{ + return pmempool_feature_queryU(path, feature, flags); +} +#else +/* + * pmempool_feature_queryW -- query pool set feature as widechar + */ +int +pmempool_feature_queryW(const wchar_t *path, enum pmempool_feature feature, + unsigned flags) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) { + ERR("Invalid poolest/pool file path."); + return -1; + } + + int ret = pmempool_feature_queryU(upath, feature, flags); + + util_free_UTF8(upath); + return ret; +} +#endif diff --git a/src/pmdk/src/libpmempool/libpmempool.c b/src/pmdk/src/libpmempool/libpmempool.c new file mode 100644 index 000000000..054eb8bd0 --- /dev/null +++ b/src/pmdk/src/libpmempool/libpmempool.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * libpmempool.c -- entry points for libpmempool + */ + +#include +#include +#include +#include + +#include "pmemcommon.h" +#include "libpmempool.h" +#include "pmempool.h" +#include "pool.h" +#include "check.h" + +#ifdef USE_RPMEM +#include "rpmem_common.h" +#include "rpmem_util.h" +#endif + +#ifdef _WIN32 +#define ANSWER_BUFFSIZE 256 +#endif + +/* + * libpmempool_init -- load-time initialization for libpmempool + * + * Called automatically by the run-time loader. + */ +ATTR_CONSTRUCTOR +void +libpmempool_init(void) +{ + common_init(PMEMPOOL_LOG_PREFIX, PMEMPOOL_LOG_LEVEL_VAR, + PMEMPOOL_LOG_FILE_VAR, PMEMPOOL_MAJOR_VERSION, + PMEMPOOL_MINOR_VERSION); + LOG(3, NULL); +#ifdef USE_RPMEM + util_remote_init(); + rpmem_util_cmds_init(); +#endif +} + +/* + * libpmempool_fini -- libpmempool cleanup routine + * + * Called automatically when the process terminates. + */ +ATTR_DESTRUCTOR +void +libpmempool_fini(void) +{ + LOG(3, NULL); +#ifdef USE_RPMEM + util_remote_unload(); + util_remote_fini(); + rpmem_util_cmds_fini(); +#endif + common_fini(); +} + +/* + * pmempool_check_versionU -- see if library meets application version + * requirements + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmempool_check_versionU(unsigned major_required, unsigned minor_required) +{ + LOG(3, "major_required %u minor_required %u", + major_required, minor_required); + + if (major_required != PMEMPOOL_MAJOR_VERSION) { + ERR("libpmempool major version mismatch (need %u, found %u)", + major_required, PMEMPOOL_MAJOR_VERSION); + return out_get_errormsg(); + } + + if (minor_required > PMEMPOOL_MINOR_VERSION) { + ERR("libpmempool minor version mismatch (need %u, found %u)", + minor_required, PMEMPOOL_MINOR_VERSION); + return out_get_errormsg(); + } + + return NULL; +} + +#ifndef _WIN32 +/* + * pmempool_check_version -- see if lib meets application version requirements + */ +const char * +pmempool_check_version(unsigned major_required, unsigned minor_required) +{ + return pmempool_check_versionU(major_required, minor_required); +} +#else +/* + * pmempool_check_versionW -- see if library meets application version + * requirements as widechar + */ +const wchar_t * +pmempool_check_versionW(unsigned major_required, unsigned minor_required) +{ + if (pmempool_check_versionU(major_required, minor_required) != NULL) + return out_get_errormsgW(); + else + return NULL; +} +#endif + +/* + * pmempool_errormsgU -- return last error message + */ +#ifndef _WIN32 +static inline +#endif +const char * +pmempool_errormsgU(void) +{ + return out_get_errormsg(); +} + +#ifndef _WIN32 +/* + * pmempool_errormsg -- return last error message + */ +const char * +pmempool_errormsg(void) +{ + return pmempool_errormsgU(); +} +#else +/* + * pmempool_errormsgW -- return last error message as widechar + */ +const wchar_t * +pmempool_errormsgW(void) +{ + return out_get_errormsgW(); +} +#endif + +/* + * pmempool_ppc_set_default -- (internal) set default values of check context + */ +static void +pmempool_ppc_set_default(PMEMpoolcheck *ppc) +{ + /* all other fields should be zeroed */ + const PMEMpoolcheck ppc_default = { + .args = { + .pool_type = PMEMPOOL_POOL_TYPE_DETECT, + }, + .result = CHECK_RESULT_CONSISTENT, + }; + *ppc = ppc_default; +} + +/* + * pmempool_check_initU -- initialize check context + */ +#ifndef _WIN32 +static inline +#endif +PMEMpoolcheck * +pmempool_check_initU(struct pmempool_check_argsU *args, size_t args_size) +{ + LOG(3, "path %s backup_path %s pool_type %u flags %x", args->path, + args->backup_path, args->pool_type, args->flags); + + /* + * Currently one size of args structure is supported. The version of the + * pmempool_check_args structure can be distinguished based on provided + * args_size. + */ + if (args_size < sizeof(struct pmempool_check_args)) { + ERR("provided args_size is not supported"); + errno = EINVAL; + return NULL; + } + + /* + * Dry run does not allow to made changes possibly performed during + * repair. Advanced allow to perform more complex repairs. Questions + * are ask only if repairs are made. So dry run, advanced and always_yes + * can be set only if repair is set. + */ + if (util_flag_isclr(args->flags, PMEMPOOL_CHECK_REPAIR) && + util_flag_isset(args->flags, PMEMPOOL_CHECK_DRY_RUN | + PMEMPOOL_CHECK_ADVANCED | PMEMPOOL_CHECK_ALWAYS_YES)) { + ERR("dry_run, advanced and always_yes are applicable only if " + "repair is set"); + errno = EINVAL; + return NULL; + } + + /* + * dry run does not modify anything so performing backup is redundant + */ + if (util_flag_isset(args->flags, PMEMPOOL_CHECK_DRY_RUN) && + args->backup_path != NULL) { + ERR("dry run does not allow one to perform backup"); + errno = EINVAL; + return NULL; + } + + /* + * libpmempool uses str format of communication so it must be set + */ + if (util_flag_isclr(args->flags, PMEMPOOL_CHECK_FORMAT_STR)) { + ERR("PMEMPOOL_CHECK_FORMAT_STR flag must be set"); + errno = EINVAL; + return NULL; + } + + PMEMpoolcheck *ppc = calloc(1, sizeof(*ppc)); + if (ppc == NULL) { + ERR("!calloc"); + return NULL; + } + + pmempool_ppc_set_default(ppc); + memcpy(&ppc->args, args, sizeof(ppc->args)); + ppc->path = strdup(args->path); + if (!ppc->path) { + ERR("!strdup"); + goto error_path_malloc; + } + ppc->args.path = ppc->path; + + if (args->backup_path != NULL) { + ppc->backup_path = strdup(args->backup_path); + if (!ppc->backup_path) { + ERR("!strdup"); + goto error_backup_path_malloc; + } + ppc->args.backup_path = ppc->backup_path; + } + + if (check_init(ppc) != 0) + goto error_check_init; + + return ppc; + +error_check_init: + /* in case errno not set by any of the used functions set its value */ + if (errno == 0) + errno = EINVAL; + + free(ppc->backup_path); +error_backup_path_malloc: + free(ppc->path); +error_path_malloc: + free(ppc); + return NULL; +} + +#ifndef _WIN32 +/* + * pmempool_check_init -- initialize check context + */ +PMEMpoolcheck * +pmempool_check_init(struct pmempool_check_args *args, size_t args_size) +{ + return pmempool_check_initU(args, args_size); +} +#else +/* + * pmempool_check_initW -- initialize check context as widechar + */ +PMEMpoolcheck * +pmempool_check_initW(struct pmempool_check_argsW *args, size_t args_size) +{ + char *upath = util_toUTF8(args->path); + if (upath == NULL) + return NULL; + char *ubackup_path = NULL; + if (args->backup_path != NULL) { + ubackup_path = util_toUTF8(args->backup_path); + if (ubackup_path == NULL) { + util_free_UTF8(upath); + return NULL; + } + } + + struct pmempool_check_argsU uargs = { + .path = upath, + .backup_path = ubackup_path, + .pool_type = args->pool_type, + .flags = args->flags + }; + + PMEMpoolcheck *ret = pmempool_check_initU(&uargs, args_size); + + util_free_UTF8(ubackup_path); + util_free_UTF8(upath); + return ret; +} +#endif + +/* + * pmempool_checkU -- continue check till produce status to consume for caller + */ +#ifndef _WIN32 +static inline +#endif +struct pmempool_check_statusU * +pmempool_checkU(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + ASSERTne(ppc, NULL); + + struct check_status *result; + do { + result = check_step(ppc); + + if (check_is_end(ppc->data) && result == NULL) + return NULL; + } while (result == NULL); + + return check_status_get(result); +} + +#ifndef _WIN32 +/* + * pmempool_check -- continue check till produce status to consume for caller + */ +struct pmempool_check_status * +pmempool_check(PMEMpoolcheck *ppc) +{ + return pmempool_checkU(ppc); +} +#else +/* + * pmempool_checkW -- continue check till produce status to consume for caller + */ +struct pmempool_check_statusW * +pmempool_checkW(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + ASSERTne(ppc, NULL); + + /* check the cache and convert msg and answer */ + char buf[ANSWER_BUFFSIZE]; + memset(buf, 0, ANSWER_BUFFSIZE); + convert_status_cache(ppc, buf, ANSWER_BUFFSIZE); + + struct check_status *uresult; + do { + uresult = check_step(ppc); + + if (check_is_end(ppc->data) && uresult == NULL) + return NULL; + } while (uresult == NULL); + + struct pmempool_check_statusU *uret_res = check_status_get(uresult); + const wchar_t *wmsg = util_toUTF16(uret_res->str.msg); + if (wmsg == NULL) + FATAL("!malloc"); + + struct pmempool_check_statusW *wret_res = + (struct pmempool_check_statusW *)uret_res; + /* pointer to old message is freed in next check step */ + wret_res->str.msg = wmsg; + return wret_res; +} +#endif + +/* + * pmempool_check_end -- end check and release check context + */ +enum pmempool_check_result +pmempool_check_end(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + const enum check_result result = ppc->result; + const unsigned sync_required = ppc->sync_required; + + check_fini(ppc); + free(ppc->path); + free(ppc->backup_path); + free(ppc); + + if (sync_required) { + switch (result) { + case CHECK_RESULT_CONSISTENT: + case CHECK_RESULT_REPAIRED: + return PMEMPOOL_CHECK_RESULT_SYNC_REQ; + default: + /* other results require fixing prior to sync */ + ; + } + } + + switch (result) { + case CHECK_RESULT_CONSISTENT: + return PMEMPOOL_CHECK_RESULT_CONSISTENT; + + case CHECK_RESULT_NOT_CONSISTENT: + return PMEMPOOL_CHECK_RESULT_NOT_CONSISTENT; + + case CHECK_RESULT_REPAIRED: + return PMEMPOOL_CHECK_RESULT_REPAIRED; + + case CHECK_RESULT_CANNOT_REPAIR: + return PMEMPOOL_CHECK_RESULT_CANNOT_REPAIR; + + default: + return PMEMPOOL_CHECK_RESULT_ERROR; + } +} diff --git a/src/pmdk/src/libpmempool/libpmempool.def b/src/pmdk/src/libpmempool/libpmempool.def new file mode 100644 index 000000000..a0edb3d6c --- /dev/null +++ b/src/pmdk/src/libpmempool/libpmempool.def @@ -0,0 +1,32 @@ +;;;; Begin Copyright Notice +; SPDX-License-Identifier: BSD-3-Clause +; Copyright 2016, Intel Corporation +;;;; End Copyright Notice + +LIBRARY libpmempool + +VERSION 1.0 + +EXPORTS + pmempool_check_versionU + pmempool_check_versionW + pmempool_errormsgU + pmempool_errormsgW + pmempool_check_initU + pmempool_check_initW + pmempool_checkU + pmempool_checkW + pmempool_check_end + pmempool_syncU + pmempool_syncW + pmempool_transformU + pmempool_transformW + pmempool_rmU + pmempool_rmW + pmempool_feature_enableU + pmempool_feature_enableW + pmempool_feature_disableU + pmempool_feature_disableW + pmempool_feature_queryU + pmempool_feature_queryW + DllMain diff --git a/src/pmdk/src/libpmempool/libpmempool.link.in b/src/pmdk/src/libpmempool/libpmempool.link.in new file mode 100644 index 000000000..c3c0e864d --- /dev/null +++ b/src/pmdk/src/libpmempool/libpmempool.link.in @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2019, Intel Corporation +# +# +# src/libpmempool.link -- linker link file for libpmempool +# +LIBPMEMPOOL_1.0 { + global: + pmempool_errormsg; + pmempool_check_version; + pmempool_check_init; + pmempool_check; + pmempool_check_end; + pmempool_transform; + pmempool_sync; + pmempool_rm; + pmempool_feature_enable; + pmempool_feature_disable; + pmempool_feature_query; + fault_injection; + local: + *; +}; diff --git a/src/pmdk/src/libpmempool/libpmempool.rc b/src/pmdk/src/libpmempool/libpmempool.rc new file mode 100644 index 000000000..85462b359 --- /dev/null +++ b/src/pmdk/src/libpmempool/libpmempool.rc @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016, Intel Corporation */ + +/* + * libpmempool.rc -- libpmempool resource file + */ + +#include +#define FILE_NAME "libpmempool.dll" +#define DESCRIPTION "libpmempool - pool management library" +#define TYPE VFT_DLL +#include \ No newline at end of file diff --git a/src/pmdk/src/libpmempool/libpmempool.vcxproj b/src/pmdk/src/libpmempool/libpmempool.vcxproj new file mode 100644 index 000000000..44359a992 --- /dev/null +++ b/src/pmdk/src/libpmempool/libpmempool.vcxproj @@ -0,0 +1,162 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {f7c6c6b6-4142-4c82-8699-4a9d8183181b} + + + {0b1818eb-bdc8-4865-964f-db8bf05cfd86} + + + {1baa1617-93ae-4196-8a1a-bd492fb18aef} + + + {9e9e3d25-2139-4a5d-9200-18148ddead45} + + + {901f04db-e1a5-4a41-8b81-9d31c19acd59} + + + + + + + {CF9A0883-6334-44C7-AC29-349468C78E27} + DynamicLibrary + libpmempool + libpmempool + en-US + 14.0 + 10.0.17134.0 + 10.0.10240.0 + + + + DynamicLibrary + true + v140 + + + DynamicLibrary + false + false + v140 + + + + + + + + + + + + + + + $(SolutionDir)\libpmemobj;$(SolutionDir)\libpmemblk;$(SolutionDir)\libpmemlog;$(SolutionDir)\libpmem2;%(AdditionalIncludeDirectories) + + + + + $(SolutionDir)\libpmemobj;$(SolutionDir)\libpmemblk;$(SolutionDir)\libpmemlog;$(SolutionDir)\libpmem2;%(AdditionalIncludeDirectories) + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmempool/libpmempool.vcxproj.filters b/src/pmdk/src/libpmempool/libpmempool.vcxproj.filters new file mode 100644 index 000000000..ffd1ce246 --- /dev/null +++ b/src/pmdk/src/libpmempool/libpmempool.vcxproj.filters @@ -0,0 +1,253 @@ + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {cd079c79-5441-413e-b2ba-99fed2b0b779} + + + {9ad93d4f-a9d1-4e38-94a1-77e36acc268f} + + + + + Source Files + + + + + Source Files + + + \ No newline at end of file diff --git a/src/pmdk/src/libpmempool/libpmempool_main.c b/src/pmdk/src/libpmempool/libpmempool_main.c new file mode 100644 index 000000000..61419cdec --- /dev/null +++ b/src/pmdk/src/libpmempool/libpmempool_main.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2017, Intel Corporation */ + +/* + * libpmempool_main.c -- entry point for libpmempool.dll + * + * XXX - This is a placeholder. All the library initialization/cleanup + * that is done in library ctors/dtors, as well as TLS initialization + * should be moved here. + */ + +#include + +void libpmempool_init(void); +void libpmempool_fini(void); + +int APIENTRY +DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved) +{ + switch (dwReason) { + case DLL_PROCESS_ATTACH: + libpmempool_init(); + break; + + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + break; + + case DLL_PROCESS_DETACH: + libpmempool_fini(); + break; + } + return TRUE; +} diff --git a/src/pmdk/src/libpmempool/pmempool.h b/src/pmdk/src/libpmempool/pmempool.h new file mode 100644 index 000000000..710f2353c --- /dev/null +++ b/src/pmdk/src/libpmempool/pmempool.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * pmempool.h -- internal definitions for libpmempool + */ + +#ifndef PMEMPOOL_H +#define PMEMPOOL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define PMEMPOOL_LOG_PREFIX "libpmempool" +#define PMEMPOOL_LOG_LEVEL_VAR "PMEMPOOL_LOG_LEVEL" +#define PMEMPOOL_LOG_FILE_VAR "PMEMPOOL_LOG_FILE" + +enum check_result { + CHECK_RESULT_CONSISTENT, + CHECK_RESULT_NOT_CONSISTENT, + CHECK_RESULT_ASK_QUESTIONS, + CHECK_RESULT_PROCESS_ANSWERS, + CHECK_RESULT_REPAIRED, + CHECK_RESULT_CANNOT_REPAIR, + CHECK_RESULT_ERROR, + CHECK_RESULT_INTERNAL_ERROR +}; + +/* + * pmempool_check_ctx -- context and arguments for check command + */ +struct pmempool_check_ctx { + struct pmempool_check_args args; + char *path; + char *backup_path; + + struct check_data *data; + struct pool_data *pool; + enum check_result result; + unsigned sync_required; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmempool/pool.c b/src/pmdk/src/libpmempool/pool.c new file mode 100644 index 000000000..ad54330b4 --- /dev/null +++ b/src/pmdk/src/libpmempool/pool.c @@ -0,0 +1,1123 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * pool.c -- pool processing functions + */ + +#include +#include +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#ifdef __FreeBSD__ +#include +#define BLKGETSIZE64 DIOCGMEDIASIZE +#else +#include +#endif +#endif + +#include "libpmem.h" +#include "libpmemlog.h" +#include "libpmemblk.h" +#include "libpmempool.h" + +#include "out.h" +#include "pmempool.h" +#include "pool.h" +#include "lane.h" +#include "obj.h" +#include "btt.h" +#include "file.h" +#include "os.h" +#include "set.h" +#include "check_util.h" +#include "util_pmem.h" +#include "mmap.h" + +/* arbitrary size of a maximum file part being read / write at once */ +#define RW_BUFFERING_SIZE (128 * 1024 * 1024) + +/* + * pool_btt_lseek -- (internal) perform lseek in BTT file mode + */ +static inline os_off_t +pool_btt_lseek(struct pool_data *pool, os_off_t offset, int whence) +{ + os_off_t result; + if ((result = os_lseek(pool->set_file->fd, offset, whence)) == -1) + ERR("!lseek"); + + return result; +} + +/* + * pool_btt_read -- (internal) perform read in BTT file mode + */ +static inline ssize_t +pool_btt_read(struct pool_data *pool, void *dst, size_t count) +{ + size_t total = 0; + ssize_t nread; + while (count > total && + (nread = util_read(pool->set_file->fd, dst, count - total))) { + if (nread == -1) { + ERR("!read"); + return total ? (ssize_t)total : -1; + } + + dst = (void *)((ssize_t)dst + nread); + total += (size_t)nread; + } + + return (ssize_t)total; +} + +/* + * pool_btt_write -- (internal) perform write in BTT file mode + */ +static inline ssize_t +pool_btt_write(struct pool_data *pool, const void *src, size_t count) +{ + ssize_t nwrite = 0; + size_t total = 0; + while (count > total && + (nwrite = util_write(pool->set_file->fd, src, + count - total))) { + if (nwrite == -1) { + ERR("!write"); + return total ? (ssize_t)total : -1; + } + + src = (void *)((ssize_t)src + nwrite); + total += (size_t)nwrite; + } + + return (ssize_t)total; +} + +/* + * pool_set_read_header -- (internal) read a header of a pool set + */ +static int +pool_set_read_header(const char *fname, struct pool_hdr *hdr) +{ + struct pool_set *set; + int ret = 0; + + if (util_poolset_read(&set, fname)) { + return -1; + } + /* open the first part set file to read the pool header values */ + const struct pool_set_part *part = PART(REP(set, 0), 0); + int fdp = util_file_open(part->path, NULL, 0, O_RDONLY); + if (fdp < 0) { + ERR("cannot open poolset part file"); + ret = -1; + goto err_pool_set; + } + + /* read the pool header from first pool set file */ + if (pread(fdp, hdr, sizeof(*hdr), 0) != sizeof(*hdr)) { + ERR("cannot read pool header from poolset"); + ret = -1; + goto err_close_part; + } + +err_close_part: + os_close(fdp); + +err_pool_set: + util_poolset_free(set); + return ret; +} + +/* + * pool_set_map -- (internal) map poolset + */ +static int +pool_set_map(const char *fname, struct pool_set **poolset, unsigned flags) +{ + ASSERTeq(util_is_poolset_file(fname), 1); + + struct pool_hdr hdr; + if (pool_set_read_header(fname, &hdr)) + return -1; + + util_convert2h_hdr_nocheck(&hdr); + + /* parse pool type from first pool set file */ + enum pool_type type = pool_hdr_get_type(&hdr); + if (type == POOL_TYPE_UNKNOWN) { + ERR("cannot determine pool type from poolset"); + return -1; + } + + /* + * Open the poolset, the values passed to util_pool_open are read + * from the first poolset file, these values are then compared with + * the values from all headers of poolset files. + */ + struct pool_attr attr; + util_pool_hdr2attr(&attr, &hdr); + if (util_pool_open(poolset, fname, 0 /* minpartsize */, &attr, + NULL, NULL, flags | POOL_OPEN_IGNORE_SDS | + POOL_OPEN_IGNORE_BAD_BLOCKS)) { + ERR("opening poolset failed"); + return -1; + } + + return 0; +} + +/* + * pool_params_from_header -- parse pool params from pool header + */ +void +pool_params_from_header(struct pool_params *params, const struct pool_hdr *hdr) +{ + memcpy(params->signature, hdr->signature, sizeof(params->signature)); + memcpy(¶ms->features, &hdr->features, sizeof(params->features)); + + /* + * Check if file is a part of pool set by comparing the UUID with the + * next part UUID. If it is the same it means the pool consist of a + * single file. + */ + int uuid_eq_next = uuidcmp(hdr->uuid, hdr->next_part_uuid); + int uuid_eq_prev = uuidcmp(hdr->uuid, hdr->prev_part_uuid); + params->is_part = !params->is_poolset && (uuid_eq_next || uuid_eq_prev); + + params->type = pool_hdr_get_type(hdr); +} + +/* + * pool_check_type_to_pool_type -- (internal) convert check pool type to + * internal pool type value + */ +static enum pool_type +pool_check_type_to_pool_type(enum pmempool_pool_type check_pool_type) +{ + switch (check_pool_type) { + case PMEMPOOL_POOL_TYPE_LOG: + return POOL_TYPE_LOG; + case PMEMPOOL_POOL_TYPE_BLK: + return POOL_TYPE_BLK; + case PMEMPOOL_POOL_TYPE_OBJ: + return POOL_TYPE_OBJ; + default: + ERR("can not convert pmempool_pool_type %u to pool_type", + check_pool_type); + return POOL_TYPE_UNKNOWN; + } +} + +/* + * pool_parse_params -- parse pool type, file size and block size + */ +static int +pool_params_parse(const PMEMpoolcheck *ppc, struct pool_params *params, + int check) +{ + LOG(3, NULL); + int is_btt = ppc->args.pool_type == PMEMPOOL_POOL_TYPE_BTT; + + params->type = POOL_TYPE_UNKNOWN; + params->is_poolset = util_is_poolset_file(ppc->path) == 1; + + int fd = util_file_open(ppc->path, NULL, 0, O_RDONLY); + if (fd < 0) + return -1; + + int ret = 0; + + os_stat_t stat_buf; + ret = os_fstat(fd, &stat_buf); + if (ret) + goto out_close; + + ASSERT(stat_buf.st_size >= 0); + + params->mode = stat_buf.st_mode; + + struct pool_set *set; + void *addr; + if (params->is_poolset) { + /* + * Need to close the poolset because it will be opened with + * flock in the following instructions. + */ + os_close(fd); + fd = -1; + + if (check) { + if (pool_set_map(ppc->path, &set, 0)) + return -1; + } else { + ret = util_poolset_create_set(&set, ppc->path, + 0, 0, true); + if (ret < 0) { + LOG(2, "cannot open pool set -- '%s'", + ppc->path); + return -1; + } + if (set->remote) { + ERR("poolsets with remote replicas are not " + "supported"); + return -1; + } + if (util_pool_open_nocheck(set, + POOL_OPEN_IGNORE_BAD_BLOCKS)) + return -1; + } + + params->size = set->poolsize; + addr = set->replica[0]->part[0].addr; + + /* + * XXX mprotect for device dax with length not aligned to its + * page granularity causes SIGBUS on the next page fault. + * The length argument of this call should be changed to + * set->poolsize once the kernel issue is solved. + */ + if (mprotect(addr, set->replica[0]->repsize, + PROT_READ) < 0) { + ERR("!mprotect"); + goto out_unmap; + } + params->is_dev_dax = set->replica[0]->part[0].is_dev_dax; + params->is_pmem = set->replica[0]->is_pmem; + } else if (is_btt) { + params->size = (size_t)stat_buf.st_size; +#ifndef _WIN32 + if (params->mode & S_IFBLK) + if (ioctl(fd, BLKGETSIZE64, ¶ms->size)) { + ERR("!ioctl"); + goto out_close; + } +#endif + addr = NULL; + } else { + enum file_type type = util_file_get_type(ppc->path); + if (type < 0) { + ret = -1; + goto out_close; + } + + ssize_t s = util_file_get_size(ppc->path); + if (s < 0) { + ret = -1; + goto out_close; + } + params->size = (size_t)s; + int map_sync; + addr = util_map(fd, 0, params->size, MAP_SHARED, 1, 0, + &map_sync); + if (addr == NULL) { + ret = -1; + goto out_close; + } + params->is_dev_dax = type == TYPE_DEVDAX; + params->is_pmem = params->is_dev_dax || map_sync || + pmem_is_pmem(addr, params->size); + } + + /* stop processing for BTT device */ + if (is_btt) { + params->type = POOL_TYPE_BTT; + params->is_part = false; + goto out_close; + } + + struct pool_hdr hdr; + memcpy(&hdr, addr, sizeof(hdr)); + util_convert2h_hdr_nocheck(&hdr); + pool_params_from_header(params, &hdr); + + if (ppc->args.pool_type != PMEMPOOL_POOL_TYPE_DETECT) { + enum pool_type declared_type = + pool_check_type_to_pool_type(ppc->args.pool_type); + if ((params->type & ~declared_type) != 0) { + ERR("declared pool type does not match"); + errno = EINVAL; + ret = 1; + goto out_unmap; + } + } + + if (params->type == POOL_TYPE_BLK) { + struct pmemblk pbp; + memcpy(&pbp, addr, sizeof(pbp)); + params->blk.bsize = le32toh(pbp.bsize); + } else if (params->type == POOL_TYPE_OBJ) { + struct pmemobjpool *pop = addr; + memcpy(params->obj.layout, pop->layout, + PMEMOBJ_MAX_LAYOUT); + } + +out_unmap: + if (params->is_poolset) { + ASSERTeq(fd, -1); + ASSERTne(addr, NULL); + util_poolset_close(set, DO_NOT_DELETE_PARTS); + } else if (!is_btt) { + ASSERTne(fd, -1); + ASSERTne(addr, NULL); + munmap(addr, params->size); + } +out_close: + if (fd != -1) + os_close(fd); + return ret; +} + +/* + * pool_set_file_open -- (internal) opens pool set file or regular file + */ +static struct pool_set_file * +pool_set_file_open(const char *fname, struct pool_params *params, int rdonly) +{ + LOG(3, NULL); + + struct pool_set_file *file = calloc(1, sizeof(*file)); + if (!file) + return NULL; + + file->fname = strdup(fname); + if (!file->fname) + goto err; + + const char *path = file->fname; + + if (params->type != POOL_TYPE_BTT) { + int ret = util_poolset_create_set(&file->poolset, path, + 0, 0, true); + if (ret < 0) { + LOG(2, "cannot open pool set -- '%s'", path); + goto err_free_fname; + } + unsigned flags = (rdonly ? POOL_OPEN_COW : 0) | + POOL_OPEN_IGNORE_BAD_BLOCKS; + if (util_pool_open_nocheck(file->poolset, flags)) + goto err_free_fname; + + file->size = file->poolset->poolsize; + + /* get modification time from the first part of first replica */ + path = file->poolset->replica[0]->part[0].path; + file->addr = file->poolset->replica[0]->part[0].addr; + } else { + int oflag = rdonly ? O_RDONLY : O_RDWR; + file->fd = util_file_open(fname, NULL, 0, oflag); + file->size = params->size; + } + + os_stat_t buf; + if (os_stat(path, &buf)) { + ERR("%s", path); + goto err_close_poolset; + } + + file->mtime = buf.st_mtime; + file->mode = buf.st_mode; + return file; + +err_close_poolset: + if (params->type != POOL_TYPE_BTT) + util_poolset_close(file->poolset, DO_NOT_DELETE_PARTS); + else if (file->fd != -1) + os_close(file->fd); +err_free_fname: + free(file->fname); +err: + free(file); + return NULL; +} + +/* + * pool_set_parse -- parse poolset file + */ +int +pool_set_parse(struct pool_set **setp, const char *path) +{ + LOG(3, "setp %p path %s", setp, path); + + int fd = os_open(path, O_RDONLY); + int ret = 0; + + if (fd < 0) + return 1; + + if (util_poolset_parse(setp, path, fd)) { + ret = 1; + goto err_close; + } + +err_close: + os_close(fd); + return ret; +} + +/* + * pool_data_alloc -- allocate pool data and open set_file + */ +struct pool_data * +pool_data_alloc(PMEMpoolcheck *ppc) +{ + LOG(3, NULL); + + struct pool_data *pool = calloc(1, sizeof(*pool)); + if (!pool) { + ERR("!calloc"); + return NULL; + } + + PMDK_TAILQ_INIT(&pool->arenas); + pool->uuid_op = UUID_NOP; + + if (pool_params_parse(ppc, &pool->params, 0)) + goto error; + + int rdonly = CHECK_IS_NOT(ppc, REPAIR); + int prv = CHECK_IS(ppc, DRY_RUN); + + if (prv && pool->params.is_dev_dax) { + errno = ENOTSUP; + ERR("!cannot perform a dry run on dax device"); + goto error; + } + + pool->set_file = pool_set_file_open(ppc->path, &pool->params, prv); + if (pool->set_file == NULL) + goto error; + + /* + * XXX mprotect for device dax with length not aligned to its + * page granularity causes SIGBUS on the next page fault. + * The length argument of this call should be changed to + * pool->set_file->poolsize once the kernel issue is solved. + */ + if (rdonly && mprotect(pool->set_file->addr, + pool->set_file->poolset->replica[0]->repsize, + PROT_READ) < 0) + goto error; + + if (pool->params.type != POOL_TYPE_BTT) { + if (pool_set_file_map_headers(pool->set_file, rdonly, prv)) + goto error; + } + + return pool; + +error: + pool_data_free(pool); + return NULL; +} + +/* + * pool_set_file_close -- (internal) closes pool set file or regular file + */ +static void +pool_set_file_close(struct pool_set_file *file) +{ + LOG(3, NULL); + + if (file->poolset) + util_poolset_close(file->poolset, DO_NOT_DELETE_PARTS); + else if (file->addr) { + munmap(file->addr, file->size); + os_close(file->fd); + } else if (file->fd) + os_close(file->fd); + + free(file->fname); + free(file); +} + +/* + * pool_data_free -- close set_file and release pool data + */ +void +pool_data_free(struct pool_data *pool) +{ + LOG(3, NULL); + + if (pool->set_file) { + if (pool->params.type != POOL_TYPE_BTT) + pool_set_file_unmap_headers(pool->set_file); + pool_set_file_close(pool->set_file); + } + + while (!PMDK_TAILQ_EMPTY(&pool->arenas)) { + struct arena *arenap = PMDK_TAILQ_FIRST(&pool->arenas); + if (arenap->map) + free(arenap->map); + if (arenap->flog) + free(arenap->flog); + + PMDK_TAILQ_REMOVE(&pool->arenas, arenap, next); + free(arenap); + } + + free(pool); +} + +/* + * pool_set_file_map -- return mapped address at given offset + */ +void * +pool_set_file_map(struct pool_set_file *file, uint64_t offset) +{ + if (file->addr == MAP_FAILED) + return NULL; + + return (char *)file->addr + offset; +} + +/* + * pool_read -- read from pool set file or regular file + * + * 'buff' has to be a buffer at least 'nbytes' long + * 'off' is an offset from the beginning of the pool + */ +int +pool_read(struct pool_data *pool, void *buff, size_t nbytes, uint64_t off) +{ + if (off + nbytes > pool->set_file->size) + return -1; + + if (pool->params.type != POOL_TYPE_BTT) + memcpy(buff, (char *)pool->set_file->addr + off, nbytes); + else { + if (pool_btt_lseek(pool, (os_off_t)off, SEEK_SET) == -1) + return -1; + if ((size_t)pool_btt_read(pool, buff, nbytes) != nbytes) + return -1; + } + + return 0; +} + +/* + * pool_write -- write to pool set file or regular file + * + * 'buff' has to be a buffer at least 'nbytes' long + * 'off' is an offset from the beginning of the pool + */ +int +pool_write(struct pool_data *pool, const void *buff, size_t nbytes, + uint64_t off) +{ + if (off + nbytes > pool->set_file->size) + return -1; + + if (pool->params.type != POOL_TYPE_BTT) { + memcpy((char *)pool->set_file->addr + off, buff, nbytes); + util_persist_auto(pool->params.is_pmem, + (char *)pool->set_file->addr + off, nbytes); + } else { + if (pool_btt_lseek(pool, (os_off_t)off, SEEK_SET) == -1) + return -1; + if ((size_t)pool_btt_write(pool, buff, nbytes) != nbytes) + return -1; + } + + return 0; +} + +/* + * pool_copy -- make a copy of the pool + */ +int +pool_copy(struct pool_data *pool, const char *dst_path, int overwrite) +{ + struct pool_set_file *file = pool->set_file; + int dfd; + int exists = util_file_exists(dst_path); + if (exists < 0) + return -1; + + if (exists) { + if (!overwrite) { + errno = EEXIST; + return -1; + } + dfd = util_file_open(dst_path, NULL, 0, O_RDWR); + } else { + errno = 0; + dfd = util_file_create(dst_path, file->size, 0); + } + if (dfd < 0) + return -1; + + int result = 0; + os_stat_t stat_buf; + if (os_stat(file->fname, &stat_buf)) { + result = -1; + goto out_close; + } + + if (fchmod(dfd, stat_buf.st_mode)) { + result = -1; + goto out_close; + } + + void *daddr = mmap(NULL, file->size, PROT_READ | PROT_WRITE, + MAP_SHARED, dfd, 0); + if (daddr == MAP_FAILED) { + result = -1; + goto out_close; + } + + if (pool->params.type != POOL_TYPE_BTT) { + void *saddr = pool_set_file_map(file, 0); + memcpy(daddr, saddr, file->size); + goto out_unmap; + } + + void *buf = malloc(RW_BUFFERING_SIZE); + if (buf == NULL) { + ERR("!malloc"); + result = -1; + goto out_unmap; + } + + if (pool_btt_lseek(pool, 0, SEEK_SET) == -1) { + result = -1; + goto out_free; + } + ssize_t buf_read = 0; + void *dst = daddr; + while ((buf_read = pool_btt_read(pool, buf, RW_BUFFERING_SIZE))) { + if (buf_read == -1) + break; + + memcpy(dst, buf, (size_t)buf_read); + dst = (void *)((ssize_t)dst + buf_read); + } + +out_free: + free(buf); +out_unmap: + munmap(daddr, file->size); +out_close: + (void) os_close(dfd); + return result; +} + +/* + * pool_set_part_copy -- make a copy of the poolset part + */ +int +pool_set_part_copy(struct pool_set_part *dpart, struct pool_set_part *spart, + int overwrite) +{ + LOG(3, "dpart %p spart %p", dpart, spart); + + int result = 0; + + os_stat_t stat_buf; + if (os_fstat(spart->fd, &stat_buf)) { + ERR("!util_stat"); + return -1; + } + + size_t smapped = 0; + void *saddr = pmem_map_file(spart->path, 0, 0, S_IREAD, &smapped, NULL); + if (!saddr) + return -1; + + size_t dmapped = 0; + int is_pmem; + void *daddr; + + int exists = util_file_exists(dpart->path); + if (exists < 0) { + result = -1; + goto out_sunmap; + } + + if (exists) { + if (!overwrite) { + errno = EEXIST; + result = -1; + goto out_sunmap; + } + + daddr = pmem_map_file(dpart->path, 0, 0, S_IWRITE, &dmapped, + &is_pmem); + } else { + errno = 0; + daddr = pmem_map_file(dpart->path, dpart->filesize, + PMEM_FILE_CREATE | PMEM_FILE_EXCL, + stat_buf.st_mode, &dmapped, &is_pmem); + } + if (!daddr) { + result = -1; + goto out_sunmap; + } + +#ifdef DEBUG + /* provide extra logging in case of wrong dmapped/smapped value */ + if (dmapped < smapped) { + LOG(1, "dmapped < smapped: dmapped = %lu, smapped = %lu", + dmapped, smapped); + ASSERT(0); + } +#endif + + if (is_pmem) { + pmem_memcpy_persist(daddr, saddr, smapped); + } else { + memcpy(daddr, saddr, smapped); + pmem_msync(daddr, smapped); + } + + pmem_unmap(daddr, dmapped); +out_sunmap: + pmem_unmap(saddr, smapped); + return result; +} + +/* + * pool_memset -- memset pool part described by off and count + */ +int +pool_memset(struct pool_data *pool, uint64_t off, int c, size_t count) +{ + int result = 0; + + if (pool->params.type != POOL_TYPE_BTT) + memset((char *)off, 0, count); + else { + if (pool_btt_lseek(pool, (os_off_t)off, SEEK_SET) == -1) + return -1; + + size_t zero_size = min(count, RW_BUFFERING_SIZE); + void *buf = malloc(zero_size); + if (!buf) { + ERR("!malloc"); + return -1; + } + memset(buf, c, zero_size); + ssize_t nwrite = 0; + do { + zero_size = min(zero_size, count); + nwrite = pool_btt_write(pool, buf, zero_size); + if (nwrite < 0) { + result = -1; + break; + } + count -= (size_t)nwrite; + } while (count > 0); + + free(buf); + } + + return result; +} + +/* + * pool_set_files_count -- get total number of parts of all replicas + */ +unsigned +pool_set_files_count(struct pool_set_file *file) +{ + unsigned ret = 0; + unsigned nreplicas = file->poolset->nreplicas; + for (unsigned r = 0; r < nreplicas; r++) { + struct pool_replica *rep = file->poolset->replica[r]; + ret += rep->nparts; + } + + return ret; +} + +/* + * pool_set_file_map_headers -- map headers of each pool set part file + */ +int +pool_set_file_map_headers(struct pool_set_file *file, int rdonly, int prv) +{ + if (!file->poolset) + return -1; + + for (unsigned r = 0; r < file->poolset->nreplicas; r++) { + struct pool_replica *rep = file->poolset->replica[r]; + for (unsigned p = 0; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + if (util_map_hdr(part, + prv ? MAP_PRIVATE : MAP_SHARED, rdonly)) { + part->hdr = NULL; + goto err; + } + } + } + + return 0; +err: + pool_set_file_unmap_headers(file); + return -1; +} + +/* + * pool_set_file_unmap_headers -- unmap headers of each pool set part file + */ +void +pool_set_file_unmap_headers(struct pool_set_file *file) +{ + if (!file->poolset) + return; + for (unsigned r = 0; r < file->poolset->nreplicas; r++) { + struct pool_replica *rep = file->poolset->replica[r]; + for (unsigned p = 0; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + util_unmap_hdr(part); + } + } +} + +/* + * pool_get_signature -- (internal) return signature of specified pool type + */ +static const char * +pool_get_signature(enum pool_type type) +{ + switch (type) { + case POOL_TYPE_LOG: + return LOG_HDR_SIG; + case POOL_TYPE_BLK: + return BLK_HDR_SIG; + case POOL_TYPE_OBJ: + return OBJ_HDR_SIG; + default: + return NULL; + } +} + +/* + * pool_hdr_default -- return default pool header values + */ +void +pool_hdr_default(enum pool_type type, struct pool_hdr *hdrp) +{ + memset(hdrp, 0, sizeof(*hdrp)); + const char *sig = pool_get_signature(type); + ASSERTne(sig, NULL); + + memcpy(hdrp->signature, sig, POOL_HDR_SIG_LEN); + + switch (type) { + case POOL_TYPE_LOG: + hdrp->major = LOG_FORMAT_MAJOR; + hdrp->features = log_format_feat_default; + break; + case POOL_TYPE_BLK: + hdrp->major = BLK_FORMAT_MAJOR; + hdrp->features = blk_format_feat_default; + break; + case POOL_TYPE_OBJ: + hdrp->major = OBJ_FORMAT_MAJOR; + hdrp->features = obj_format_feat_default; + break; + default: + break; + } +} + +/* + * pool_hdr_get_type -- return pool type based on pool header data + */ +enum pool_type +pool_hdr_get_type(const struct pool_hdr *hdrp) +{ + if (memcmp(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN) == 0) + return POOL_TYPE_LOG; + else if (memcmp(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN) == 0) + return POOL_TYPE_BLK; + else if (memcmp(hdrp->signature, OBJ_HDR_SIG, POOL_HDR_SIG_LEN) == 0) + return POOL_TYPE_OBJ; + else + return POOL_TYPE_UNKNOWN; +} + +/* + * pool_get_pool_type_str -- return human-readable pool type string + */ +const char * +pool_get_pool_type_str(enum pool_type type) +{ + switch (type) { + case POOL_TYPE_BTT: + return "btt"; + case POOL_TYPE_LOG: + return "pmemlog"; + case POOL_TYPE_BLK: + return "pmemblk"; + case POOL_TYPE_OBJ: + return "pmemobj"; + default: + return "unknown"; + } +} + +/* + * pool_set_type -- get pool type of a poolset + */ +enum pool_type +pool_set_type(struct pool_set *set) +{ + struct pool_hdr hdr; + + /* open the first part file to read the pool header values */ + const struct pool_set_part *part = PART(REP(set, 0), 0); + + if (util_file_pread(part->path, &hdr, sizeof(hdr), 0) != + sizeof(hdr)) { + ERR("cannot read pool header from poolset"); + return POOL_TYPE_UNKNOWN; + } + + util_convert2h_hdr_nocheck(&hdr); + enum pool_type type = pool_hdr_get_type(&hdr); + return type; +} + +/* + * pool_btt_info_valid -- check consistency of BTT Info header + */ +int +pool_btt_info_valid(struct btt_info *infop) +{ + if (memcmp(infop->sig, BTTINFO_SIG, BTTINFO_SIG_LEN) != 0) + return 0; + + return util_checksum(infop, sizeof(*infop), &infop->checksum, 0, 0); +} + +/* + * pool_blk_get_first_valid_arena -- get first valid BTT Info in arena + */ +int +pool_blk_get_first_valid_arena(struct pool_data *pool, struct arena *arenap) +{ + arenap->zeroed = true; + uint64_t offset = pool_get_first_valid_btt(pool, &arenap->btt_info, + 2 * BTT_ALIGNMENT, &arenap->zeroed); + + if (offset != 0) { + arenap->offset = offset; + arenap->valid = true; + return 1; + } + + return 0; +} + +/* + * pool_next_arena_offset -- get offset of next arena + * + * Calculated offset is theoretical. Function does not check if such arena can + * exist. + */ +uint64_t +pool_next_arena_offset(struct pool_data *pool, uint64_t offset) +{ + uint64_t lastoff = (pool->set_file->size & ~(BTT_ALIGNMENT - 1)); + uint64_t nextoff = min(offset + BTT_MAX_ARENA, lastoff); + return nextoff; +} + +/* + * pool_get_first_valid_btt -- return offset to first valid BTT Info + * + * - Return offset to valid BTT Info header in pool file. + * - Start looking from given offset. + * - Convert BTT Info header to host endianness. + * - Return the BTT Info header by pointer. + * - If zeroed pointer provided would check if all checked BTT Info are zeroed + * which is useful for BLK pools + */ +uint64_t +pool_get_first_valid_btt(struct pool_data *pool, struct btt_info *infop, + uint64_t offset, bool *zeroed) +{ + /* if we have valid arena get BTT Info header from it */ + if (pool->narenas != 0) { + struct arena *arenap = PMDK_TAILQ_FIRST(&pool->arenas); + memcpy(infop, &arenap->btt_info, sizeof(*infop)); + return arenap->offset; + } + + const size_t info_size = sizeof(*infop); + + /* theoretical offsets to BTT Info header and backup */ + uint64_t offsets[2] = {offset, 0}; + + while (offsets[0] < pool->set_file->size) { + /* calculate backup offset */ + offsets[1] = pool_next_arena_offset(pool, offsets[0]) - + info_size; + + /* check both offsets: header and backup */ + for (int i = 0; i < 2; ++i) { + if (pool_read(pool, infop, info_size, offsets[i])) + continue; + + /* check if all possible BTT Info are zeroed */ + if (zeroed) + *zeroed &= util_is_zeroed((const void *)infop, + info_size); + + /* check if read BTT Info is valid */ + if (pool_btt_info_valid(infop)) { + btt_info_convert2h(infop); + return offsets[i]; + } + } + + /* jump to next arena */ + offsets[0] += BTT_MAX_ARENA; + } + + return 0; +} + +/* + * pool_get_min_size -- return the minimum pool size of a pool of a given type + */ +size_t +pool_get_min_size(enum pool_type type) +{ + switch (type) { + case POOL_TYPE_LOG: + return PMEMLOG_MIN_POOL; + case POOL_TYPE_BLK: + return PMEMBLK_MIN_POOL; + case POOL_TYPE_OBJ: + return PMEMOBJ_MIN_POOL; + default: + ERR("unknown type of a pool"); + return SIZE_MAX; + } +} + +#if FAULT_INJECTION +void +pmempool_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + core_inject_fault_at(type, nth, at); +} + +int +pmempool_fault_injection_enabled(void) +{ + return core_fault_injection_enabled(); +} +#endif diff --git a/src/pmdk/src/libpmempool/pool.h b/src/pmdk/src/libpmempool/pool.h new file mode 100644 index 000000000..3b51e08a8 --- /dev/null +++ b/src/pmdk/src/libpmempool/pool.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * pool.h -- internal definitions for pool processing functions + */ + +#ifndef POOL_H +#define POOL_H + +#include +#include + +#include "libpmemobj.h" + +#include "queue.h" +#include "set.h" +#include "log.h" +#include "blk.h" +#include "btt_layout.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "alloc.h" +#include "fault_injection.h" + +enum pool_type { + POOL_TYPE_UNKNOWN = (1 << 0), + POOL_TYPE_LOG = (1 << 1), + POOL_TYPE_BLK = (1 << 2), + POOL_TYPE_OBJ = (1 << 3), + POOL_TYPE_BTT = (1 << 4), + + POOL_TYPE_ANY = POOL_TYPE_UNKNOWN | POOL_TYPE_LOG | + POOL_TYPE_BLK | POOL_TYPE_OBJ | POOL_TYPE_BTT, +}; + +struct pool_params { + enum pool_type type; + char signature[POOL_HDR_SIG_LEN]; + features_t features; + size_t size; + mode_t mode; + int is_poolset; + int is_part; + int is_dev_dax; + int is_pmem; + union { + struct { + uint64_t bsize; + } blk; + struct { + char layout[PMEMOBJ_MAX_LAYOUT]; + } obj; + }; +}; + +struct pool_set_file { + int fd; + char *fname; + void *addr; + size_t size; + struct pool_set *poolset; + time_t mtime; + mode_t mode; +}; + +struct arena { + PMDK_TAILQ_ENTRY(arena) next; + struct btt_info btt_info; + uint32_t id; + bool valid; + bool zeroed; + uint64_t offset; + uint8_t *flog; + size_t flogsize; + uint32_t *map; + size_t mapsize; +}; + +struct pool_data { + struct pool_params params; + struct pool_set_file *set_file; + int blk_no_layout; + union { + struct pool_hdr pool; + struct pmemlog log; + struct pmemblk blk; + } hdr; + enum { + UUID_NOP = 0, + UUID_FROM_BTT, + UUID_NOT_FROM_BTT, + } uuid_op; + struct arena bttc; + PMDK_TAILQ_HEAD(arenashead, arena) arenas; + uint32_t narenas; +}; + +struct pool_data *pool_data_alloc(PMEMpoolcheck *ppc); +void pool_data_free(struct pool_data *pool); +void pool_params_from_header(struct pool_params *params, + const struct pool_hdr *hdr); + +int pool_set_parse(struct pool_set **setp, const char *path); +void *pool_set_file_map(struct pool_set_file *file, uint64_t offset); +int pool_read(struct pool_data *pool, void *buff, size_t nbytes, + uint64_t off); +int pool_write(struct pool_data *pool, const void *buff, size_t nbytes, + uint64_t off); +int pool_copy(struct pool_data *pool, const char *dst_path, int overwrite); +int pool_set_part_copy(struct pool_set_part *dpart, + struct pool_set_part *spart, int overwrite); +int pool_memset(struct pool_data *pool, uint64_t off, int c, size_t count); + +unsigned pool_set_files_count(struct pool_set_file *file); +int pool_set_file_map_headers(struct pool_set_file *file, int rdonly, int prv); +void pool_set_file_unmap_headers(struct pool_set_file *file); + +void pool_hdr_default(enum pool_type type, struct pool_hdr *hdrp); +enum pool_type pool_hdr_get_type(const struct pool_hdr *hdrp); +enum pool_type pool_set_type(struct pool_set *set); +const char *pool_get_pool_type_str(enum pool_type type); + +int pool_btt_info_valid(struct btt_info *infop); + +int pool_blk_get_first_valid_arena(struct pool_data *pool, + struct arena *arenap); +int pool_blk_bsize_valid(uint32_t bsize, uint64_t fsize); +uint64_t pool_next_arena_offset(struct pool_data *pool, uint64_t header_offset); +uint64_t pool_get_first_valid_btt(struct pool_data *pool, + struct btt_info *infop, uint64_t offset, bool *zeroed); +size_t pool_get_min_size(enum pool_type); + +#if FAULT_INJECTION +void +pmempool_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at); + +int +pmempool_fault_injection_enabled(void); +#else +static inline void +pmempool_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + abort(); +} + +static inline int +pmempool_fault_injection_enabled(void) +{ + return 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmempool/replica.c b/src/pmdk/src/libpmempool/replica.c new file mode 100644 index 000000000..cf38e742d --- /dev/null +++ b/src/pmdk/src/libpmempool/replica.c @@ -0,0 +1,2503 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * replica.c -- groups all commands for replica manipulation + */ + +#include "replica.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "obj.h" +#include "palloc.h" +#include "file.h" +#include "os.h" +#include "out.h" +#include "pool_hdr.h" +#include "set.h" +#include "util.h" +#include "uuid.h" +#include "shutdown_state.h" +#include "badblocks.h" +#include "set_badblocks.h" + +/* + * check_flags_sync -- (internal) check if flags are supported for sync + */ +static int +check_flags_sync(unsigned flags) +{ + flags &= ~(PMEMPOOL_SYNC_DRY_RUN | PMEMPOOL_SYNC_FIX_BAD_BLOCKS); + return flags > 0; +} + +/* + * check_flags_transform -- (internal) check if flags are supported for + * transform + */ +static int +check_flags_transform(unsigned flags) +{ + flags &= ~PMEMPOOL_TRANSFORM_DRY_RUN; + return flags > 0; +} + +/* + * replica_align_badblock_offset_length -- align offset and length + * of the bad block for the given part + */ +void +replica_align_badblock_offset_length(size_t *offset, size_t *length, + struct pool_set *set_in, unsigned repn, unsigned partn) +{ + LOG(3, "offset %zu, length %zu, pool_set %p, replica %u, part %u", + *offset, *length, set_in, repn, partn); + + size_t alignment = set_in->replica[repn]->part[partn].alignment; + + size_t off = ALIGN_DOWN(*offset, alignment); + size_t len = ALIGN_UP(*length + (*offset - off), alignment); + + *offset = off; + *length = len; +} + +/* + * replica_get_part_data_len -- get data length for given part + */ +size_t +replica_get_part_data_len(struct pool_set *set_in, unsigned repn, + unsigned partn) +{ + size_t alignment = set_in->replica[repn]->part[partn].alignment; + size_t hdrsize = (set_in->options & OPTION_SINGLEHDR) ? 0 : alignment; + return ALIGN_DOWN(set_in->replica[repn]->part[partn].filesize, + alignment) - ((partn == 0) ? POOL_HDR_SIZE : hdrsize); +} + +/* + * replica_get_part_offset -- get part's offset from the beginning of replica + */ +uint64_t +replica_get_part_offset(struct pool_set *set, unsigned repn, unsigned partn) +{ + return (uint64_t)set->replica[repn]->part[partn].addr - + (uint64_t)set->replica[repn]->part[0].addr; +} + +/* + * replica_get_part_data_offset -- get data length before given part + */ +uint64_t +replica_get_part_data_offset(struct pool_set *set, unsigned repn, + unsigned partn) +{ + if (partn == 0) + return POOL_HDR_SIZE; + + return (uint64_t)set->replica[repn]->part[partn].addr - + (uint64_t)set->replica[repn]->part[0].addr; +} + +/* + * replica_remove_part -- unlink part from replica + */ +int +replica_remove_part(struct pool_set *set, unsigned repn, unsigned partn, + int fix_bad_blocks) +{ + LOG(3, "set %p repn %u partn %u fix_bad_blocks %i", + set, repn, partn, fix_bad_blocks); + + struct pool_set_part *part = PART(REP(set, repn), partn); + if (part->fd != -1) { + os_close(part->fd); + part->fd = -1; + } + + int olderrno = errno; + enum file_type type = util_file_get_type(part->path); + if (type == OTHER_ERROR) + return -1; + + /* if the part is a device dax, clear its bad blocks */ + if (type == TYPE_DEVDAX && fix_bad_blocks && + badblocks_clear_all(part->path)) { + ERR("clearing bad blocks in device dax failed -- '%s'", + part->path); + errno = EIO; + return -1; + } + + if (type == TYPE_NORMAL && util_unlink(part->path)) { + ERR("!removing part %u from replica %u failed", + partn, repn); + return -1; + } + + errno = olderrno; + LOG(4, "Removed part %s number %u from replica %u", part->path, partn, + repn); + return 0; +} + +/* + * create_replica_health_status -- (internal) create helping structure for + * storing replica's health status + */ +static struct replica_health_status * +create_replica_health_status(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + + unsigned nparts = set->replica[repn]->nparts; + struct replica_health_status *replica_hs; + + replica_hs = Zalloc(sizeof(struct replica_health_status) + + nparts * sizeof(struct part_health_status)); + if (replica_hs == NULL) { + ERR("!Zalloc for replica health status"); + return NULL; + } + + replica_hs->nparts = nparts; + replica_hs->nhdrs = set->replica[repn]->nhdrs; + + return replica_hs; +} + +/* + * replica_part_remove_recovery_file -- remove bad blocks' recovery file + */ +static int +replica_part_remove_recovery_file(struct part_health_status *phs) +{ + LOG(3, "phs %p", phs); + + if (phs->recovery_file_name == NULL || phs->recovery_file_exists == 0) + return 0; + + if (os_unlink(phs->recovery_file_name) < 0) { + ERR("!removing the bad block recovery file failed -- '%s'", + phs->recovery_file_name); + return -1; + } + + LOG(3, "bad block recovery file removed -- '%s'", + phs->recovery_file_name); + + phs->recovery_file_exists = 0; + + return 0; +} + +/* + * replica_remove_all_recovery_files -- remove all recovery files + */ +int +replica_remove_all_recovery_files(struct poolset_health_status *set_hs) +{ + LOG(3, "set_hs %p", set_hs); + + int ret = 0; + + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + struct replica_health_status *rhs = set_hs->replica[r]; + for (unsigned p = 0; p < rhs->nparts; ++p) + ret |= replica_part_remove_recovery_file(&rhs->part[p]); + } + + return ret; +} + +/* + * replica_free_poolset_health_status -- free memory allocated for helping + * structure + */ +void +replica_free_poolset_health_status(struct poolset_health_status *set_hs) +{ + LOG(3, "set_hs %p", set_hs); + + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + struct replica_health_status *rep_hs = set_hs->replica[r]; + + for (unsigned p = 0; p < rep_hs->nparts; ++p) { + Free(rep_hs->part[p].recovery_file_name); + Free(rep_hs->part[p].bbs.bbv); + } + + Free(set_hs->replica[r]); + } + + Free(set_hs); +} + +/* + * replica_create_poolset_health_status -- create helping structure for storing + * poolset's health status + */ +int +replica_create_poolset_health_status(struct pool_set *set, + struct poolset_health_status **set_hsp) +{ + LOG(3, "set %p, set_hsp %p", set, set_hsp); + unsigned nreplicas = set->nreplicas; + struct poolset_health_status *set_hs; + set_hs = Zalloc(sizeof(struct poolset_health_status) + + nreplicas * sizeof(struct replica_health_status *)); + if (set_hs == NULL) { + ERR("!Zalloc for poolset health state"); + return -1; + } + set_hs->nreplicas = nreplicas; + for (unsigned i = 0; i < nreplicas; ++i) { + struct replica_health_status *replica_hs = + create_replica_health_status(set, i); + if (replica_hs == NULL) { + replica_free_poolset_health_status(set_hs); + return -1; + } + set_hs->replica[i] = replica_hs; + } + *set_hsp = set_hs; + return 0; +} + +/* + * replica_is_part_broken -- check if part is marked as broken in the helping + * structure + */ +int +replica_is_part_broken(unsigned repn, unsigned partn, + struct poolset_health_status *set_hs) +{ + struct replica_health_status *rhs = REP_HEALTH(set_hs, repn); + return (rhs->flags & IS_BROKEN) || + (PART_HEALTH(rhs, partn) & IS_BROKEN); +} + +/* + * is_replica_broken -- check if any part in the replica is marked as broken + */ +int +replica_is_replica_broken(unsigned repn, struct poolset_health_status *set_hs) +{ + LOG(3, "repn %u, set_hs %p", repn, set_hs); + struct replica_health_status *r_hs = REP_HEALTH(set_hs, repn); + if (r_hs->flags & IS_BROKEN) + return 1; + + for (unsigned p = 0; p < r_hs->nparts; ++p) { + if (replica_is_part_broken(repn, p, set_hs)) + return 1; + } + return 0; +} + +/* + * replica_is_replica_consistent -- check if replica is not marked as + * inconsistent + */ +int +replica_is_replica_consistent(unsigned repn, + struct poolset_health_status *set_hs) +{ + return !(REP_HEALTH(set_hs, repn)->flags & IS_INCONSISTENT); +} + +/* + * replica_has_bad_blocks -- check if replica has bad blocks + */ +int +replica_has_bad_blocks(unsigned repn, struct poolset_health_status *set_hs) +{ + return REP_HEALTH(set_hs, repn)->flags & HAS_BAD_BLOCKS; +} + +/* + * replica_part_has_bad_blocks -- check if replica's part has bad blocks + */ +int +replica_part_has_bad_blocks(struct part_health_status *phs) +{ + return phs->flags & HAS_BAD_BLOCKS; +} + +/* + * replica_part_has_corrupted_header -- (internal) check if replica's part + * has bad blocks in the header (corrupted header) + */ +int +replica_part_has_corrupted_header(unsigned repn, unsigned partn, + struct poolset_health_status *set_hs) +{ + struct replica_health_status *rhs = REP_HEALTH(set_hs, repn); + return PART_HEALTH(rhs, partn) & HAS_CORRUPTED_HEADER; +} + +/* + * replica_has_corrupted_header -- (internal) check if replica has bad blocks + * in the header (corrupted header) + */ +static int +replica_has_corrupted_header(unsigned repn, + struct poolset_health_status *set_hs) +{ + return REP_HEALTH(set_hs, repn)->flags & HAS_CORRUPTED_HEADER; +} + +/* + * replica_is_replica_healthy -- check if replica is unbroken and consistent + */ +int +replica_is_replica_healthy(unsigned repn, struct poolset_health_status *set_hs) +{ + LOG(3, "repn %u, set_hs %p", repn, set_hs); + + int ret = !replica_is_replica_broken(repn, set_hs) && + replica_is_replica_consistent(repn, set_hs) && + !replica_has_bad_blocks(repn, set_hs); + + LOG(4, "return %i", ret); + + return ret; +} + +/* + * replica_has_healthy_header -- (internal) check if replica has healthy headers + */ +static int +replica_has_healthy_header(unsigned repn, struct poolset_health_status *set_hs) +{ + LOG(3, "repn %u, set_hs %p", repn, set_hs); + + int ret = !replica_is_replica_broken(repn, set_hs) && + replica_is_replica_consistent(repn, set_hs) && + !replica_has_corrupted_header(repn, set_hs); + + LOG(4, "return %i", ret); + + return ret; +} + +/* + * replica_is_poolset_healthy -- check if all replicas in a poolset are not + * marked as broken nor inconsistent in the + * helping structure + */ +int +replica_is_poolset_healthy(struct poolset_health_status *set_hs) +{ + LOG(3, "set_hs %p", set_hs); + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + if (!replica_is_replica_healthy(r, set_hs)) + return 0; + } + return 1; +} + +/* + * replica_is_poolset_transformed -- check if the flag indicating a call from + * pmempool_transform is on + */ +int +replica_is_poolset_transformed(unsigned flags) +{ + return flags & IS_TRANSFORMED; +} + +/* + * replica_find_unbroken_part_with_header -- find a part number in a given + * replica, which is not marked as broken in the helping structure and contains + * a pool header + */ +unsigned +replica_find_unbroken_part(unsigned repn, struct poolset_health_status *set_hs) +{ + LOG(3, "repn %u, set_hs %p", repn, set_hs); + for (unsigned p = 0; p < REP_HEALTH(set_hs, repn)->nhdrs; ++p) { + if (!replica_is_part_broken(repn, p, set_hs)) + return p; + } + return UNDEF_PART; +} + +/* + * replica_find_healthy_replica -- find a replica which is a good source of data + */ +unsigned +replica_find_healthy_replica(struct poolset_health_status *set_hs) +{ + LOG(3, "set_hs %p", set_hs); + + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + if (replica_is_replica_healthy(r, set_hs)) { + LOG(4, "return %i", r); + return r; + } + } + + LOG(4, "return %i", UNDEF_REPLICA); + return UNDEF_REPLICA; +} + +/* + * replica_find_replica_healthy_header -- find a replica with a healthy header + */ +unsigned +replica_find_replica_healthy_header(struct poolset_health_status *set_hs) +{ + LOG(3, "set_hs %p", set_hs); + + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + if (replica_has_healthy_header(r, set_hs)) { + LOG(4, "return %i", r); + return r; + } + } + + LOG(4, "return %i", UNDEF_REPLICA); + return UNDEF_REPLICA; +} + +/* + * replica_check_store_size -- (internal) store size from pool descriptor for + * replica + */ +static int +replica_check_store_size(struct pool_set *set, + struct poolset_health_status *set_hs, unsigned repn) +{ + LOG(3, "set %p, set_hs %p, repn %u", set, set_hs, repn); + struct pool_replica *rep = set->replica[repn]; + struct pmemobjpool pop; + + if (rep->remote) { + memcpy(&pop.hdr, rep->part[0].hdr, sizeof(pop.hdr)); + void *descr = (void *)((uintptr_t)&pop + POOL_HDR_SIZE); + if (Rpmem_read(rep->remote->rpp, descr, POOL_HDR_SIZE, + sizeof(pop) - POOL_HDR_SIZE, 0)) { + return -1; + } + } else { + /* round up map size to Mmap align size */ + if (util_map_part(&rep->part[0], NULL, + ALIGN_UP(sizeof(pop), rep->part[0].alignment), + 0, MAP_SHARED, 1)) { + return -1; + } + + memcpy(&pop, rep->part[0].addr, sizeof(pop)); + + util_unmap_part(&rep->part[0]); + } + + void *dscp = (void *)((uintptr_t)&pop + sizeof(pop.hdr)); + + if (!util_checksum(dscp, OBJ_DSC_P_SIZE, &pop.checksum, 0, + 0)) { + set_hs->replica[repn]->flags |= IS_BROKEN; + return 0; + } + + set_hs->replica[repn]->pool_size = pop.heap_offset + pop.heap_size; + + return 0; +} + +/* + * check_store_all_sizes -- (internal) store sizes from pool descriptor for all + * healthy replicas + */ +static int +check_store_all_sizes(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (!replica_has_healthy_header(r, set_hs)) + continue; + + if (replica_check_store_size(set, set_hs, r)) + return -1; + } + + return 0; +} + +/* + * check_and_open_poolset_part_files -- (internal) for each part in a poolset + * check if the part files are accessible, and if not, mark it as broken + * in a helping structure; then open the part file + */ +static int +check_and_open_poolset_part_files(struct pool_set *set, + struct poolset_health_status *set_hs, unsigned flags) +{ + LOG(3, "set %p, set_hs %p, flags %u", set, set_hs, flags); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + if (rep->remote) { + if (util_replica_open_remote(set, r, 0)) { + LOG(1, "cannot open remote replica no %u", r); + return -1; + } + + unsigned nlanes = REMOTE_NLANES; + int ret = util_poolset_remote_open(rep, r, + rep->repsize, 0, + rep->part[0].addr, + rep->resvsize, &nlanes); + if (ret) { + rep_hs->flags |= IS_BROKEN; + LOG(1, "remote replica #%u marked as BROKEN", + r); + } + + continue; + } + + for (unsigned p = 0; p < rep->nparts; ++p) { + const char *path = rep->part[p].path; + enum file_type type = util_file_get_type(path); + + if (type < 0 || os_access(path, R_OK|W_OK) != 0) { + LOG(1, "part file %s is not accessible", path); + errno = 0; + rep_hs->part[p].flags |= IS_BROKEN; + if (is_dry_run(flags)) + continue; + } + + if (util_part_open(&rep->part[p], 0, 0)) { + if (type == TYPE_DEVDAX) { + LOG(1, + "opening part on Device DAX %s failed", + path); + return -1; + } + LOG(1, "opening part %s failed", path); + errno = 0; + rep_hs->part[p].flags |= IS_BROKEN; + } + } + } + return 0; +} + +/* + * map_all_unbroken_headers -- (internal) map all headers in a poolset, + * skipping those marked as broken in a helping + * structure + */ +static int +map_all_unbroken_headers(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip broken parts */ + if (replica_is_part_broken(r, p, set_hs)) + continue; + + LOG(4, "mapping header for part %u, replica %u", p, r); + if (util_map_hdr(&rep->part[p], MAP_SHARED, 0) != 0) { + LOG(1, "header mapping failed - part #%d", p); + rep_hs->part[p].flags |= IS_BROKEN; + } + } + } + return 0; +} + +/* + * unmap_all_headers -- (internal) unmap all headers in a poolset + */ +static int +unmap_all_headers(struct pool_set *set) +{ + LOG(3, "set %p", set); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + util_replica_close(set, r); + + if (rep->remote && rep->remote->rpp) { + Rpmem_close(rep->remote->rpp); + rep->remote->rpp = NULL; + } + } + + return 0; +} + +/* + * check_checksums_and_signatures -- (internal) check if checksums + * and signatures are correct for parts + * in a given replica + */ +static int +check_checksums_and_signatures(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + struct replica_health_status *rep_hs = REP_HEALTH(set_hs, r); + + /* + * Checksums and signatures of remote replicas are checked + * during opening them on the remote side by the rpmem daemon. + * The local version of remote headers does not contain + * such data. + */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nhdrs; ++p) { + + /* skip broken parts */ + if (replica_is_part_broken(r, p, set_hs)) + continue; + + /* check part's checksum */ + LOG(4, "checking checksum for part %u, replica %u", + p, r); + + struct pool_hdr *hdr = HDR(rep, p); + + if (!util_checksum(hdr, sizeof(*hdr), &hdr->checksum, 0, + POOL_HDR_CSUM_END_OFF(hdr))) { + ERR("invalid checksum of pool header"); + rep_hs->part[p].flags |= IS_BROKEN; + } else if (util_is_zeroed(hdr, sizeof(*hdr))) { + rep_hs->part[p].flags |= IS_BROKEN; + } + + enum pool_type type = pool_hdr_get_type(hdr); + if (type == POOL_TYPE_UNKNOWN) { + ERR("invalid signature"); + rep_hs->part[p].flags |= IS_BROKEN; + } + } + } + return 0; +} + +/* + * replica_badblocks_recovery_file_save -- save bad blocks in the bad blocks + * recovery file before clearing them + */ +static int +replica_badblocks_recovery_file_save(struct part_health_status *part_hs) +{ + LOG(3, "part_health_status %p", part_hs); + + ASSERTeq(part_hs->recovery_file_exists, 1); + ASSERTne(part_hs->recovery_file_name, NULL); + + struct badblocks *bbs = &part_hs->bbs; + char *path = part_hs->recovery_file_name; + int ret = -1; + + int fd = os_open(path, O_WRONLY | O_TRUNC); + if (fd < 0) { + ERR("!opening bad block recovery file failed -- '%s'", path); + return -1; + } + + FILE *recovery_file_name = os_fdopen(fd, "w"); + if (recovery_file_name == NULL) { + ERR( + "!opening a file stream for bad block recovery file failed -- '%s'", + path); + os_close(fd); + return -1; + } + + /* save bad blocks */ + for (unsigned i = 0; i < bbs->bb_cnt; i++) { + ASSERT(bbs->bbv[i].length != 0); + fprintf(recovery_file_name, "%zu %zu\n", + bbs->bbv[i].offset, bbs->bbv[i].length); + } + + if (fflush(recovery_file_name) == EOF) { + ERR("!flushing bad block recovery file failed -- '%s'", path); + goto exit_error; + } + + if (os_fsync(fd) < 0) { + ERR("!syncing bad block recovery file failed -- '%s'", path); + goto exit_error; + } + + /* save the finish flag */ + fprintf(recovery_file_name, "0 0\n"); + + if (fflush(recovery_file_name) == EOF) { + ERR("!flushing bad block recovery file failed -- '%s'", path); + goto exit_error; + } + + if (os_fsync(fd) < 0) { + ERR("!syncing bad block recovery file failed -- '%s'", path); + goto exit_error; + } + + LOG(3, "bad blocks saved in the recovery file -- '%s'", path); + ret = 0; + +exit_error: + os_fclose(recovery_file_name); + + return ret; +} + +/* + * replica_part_badblocks_recovery_file_read -- read bad blocks + * from the bad block recovery file + * for the current part + */ +static int +replica_part_badblocks_recovery_file_read(struct part_health_status *part_hs) +{ + LOG(3, "part_health_status %p", part_hs); + + ASSERT(part_hs->recovery_file_exists); + ASSERTne(part_hs->recovery_file_name, NULL); + + VEC(bbsvec, struct bad_block) bbv = VEC_INITIALIZER; + char *path = part_hs->recovery_file_name; + struct bad_block bb; + int ret = -1; + + FILE *recovery_file = os_fopen(path, "r"); + if (!recovery_file) { + ERR("!opening the recovery file for reading failed -- '%s'", + path); + return -1; + } + + unsigned long long min_offset = 0; /* minimum possible offset */ + + do { + if (fscanf(recovery_file, "%zu %zu\n", + &bb.offset, &bb.length) < 2) { + LOG(1, "incomplete bad block recovery file -- '%s'", + path); + ret = 1; + goto error_exit; + } + + if (bb.offset == 0 && bb.length == 0) { + /* finish_flag */ + break; + } + + /* check if bad blocks build an increasing sequence */ + if (bb.offset < min_offset) { + ERR( + "wrong format of bad block recovery file (bad blocks are not sorted by the offset in ascending order) -- '%s'", + path); + errno = EINVAL; + ret = -1; + goto error_exit; + } + + /* update the minimum possible offset */ + min_offset = bb.offset + bb.length; + + bb.nhealthy = NO_HEALTHY_REPLICA; /* unknown healthy replica */ + + /* add the new bad block to the vector */ + if (VEC_PUSH_BACK(&bbv, bb)) + goto error_exit; + } while (1); + + part_hs->bbs.bbv = VEC_ARR(&bbv); + part_hs->bbs.bb_cnt = (unsigned)VEC_SIZE(&bbv); + + os_fclose(recovery_file); + + LOG(1, "bad blocks read from the recovery file -- '%s'", path); + + return 0; + +error_exit: + VEC_DELETE(&bbv); + os_fclose(recovery_file); + return ret; +} + +/* status returned by the replica_badblocks_recovery_files_check() function */ +enum badblocks_recovery_files_status { + RECOVERY_FILES_ERROR = -1, + RECOVERY_FILES_DO_NOT_EXIST = 0, + RECOVERY_FILES_EXIST_ALL = 1, + RECOVERY_FILES_NOT_ALL_EXIST = 2 +}; + +/* + * replica_badblocks_recovery_files_check -- (internal) check if bad blocks + * recovery files exist + */ +static enum badblocks_recovery_files_status +replica_badblocks_recovery_files_check(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + int recovery_file_exists = 0; + int recovery_file_does_not_exist = 0; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + + if (rep->remote) { + /* + * Bad blocks in remote replicas currently are fixed + * during opening by removing and recreating + * the whole remote replica. + */ + continue; + } + + for (unsigned p = 0; p < rep->nparts; ++p) { + const char *path = PART(rep, p)->path; + struct part_health_status *part_hs = &rep_hs->part[p]; + + int exists = util_file_exists(path); + if (exists < 0) + return -1; + + if (!exists) { + /* part file does not exist - skip it */ + continue; + } + + part_hs->recovery_file_name = + badblocks_recovery_file_alloc(set->path, + r, p); + if (part_hs->recovery_file_name == NULL) { + LOG(1, + "allocating name of bad block recovery file failed"); + return RECOVERY_FILES_ERROR; + } + + exists = util_file_exists(part_hs->recovery_file_name); + if (exists < 0) + return -1; + + part_hs->recovery_file_exists = exists; + + if (part_hs->recovery_file_exists) { + LOG(3, "bad block recovery file exists: %s", + part_hs->recovery_file_name); + + recovery_file_exists = 1; + + } else { + LOG(3, + "bad block recovery file does not exist: %s", + part_hs->recovery_file_name); + + recovery_file_does_not_exist = 1; + } + } + } + + if (recovery_file_exists) { + if (recovery_file_does_not_exist) { + LOG(4, "return RECOVERY_FILES_NOT_ALL_EXIST"); + return RECOVERY_FILES_NOT_ALL_EXIST; + } else { + LOG(4, "return RECOVERY_FILES_EXIST_ALL"); + return RECOVERY_FILES_EXIST_ALL; + } + } + + LOG(4, "return RECOVERY_FILES_DO_NOT_EXIST"); + return RECOVERY_FILES_DO_NOT_EXIST; +} + +/* + * replica_badblocks_recovery_files_read -- (internal) read bad blocks from all + * bad block recovery files for all parts + */ +static int +replica_badblocks_recovery_files_read(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + int ret; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + + /* XXX: not supported yet */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + const char *path = PART(rep, p)->path; + struct part_health_status *part_hs = &rep_hs->part[p]; + + int exists = util_file_exists(path); + if (exists < 0) + return -1; + + if (!exists) { + /* the part does not exist */ + continue; + } + + LOG(1, + "reading bad blocks from the recovery file -- '%s'", + part_hs->recovery_file_name); + + ret = replica_part_badblocks_recovery_file_read( + part_hs); + if (ret < 0) { + LOG(1, + "reading bad blocks from the recovery file failed -- '%s'", + part_hs->recovery_file_name); + return -1; + } + + if (ret > 0) { + LOG(1, + "incomplete bad block recovery file detected -- '%s'", + part_hs->recovery_file_name); + return 1; + } + + if (part_hs->bbs.bb_cnt) { + LOG(3, "part %u contains %u bad blocks -- '%s'", + p, part_hs->bbs.bb_cnt, path); + } + } + } + + return 0; +} + +/* + * replica_badblocks_recovery_files_create_empty -- (internal) create one empty + * bad block recovery file + * for each part file + */ +static int +replica_badblocks_recovery_files_create_empty(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + struct part_health_status *part_hs; + const char *path; + int fd; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + + /* XXX: not supported yet */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + part_hs = &rep_hs->part[p]; + path = PART(rep, p)->path; + + if (!part_hs->recovery_file_name) + continue; + + fd = os_open(part_hs->recovery_file_name, + O_RDWR | O_CREAT | O_EXCL, + 0600); + if (fd < 0) { + ERR( + "!creating an empty bad block recovery file failed -- '%s' (part file '%s')", + part_hs->recovery_file_name, path); + return -1; + } + + os_close(fd); + + char *file_name = Strdup(part_hs->recovery_file_name); + if (file_name == NULL) { + ERR("!Strdup"); + return -1; + } + + char *dir_name = dirname(file_name); + + /* fsync the file's directory */ + if (os_fsync_dir(dir_name) < 0) { + ERR( + "!syncing the directory of the bad block recovery file failed -- '%s' (part file '%s')", + dir_name, path); + Free(file_name); + return -1; + } + + Free(file_name); + + part_hs->recovery_file_exists = 1; + } + } + + return 0; +} + +/* + * replica_badblocks_recovery_files_save -- (internal) save bad blocks + * in the bad block recovery files + */ +static int +replica_badblocks_recovery_files_save(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + + /* XXX: not supported yet */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + struct part_health_status *part_hs = &rep_hs->part[p]; + + if (!part_hs->recovery_file_name) + continue; + + int ret = replica_badblocks_recovery_file_save(part_hs); + if (ret < 0) { + LOG(1, + "opening bad block recovery file failed -- '%s'", + part_hs->recovery_file_name); + return -1; + } + } + } + + return 0; +} + +/* + * replica_badblocks_get -- (internal) get all bad blocks and save them + * in part_hs->bbs structures. + * Returns 1 if any bad block was found, 0 otherwise. + */ +static int +replica_badblocks_get(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + int bad_blocks_found = 0; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + + /* XXX: not supported yet */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + const char *path = PART(rep, p)->path; + struct part_health_status *part_hs = &rep_hs->part[p]; + + int exists = util_file_exists(path); + if (exists < 0) + return -1; + + if (!exists) + continue; + + int ret = badblocks_get(path, &part_hs->bbs); + if (ret < 0) { + ERR( + "!checking the pool part for bad blocks failed -- '%s'", + path); + return -1; + } + + if (part_hs->bbs.bb_cnt) { + LOG(3, "part %u contains %u bad blocks -- '%s'", + p, part_hs->bbs.bb_cnt, path); + + bad_blocks_found = 1; + } + } + } + + return bad_blocks_found; +} + +/* + * check_badblocks_in_header -- (internal) check if bad blocks corrupted + * the header + */ +static int +check_badblocks_in_header(struct badblocks *bbs) +{ + for (unsigned b = 0; b < bbs->bb_cnt; b++) + if (bbs->bbv[b].offset < POOL_HDR_SIZE) + return 1; + + return 0; +} + +/* + * replica_badblocks_clear -- (internal) clear all bad blocks + */ +static int +replica_badblocks_clear(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + int ret; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + + /* XXX: not supported yet */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + const char *path = PART(rep, p)->path; + struct part_health_status *part_hs = &rep_hs->part[p]; + + int exists = util_file_exists(path); + if (exists < 0) + return -1; + + if (!exists) { + /* the part does not exist */ + continue; + } + + if (part_hs->bbs.bb_cnt == 0) { + /* no bad blocks found */ + continue; + } + + /* bad blocks were found */ + part_hs->flags |= HAS_BAD_BLOCKS; + rep_hs->flags |= HAS_BAD_BLOCKS; + + if (check_badblocks_in_header(&part_hs->bbs)) { + part_hs->flags |= HAS_CORRUPTED_HEADER; + if (p == 0) + rep_hs->flags |= HAS_CORRUPTED_HEADER; + } + + ret = badblocks_clear(path, &part_hs->bbs); + if (ret < 0) { + LOG(1, + "clearing bad blocks in replica failed -- '%s'", + path); + return -1; + } + } + } + + return 0; +} + +/* + * replica_badblocks_check_or_clear -- (internal) check if replica contains + * bad blocks when in dry run + * or clear them otherwise + */ +static int +replica_badblocks_check_or_clear(struct pool_set *set, + struct poolset_health_status *set_hs, + int dry_run, int called_from_sync, + int check_bad_blocks, int fix_bad_blocks) +{ + LOG(3, + "set %p, set_hs %p, dry_run %i, called_from_sync %i, " + "check_bad_blocks %i, fix_bad_blocks %i", + set, set_hs, dry_run, called_from_sync, + check_bad_blocks, fix_bad_blocks); + +#define ERR_MSG_BB \ + " please read the manual first and use this option\n"\ + " ONLY IF you are sure that you know what you are doing" + + enum badblocks_recovery_files_status status; + int ret; + + /* check all bad block recovery files */ + status = replica_badblocks_recovery_files_check(set, set_hs); + + /* phase #1 - error handling */ + switch (status) { + case RECOVERY_FILES_ERROR: + LOG(1, "checking bad block recovery files failed"); + return -1; + + case RECOVERY_FILES_EXIST_ALL: + case RECOVERY_FILES_NOT_ALL_EXIST: + if (!called_from_sync) { + ERR( + "error: a bad block recovery file exists, run 'pmempool sync --bad-blocks' to fix bad blocks first"); + return -1; + } + + if (!fix_bad_blocks) { + ERR( + "error: a bad block recovery file exists, but the '--bad-blocks' option is not set\n" + ERR_MSG_BB); + return -1; + } + break; + + default: + break; + }; + + /* + * The pool is checked for bad blocks only if: + * 1) compat feature POOL_FEAT_CHECK_BAD_BLOCKS is set + * OR: + * 2) the '--bad-blocks' option is set + * + * Bad blocks are cleared and fixed only if: + * - the '--bad-blocks' option is set + */ + if (!fix_bad_blocks && !check_bad_blocks) { + LOG(3, "skipping bad blocks checking"); + return 0; + } + + /* phase #2 - reading recovery files */ + switch (status) { + case RECOVERY_FILES_EXIST_ALL: + /* read all bad block recovery files */ + ret = replica_badblocks_recovery_files_read(set, set_hs); + if (ret < 0) { + LOG(1, "checking bad block recovery files failed"); + return -1; + } + + if (ret > 0) { + /* incomplete bad block recovery file was detected */ + + LOG(1, + "warning: incomplete bad block recovery file detected\n" + " - all recovery files will be removed"); + + /* changing status to RECOVERY_FILES_NOT_ALL_EXIST */ + status = RECOVERY_FILES_NOT_ALL_EXIST; + } + break; + + case RECOVERY_FILES_NOT_ALL_EXIST: + LOG(1, + "warning: one of bad block recovery files does not exist\n" + " - all recovery files will be removed"); + break; + + default: + break; + }; + + if (status == RECOVERY_FILES_NOT_ALL_EXIST) { + /* + * At least one of bad block recovery files does not exist, + * or an incomplete bad block recovery file was detected, + * so all recovery files have to be removed. + */ + + if (!dry_run) { + LOG(1, "removing all bad block recovery files..."); + ret = replica_remove_all_recovery_files(set_hs); + if (ret < 0) { + LOG(1, + "removing bad block recovery files failed"); + return -1; + } + } else { + LOG(1, "all bad block recovery files would be removed"); + } + + /* changing status to RECOVERY_FILES_DO_NOT_EXIST */ + status = RECOVERY_FILES_DO_NOT_EXIST; + } + + if (status == RECOVERY_FILES_DO_NOT_EXIST) { + /* + * There are no bad block recovery files, + * so let's check bad blocks. + */ + + int bad_blocks_found = replica_badblocks_get(set, set_hs); + if (bad_blocks_found < 0) { + if (errno == ENOTSUP) { + LOG(1, BB_NOT_SUPP); + return -1; + } + + LOG(1, "checking bad blocks failed"); + return -1; + } + + if (!bad_blocks_found) { + LOG(4, "no bad blocks found"); + return 0; + } + + /* bad blocks were found */ + + if (!called_from_sync) { + ERR( + "error: bad blocks found, run 'pmempool sync --bad-blocks' to fix bad blocks first"); + return -1; + } + + if (!fix_bad_blocks) { + ERR( + "error: bad blocks found, but the '--bad-blocks' option is not set\n" + ERR_MSG_BB); + return -1; + } + + if (dry_run) { + /* dry-run - do nothing */ + LOG(1, "warning: bad blocks were found"); + return 0; + } + + /* create one empty recovery file for each part file */ + ret = replica_badblocks_recovery_files_create_empty(set, + set_hs); + if (ret < 0) { + LOG(1, + "creating empty bad block recovery files failed"); + return -1; + } + + /* save bad blocks in recovery files */ + ret = replica_badblocks_recovery_files_save(set, set_hs); + if (ret < 0) { + LOG(1, "saving bad block recovery files failed"); + return -1; + } + } + + if (dry_run) { + /* dry-run - do nothing */ + LOG(1, "bad blocks would be cleared"); + return 0; + } + + ret = replica_badblocks_clear(set, set_hs); + if (ret < 0) { + ERR("clearing bad blocks failed"); + return -1; + } + + return 0; +} + +/* + * check_shutdown_state -- (internal) check if replica has + * healthy shutdown_state + */ +static int +check_shutdown_state(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) {\ + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + struct pool_hdr *hdrp = HDR(rep, 0); + + if (rep->remote) + continue; + + if (hdrp == NULL) { + /* cannot verify shutdown state */ + rep_hs->flags |= IS_BROKEN; + continue; + } + + struct shutdown_state curr_sds; + shutdown_state_init(&curr_sds, NULL); + for (unsigned p = 0; p < rep->nparts; ++p) { + if (PART(rep, p)->fd < 0) + continue; + + if (shutdown_state_add_part(&curr_sds, + PART(rep, p)->fd, NULL)) { + rep_hs->flags |= IS_BROKEN; + break; + } + } + + if (rep_hs->flags & IS_BROKEN) + continue; + + /* make a copy of sds as we shouldn't modify a pool */ + struct shutdown_state pool_sds = hdrp->sds; + + if (shutdown_state_check(&curr_sds, &pool_sds, NULL)) + rep_hs->flags |= IS_BROKEN; + + } + return 0; +} + +/* + * check_uuids_between_parts -- (internal) check if uuids between adjacent + * parts are consistent for a given replica + */ +static int +check_uuids_between_parts(struct pool_set *set, unsigned repn, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); + struct pool_replica *rep = REP(set, repn); + + /* check poolset_uuid consistency between replica's parts */ + LOG(4, "checking consistency of poolset uuid in replica %u", repn); + uuid_t poolset_uuid; + int uuid_stored = 0; + unsigned part_stored = UNDEF_PART; + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip broken parts */ + if (replica_is_part_broken(repn, p, set_hs)) + continue; + + if (!uuid_stored) { + memcpy(poolset_uuid, HDR(rep, p)->poolset_uuid, + POOL_HDR_UUID_LEN); + uuid_stored = 1; + part_stored = p; + continue; + } + + if (uuidcmp(HDR(rep, p)->poolset_uuid, poolset_uuid)) { + ERR( + "different poolset uuids in parts from the same replica (repn %u, parts %u and %u) - cannot synchronize", + repn, part_stored, p); + errno = EINVAL; + return -1; + } + } + + /* check if all uuids for adjacent replicas are the same across parts */ + LOG(4, "checking consistency of adjacent replicas' uuids in replica %u", + repn); + unsigned unbroken_p = UNDEF_PART; + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip broken parts */ + if (replica_is_part_broken(repn, p, set_hs)) + continue; + + if (unbroken_p == UNDEF_PART) { + unbroken_p = p; + continue; + } + + struct pool_hdr *hdrp = HDR(rep, p); + int prev_differ = uuidcmp(HDR(rep, unbroken_p)->prev_repl_uuid, + hdrp->prev_repl_uuid); + int next_differ = uuidcmp(HDR(rep, unbroken_p)->next_repl_uuid, + hdrp->next_repl_uuid); + + if (prev_differ || next_differ) { + ERR( + "different adjacent replica UUID between parts (repn %u, parts %u and %u) - cannot synchronize", + repn, unbroken_p, p); + errno = EINVAL; + return -1; + } + } + + /* check parts linkage */ + LOG(4, "checking parts linkage in replica %u", repn); + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip broken parts */ + if (replica_is_part_broken(repn, p, set_hs)) + continue; + + struct pool_hdr *hdrp = HDR(rep, p); + struct pool_hdr *next_hdrp = HDRN(rep, p); + int next_is_broken = replica_is_part_broken(repn, p + 1, + set_hs); + + if (!next_is_broken) { + int next_decoupled = + uuidcmp(next_hdrp->prev_part_uuid, + hdrp->uuid) || + uuidcmp(hdrp->next_part_uuid, next_hdrp->uuid); + if (next_decoupled) { + ERR( + "two consecutive unbroken parts are not linked to each other (repn %u, parts %u and %u) - cannot synchronize", + repn, p, p + 1); + errno = EINVAL; + return -1; + } + } + } + return 0; +} + +/* + * check_replicas_consistency -- (internal) check if all uuids within each + * replica are consistent + */ +static int +check_replicas_consistency(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (check_uuids_between_parts(set, r, set_hs)) + return -1; + } + return 0; +} + +/* + * check_replica_options -- (internal) check if options are consistent in the + * replica + */ +static int +check_replica_options(struct pool_set *set, unsigned repn, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); + struct pool_replica *rep = REP(set, repn); + struct replica_health_status *rep_hs = REP_HEALTH(set_hs, repn); + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip broken parts */ + if (replica_is_part_broken(repn, p, set_hs)) + continue; + + struct pool_hdr *hdr = HDR(rep, p); + if (((hdr->features.incompat & POOL_FEAT_SINGLEHDR) == 0) != + ((set->options & OPTION_SINGLEHDR) == 0)) { + LOG(1, + "improper options are set in part %u's header in replica %u", + p, repn); + rep_hs->part[p].flags |= IS_BROKEN; + } + } + return 0; +} + +/* + * check_options -- (internal) check if options are consistent in all replicas + */ +static int +check_options(struct pool_set *set, struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (check_replica_options(set, r, set_hs)) + return -1; + } + return 0; +} + +/* + * check_replica_poolset_uuids - (internal) check if poolset_uuid fields are + * consistent among all parts of a replica; + * the replica is initially considered as + * consistent + */ +static int +check_replica_poolset_uuids(struct pool_set *set, unsigned repn, + uuid_t poolset_uuid, struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, repn %u, poolset_uuid %p, set_hs %p", set, repn, + poolset_uuid, set_hs); + struct pool_replica *rep = REP(set, repn); + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip broken parts */ + if (replica_is_part_broken(repn, p, set_hs)) + continue; + + if (uuidcmp(HDR(rep, p)->poolset_uuid, poolset_uuid)) { + /* + * two internally consistent replicas have + * different poolset_uuid + */ + return -1; + } else { + /* + * it is sufficient to check only one part + * from internally consistent replica + */ + break; + } + } + return 0; +} + +/* + * check_poolset_uuids -- (internal) check if poolset_uuid fields are consistent + * among all internally consistent replicas + */ +static int +check_poolset_uuids(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + /* find a replica with healthy header */ + unsigned r_h = replica_find_replica_healthy_header(set_hs); + if (r_h == UNDEF_REPLICA) { + ERR("no healthy replica found"); + return -1; + } + + uuid_t poolset_uuid; + memcpy(poolset_uuid, HDR(REP(set, r_h), 0)->poolset_uuid, + POOL_HDR_UUID_LEN); + + for (unsigned r = 0; r < set->nreplicas; ++r) { + /* skip inconsistent replicas */ + if (!replica_is_replica_consistent(r, set_hs) || r == r_h) + continue; + + if (check_replica_poolset_uuids(set, r, poolset_uuid, set_hs)) { + ERR( + "inconsistent poolset uuids between replicas %u and %u - cannot synchronize", + r_h, r); + return -1; + } + } + return 0; +} + +/* + * get_replica_uuid -- (internal) get replica uuid + */ +static int +get_replica_uuid(struct pool_replica *rep, unsigned repn, + struct poolset_health_status *set_hs, uuid_t **uuidpp) +{ + unsigned nhdrs = rep->nhdrs; + if (!replica_is_part_broken(repn, 0, set_hs)) { + /* the first part is not broken */ + *uuidpp = &HDR(rep, 0)->uuid; + return 0; + } else if (nhdrs > 1 && !replica_is_part_broken(repn, 1, set_hs)) { + /* the second part is not broken */ + *uuidpp = &HDR(rep, 1)->prev_part_uuid; + return 0; + } else if (nhdrs > 1 && + !replica_is_part_broken(repn, nhdrs - 1, set_hs)) { + /* the last part is not broken */ + *uuidpp = &HDR(rep, nhdrs - 1)->next_part_uuid; + return 0; + } else { + /* cannot get replica uuid */ + return -1; + } +} + +/* + * check_uuids_between_replicas -- (internal) check if uuids between internally + * consistent adjacent replicas are consistent + */ +static int +check_uuids_between_replicas(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) { + /* skip comparing inconsistent pairs of replicas */ + if (!replica_is_replica_consistent(r, set_hs) || + !replica_is_replica_consistent(r + 1, set_hs)) + continue; + + struct pool_replica *rep = REP(set, r); + struct pool_replica *rep_n = REPN(set, r); + + /* get uuids of the two adjacent replicas */ + uuid_t *rep_uuidp = NULL; + uuid_t *rep_n_uuidp = NULL; + unsigned r_n = REPN_HEALTHidx(set_hs, r); + if (get_replica_uuid(rep, r, set_hs, &rep_uuidp)) + LOG(2, "cannot get replica uuid, replica %u", r); + if (get_replica_uuid(rep_n, r_n, set_hs, &rep_n_uuidp)) + LOG(2, "cannot get replica uuid, replica %u", r_n); + + /* + * check if replica uuids are consistent between two adjacent + * replicas + */ + unsigned p = replica_find_unbroken_part(r, set_hs); + unsigned p_n = replica_find_unbroken_part(r_n, set_hs); + if (p_n != UNDEF_PART && rep_uuidp != NULL && + uuidcmp(*rep_uuidp, + HDR(rep_n, p_n)->prev_repl_uuid)) { + ERR( + "inconsistent replica uuids between replicas %u and %u", + r, r_n); + return -1; + } + if (p != UNDEF_PART && rep_n_uuidp != NULL && + uuidcmp(*rep_n_uuidp, + HDR(rep, p)->next_repl_uuid)) { + ERR( + "inconsistent replica uuids between replicas %u and %u", + r, r_n); + return -1; + } + + /* + * check if replica uuids on borders of a broken replica are + * consistent + */ + unsigned r_nn = REPN_HEALTHidx(set_hs, r_n); + if (set->nreplicas > 1 && p != UNDEF_PART && + replica_is_replica_broken(r_n, set_hs) && + replica_is_replica_consistent(r_nn, set_hs)) { + unsigned p_nn = + replica_find_unbroken_part(r_nn, set_hs); + if (p_nn == UNDEF_PART) { + LOG(2, + "cannot compare uuids on borders of replica %u", + r); + continue; + } + struct pool_replica *rep_nn = REP(set, r_nn); + if (uuidcmp(HDR(rep, p)->next_repl_uuid, + HDR(rep_nn, p_nn)->prev_repl_uuid)) { + ERR( + "inconsistent replica uuids on borders of replica %u", + r); + return -1; + } + } + } + return 0; +} + +/* + * check_replica_cycles -- (internal) check if healthy replicas form cycles + * shorter than the number of all replicas + */ +static int +check_replica_cycles(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + unsigned first_healthy; + unsigned count_healthy = 0; + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (!replica_is_replica_healthy(r, set_hs)) { + count_healthy = 0; + continue; + } + + if (count_healthy == 0) + first_healthy = r; + + ++count_healthy; + struct pool_hdr *hdrh = + PART(REP(set, first_healthy), 0)->hdr; + struct pool_hdr *hdr = PART(REP(set, r), 0)->hdr; + if (uuidcmp(hdrh->uuid, hdr->next_repl_uuid) == 0 && + count_healthy < set->nreplicas) { + /* + * Healthy replicas form a cycle shorter than + * the number of all replicas; for the user it + * means that: + */ + ERR( + "alien replica found (probably coming from a different poolset)"); + return -1; + } + } + return 0; +} + +/* + * check_replica_sizes -- (internal) check if all replicas are large + * enough to hold data from a healthy replica + */ +static int +check_replica_sizes(struct pool_set *set, struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + ssize_t pool_size = -1; + for (unsigned r = 0; r < set->nreplicas; ++r) { + /* skip broken replicas */ + if (!replica_is_replica_healthy(r, set_hs)) + continue; + + /* get the size of a pool in the replica */ + ssize_t replica_pool_size; + if (REP(set, r)->remote) + /* XXX: no way to get the size of a remote pool yet */ + replica_pool_size = (ssize_t)set->poolsize; + else + replica_pool_size = replica_get_pool_size(set, r); + + if (replica_pool_size < 0) { + LOG(1, "getting pool size from replica %u failed", r); + set_hs->replica[r]->flags |= IS_BROKEN; + continue; + } + + /* check if the pool is bigger than minimum size */ + enum pool_type type = pool_hdr_get_type(HDR(REP(set, r), 0)); + if ((size_t)replica_pool_size < pool_get_min_size(type)) { + LOG(1, + "pool size from replica %u is smaller than the minimum size allowed for the pool", + r); + set_hs->replica[r]->flags |= IS_BROKEN; + continue; + } + + /* check if each replica is big enough to hold the pool data */ + if (set->poolsize < (size_t)replica_pool_size) { + ERR( + "some replicas are too small to hold synchronized data"); + return -1; + } + + if (pool_size < 0) { + pool_size = replica_pool_size; + continue; + } + + /* check if pools in all healthy replicas are of equal size */ + if (pool_size != replica_pool_size) { + ERR("pool sizes from different replicas differ"); + return -1; + } + } + return 0; +} + +/* + * replica_read_features -- (internal) read features from the header + */ +static int +replica_read_features(struct pool_set *set, + struct poolset_health_status *set_hs, + features_t *features) +{ + LOG(3, "set %p set_hs %p features %p", set, set_hs, features); + + ASSERTne(features, NULL); + + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + struct replica_health_status *rep_hs = set_hs->replica[r]; + + if (rep->remote) { + if (rep_hs->flags & IS_BROKEN) + continue; + + struct pool_hdr *hdrp = rep->part[0].hdr; + memcpy(features, &hdrp->features, sizeof(*features)); + + return 0; + } + + for (unsigned p = 0; p < rep->nparts; p++) { + struct pool_set_part *part = &rep->part[p]; + + if (part->fd == -1) + continue; + + if (util_map_hdr(part, MAP_SHARED, 0) != 0) { + LOG(1, "header mapping failed"); + return -1; + } + + struct pool_hdr *hdrp = part->hdr; + memcpy(features, &hdrp->features, sizeof(*features)); + + util_unmap_hdr(part); + + return 0; + } + } + + /* no healthy replica/part found */ + return -1; +} + +/* + * replica_check_poolset_health -- check if a given poolset can be considered as + * healthy, and store the status in a helping structure + */ +int +replica_check_poolset_health(struct pool_set *set, + struct poolset_health_status **set_hsp, + int called_from_sync, unsigned flags) +{ + LOG(3, "set %p, set_hsp %p, called_from_sync %i, flags %u", + set, set_hsp, called_from_sync, flags); + + if (replica_create_poolset_health_status(set, set_hsp)) { + LOG(1, "creating poolset health status failed"); + return -1; + } + + struct poolset_health_status *set_hs = *set_hsp; + + /* check if part files exist and are accessible */ + if (check_and_open_poolset_part_files(set, set_hs, flags)) { + LOG(1, "poolset part files check failed"); + goto err; + } + + features_t features; + int check_bad_blks; + int fix_bad_blks = called_from_sync && fix_bad_blocks(flags); + + if (fix_bad_blks) { + /* + * We will fix bad blocks, so we cannot read features here, + * because reading could fail, because of bad blocks. + * We will read features after having bad blocks fixed. + * + * Fixing bad blocks implies checking bad blocks. + */ + check_bad_blks = 1; + } else { + /* + * We will not fix bad blocks, so we have to read features here. + */ + if (replica_read_features(set, set_hs, &features)) { + LOG(1, "reading features failed"); + goto err; + } + check_bad_blks = features.compat & POOL_FEAT_CHECK_BAD_BLOCKS; + } + + /* check for bad blocks when in dry run or clear them otherwise */ + if (replica_badblocks_check_or_clear(set, set_hs, is_dry_run(flags), + called_from_sync, check_bad_blks, fix_bad_blks)) { + LOG(1, "replica bad_blocks check failed"); + goto err; + } + + /* read features after fixing bad blocks */ + if (fix_bad_blks && replica_read_features(set, set_hs, &features)) { + LOG(1, "reading features failed"); + goto err; + } + + /* set ignore_sds flag basing on features read from the header */ + set->ignore_sds = !(features.incompat & POOL_FEAT_SDS); + + /* map all headers */ + map_all_unbroken_headers(set, set_hs); + + /* + * Check if checksums and signatures are correct for all parts + * in all replicas. + */ + check_checksums_and_signatures(set, set_hs); + + /* check if option flags are consistent */ + if (check_options(set, set_hs)) { + LOG(1, "flags check failed"); + goto err; + } + + if (!set->ignore_sds && check_shutdown_state(set, set_hs)) { + LOG(1, "replica shutdown_state check failed"); + goto err; + } + + /* check if uuids in parts across each replica are consistent */ + if (check_replicas_consistency(set, set_hs)) { + LOG(1, "replica consistency check failed"); + goto err; + } + + /* check poolset_uuid values between replicas */ + if (check_poolset_uuids(set, set_hs)) { + LOG(1, "poolset uuids check failed"); + goto err; + } + + /* check if uuids for adjacent replicas are consistent */ + if (check_uuids_between_replicas(set, set_hs)) { + LOG(1, "replica uuids check failed"); + goto err; + } + + /* check if healthy replicas make up another poolset */ + if (check_replica_cycles(set, set_hs)) { + LOG(1, "replica cycles check failed"); + goto err; + } + + /* check if replicas are large enough */ + if (check_replica_sizes(set, set_hs)) { + LOG(1, "replica sizes check failed"); + goto err; + } + + if (check_store_all_sizes(set, set_hs)) { + LOG(1, "reading pool sizes failed"); + goto err; + } + + unmap_all_headers(set); + util_poolset_fdclose_always(set); + return 0; + +err: + errno = EINVAL; + unmap_all_headers(set); + util_poolset_fdclose_always(set); + replica_free_poolset_health_status(set_hs); + return -1; +} + +/* + * replica_get_pool_size -- find the effective size (mapped) of a pool based + * on metadata from given replica + */ +ssize_t +replica_get_pool_size(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct pool_set_part *part = PART(REP(set, repn), 0); + int should_close_part = 0; + int should_unmap_part = 0; + if (part->fd == -1) { + if (util_part_open(part, 0, 0)) + return -1; + + should_close_part = 1; + } + + if (part->addr == NULL) { + if (util_map_part(part, NULL, + ALIGN_UP(sizeof(PMEMobjpool), part->alignment), 0, + MAP_SHARED, 1)) { + util_part_fdclose(part); + return -1; + } + should_unmap_part = 1; + } + + PMEMobjpool *pop = (PMEMobjpool *)part->addr; + ssize_t ret = (ssize_t)(pop->heap_offset + pop->heap_size); + + if (should_unmap_part) + util_unmap_part(part); + if (should_close_part) + util_part_fdclose(part); + + return ret; +} + +/* + * replica_check_part_sizes -- check if all parts are large enough + */ +int +replica_check_part_sizes(struct pool_set *set, size_t min_size) +{ + LOG(3, "set %p, min_size %zu", set, min_size); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + if (rep->remote != NULL) + /* skip remote replicas */ + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + if (PART(rep, p)->filesize < min_size) { + ERR("replica %u, part %u: file is too small", + r, p); + errno = EINVAL; + return -1; + } + } + } + return 0; +} + +/* + * replica_check_local_part_dir -- check if directory for the part file + * exists + */ +int +replica_check_local_part_dir(struct pool_set *set, unsigned repn, + unsigned partn) +{ + LOG(3, "set %p, repn %u, partn %u", set, repn, partn); + char *path = Strdup(PART(REP(set, repn), partn)->path); + const char *dir = dirname(path); + os_stat_t sb; + if (os_stat(dir, &sb) != 0 || !(sb.st_mode & S_IFDIR)) { + ERR( + "directory %s for part %u in replica %u does not exist or is not accessible", + path, partn, repn); + Free(path); + return -1; + } + Free(path); + return 0; +} + +/* + * replica_check_part_dirs -- (internal) check if directories for part files + * exist + */ +int +replica_check_part_dirs(struct pool_set *set) +{ + LOG(3, "set %p", set); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + if (rep->remote != NULL) + /* skip remote replicas */ + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + if (replica_check_local_part_dir(set, r, p)) + return -1; + } + } + return 0; +} + +/* + * replica_open_replica_part_files -- open all part files for a replica + */ +int +replica_open_replica_part_files(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct pool_replica *rep = set->replica[repn]; + for (unsigned p = 0; p < rep->nparts; ++p) { + /* skip already opened files */ + if (rep->part[p].fd != -1) + continue; + + if (util_part_open(&rep->part[p], 0, 0)) { + LOG(1, "part files open failed for replica %u, part %u", + repn, p); + errno = EINVAL; + goto err; + } + } + return 0; + +err: + util_replica_fdclose(set->replica[repn]); + return -1; +} + +/* + * replica_open_poolset_part_files -- open all part files for a poolset + */ +int +replica_open_poolset_part_files(struct pool_set *set) +{ + LOG(3, "set %p", set); + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (set->replica[r]->remote) + continue; + if (replica_open_replica_part_files(set, r)) { + LOG(1, "opening replica %u, part files failed", r); + goto err; + } + } + + return 0; + +err: + util_poolset_fdclose_always(set); + return -1; +} + +/* + * pmempool_syncU -- synchronize replicas within a poolset + */ +#ifndef _WIN32 +static inline +#endif +int +pmempool_syncU(const char *poolset, unsigned flags) +{ + LOG(3, "poolset %s, flags %u", poolset, flags); + ASSERTne(poolset, NULL); + + /* check if poolset has correct signature */ + if (util_is_poolset_file(poolset) != 1) { + ERR("file is not a poolset file"); + goto err; + } + + /* check if flags are supported */ + if (check_flags_sync(flags)) { + ERR("unsupported flags"); + errno = EINVAL; + goto err; + } + + /* open poolset file */ + int fd = util_file_open(poolset, NULL, 0, O_RDONLY); + if (fd < 0) { + ERR("cannot open a poolset file"); + goto err; + } + + /* fill up pool_set structure */ + struct pool_set *set = NULL; + if (util_poolset_parse(&set, poolset, fd)) { + ERR("parsing input poolset failed"); + goto err_close_file; + } + + if (set->nreplicas == 1) { + ERR("no replica(s) found in the pool set"); + errno = EINVAL; + goto err_close_file; + } + + if (set->remote && util_remote_load()) { + ERR("remote replication not available"); + errno = ENOTSUP; + goto err_close_file; + } + + /* sync all replicas */ + if (replica_sync(set, NULL, flags)) { + LOG(1, "synchronization failed"); + goto err_close_all; + } + + util_poolset_close(set, DO_NOT_DELETE_PARTS); + os_close(fd); + return 0; + +err_close_all: + util_poolset_close(set, DO_NOT_DELETE_PARTS); + +err_close_file: + os_close(fd); + +err: + if (errno == 0) + errno = EINVAL; + + return -1; +} + +#ifndef _WIN32 +/* + * pmempool_sync -- synchronize replicas within a poolset + */ +int +pmempool_sync(const char *poolset, unsigned flags) +{ + return pmempool_syncU(poolset, flags); +} +#else +/* + * pmempool_syncW -- synchronize replicas within a poolset in widechar + */ +int +pmempool_syncW(const wchar_t *poolset, unsigned flags) +{ + char *path = util_toUTF8(poolset); + if (path == NULL) { + ERR("Invalid poolest file path."); + return -1; + } + + int ret = pmempool_syncU(path, flags); + + util_free_UTF8(path); + return ret; +} +#endif + +/* + * pmempool_transformU -- alter poolset structure + */ +#ifndef _WIN32 +static inline +#endif +int +pmempool_transformU(const char *poolset_src, + const char *poolset_dst, unsigned flags) +{ + LOG(3, "poolset_src %s, poolset_dst %s, flags %u", poolset_src, + poolset_dst, flags); + ASSERTne(poolset_src, NULL); + ASSERTne(poolset_dst, NULL); + + /* check if the source poolset has correct signature */ + if (util_is_poolset_file(poolset_src) != 1) { + ERR("source file is not a poolset file"); + goto err; + } + + /* check if the destination poolset has correct signature */ + if (util_is_poolset_file(poolset_dst) != 1) { + ERR("destination file is not a poolset file"); + goto err; + } + + /* check if flags are supported */ + if (check_flags_transform(flags)) { + ERR("unsupported flags"); + errno = EINVAL; + goto err; + } + + /* open the source poolset file */ + int fd_in = util_file_open(poolset_src, NULL, 0, O_RDONLY); + if (fd_in < 0) { + ERR("cannot open source poolset file"); + goto err; + } + + /* parse the source poolset file */ + struct pool_set *set_in = NULL; + if (util_poolset_parse(&set_in, poolset_src, fd_in)) { + ERR("parsing source poolset failed"); + os_close(fd_in); + goto err; + } + os_close(fd_in); + + /* open the destination poolset file */ + int fd_out = util_file_open(poolset_dst, NULL, 0, O_RDONLY); + if (fd_out < 0) { + ERR("cannot open destination poolset file"); + goto err; + } + + enum del_parts_mode del = DO_NOT_DELETE_PARTS; + + /* parse the destination poolset file */ + struct pool_set *set_out = NULL; + if (util_poolset_parse(&set_out, poolset_dst, fd_out)) { + ERR("parsing destination poolset failed"); + os_close(fd_out); + goto err_free_poolin; + } + os_close(fd_out); + + /* check if the source poolset is of a correct type */ + enum pool_type ptype = pool_set_type(set_in); + if (ptype != POOL_TYPE_OBJ) { + errno = EINVAL; + ERR("transform is not supported for given pool type: %s", + pool_get_pool_type_str(ptype)); + goto err_free_poolout; + } + + /* load remote library if needed */ + if (set_in->remote && util_remote_load()) { + ERR("remote replication not available"); + goto err_free_poolout; + } + if (set_out->remote && util_remote_load()) { + ERR("remote replication not available"); + goto err_free_poolout; + } + + del = is_dry_run(flags) ? DO_NOT_DELETE_PARTS : DELETE_CREATED_PARTS; + + /* transform poolset */ + if (replica_transform(set_in, set_out, flags)) { + LOG(1, "transformation failed"); + goto err_free_poolout; + } + + util_poolset_close(set_in, DO_NOT_DELETE_PARTS); + util_poolset_close(set_out, DO_NOT_DELETE_PARTS); + return 0; + +err_free_poolout: + util_poolset_close(set_out, del); + +err_free_poolin: + util_poolset_close(set_in, DO_NOT_DELETE_PARTS); + +err: + if (errno == 0) + errno = EINVAL; + + return -1; +} + +#ifndef _WIN32 +/* + * pmempool_transform -- alter poolset structure + */ +int +pmempool_transform(const char *poolset_src, + const char *poolset_dst, unsigned flags) +{ + return pmempool_transformU(poolset_src, poolset_dst, flags); +} +#else +/* + * pmempool_transformW -- alter poolset structure in widechar + */ +int +pmempool_transformW(const wchar_t *poolset_src, + const wchar_t *poolset_dst, unsigned flags) +{ + char *path_src = util_toUTF8(poolset_src); + if (path_src == NULL) { + ERR("Invalid source poolest file path."); + return -1; + } + + char *path_dst = util_toUTF8(poolset_dst); + if (path_dst == NULL) { + ERR("Invalid destination poolest file path."); + Free(path_src); + return -1; + } + + int ret = pmempool_transformU(path_src, path_dst, flags); + + util_free_UTF8(path_src); + util_free_UTF8(path_dst); + return ret; +} +#endif diff --git a/src/pmdk/src/libpmempool/replica.h b/src/pmdk/src/libpmempool/replica.h new file mode 100644 index 000000000..06fa5b3f2 --- /dev/null +++ b/src/pmdk/src/libpmempool/replica.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * replica.h -- module for synchronizing and transforming poolset + */ +#ifndef REPLICA_H +#define REPLICA_H + +#include "libpmempool.h" +#include "pool.h" +#include "badblocks.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define UNDEF_REPLICA UINT_MAX +#define UNDEF_PART UINT_MAX + +/* + * A part marked as broken does not exist or is damaged so that + * it cannot be opened and has to be recreated. + */ +#define IS_BROKEN (1U << 0) + +/* + * A replica marked as inconsistent exists but has inconsistent metadata + * (e.g. inconsistent parts or replicas linkage) + */ +#define IS_INCONSISTENT (1U << 1) + +/* + * A part or replica marked in this way has bad blocks inside. + */ +#define HAS_BAD_BLOCKS (1U << 2) + +/* + * A part marked in this way has bad blocks in the header + */ +#define HAS_CORRUPTED_HEADER (1U << 3) + +/* + * A flag which can be passed to sync_replica() to indicate that the function is + * called by pmempool_transform + */ +#define IS_TRANSFORMED (1U << 10) + +/* + * Number of lanes utilized when working with remote replicas + */ +#define REMOTE_NLANES 1 + +/* + * Helping structures for storing part's health status + */ +struct part_health_status { + unsigned flags; + struct badblocks bbs; /* structure with bad blocks */ + char *recovery_file_name; /* name of bad block recovery file */ + int recovery_file_exists; /* bad block recovery file exists */ +}; + +/* + * Helping structures for storing replica and poolset's health status + */ +struct replica_health_status { + unsigned nparts; + unsigned nhdrs; + /* a flag for the replica */ + unsigned flags; + /* effective size of a pool, valid only for healthy replica */ + size_t pool_size; + /* flags for each part */ + struct part_health_status part[]; +}; + +struct poolset_health_status { + unsigned nreplicas; + /* a flag for the poolset */ + unsigned flags; + /* health statuses for each replica */ + struct replica_health_status *replica[]; +}; + +/* get index of the (r)th replica health status */ +static inline unsigned +REP_HEALTHidx(struct poolset_health_status *set, unsigned r) +{ + ASSERTne(set->nreplicas, 0); + return (set->nreplicas + r) % set->nreplicas; +} + +/* get index of the (r + 1)th replica health status */ +static inline unsigned +REPN_HEALTHidx(struct poolset_health_status *set, unsigned r) +{ + ASSERTne(set->nreplicas, 0); + return (set->nreplicas + r + 1) % set->nreplicas; +} + +/* get (p)th part health status */ +static inline unsigned +PART_HEALTHidx(struct replica_health_status *rep, unsigned p) +{ + ASSERTne(rep->nparts, 0); + return (rep->nparts + p) % rep->nparts; +} + +/* get (r)th replica health status */ +static inline struct replica_health_status * +REP_HEALTH(struct poolset_health_status *set, unsigned r) +{ + return set->replica[REP_HEALTHidx(set, r)]; +} + +/* get (p)th part health status */ +static inline unsigned +PART_HEALTH(struct replica_health_status *rep, unsigned p) +{ + return rep->part[PART_HEALTHidx(rep, p)].flags; +} + +uint64_t replica_get_part_offset(struct pool_set *set, + unsigned repn, unsigned partn); + +void replica_align_badblock_offset_length(size_t *offset, size_t *length, + struct pool_set *set_in, unsigned repn, unsigned partn); + +size_t replica_get_part_data_len(struct pool_set *set_in, unsigned repn, + unsigned partn); +uint64_t replica_get_part_data_offset(struct pool_set *set_in, unsigned repn, + unsigned part); + +/* + * is_dry_run -- (internal) check whether only verification mode is enabled + */ +static inline bool +is_dry_run(unsigned flags) +{ + /* + * PMEMPOOL_SYNC_DRY_RUN and PMEMPOOL_TRANSFORM_DRY_RUN + * have to have the same value in order to use this common function. + */ + ASSERT_COMPILE_ERROR_ON(PMEMPOOL_SYNC_DRY_RUN != + PMEMPOOL_TRANSFORM_DRY_RUN); + + return flags & PMEMPOOL_SYNC_DRY_RUN; +} + +/* + * fix_bad_blocks -- (internal) fix bad blocks - it causes reading or creating + * bad blocks recovery files + * (depending on if they exist or not) + */ +static inline bool +fix_bad_blocks(unsigned flags) +{ + return flags & PMEMPOOL_SYNC_FIX_BAD_BLOCKS; +} + +int replica_remove_all_recovery_files(struct poolset_health_status *set_hs); +int replica_remove_part(struct pool_set *set, unsigned repn, unsigned partn, + int fix_bad_blocks); +int replica_create_poolset_health_status(struct pool_set *set, + struct poolset_health_status **set_hsp); +void replica_free_poolset_health_status(struct poolset_health_status *set_s); +int replica_check_poolset_health(struct pool_set *set, + struct poolset_health_status **set_hs, + int called_from_sync, unsigned flags); +int replica_is_part_broken(unsigned repn, unsigned partn, + struct poolset_health_status *set_hs); +int replica_has_bad_blocks(unsigned repn, struct poolset_health_status *set_hs); +int replica_part_has_bad_blocks(struct part_health_status *phs); +int replica_part_has_corrupted_header(unsigned repn, unsigned partn, + struct poolset_health_status *set_hs); +unsigned replica_find_unbroken_part(unsigned repn, + struct poolset_health_status *set_hs); +int replica_is_replica_broken(unsigned repn, + struct poolset_health_status *set_hs); +int replica_is_replica_consistent(unsigned repn, + struct poolset_health_status *set_hs); +int replica_is_replica_healthy(unsigned repn, + struct poolset_health_status *set_hs); + +unsigned replica_find_healthy_replica( + struct poolset_health_status *set_hs); +unsigned replica_find_replica_healthy_header( + struct poolset_health_status *set_hs); + +int replica_is_poolset_healthy(struct poolset_health_status *set_hs); +int replica_is_poolset_transformed(unsigned flags); +ssize_t replica_get_pool_size(struct pool_set *set, unsigned repn); +int replica_check_part_sizes(struct pool_set *set, size_t min_size); +int replica_check_part_dirs(struct pool_set *set); +int replica_check_local_part_dir(struct pool_set *set, unsigned repn, + unsigned partn); + +int replica_open_replica_part_files(struct pool_set *set, unsigned repn); +int replica_open_poolset_part_files(struct pool_set *set); + +int replica_sync(struct pool_set *set_in, struct poolset_health_status *set_hs, + unsigned flags); +int replica_transform(struct pool_set *set_in, struct pool_set *set_out, + unsigned flags); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libpmempool/rm.c b/src/pmdk/src/libpmempool/rm.c new file mode 100644 index 000000000..9313e0ac0 --- /dev/null +++ b/src/pmdk/src/libpmempool/rm.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * rm.c -- implementation of pmempool_rm() function + */ +#include +#include + +#include "libpmempool.h" +#include "out.h" +#include "os.h" +#include "util.h" +#include "set.h" +#include "file.h" + +#define PMEMPOOL_RM_ALL_FLAGS (\ + PMEMPOOL_RM_FORCE |\ + PMEMPOOL_RM_POOLSET_LOCAL |\ + PMEMPOOL_RM_POOLSET_REMOTE) + +#define ERR_F(f, ...) do {\ + if (CHECK_FLAG((f), FORCE))\ + LOG(2, "!(ignored) " __VA_ARGS__);\ + else\ + ERR(__VA_ARGS__);\ +} while (0) + +#define CHECK_FLAG(f, i) ((f) & PMEMPOOL_RM_##i) + +struct cb_args { + unsigned flags; + int error; +}; + +/* + * rm_local -- (internal) remove single local file + */ +static int +rm_local(const char *path, unsigned flags, int is_part_file) +{ + int ret = util_unlink_flock(path); + if (!ret) { + LOG(3, "%s: removed", path); + return 0; + } + + int oerrno = errno; + os_stat_t buff; + ret = os_stat(path, &buff); + if (!ret) { + if (S_ISDIR(buff.st_mode)) { + errno = EISDIR; + if (is_part_file) + ERR("%s: removing file failed", path); + else + ERR("removing file failed"); + return -1; + } + } + + errno = oerrno; + + if (is_part_file) + ERR_F(flags, "%s: removing file failed", path); + else + ERR_F(flags, "removing file failed"); + + if (CHECK_FLAG(flags, FORCE)) + return 0; + + return -1; +} + +/* + * rm_remote -- (internal) remove remote replica + */ +static int +rm_remote(const char *node, const char *path, unsigned flags) +{ + if (!Rpmem_remove) { + ERR_F(flags, "cannot remove remote replica" + " -- missing librpmem"); + return -1; + } + + int rpmem_flags = 0; + if (CHECK_FLAG(flags, FORCE)) + rpmem_flags |= RPMEM_REMOVE_FORCE; + + if (CHECK_FLAG(flags, POOLSET_REMOTE)) + rpmem_flags |= RPMEM_REMOVE_POOL_SET; + + int ret = Rpmem_remove(node, path, rpmem_flags); + if (ret) { + ERR_F(flags, "%s/%s removing failed", node, path); + if (CHECK_FLAG(flags, FORCE)) + ret = 0; + } else { + LOG(3, "%s/%s: removed", node, path); + } + + return ret; +} + +/* + * rm_cb -- (internal) foreach part callback + */ +static int +rm_cb(struct part_file *pf, void *arg) +{ + struct cb_args *args = (struct cb_args *)arg; + int ret; + if (pf->is_remote) { + ret = rm_remote(pf->remote->node_addr, pf->remote->pool_desc, + args->flags); + } else { + ret = rm_local(pf->part->path, args->flags, 1); + } + + if (ret) + args->error = ret; + + return 0; +} + +/* + * pmempool_rmU -- remove pool files or poolsets + */ +#ifndef _WIN32 +static inline +#endif +int +pmempool_rmU(const char *path, unsigned flags) +{ + LOG(3, "path %s flags %x", path, flags); + int ret; + + if (flags & ~PMEMPOOL_RM_ALL_FLAGS) { + ERR("invalid flags specified"); + errno = EINVAL; + return -1; + } + + int is_poolset = util_is_poolset_file(path); + if (is_poolset < 0) { + os_stat_t buff; + ret = os_stat(path, &buff); + if (!ret) { + if (S_ISDIR(buff.st_mode)) { + errno = EISDIR; + ERR("removing file failed"); + return -1; + } + } + ERR_F(flags, "removing file failed"); + if (CHECK_FLAG(flags, FORCE)) + return 0; + + return -1; + } + + if (!is_poolset) { + LOG(2, "%s: not a poolset file", path); + return rm_local(path, flags, 0); + } + + LOG(2, "%s: poolset file", path); + + /* fill up pool_set structure */ + struct pool_set *set = NULL; + int fd = os_open(path, O_RDONLY); + if (fd == -1 || util_poolset_parse(&set, path, fd)) { + ERR_F(flags, "parsing poolset file failed"); + if (fd != -1) + os_close(fd); + if (CHECK_FLAG(flags, FORCE)) + return 0; + return -1; + } + os_close(fd); + + if (set->remote) { + /* ignore error - it will be handled in rm_remote() */ + (void) util_remote_load(); + } + + util_poolset_free(set); + + struct cb_args args; + args.flags = flags; + args.error = 0; + ret = util_poolset_foreach_part(path, rm_cb, &args); + if (ret == -1) { + ERR_F(flags, "parsing poolset file failed"); + if (CHECK_FLAG(flags, FORCE)) + return 0; + + return ret; + } + + ASSERTeq(ret, 0); + + if (args.error) + return args.error; + + if (CHECK_FLAG(flags, POOLSET_LOCAL)) { + ret = rm_local(path, flags, 0); + if (ret) { + ERR_F(flags, "removing pool set file failed"); + } else { + LOG(3, "%s: removed", path); + } + + if (CHECK_FLAG(flags, FORCE)) + return 0; + + return ret; + } + + return 0; +} + +#ifndef _WIN32 +/* + * pmempool_rm -- remove pool files or poolsets + */ +int +pmempool_rm(const char *path, unsigned flags) +{ + return pmempool_rmU(path, flags); +} +#else +/* + * pmempool_rmW -- remove pool files or poolsets in widechar + */ +int +pmempool_rmW(const wchar_t *path, unsigned flags) +{ + char *upath = util_toUTF8(path); + if (upath == NULL) { + ERR("Invalid poolest/pool file path."); + return -1; + } + + int ret = pmempool_rmU(upath, flags); + + util_free_UTF8(upath); + return ret; +} +#endif diff --git a/src/pmdk/src/libpmempool/sync.c b/src/pmdk/src/libpmempool/sync.c new file mode 100644 index 000000000..b7c0cb4ff --- /dev/null +++ b/src/pmdk/src/libpmempool/sync.c @@ -0,0 +1,1646 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * sync.c -- a module for poolset synchronizing + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "libpmem.h" +#include "replica.h" +#include "out.h" +#include "os.h" +#include "util_pmem.h" +#include "util.h" + +#ifdef USE_RPMEM +#include "rpmem_common.h" +#include "rpmem_ssh.h" +#endif + +#define BB_DATA_STR "offset 0x%zx, length 0x%zx, nhealthy %i" + +/* defines 'struct bb_vec' - the vector of the 'struct bad_block' structures */ +VEC(bb_vec, struct bad_block); + +/* + * validate_args -- (internal) check whether passed arguments are valid + */ +static int +validate_args(struct pool_set *set) +{ + LOG(3, "set %p", set); + ASSERTne(set, NULL); + + /* the checks below help detect use of incorrect poolset file */ + + /* + * check if all parts in the poolset are large enough + * (now replication works only for pmemobj pools) + */ + if (replica_check_part_sizes(set, PMEMOBJ_MIN_POOL)) { + LOG(2, "part sizes check failed"); + goto err; + } + + /* + * check if all directories for part files exist + */ + if (replica_check_part_dirs(set)) { + LOG(2, "part directories check failed"); + goto err; + } + + return 0; + +err: + if (errno == 0) + errno = EINVAL; + return -1; +} + +/* + * sync_copy_data -- (internal) copy data from the healthy replica + * to the broken one + */ +static int +sync_copy_data(void *src_addr, void *dst_addr, size_t off, size_t len, + struct pool_replica *rep_h, + struct pool_replica *rep, const struct pool_set_part *part) +{ + LOG(3, "src_addr %p dst_addr %p off %zu len %zu " + "rep_h %p rep %p part %p", + src_addr, dst_addr, off, len, rep_h, rep, part); + + int ret; + + if (rep->remote) { + LOG(10, + "copying data (offset 0x%zx length 0x%zx) to remote node -- '%s' on '%s'", + off, len, + rep->remote->pool_desc, + rep->remote->node_addr); + + ret = Rpmem_persist(rep->remote->rpp, off, len, 0, 0); + if (ret) { + LOG(1, + "copying data to remote node failed -- '%s' on '%s'", + rep->remote->pool_desc, + rep->remote->node_addr); + return -1; + } + } else if (rep_h->remote) { + LOG(10, + "reading data (offset 0x%zx length 0x%zx) from remote node -- '%s' on '%s'", + off, len, + rep_h->remote->pool_desc, + rep_h->remote->node_addr); + + ret = Rpmem_read(rep_h->remote->rpp, dst_addr, off, len, 0); + if (ret) { + LOG(1, + "reading data from remote node failed -- '%s' on '%s'", + rep_h->remote->pool_desc, + rep_h->remote->node_addr); + return -1; + } + } else { + LOG(10, + "copying data (offset 0x%zx length 0x%zx) from local replica -- '%s'", + off, len, rep_h->part[0].path); + + /* copy all data */ + memcpy(dst_addr, src_addr, len); + util_persist(part->is_dev_dax, dst_addr, len); + } + + return 0; +} + +/* + * sync_recreate_header -- (internal) recreate the header + */ +static int +sync_recreate_header(struct pool_set *set, unsigned r, unsigned p, + struct pool_hdr *src_hdr) +{ + LOG(3, "set %p replica %u part %u src_hdr %p", set, r, p, src_hdr); + + struct pool_attr attr; + util_pool_hdr2attr(&attr, src_hdr); + + if (util_header_create(set, r, p, &attr, 1) != 0) { + LOG(1, "part headers create failed for replica %u part %u", + r, p); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * sync_mark_replica_no_badblocks -- (internal) mark replica as not having + * bad blocks + */ +static void +sync_mark_replica_no_badblocks(unsigned repn, + struct poolset_health_status *set_hs) +{ + LOG(3, "repn %u set_hs %p", repn, set_hs); + + struct replica_health_status *rhs = REP_HEALTH(set_hs, repn); + + if (rhs->flags & HAS_BAD_BLOCKS) { + rhs->flags &= ~HAS_BAD_BLOCKS; + LOG(4, "replica %u has no bad blocks now", repn); + } +} + +/* + * sync_mark_part_no_badblocks -- (internal) mark part as not having bad blocks + */ +static void +sync_mark_part_no_badblocks(unsigned repn, unsigned partn, + struct poolset_health_status *set_hs) +{ + LOG(3, "repn %u partn %u set_hs %p", repn, partn, set_hs); + + struct replica_health_status *rhs = REP_HEALTH(set_hs, repn); + + if (rhs->part[PART_HEALTHidx(rhs, partn)].flags & HAS_BAD_BLOCKS) { + rhs->part[PART_HEALTHidx(rhs, partn)].flags &= ~HAS_BAD_BLOCKS; + LOG(4, "replica %u part %u has no bad blocks now", repn, partn); + } +} + +/* + * sync_recalc_badblocks -- (internal) recalculate offset and length + * of bad blocks to absolute ones + * (relative to the beginning of the pool) + */ +static int +sync_recalc_badblocks(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p set_hs %p", set, set_hs); + + /* header size for all headers but the first one */ + size_t hdrsize = (set->options & (OPTION_SINGLEHDR | OPTION_NOHDRS)) ? + 0 : Mmap_align; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + struct replica_health_status *rep_hs = set_hs->replica[r]; + + for (unsigned p = 0; p < rep->nparts; ++p) { + + struct part_health_status *phs = &rep_hs->part[p]; + + if (!replica_part_has_bad_blocks(phs)) { + /* skip parts with no bad blocks */ + continue; + } + + ASSERTne(phs->bbs.bb_cnt, 0); + ASSERTne(phs->bbs.bbv, NULL); + + LOG(10, "Replica %u part %u HAS %u bad blocks", + r, p, phs->bbs.bb_cnt); + + size_t part_off = replica_get_part_offset(set, r, p); + + for (unsigned i = 0; i < phs->bbs.bb_cnt; i++) { + LOG(10, + "relative bad block #%i: offset %zu, length %zu", + i, + phs->bbs.bbv[i].offset, + phs->bbs.bbv[i].length); + + size_t off = phs->bbs.bbv[i].offset; + size_t len = phs->bbs.bbv[i].length; + + if (len + off <= hdrsize) + continue; + + /* parts #>0 are mapped without the header */ + if (p > 0 && hdrsize > 0) { + if (off >= hdrsize) { + /* + * Bad block does not overlap + * with the header, so only + * adjust the offset. + */ + off -= hdrsize; + } else { + /* + * Bad block overlaps + * with the header, + * so adjust the length + * and zero the offset. + */ + len -= hdrsize - off; + off = 0; + } + } + + replica_align_badblock_offset_length(&off, &len, + set, r, p); + + phs->bbs.bbv[i].offset = part_off + off; + phs->bbs.bbv[i].length = (unsigned)len; + + LOG(10, + "absolute bad block #%i: offset 0x%zx, length 0x%zx", + i, + phs->bbs.bbv[i].offset, + phs->bbs.bbv[i].length); + } + } + } + + return 0; +} + +/* + * sync_badblocks_find_healthy_replica -- (internal) look for a healthy replica + * for each bad block + * + * This function looks for a healthy replica for each bad block. Bad blocks + * can overlap across replicas, so each bad block may have to be divided + * into smaller parts which can be fixed using different healthy replica. + * + * Key variables: + * - bbv_all[] - array containing all (possibly divided) bad blocks + * from all previous replicas. + * - bbv_aux[] - array containing all (possibly divided) bad blocks + * from all previous parts of the current replica merged with + * these bad blocks from bbv_all[] that have offsets less or equal + * the greatest bad block's offset in the previous part. + * + * This function merges bad blocks from bbv_all[] with bad blocks + * from the current part and writes the outcome bad blocks to bbv_aux[]. + * Only bad blocks with offsets less or equal the greatest bad block's offset + * in the current part will be moved from bbv_all[] to bbv_aux[]. + * The rest of them has to be moved at the end by sync_badblocks_move_vec(). + * + * bbv_aux[] becomes new bbv_all[] and bbv_aux[] is zeroed + * before checking the next replica (bbv_all = bbv_aux; bbv_aux = 0). + * + * For example (all replicas have only one part): + * - bbv_all with rep#0: |__----___________----__| + * - merged with rep#1: |____----_______----____| + * - gives such bbv_aux: |__11--00_______00--11__| + * - merged with rep#2: |__________---__________| + * - gives such bbv_aux: |__112200__000__002211__| (all bad blocks can be fixed) + * + * where: + * '_' stands for a healthy block (no bad block) + * '-' stands for a bad block with nhealthy == NO_HEALTHY_REPLICA + * 'N' stands for a bad block with nhealthy == N (can be fixed using rep#N) + */ +static int +sync_badblocks_find_healthy_replica(struct part_health_status *phs, + int rep, + struct bb_vec *pbbv_all, + struct bb_vec *pbbv_aux, + unsigned *i_all) +{ + LOG(3, "phs %p rep %i pbbv_all %p pbbv_aux %p i_all %i", + phs, rep, pbbv_all, pbbv_aux, *i_all); + + struct bad_block bb_add; /* the element which is being added */ + struct bad_block bb_new; /* a new element */ + struct bad_block *pbb_all; /* current element of bbv_all[] */ + + unsigned long long beg_prev; + unsigned long long end_prev; + unsigned long long beg_new; + unsigned long long end_new; + size_t len_prev; + size_t len_new; + + size_t size_all = VEC_SIZE(pbbv_all); + + if (size_all == 0) { + /* there were no bad blocks so far, so fill up bbv_aux[] */ + for (unsigned i = 0; i < phs->bbs.bb_cnt; i++) { + bb_add = phs->bbs.bbv[i]; + + if (rep > 0) + /* bad block can be fixed with replica #0 */ + bb_add.nhealthy = 0; + + if (VEC_PUSH_BACK(pbbv_aux, bb_add)) + return -1; + + LOG(10, + "added bad block (prev-empty): " BB_DATA_STR, + bb_add.offset, bb_add.length, bb_add.nhealthy); + } + } else { + if (*i_all < size_all) { + pbb_all = VEC_GET(pbbv_all, (*i_all)++); + } else { + pbb_all = NULL; + } + + for (unsigned i = 0; i < phs->bbs.bb_cnt; i++) { + bb_new = phs->bbs.bbv[i]; + + LOG(10, + " * (%u) inserting new bad block: " BB_DATA_STR, + i + 1, + bb_new.offset, bb_new.length, bb_new.nhealthy); + + if (pbb_all == NULL || pbb_all->length == 0) { + if (*i_all < size_all) + pbb_all = VEC_GET(pbbv_all, (*i_all)++); + else + pbb_all = NULL; + } + + /* all from bbv_all before the bb_new */ + while (pbb_all != NULL && pbb_all->offset + + pbb_all->length - 1 + < bb_new.offset) { + if (pbb_all->nhealthy == NO_HEALTHY_REPLICA) + /* can be fixed with this replica */ + pbb_all->nhealthy = rep; + + if (VEC_PUSH_BACK(pbbv_aux, *pbb_all)) + return -1; + + LOG(10, + "added bad block (prev-before): " + BB_DATA_STR, + pbb_all->offset, pbb_all->length, + pbb_all->nhealthy); + + if (*i_all < size_all) { + pbb_all = VEC_GET(pbbv_all, (*i_all)++); + } else { + pbb_all = NULL; + break; + } + } + + beg_new = bb_new.offset; + len_new = bb_new.length; + end_new = beg_new + len_new - 1; + + /* all pbb_all overlapping with the bb_new */ + while (len_new > 0 && pbb_all != NULL) { + + beg_prev = pbb_all->offset; + len_prev = pbb_all->length; + end_prev = beg_prev + len_prev - 1; + + /* check if new overlaps with prev */ + if (end_prev < beg_new || end_new < beg_prev) + break; + + /* + * 1st part: non-overlapping part + * of pbb_all or bb_new + */ + if (beg_prev < beg_new) { + /* non-overlapping part of pbb_all */ + bb_add.offset = beg_prev; + bb_add.length = (unsigned) + (beg_new - beg_prev); + + if (pbb_all->nhealthy != + NO_HEALTHY_REPLICA) { + bb_add.nhealthy = + pbb_all->nhealthy; + } else { + /* + * It can be fixed with + * this replica. + */ + bb_add.nhealthy = rep; + } + + if (VEC_PUSH_BACK(pbbv_aux, bb_add)) + return -1; + + LOG(10, + "added bad block (prev-only): " + BB_DATA_STR, + bb_add.offset, bb_add.length, + bb_add.nhealthy); + + beg_prev += bb_add.length; + len_prev -= bb_add.length; + + } else if (beg_new < beg_prev) { + /* non-overlapping part of bb_new */ + bb_add.offset = beg_new; + bb_add.length = (unsigned) + (beg_prev - beg_new); + + if (rep == 0) { + bb_add.nhealthy = + NO_HEALTHY_REPLICA; + } else { + /* + * It can be fixed with any + * previous replica, so let's + * choose replia #0. + */ + bb_add.nhealthy = 0; + } + + if (VEC_PUSH_BACK(pbbv_aux, bb_add)) + return -1; + + LOG(10, + "added bad block (new-only): " + BB_DATA_STR, + bb_add.offset, bb_add.length, + bb_add.nhealthy); + + beg_new += bb_add.length; + len_new -= bb_add.length; + } + + /* + * 2nd part: overlapping part + * of pbb_all and bb_new + */ + if (len_prev <= len_new) { + bb_add.offset = beg_prev; + bb_add.length = len_prev; + + beg_new += len_prev; + len_new -= len_prev; + + /* whole pbb_all was added */ + len_prev = 0; + } else { + bb_add.offset = beg_new; + bb_add.length = len_new; + + beg_prev += len_new; + len_prev -= len_new; + + /* whole bb_new was added */ + len_new = 0; + } + + bb_add.nhealthy = pbb_all->nhealthy; + + if (VEC_PUSH_BACK(pbbv_aux, bb_add)) + return -1; + + LOG(10, + "added bad block (common): " + BB_DATA_STR, + bb_add.offset, bb_add.length, + bb_add.nhealthy); + + /* update pbb_all */ + pbb_all->offset = beg_prev; + pbb_all->length = len_prev; + + if (len_prev == 0) { + if (*i_all < size_all) + pbb_all = VEC_GET(pbbv_all, + (*i_all)++); + else + pbb_all = NULL; + } + } + + /* the rest of the bb_new */ + if (len_new > 0) { + bb_add.offset = beg_new; + bb_add.length = len_new; + + if (rep > 0) + /* it can be fixed with replica #0 */ + bb_add.nhealthy = 0; + else + bb_add.nhealthy = NO_HEALTHY_REPLICA; + + if (VEC_PUSH_BACK(pbbv_aux, bb_add)) + return -1; + + LOG(10, + "added bad block (new-rest): " + BB_DATA_STR, + bb_add.offset, bb_add.length, + bb_add.nhealthy); + } + } + + if (pbb_all != NULL && pbb_all->length > 0 && *i_all > 0) + /* this pbb_all will be used again in the next part */ + (*i_all)--; + } + + return 0; +} + +/* + * sync_badblocks_assign_healthy_replica -- (internal) assign healthy replica + * for each bad block + */ +static int +sync_badblocks_assign_healthy_replica(struct part_health_status *phs, + int rep, + struct bb_vec *pbbv_all, + unsigned *i_all) +{ + LOG(3, "phs %p rep %i pbbv_all %p i_all %i", + phs, rep, pbbv_all, *i_all); + + struct bad_block bb_new; /* a new element */ + struct bad_block bb_old; /* an old element */ + struct bad_block *pbb_all; /* current element of bbv_all[] */ + + size_t length_left; + + struct bb_vec bbv_new = VEC_INITIALIZER; + + size_t size_all = VEC_SIZE(pbbv_all); + pbb_all = VEC_GET(pbbv_all, *i_all); + + for (unsigned i = 0; i < phs->bbs.bb_cnt; i++) { + bb_old = phs->bbs.bbv[i]; + + LOG(10, + "assigning old bad block: " BB_DATA_STR, + bb_old.offset, bb_old.length, bb_old.nhealthy); + + /* + * Skip all bad blocks from bbv_all with offsets + * less than the offset of the current bb_old. + */ + while (pbb_all->offset < bb_old.offset) { + /* (*i_all) has to be less than (size_all - 1) */ + ASSERT(*i_all < size_all - 1); + pbb_all = VEC_GET(pbbv_all, ++(*i_all)); + } + + bb_new.offset = bb_old.offset; + length_left = bb_old.length; + + while (length_left > 0) { + LOG(10, + "checking saved bad block: " BB_DATA_STR, + pbb_all->offset, pbb_all->length, + pbb_all->nhealthy); + + ASSERTeq(pbb_all->offset, bb_new.offset); + ASSERT(pbb_all->length <= length_left); + + bb_new.length = pbb_all->length; + bb_new.nhealthy = pbb_all->nhealthy; + + if (VEC_PUSH_BACK(&bbv_new, bb_new)) + goto error_exit; + + LOG(10, + "added new bad block: " BB_DATA_STR, + bb_new.offset, bb_new.length, bb_new.nhealthy); + + bb_new.offset += bb_new.length; + length_left -= bb_new.length; + + if (length_left == 0) + continue; + + /* (*i_all) has to be less than (size_all - 1) */ + ASSERT(*i_all < size_all - 1); + pbb_all = VEC_GET(pbbv_all, ++(*i_all)); + } + } + + Free(phs->bbs.bbv); + phs->bbs.bbv = VEC_ARR(&bbv_new); + phs->bbs.bb_cnt = (unsigned)VEC_SIZE(&bbv_new); + + LOG(10, "added %u new bad blocks", phs->bbs.bb_cnt); + + return 0; + +error_exit: + VEC_DELETE(&bbv_new); + return -1; +} + +/* + * sync_badblocks_move_vec -- (internal) move bad blocks from vector pbbv_all + * to vector pbbv_aux + */ +static int +sync_badblocks_move_vec(struct bb_vec *pbbv_all, + struct bb_vec *pbbv_aux, + unsigned i_all, + unsigned rep) +{ + LOG(3, "pbbv_all %p pbbv_aux %p i_all %u rep %u", + pbbv_all, pbbv_aux, i_all, rep); + + size_t size_all = VEC_SIZE(pbbv_all); + struct bad_block *pbb_all; + + while (i_all < size_all) { + pbb_all = VEC_GET(pbbv_all, i_all++); + + if (pbb_all->length == 0) + continue; + + if (pbb_all->nhealthy == NO_HEALTHY_REPLICA && rep > 0) + /* it can be fixed using the last replica */ + pbb_all->nhealthy = (int)rep; + + if (VEC_PUSH_BACK(pbbv_aux, *pbb_all)) + return -1; + + LOG(10, + "added bad block (prev-after): " BB_DATA_STR, + pbb_all->offset, pbb_all->length, + pbb_all->nhealthy); + } + + return 0; +} + +/* + * sync_check_bad_blocks_overlap -- (internal) check if there are uncorrectable + * bad blocks (bad blocks overlapping + * in all replicas) + */ +static int +sync_check_bad_blocks_overlap(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p set_hs %p", set, set_hs); + + struct bb_vec bbv_all = VEC_INITIALIZER; + struct bb_vec bbv_aux = VEC_INITIALIZER; + + int ret = -1; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + struct replica_health_status *rep_hs = set_hs->replica[r]; + + unsigned i_all = 0; /* index in bbv_all */ + + for (unsigned p = 0; p < rep->nparts; ++p) { + struct part_health_status *phs = &rep_hs->part[p]; + + if (!replica_part_has_bad_blocks(phs)) { + /* skip parts with no bad blocks */ + continue; + } + + ASSERTne(phs->bbs.bb_cnt, 0); + ASSERTne(phs->bbs.bbv, NULL); + + LOG(10, "Replica %u part %u HAS %u bad blocks", + r, p, phs->bbs.bb_cnt); + + /* + * This function merges bad blocks from bbv_all + * with bad blocks from the current part + * and writes the outcome bad blocks to bbv_aux. + * Only bad blocks with offsets less or equal + * the greatest bad block's offset in the current part + * will be moved from bbv_all to bbv_aux. + * The rest of them has to be moved at the end + * by sync_badblocks_move_vec() below. + */ + if (sync_badblocks_find_healthy_replica(phs, (int)r, + &bbv_all, &bbv_aux, + &i_all)) + goto exit; + } + + /* + * Move the rest of bad blocks from bbv_all to bbv_aux + * (for more details see the comment above). + * All these bad blocks can be fixed using the last replica 'r'. + */ + if (sync_badblocks_move_vec(&bbv_all, &bbv_aux, i_all, r)) + return -1; + + /* bbv_aux becomes a new bbv_all */ + VEC_MOVE(&bbv_all, &bbv_aux); + i_all = 0; + } + + ret = 0; + + /* check if there is an uncorrectable bad block */ + size_t size_all = VEC_SIZE(&bbv_all); + for (unsigned i = 0; i < size_all; i++) { + struct bad_block *pbb_all = VEC_GET(&bbv_all, i); + if (pbb_all->nhealthy == NO_HEALTHY_REPLICA) { + ret = 1; /* this bad block cannot be fixed */ + + LOG(1, + "uncorrectable bad block found: offset 0x%zx, length 0x%zx", + pbb_all->offset, pbb_all->length); + + goto exit; + } + } + + /* + * All bad blocks can be fixed, + * so assign healthy replica for each of them. + */ + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + struct replica_health_status *rep_hs = set_hs->replica[r]; + + if (!replica_has_bad_blocks(r, set_hs)) { + /* skip replicas with no bad blocks */ + continue; + } + + unsigned i_all = 0; /* index in bbv_all */ + + for (unsigned p = 0; p < rep->nparts; ++p) { + struct part_health_status *phs = &rep_hs->part[p]; + + if (!replica_part_has_bad_blocks(phs)) { + /* skip parts with no bad blocks */ + continue; + } + + if (sync_badblocks_assign_healthy_replica(phs, (int)r, + &bbv_all, + &i_all)) + goto exit; + } + } + +exit: + VEC_DELETE(&bbv_aux); + VEC_DELETE(&bbv_all); + + return ret; +} + +/* + * sync_badblocks_data -- (internal) clear bad blocks in replica + */ +static int +sync_badblocks_data(struct pool_set *set, struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + + struct pool_replica *rep_h; + + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + struct replica_health_status *rep_hs = set_hs->replica[r]; + + for (unsigned p = 0; p < rep->nparts; ++p) { + + struct part_health_status *phs = &rep_hs->part[p]; + + if (!replica_part_has_bad_blocks(phs)) { + /* skip parts with no bad blocks */ + continue; + } + + ASSERTne(phs->bbs.bb_cnt, 0); + ASSERTne(phs->bbs.bbv, NULL); + + const struct pool_set_part *part = &rep->part[p]; + size_t part_off = replica_get_part_offset(set, r, p); + + for (unsigned i = 0; i < phs->bbs.bb_cnt; i++) { + size_t off = phs->bbs.bbv[i].offset - part_off; + size_t len = phs->bbs.bbv[i].length; + + ASSERT(phs->bbs.bbv[i].nhealthy >= 0); + + rep_h = REP(set, + (unsigned)phs->bbs.bbv[i].nhealthy); + + void *src_addr = ADDR_SUM(rep_h->part[0].addr, + part_off + off); + void *dst_addr = ADDR_SUM(part->addr, off); + + if (sync_copy_data(src_addr, dst_addr, + part_off + off, len, + rep_h, rep, part)) + return -1; + } + + /* free array of bad blocks */ + Free(phs->bbs.bbv); + phs->bbs.bbv = NULL; + + /* mark part as having no bad blocks */ + sync_mark_part_no_badblocks(r, p, set_hs); + } + + /* mark replica as having no bad blocks */ + sync_mark_replica_no_badblocks(r, set_hs); + } + + LOG(1, "all bad blocks have been fixed"); + + if (replica_remove_all_recovery_files(set_hs)) { + LOG(1, "removing bad block recovery files failed"); + return -1; + } + + return 0; +} + +/* + * recreate_broken_parts -- (internal) create parts in place of the broken ones + */ +static int +recreate_broken_parts(struct pool_set *set, + struct poolset_health_status *set_hs, + int fix_bad_blocks) +{ + LOG(3, "set %p set_hs %p fix_bad_blocks %i", + set, set_hs, fix_bad_blocks); + + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + if (set->replica[r]->remote) + continue; + + struct pool_replica *broken_r = set->replica[r]; + + for (unsigned p = 0; p < set_hs->replica[r]->nparts; ++p) { + /* skip unbroken parts */ + if (!replica_is_part_broken(r, p, set_hs)) + continue; + + /* remove parts from broken replica */ + if (replica_remove_part(set, r, p, fix_bad_blocks)) { + LOG(2, "cannot remove part"); + return -1; + } + + /* create removed part and open it */ + if (util_part_open(&broken_r->part[p], 0, + 1 /* create */)) { + LOG(2, "cannot open/create parts"); + return -1; + } + + sync_mark_part_no_badblocks(r, p, set_hs); + } + } + + return 0; +} + +/* + * fill_struct_part_uuids -- (internal) set part uuids in pool_set structure + */ +static void +fill_struct_part_uuids(struct pool_set *set, unsigned repn, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); + struct pool_replica *rep = REP(set, repn); + struct pool_hdr *hdrp; + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip broken parts */ + if (replica_is_part_broken(repn, p, set_hs)) + continue; + + hdrp = HDR(rep, p); + memcpy(rep->part[p].uuid, hdrp->uuid, POOL_HDR_UUID_LEN); + } +} + +/* + * is_uuid_already_used -- (internal) check if given uuid is assigned to + * any of the earlier replicas + */ +static int +is_uuid_already_used(uuid_t uuid, struct pool_set *set, unsigned repn) +{ + for (unsigned r = 0; r < repn; ++r) { + if (uuidcmp(uuid, PART(REP(set, r), 0)->uuid) == 0) + return 1; + } + return 0; +} + +/* + * fill_struct_broken_part_uuids -- (internal) set part uuids in pool_set + * structure + */ +static int +fill_struct_broken_part_uuids(struct pool_set *set, unsigned repn, + struct poolset_health_status *set_hs, unsigned flags) +{ + LOG(3, "set %p, repn %u, set_hs %p, flags %u", set, repn, set_hs, + flags); + struct pool_replica *rep = REP(set, repn); + struct pool_hdr *hdrp; + for (unsigned p = 0; p < rep->nhdrs; ++p) { + /* skip unbroken parts */ + if (!replica_is_part_broken(repn, p, set_hs)) + continue; + + /* check if part was damaged or was added by transform */ + if (replica_is_poolset_transformed(flags)) { + /* generate new uuid for this part */ + if (util_uuid_generate(rep->part[p].uuid) < 0) { + ERR("cannot generate pool set part UUID"); + errno = EINVAL; + return -1; + } + continue; + } + + if (!replica_is_part_broken(repn, p - 1, set_hs) && + !(set->options & OPTION_SINGLEHDR)) { + /* try to get part uuid from the previous part */ + hdrp = HDRP(rep, p); + memcpy(rep->part[p].uuid, hdrp->next_part_uuid, + POOL_HDR_UUID_LEN); + } else if (!replica_is_part_broken(repn, p + 1, set_hs) && + !(set->options & OPTION_SINGLEHDR)) { + /* try to get part uuid from the next part */ + hdrp = HDRN(rep, p); + memcpy(rep->part[p].uuid, hdrp->prev_part_uuid, + POOL_HDR_UUID_LEN); + } else if (p == 0 && + !replica_is_part_broken(repn - 1, 0, set_hs)) { + /* try to get part uuid from the previous replica */ + hdrp = HDR(REPP(set, repn), 0); + if (is_uuid_already_used(hdrp->next_repl_uuid, set, + repn)) { + ERR( + "repeated uuid - some replicas were created with a different poolset file"); + errno = EINVAL; + return -1; + } + memcpy(rep->part[p].uuid, hdrp->next_repl_uuid, + POOL_HDR_UUID_LEN); + } else if (p == 0 && + !replica_is_part_broken(repn + 1, 0, set_hs)) { + /* try to get part uuid from the next replica */ + hdrp = HDR(REPN(set, repn), 0); + if (is_uuid_already_used(hdrp->prev_repl_uuid, set, + repn)) { + ERR( + "repeated uuid - some replicas were created with a different poolset file"); + errno = EINVAL; + return -1; + } + memcpy(rep->part[p].uuid, hdrp->prev_repl_uuid, + POOL_HDR_UUID_LEN); + } else { + /* generate new uuid for this part */ + if (util_uuid_generate(rep->part[p].uuid) < 0) { + ERR("cannot generate pool set part UUID"); + errno = EINVAL; + return -1; + } + } + } + return 0; +} + +/* + * fill_struct_uuids -- (internal) fill fields in pool_set needed for further + * altering of uuids + */ +static int +fill_struct_uuids(struct pool_set *set, unsigned src_replica, + struct poolset_health_status *set_hs, unsigned flags) +{ + LOG(3, "set %p, src_replica %u, set_hs %p, flags %u", set, src_replica, + set_hs, flags); + + /* set poolset uuid */ + struct pool_hdr *src_hdr0 = HDR(REP(set, src_replica), 0); + memcpy(set->uuid, src_hdr0->poolset_uuid, POOL_HDR_UUID_LEN); + + /* set unbroken parts' uuids */ + for (unsigned r = 0; r < set->nreplicas; ++r) { + fill_struct_part_uuids(set, r, set_hs); + } + + /* set broken parts' uuids */ + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (fill_struct_broken_part_uuids(set, r, set_hs, flags)) + return -1; + } + return 0; +} + +/* + * create_headers_for_broken_parts -- (internal) create headers for all new + * parts created in place of the broken ones + */ +static int +create_headers_for_broken_parts(struct pool_set *set, unsigned src_replica, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, src_replica %u, set_hs %p", set, src_replica, set_hs); + + struct pool_hdr *src_hdr = HDR(REP(set, src_replica), 0); + + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + /* skip unbroken replicas */ + if (!replica_is_replica_broken(r, set_hs) && + !replica_has_bad_blocks(r, set_hs)) + continue; + + for (unsigned p = 0; p < set_hs->replica[r]->nhdrs; p++) { + /* skip unbroken parts */ + if (!replica_is_part_broken(r, p, set_hs) && + !replica_part_has_corrupted_header(r, p, set_hs)) + continue; + + if (sync_recreate_header(set, r, p, src_hdr)) + return -1; + } + } + return 0; +} + +/* + * copy_data_to_broken_parts -- (internal) copy data to all parts created + * in place of the broken ones + */ +static int +copy_data_to_broken_parts(struct pool_set *set, unsigned healthy_replica, + unsigned flags, struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, healthy_replica %u, flags %u, set_hs %p", set, + healthy_replica, flags, set_hs); + + /* get pool size from healthy replica */ + size_t poolsize = set->poolsize; + + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + /* skip unbroken and consistent replicas */ + if (replica_is_replica_healthy(r, set_hs)) + continue; + + struct pool_replica *rep = REP(set, r); + struct pool_replica *rep_h = REP(set, healthy_replica); + + for (unsigned p = 0; p < rep->nparts; ++p) { + /* skip unbroken parts from consistent replicas */ + if (!replica_is_part_broken(r, p, set_hs) && + replica_is_replica_consistent(r, set_hs)) + continue; + + const struct pool_set_part *part = &rep->part[p]; + + size_t off = replica_get_part_data_offset(set, r, p); + size_t len = replica_get_part_data_len(set, r, p); + + /* do not allow copying too much data */ + if (off >= poolsize) + continue; + + if (off + len > poolsize || rep->remote) + len = poolsize - off; + + /* + * First part of replica is mapped + * with header + */ + size_t fpoff = (p == 0) ? POOL_HDR_SIZE : 0; + void *src_addr = ADDR_SUM(rep_h->part[0].addr, off); + void *dst_addr = ADDR_SUM(part->addr, fpoff); + + if (sync_copy_data(src_addr, dst_addr, off, len, + rep_h, rep, part)) + return -1; + } + } + return 0; +} + +/* + * grant_created_parts_perm -- (internal) set RW permission rights to all + * the parts created in place of the broken ones + */ +static int +grant_created_parts_perm(struct pool_set *set, unsigned src_repn, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, src_repn %u, set_hs %p", set, src_repn, set_hs); + + /* choose the default permissions */ + mode_t def_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + + /* get permissions of the first part of the source replica */ + mode_t src_mode; + os_stat_t sb; + if (REP(set, src_repn)->remote) { + src_mode = def_mode; + } else if (os_stat(PART(REP(set, src_repn), 0)->path, &sb) != 0) { + ERR("cannot check file permissions of %s (replica %u, part %u)", + PART(REP(set, src_repn), 0)->path, src_repn, 0); + src_mode = def_mode; + } else { + src_mode = sb.st_mode; + } + + /* set permissions to all recreated parts */ + for (unsigned r = 0; r < set_hs->nreplicas; ++r) { + /* skip unbroken replicas */ + if (!replica_is_replica_broken(r, set_hs)) + continue; + + if (set->replica[r]->remote) + continue; + + for (unsigned p = 0; p < set_hs->replica[r]->nparts; p++) { + /* skip parts which were not created */ + if (!PART(REP(set, r), p)->created) + continue; + + LOG(4, "setting permissions for part %u, replica %u", + p, r); + + /* set rights to those of existing part files */ + if (os_chmod(PART(REP(set, r), p)->path, src_mode)) { + ERR( + "cannot set permission rights for created parts: replica %u, part %u", + r, p); + errno = EPERM; + return -1; + } + } + } + return 0; +} + +/* + * update_parts_linkage -- (internal) set uuids linking recreated parts within + * a replica + */ +static int +update_parts_linkage(struct pool_set *set, unsigned repn, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); + struct pool_replica *rep = REP(set, repn); + for (unsigned p = 0; p < rep->nhdrs; ++p) { + struct pool_hdr *hdrp = HDR(rep, p); + struct pool_hdr *prev_hdrp = HDRP(rep, p); + struct pool_hdr *next_hdrp = HDRN(rep, p); + + /* set uuids in the current part */ + memcpy(hdrp->prev_part_uuid, PARTP(rep, p)->uuid, + POOL_HDR_UUID_LEN); + memcpy(hdrp->next_part_uuid, PARTN(rep, p)->uuid, + POOL_HDR_UUID_LEN); + util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, + 1, POOL_HDR_CSUM_END_OFF(hdrp)); + + /* set uuids in the previous part */ + memcpy(prev_hdrp->next_part_uuid, PART(rep, p)->uuid, + POOL_HDR_UUID_LEN); + util_checksum(prev_hdrp, sizeof(*prev_hdrp), + &prev_hdrp->checksum, 1, + POOL_HDR_CSUM_END_OFF(prev_hdrp)); + + /* set uuids in the next part */ + memcpy(next_hdrp->prev_part_uuid, PART(rep, p)->uuid, + POOL_HDR_UUID_LEN); + util_checksum(next_hdrp, sizeof(*next_hdrp), + &next_hdrp->checksum, 1, + POOL_HDR_CSUM_END_OFF(next_hdrp)); + + /* store pool's header */ + util_persist(PART(rep, p)->is_dev_dax, hdrp, sizeof(*hdrp)); + util_persist(PARTP(rep, p)->is_dev_dax, prev_hdrp, + sizeof(*prev_hdrp)); + util_persist(PARTN(rep, p)->is_dev_dax, next_hdrp, + sizeof(*next_hdrp)); + + } + return 0; +} + +/* + * update_replicas_linkage -- (internal) update uuids linking replicas + */ +static int +update_replicas_linkage(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct pool_replica *rep = REP(set, repn); + struct pool_replica *prev_r = REPP(set, repn); + struct pool_replica *next_r = REPN(set, repn); + + ASSERT(rep->nparts > 0); + ASSERT(prev_r->nparts > 0); + ASSERT(next_r->nparts > 0); + + /* set uuids in the current replica */ + for (unsigned p = 0; p < rep->nhdrs; ++p) { + struct pool_hdr *hdrp = HDR(rep, p); + memcpy(hdrp->prev_repl_uuid, PART(prev_r, 0)->uuid, + POOL_HDR_UUID_LEN); + memcpy(hdrp->next_repl_uuid, PART(next_r, 0)->uuid, + POOL_HDR_UUID_LEN); + util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, + 1, POOL_HDR_CSUM_END_OFF(hdrp)); + + /* store pool's header */ + util_persist(PART(rep, p)->is_dev_dax, hdrp, sizeof(*hdrp)); + } + + /* set uuids in the previous replica */ + for (unsigned p = 0; p < prev_r->nhdrs; ++p) { + struct pool_hdr *prev_hdrp = HDR(prev_r, p); + memcpy(prev_hdrp->next_repl_uuid, PART(rep, 0)->uuid, + POOL_HDR_UUID_LEN); + util_checksum(prev_hdrp, sizeof(*prev_hdrp), + &prev_hdrp->checksum, 1, + POOL_HDR_CSUM_END_OFF(prev_hdrp)); + + /* store pool's header */ + util_persist(PART(prev_r, p)->is_dev_dax, prev_hdrp, + sizeof(*prev_hdrp)); + } + + /* set uuids in the next replica */ + for (unsigned p = 0; p < next_r->nhdrs; ++p) { + struct pool_hdr *next_hdrp = HDR(next_r, p); + + memcpy(next_hdrp->prev_repl_uuid, PART(rep, 0)->uuid, + POOL_HDR_UUID_LEN); + util_checksum(next_hdrp, sizeof(*next_hdrp), + &next_hdrp->checksum, 1, + POOL_HDR_CSUM_END_OFF(next_hdrp)); + + /* store pool's header */ + util_persist(PART(next_r, p)->is_dev_dax, next_hdrp, + sizeof(*next_hdrp)); + } + + return 0; +} + +/* + * update_poolset_uuids -- (internal) update poolset uuid in recreated parts + */ +static int +update_poolset_uuids(struct pool_set *set, unsigned repn, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, repn %u, set_hs %p", set, repn, set_hs); + struct pool_replica *rep = REP(set, repn); + for (unsigned p = 0; p < rep->nhdrs; ++p) { + struct pool_hdr *hdrp = HDR(rep, p); + memcpy(hdrp->poolset_uuid, set->uuid, POOL_HDR_UUID_LEN); + util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, + 1, POOL_HDR_CSUM_END_OFF(hdrp)); + + /* store pool's header */ + util_persist(PART(rep, p)->is_dev_dax, hdrp, sizeof(*hdrp)); + } + return 0; +} + +/* + * update_remote_headers -- (internal) update headers of existing remote + * replicas + */ +static int +update_remote_headers(struct pool_set *set) +{ + LOG(3, "set %p", set); + for (unsigned r = 0; r < set->nreplicas; ++r) { + /* skip local or just created replicas */ + if (REP(set, r)->remote == NULL || + PART(REP(set, r), 0)->created == 1) + continue; + + if (util_update_remote_header(set, r)) { + LOG(1, + "updating header of a remote replica no. %u failed", + r); + return -1; + } + } + return 0; +} + +/* + * update_uuids -- (internal) set all uuids that might have changed or be unset + * after recreating parts + */ +static int +update_uuids(struct pool_set *set, struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; ++r) { + if (!replica_is_replica_healthy(r, set_hs)) + update_parts_linkage(set, r, set_hs); + + update_replicas_linkage(set, r); + update_poolset_uuids(set, r, set_hs); + } + + if (update_remote_headers(set)) + return -1; + + return 0; +} + +/* + * remove_remote -- (internal) remove remote pool + */ +static int +remove_remote(const char *target, const char *pool_set) +{ + LOG(3, "target %s, pool_set %s", target, pool_set); +#ifdef USE_RPMEM + struct rpmem_target_info *info = rpmem_target_parse(target); + if (!info) + goto err_parse; + + struct rpmem_ssh *ssh = rpmem_ssh_exec(info, "--remove", + pool_set, "--force", NULL); + if (!ssh) { + goto err_ssh_exec; + } + + if (rpmem_ssh_monitor(ssh, 0)) + goto err_ssh_monitor; + + int ret = rpmem_ssh_close(ssh); + rpmem_target_free(info); + + return ret; +err_ssh_monitor: + rpmem_ssh_close(ssh); +err_ssh_exec: + rpmem_target_free(info); +err_parse: + return -1; +#else + FATAL("remote replication not supported"); + return -1; +#endif +} + +/* + * open_remote_replicas -- (internal) open all unbroken remote replicas + */ +static int +open_remote_replicas(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + if (!rep->remote) + continue; + if (!replica_is_replica_healthy(r, set_hs)) + continue; + + unsigned nlanes = REMOTE_NLANES; + int ret = util_poolset_remote_replica_open(set, r, + set->poolsize, 0, &nlanes); + if (ret) { + LOG(1, "Opening '%s' on '%s' failed", + rep->remote->pool_desc, + rep->remote->node_addr); + return ret; + } + } + + return 0; +} + +/* + * create_remote_replicas -- (internal) recreate all broken replicas + */ +static int +create_remote_replicas(struct pool_set *set, + struct poolset_health_status *set_hs, unsigned flags) +{ + LOG(3, "set %p, set_hs %p", set, set_hs); + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + if (!rep->remote) + continue; + if (replica_is_replica_healthy(r, set_hs)) + continue; + + if (!replica_is_poolset_transformed(flags)) { + /* ignore errors from remove operation */ + remove_remote(rep->remote->node_addr, + rep->remote->pool_desc); + } + + unsigned nlanes = REMOTE_NLANES; + int ret = util_poolset_remote_replica_open(set, r, + set->poolsize, 1, &nlanes); + if (ret) { + LOG(1, "Creating '%s' on '%s' failed", + rep->remote->pool_desc, + rep->remote->node_addr); + return ret; + } + } + + return 0; +} + +/* + * sync_replica -- synchronize data across replicas within a poolset + */ +int +replica_sync(struct pool_set *set, struct poolset_health_status *s_hs, + unsigned flags) +{ + LOG(3, "set %p, flags %u", set, flags); + int ret = 0; + struct poolset_health_status *set_hs = NULL; + + /* check if we already know the poolset health status */ + if (s_hs == NULL) { + /* validate poolset before checking its health */ + if (validate_args(set)) + return -1; + + /* examine poolset's health */ + if (replica_check_poolset_health(set, &set_hs, + 1 /* called from sync */, + flags)) { + LOG(1, "poolset health check failed"); + return -1; + } + + /* check if poolset is broken; if not, nothing to do */ + if (replica_is_poolset_healthy(set_hs)) { + LOG(1, "poolset is healthy"); + goto out; + } + } else { + set_hs = s_hs; + } + + /* find a replica with healthy header; it will be the source of data */ + unsigned healthy_replica = replica_find_healthy_replica(set_hs); + unsigned healthy_header = healthy_replica; + if (healthy_header == UNDEF_REPLICA) { + healthy_header = replica_find_replica_healthy_header(set_hs); + if (healthy_header == UNDEF_REPLICA) { + ERR("no healthy replica found"); + errno = EINVAL; + ret = -1; + goto out; + } + } + + /* in dry-run mode we can stop here */ + if (is_dry_run(flags)) { + LOG(1, "Sync in dry-run mode finished successfully"); + goto out; + } + + /* recreate broken parts */ + if (recreate_broken_parts(set, set_hs, fix_bad_blocks(flags))) { + ERR("recreating broken parts failed"); + ret = -1; + goto out; + } + + /* open all part files */ + if (replica_open_poolset_part_files(set)) { + ERR("opening poolset part files failed"); + ret = -1; + goto out; + } + + /* map all replicas */ + if (util_poolset_open(set)) { + ERR("opening poolset failed"); + ret = -1; + goto out; + } + + /* this is required for opening remote pools */ + set->poolsize = set_hs->replica[healthy_header]->pool_size; + LOG(3, "setting the pool size (%zu) from replica #%u", + set->poolsize, healthy_header); + + /* open all remote replicas */ + if (open_remote_replicas(set, set_hs)) { + ERR("opening remote replicas failed"); + ret = -1; + goto out; + } + + /* recalculate offset and length of bad blocks */ + if (sync_recalc_badblocks(set, set_hs)) { + LOG(1, "syncing bad blocks data failed"); + ret = -1; + goto out; + } + + /* + * Check if there are uncorrectable bad blocks + * (bad blocks overlapping in all replicas). + */ + int status = sync_check_bad_blocks_overlap(set, set_hs); + if (status == -1) { + LOG(1, "checking bad blocks failed"); + ret = -1; + goto out; + } + + if (status == 1) { + ERR( + "a part of the pool has uncorrectable errors in all replicas"); + errno = EINVAL; + ret = -1; + goto out; + } + + LOG(3, "bad blocks do not overlap"); + + /* sync data in bad blocks */ + if (sync_badblocks_data(set, set_hs)) { + LOG(1, "syncing bad blocks data failed"); + ret = -1; + goto out; + } + + /* find one good replica; it will be the source of data */ + healthy_replica = replica_find_healthy_replica(set_hs); + if (healthy_replica == UNDEF_REPLICA) { + ERR("no healthy replica found"); + errno = EINVAL; + ret = -1; + goto out; + } + + /* update uuid fields in the set structure with part headers */ + if (fill_struct_uuids(set, healthy_replica, set_hs, flags)) { + ERR("gathering uuids failed"); + ret = -1; + goto out; + } + + /* create headers for broken parts */ + if (create_headers_for_broken_parts(set, healthy_replica, set_hs)) { + ERR("creating headers for broken parts failed"); + ret = -1; + goto out; + } + + /* create all remote replicas */ + if (create_remote_replicas(set, set_hs, flags)) { + ERR("creating remote replicas failed"); + ret = -1; + goto out; + } + + /* check and copy data if possible */ + if (copy_data_to_broken_parts(set, healthy_replica, + flags, set_hs)) { + ERR("copying data to broken parts failed"); + ret = -1; + goto out; + } + + /* update uuids of replicas and parts */ + if (update_uuids(set, set_hs)) { + ERR("updating uuids failed"); + ret = -1; + goto out; + } + + /* grant permissions to all created parts */ + if (grant_created_parts_perm(set, healthy_replica, set_hs)) { + ERR("granting permissions to created parts failed"); + ret = -1; + } + +out: + if (s_hs == NULL) + replica_free_poolset_health_status(set_hs); + return ret; +} diff --git a/src/pmdk/src/libpmempool/transform.c b/src/pmdk/src/libpmempool/transform.c new file mode 100644 index 000000000..5d777de3f --- /dev/null +++ b/src/pmdk/src/libpmempool/transform.c @@ -0,0 +1,1017 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * transform.c -- a module for poolset transforming + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "replica.h" +#include "out.h" +#include "file.h" +#include "os.h" +#include "libpmem.h" +#include "util_pmem.h" + +/* + * poolset_compare_status - a helping structure for gathering corresponding + * replica numbers when comparing poolsets + */ +struct poolset_compare_status +{ + unsigned nreplicas; + unsigned flags; + unsigned replica[]; +}; + +/* + * type of transform operation to be done + */ +enum transform_op { + NOT_TRANSFORMABLE, + ADD_REPLICAS, + RM_REPLICAS, + ADD_HDRS, + RM_HDRS, +}; + +/* + * check_if_part_used_once -- (internal) check if the part is used only once in + * the rest of the poolset + */ +static int +check_if_part_used_once(struct pool_set *set, unsigned repn, unsigned partn) +{ + LOG(3, "set %p, repn %u, partn %u", set, repn, partn); + struct pool_replica *rep = REP(set, repn); + char *path = util_part_realpath(PART(rep, partn)->path); + if (path == NULL) { + LOG(1, "cannot get absolute path for %s, replica %u, part %u", + PART(rep, partn)->path, repn, partn); + errno = 0; + path = strdup(PART(rep, partn)->path); + if (path == NULL) { + ERR("!strdup"); + return -1; + } + } + int ret = 0; + for (unsigned r = repn; r < set->nreplicas; ++r) { + struct pool_replica *repr = set->replica[r]; + /* skip remote replicas */ + if (repr->remote != NULL) + continue; + + /* avoid superfluous comparisons */ + unsigned i = (r == repn) ? partn + 1 : 0; + for (unsigned p = i; p < repr->nparts; ++p) { + char *pathp = util_part_realpath(PART(repr, p)->path); + if (pathp == NULL) { + if (errno != ENOENT) { + ERR("realpath failed for %s, errno %d", + PART(repr, p)->path, errno); + ret = -1; + goto out; + } + LOG(1, "cannot get absolute path for %s," + " replica %u, part %u", + PART(rep, partn)->path, repn, + partn); + pathp = strdup(PART(repr, p)->path); + errno = 0; + } + int result = util_compare_file_inodes(path, pathp); + if (result == 0) { + /* same file used multiple times */ + ERR("some part file's path is" + " used multiple times"); + ret = -1; + errno = EINVAL; + free(pathp); + goto out; + } else if (result < 0) { + ERR("comparing file inodes failed for %s and" + " %s", path, pathp); + ret = -1; + free(pathp); + goto out; + } + free(pathp); + } + } +out: + free(path); + return ret; +} + +/* + * check_if_remote_replica_used_once -- (internal) check if remote replica is + * used only once in the rest of the + * poolset + */ +static int +check_if_remote_replica_used_once(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct remote_replica *rep = REP(set, repn)->remote; + ASSERTne(rep, NULL); + for (unsigned r = repn + 1; r < set->nreplicas; ++r) { + /* skip local replicas */ + if (REP(set, r)->remote == NULL) + continue; + + struct remote_replica *repr = REP(set, r)->remote; + /* XXX: add comparing resolved addresses of the nodes */ + if (strcmp(rep->node_addr, repr->node_addr) == 0 && + strcmp(rep->pool_desc, repr->pool_desc) == 0) { + ERR("remote replica %u is used multiple times", repn); + errno = EINVAL; + return -1; + } + } + return 0; +} + +/* + * check_paths -- (internal) check if directories for part files exist + * and if paths for part files do not repeat in the poolset + */ +static int +check_paths(struct pool_set *set) +{ + LOG(3, "set %p", set); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = set->replica[r]; + if (rep->remote != NULL) { + if (check_if_remote_replica_used_once(set, r)) + return -1; + } else { + for (unsigned p = 0; p < rep->nparts; ++p) { + if (replica_check_local_part_dir(set, r, p)) + return -1; + + if (check_if_part_used_once(set, r, p)) + return -1; + } + } + } + return 0; +} + +/* + * validate_args -- (internal) check whether passed arguments are valid + */ +static int +validate_args(struct pool_set *set_in, struct pool_set *set_out) +{ + LOG(3, "set_in %p, set_out %p", set_in, set_out); + + if (set_in->directory_based) { + ERR("transform of directory poolsets is not supported"); + errno = EINVAL; + return -1; + } + + /* + * check if all parts in the target poolset are large enough + * (now replication works only for pmemobj pools) + */ + if (replica_check_part_sizes(set_out, PMEMOBJ_MIN_POOL)) { + ERR("part sizes check failed"); + return -1; + } + + /* + * check if all directories for part files exist and if part files + * do not reoccur in the poolset + */ + if (check_paths(set_out)) + return -1; + + /* + * check if set_out has enough size, i.e. if the target poolset + * structure has enough capacity to accommodate the effective size of + * the source poolset + */ + ssize_t master_pool_size = replica_get_pool_size(set_in, 0); + if (master_pool_size < 0) { + ERR("getting pool size from master replica failed"); + return -1; + } + + if (set_out->poolsize < (size_t)master_pool_size) { + ERR("target poolset is too small"); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * create poolset_compare_status -- (internal) create structure for gathering + * status of poolset comparison + */ +static int +create_poolset_compare_status(struct pool_set *set, + struct poolset_compare_status **set_sp) +{ + LOG(3, "set %p, set_sp %p", set, set_sp); + struct poolset_compare_status *set_s; + set_s = Zalloc(sizeof(struct poolset_compare_status) + + set->nreplicas * sizeof(unsigned)); + if (set_s == NULL) { + ERR("!Zalloc for poolset status"); + return -1; + } + for (unsigned r = 0; r < set->nreplicas; ++r) + set_s->replica[r] = UNDEF_REPLICA; + + set_s->nreplicas = set->nreplicas; + *set_sp = set_s; + return 0; +} + +/* + * compare_parts -- (internal) check if two parts can be considered the same + */ +static int +compare_parts(struct pool_set_part *p1, struct pool_set_part *p2) +{ + LOG(3, "p1 %p, p2 %p", p1, p2); + LOG(4, "p1->path: %s, p1->filesize: %lu", p1->path, p1->filesize); + LOG(4, "p2->path: %s, p2->filesize: %lu", p2->path, p2->filesize); + return strcmp(p1->path, p2->path) || (p1->filesize != p2->filesize); +} + +/* + * compare_replicas -- (internal) check if two replicas are different + */ +static int +compare_replicas(struct pool_replica *r1, struct pool_replica *r2) +{ + LOG(3, "r1 %p, r2 %p", r1, r2); + LOG(4, "r1->nparts: %u, r2->nparts: %u", r1->nparts, r2->nparts); + /* both replicas are local */ + if (r1->remote == NULL && r2->remote == NULL) { + if (r1->nparts != r2->nparts) + return 1; + + for (unsigned p = 0; p < r1->nparts; ++p) { + if (compare_parts(&r1->part[p], &r2->part[p])) + return 1; + } + return 0; + } + /* both replicas are remote */ + if (r1->remote != NULL && r2->remote != NULL) { + return strcmp(r1->remote->node_addr, r2->remote->node_addr) || + strcmp(r1->remote->pool_desc, r2->remote->pool_desc); + } + /* a remote and a local replicas */ + return 1; +} + +/* + * check_compare_poolsets_status -- (internal) find different replicas between + * two poolsets; for each replica which has + * a counterpart in the other poolset store + * the other replica's number in a helping + * structure + */ +static int +check_compare_poolsets_status(struct pool_set *set_in, + struct pool_set *set_out, + struct poolset_compare_status *set_in_s, + struct poolset_compare_status *set_out_s) +{ + LOG(3, "set_in %p, set_out %p, set_in_s %p, set_out_s %p", set_in, + set_out, set_in_s, set_out_s); + for (unsigned ri = 0; ri < set_in->nreplicas; ++ri) { + struct pool_replica *rep_in = REP(set_in, ri); + for (unsigned ro = 0; ro < set_out->nreplicas; ++ro) { + struct pool_replica *rep_out = REP(set_out, ro); + LOG(1, "comparing rep_in %u with rep_out %u", ri, ro); + /* skip different replicas */ + if (compare_replicas(rep_in, rep_out)) + continue; + + if (set_in_s->replica[ri] != UNDEF_REPLICA || + set_out_s->replica[ro] + != UNDEF_REPLICA) { + /* there are more than one counterparts */ + ERR("there are more then one corresponding" + " replicas; cannot transform"); + errno = EINVAL; + return -1; + } + + set_in_s->replica[ri] = ro; + set_out_s->replica[ro] = ri; + } + } + return 0; +} + +/* + * check_compare_poolset_options -- (internal) check poolset options + */ +static int +check_compare_poolsets_options(struct pool_set *set_in, + struct pool_set *set_out, + struct poolset_compare_status *set_in_s, + struct poolset_compare_status *set_out_s) +{ + if (set_in->options & OPTION_SINGLEHDR) + set_in_s->flags |= OPTION_SINGLEHDR; + + if (set_out->options & OPTION_SINGLEHDR) + set_out_s->flags |= OPTION_SINGLEHDR; + + if ((set_in->options & OPTION_NOHDRS) || + (set_out->options & OPTION_NOHDRS)) { + errno = EINVAL; + ERR( + "the NOHDRS poolset option is not supported in local poolset files"); + return -1; + } + + return 0; +} + +/* + * compare_poolsets -- (internal) compare two poolsets; for each replica which + * has a counterpart in the other poolset store the other + * replica's number in a helping structure + */ +static int +compare_poolsets(struct pool_set *set_in, struct pool_set *set_out, + struct poolset_compare_status **set_in_s, + struct poolset_compare_status **set_out_s) +{ + LOG(3, "set_in %p, set_out %p, set_in_s %p, set_out_s %p", set_in, + set_out, set_in_s, set_out_s); + if (create_poolset_compare_status(set_in, set_in_s)) + return -1; + + if (create_poolset_compare_status(set_out, set_out_s)) + goto err_free_in; + + if (check_compare_poolsets_status(set_in, set_out, *set_in_s, + *set_out_s)) + goto err_free_out; + + if (check_compare_poolsets_options(set_in, set_out, *set_in_s, + *set_out_s)) + goto err_free_out; + + return 0; + +err_free_out: + Free(*set_out_s); +err_free_in: + Free(*set_in_s); + return -1; +} + +/* + * replica_counterpart -- (internal) returns index of a counterpart replica + */ +static unsigned +replica_counterpart(unsigned repn, + struct poolset_compare_status *set_s) +{ + return set_s->replica[repn]; +} + +/* + * are_poolsets_transformable -- (internal) check if poolsets can be transformed + * one into the other; also gather info about + * replicas's health + */ +static enum transform_op +identify_transform_operation(struct poolset_compare_status *set_in_s, + struct poolset_compare_status *set_out_s, + struct poolset_health_status *set_in_hs, + struct poolset_health_status *set_out_hs) +{ + LOG(3, "set_in_s %p, set_out_s %p", set_in_s, set_out_s); + + int has_replica_to_keep = 0; + int is_removing_replicas = 0; + int is_adding_replicas = 0; + + /* check if there are replicas to be removed */ + for (unsigned r = 0; r < set_in_s->nreplicas; ++r) { + unsigned c = replica_counterpart(r, set_in_s); + if (c != UNDEF_REPLICA) { + LOG(2, "replica %u has a counterpart %u", r, + set_in_s->replica[r]); + has_replica_to_keep = 1; + REP_HEALTH(set_out_hs, c)->pool_size = + REP_HEALTH(set_in_hs, r)->pool_size; + } else { + LOG(2, "replica %u has no counterpart", r); + is_removing_replicas = 1; + } + } + + /* make sure we have at least one replica to keep */ + if (!has_replica_to_keep) { + ERR("there must be at least one replica left"); + return NOT_TRANSFORMABLE; + } + + /* check if there are replicas to be added */ + for (unsigned r = 0; r < set_out_s->nreplicas; ++r) { + if (replica_counterpart(r, set_out_s) == UNDEF_REPLICA) { + LOG(2, "Replica %u from output set has no counterpart", + r); + if (is_removing_replicas) { + ERR( + "adding and removing replicas at the same time is not allowed"); + return NOT_TRANSFORMABLE; + } + + REP_HEALTH(set_out_hs, r)->flags |= IS_BROKEN; + is_adding_replicas = 1; + } + } + + /* check if there is anything to do */ + if (!is_removing_replicas && !is_adding_replicas && + (set_in_s->flags & OPTION_SINGLEHDR) == + (set_out_s->flags & OPTION_SINGLEHDR)) { + ERR("both poolsets are equal"); + return NOT_TRANSFORMABLE; + } + + /* allow changing the SINGLEHDR option only as the sole operation */ + if ((is_removing_replicas || is_adding_replicas) && + (set_in_s->flags & OPTION_SINGLEHDR) != + (set_out_s->flags & OPTION_SINGLEHDR)) { + ERR( + "cannot add/remove replicas and change the SINGLEHDR option at the same time"); + return NOT_TRANSFORMABLE; + } + + if (is_removing_replicas) + return RM_REPLICAS; + + if (is_adding_replicas) + return ADD_REPLICAS; + + if (set_out_s->flags & OPTION_SINGLEHDR) + return RM_HDRS; + + if (set_in_s->flags & OPTION_SINGLEHDR) + return ADD_HDRS; + + ASSERT(0); + return NOT_TRANSFORMABLE; +} + +/* + * do_added_parts_exist -- (internal) check if any part of the replicas that are + * to be added (marked as broken) already exists + */ +static int +do_added_parts_exist(struct pool_set *set, + struct poolset_health_status *set_hs) +{ + for (unsigned r = 0; r < set->nreplicas; ++r) { + /* skip unbroken (i.e. not being added) replicas */ + if (!replica_is_replica_broken(r, set_hs)) + continue; + + struct pool_replica *rep = REP(set, r); + + /* skip remote replicas */ + if (rep->remote) + continue; + + for (unsigned p = 0; p < rep->nparts; ++p) { + /* check if part file exists */ + int oerrno = errno; + int exists = util_file_exists(rep->part[p].path); + if (exists < 0) + return -1; + + if (exists && !rep->part[p].is_dev_dax) { + LOG(1, "part file %s exists", + rep->part[p].path); + return 1; + } + errno = oerrno; + } + } + return 0; +} + +/* + * delete_replicas -- (internal) delete replicas which do not have their + * counterpart set in the helping status structure + */ +static int +delete_replicas(struct pool_set *set, struct poolset_compare_status *set_s) +{ + LOG(3, "set %p, set_s %p", set, set_s); + for (unsigned r = 0; r < set->nreplicas; ++r) { + struct pool_replica *rep = REP(set, r); + if (replica_counterpart(r, set_s) == UNDEF_REPLICA) { + if (!rep->remote) { + if (util_replica_close_local(rep, r, + DELETE_ALL_PARTS)) + return -1; + } else { + if (util_replica_close_remote(rep, r, + DELETE_ALL_PARTS)) + return -1; + } + } + } + return 0; +} + +/* + * copy_replica_data_fw -- (internal) copy data between replicas of two + * poolsets, starting from the beginning of the + * second part + */ +static void +copy_replica_data_fw(struct pool_set *set_dst, struct pool_set *set_src, + unsigned repn) +{ + LOG(3, "set_in %p, set_out %p, repn %u", set_src, set_dst, repn); + ssize_t pool_size = replica_get_pool_size(set_src, repn); + if (pool_size < 0) { + LOG(1, "getting pool size from replica %u failed", repn); + pool_size = (ssize_t)set_src->poolsize; + } + + size_t len = (size_t)pool_size - POOL_HDR_SIZE - + replica_get_part_data_len(set_src, repn, 0); + void *src = PART(REP(set_src, repn), 1)->addr; + void *dst = PART(REP(set_dst, repn), 1)->addr; + size_t count = len / POOL_HDR_SIZE; + while (count-- > 0) { + pmem_memcpy_persist(dst, src, POOL_HDR_SIZE); + src = ADDR_SUM(src, POOL_HDR_SIZE); + dst = ADDR_SUM(dst, POOL_HDR_SIZE); + } +} + +/* + * copy_replica_data_bw -- (internal) copy data between replicas of two + * poolsets, starting from the end of the pool + */ +static void +copy_replica_data_bw(struct pool_set *set_dst, struct pool_set *set_src, + unsigned repn) +{ + LOG(3, "set_in %p, set_out %p, repn %u", set_src, set_dst, repn); + ssize_t pool_size = replica_get_pool_size(set_src, repn); + if (pool_size < 0) { + LOG(1, "getting pool size from replica %u failed", repn); + pool_size = (ssize_t)set_src->poolsize; + } + + size_t len = (size_t)pool_size - POOL_HDR_SIZE - + replica_get_part_data_len(set_src, repn, 0); + size_t count = len / POOL_HDR_SIZE; + void *src = ADDR_SUM(PART(REP(set_src, repn), 1)->addr, len); + void *dst = ADDR_SUM(PART(REP(set_dst, repn), 1)->addr, len); + while (count-- > 0) { + src = ADDR_SUM(src, -(ssize_t)POOL_HDR_SIZE); + dst = ADDR_SUM(dst, -(ssize_t)POOL_HDR_SIZE); + pmem_memcpy_persist(dst, src, POOL_HDR_SIZE); + } +} + +/* + * create_missing_headers -- (internal) create headers for all parts but the + * first one + */ +static int +create_missing_headers(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct pool_hdr *src_hdr = HDR(REP(set, repn), 0); + for (unsigned p = 1; p < set->replica[repn]->nhdrs; ++p) { + struct pool_attr attr; + util_pool_hdr2attr(&attr, src_hdr); + attr.features.incompat &= (uint32_t)(~POOL_FEAT_SINGLEHDR); + if (util_header_create(set, repn, p, &attr, 1) != 0) { + LOG(1, "part headers create failed for" + " replica %u part %u", repn, p); + errno = EINVAL; + return -1; + } + } + return 0; +} + +/* + * update_replica_header -- (internal) update field values in the first header + * in the replica + */ +static void +update_replica_header(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct pool_replica *rep = REP(set, repn); + struct pool_set_part *part = PART(REP(set, repn), 0); + struct pool_hdr *hdr = (struct pool_hdr *)part->hdr; + if (set->options & OPTION_SINGLEHDR) { + hdr->features.incompat |= POOL_FEAT_SINGLEHDR; + memcpy(hdr->next_part_uuid, hdr->uuid, POOL_HDR_UUID_LEN); + memcpy(hdr->prev_part_uuid, hdr->uuid, POOL_HDR_UUID_LEN); + } else { + hdr->features.incompat &= (uint32_t)(~POOL_FEAT_SINGLEHDR); + + } + util_checksum(hdr, sizeof(*hdr), &hdr->checksum, 1, + POOL_HDR_CSUM_END_OFF(hdr)); + util_persist_auto(rep->is_pmem, hdr, sizeof(*hdr)); +} + +/* + * fill_replica_struct_uuids -- (internal) gather all uuids required for the + * replica in the helper structure + */ +static int +fill_replica_struct_uuids(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct pool_replica *rep = REP(set, repn); + memcpy(PART(rep, 0)->uuid, HDR(rep, 0)->uuid, POOL_HDR_UUID_LEN); + for (unsigned p = 1; p < rep->nhdrs; ++p) { + if (util_uuid_generate(rep->part[p].uuid) < 0) { + ERR("cannot generate part UUID"); + errno = EINVAL; + return -1; + } + } + return 0; +} + +/* + * update_uuids -- (internal) update uuids in all headers in the replica + */ +static void +update_uuids(struct pool_set *set, unsigned repn) +{ + LOG(3, "set %p, repn %u", set, repn); + struct pool_replica *rep = REP(set, repn); + struct pool_hdr *hdr0 = HDR(rep, 0); + for (unsigned p = 0; p < rep->nhdrs; ++p) { + struct pool_hdr *hdrp = HDR(rep, p); + memcpy(hdrp->next_part_uuid, PARTN(rep, p)->uuid, + POOL_HDR_UUID_LEN); + memcpy(hdrp->prev_part_uuid, PARTP(rep, p)->uuid, + POOL_HDR_UUID_LEN); + + /* Avoid calling memcpy() on identical regions */ + if (p != 0) { + memcpy(hdrp->next_repl_uuid, hdr0->next_repl_uuid, + POOL_HDR_UUID_LEN); + memcpy(hdrp->prev_repl_uuid, hdr0->prev_repl_uuid, + POOL_HDR_UUID_LEN); + memcpy(hdrp->poolset_uuid, hdr0->poolset_uuid, + POOL_HDR_UUID_LEN); + } + + util_checksum(hdrp, sizeof(*hdrp), &hdrp->checksum, 1, + POOL_HDR_CSUM_END_OFF(hdrp)); + util_persist(PART(rep, p)->is_dev_dax, hdrp, sizeof(*hdrp)); + } +} + +/* + * copy_part_fds -- (internal) copy poolset part file descriptors between + * two poolsets + */ +static void +copy_part_fds(struct pool_set *set_dst, struct pool_set *set_src) +{ + ASSERTeq(set_src->nreplicas, set_dst->nreplicas); + for (unsigned r = 0; r < set_dst->nreplicas; ++r) { + ASSERTeq(REP(set_src, r)->nparts, REP(set_dst, r)->nparts); + for (unsigned p = 0; p < REP(set_dst, r)->nparts; ++p) { + PART(REP(set_dst, r), p)->fd = + PART(REP(set_src, r), p)->fd; + } + } + +} + +/* + * remove_hdrs_replica -- (internal) remove headers from the replica + */ +static int +remove_hdrs_replica(struct pool_set *set_in, struct pool_set *set_out, + unsigned repn) +{ + LOG(3, "set %p, repn %u", set_in, repn); + int ret = 0; + + /* open all part files of the input replica */ + if (replica_open_replica_part_files(set_in, repn)) { + LOG(1, "opening replica %u, part files failed", repn); + ret = -1; + goto out; + } + + /* share part file descriptors between poolset structures */ + copy_part_fds(set_out, set_in); + + /* map the whole input replica */ + if (util_replica_open(set_in, repn, MAP_SHARED)) { + LOG(1, "opening input replica failed: replica %u", repn); + ret = -1; + goto out_close; + } + + /* map the whole output replica */ + if (util_replica_open(set_out, repn, MAP_SHARED)) { + LOG(1, "opening output replica failed: replica %u", repn); + ret = -1; + goto out_unmap_in; + } + + /* move data between the two mappings of the replica */ + if (REP(set_in, repn)->nparts > 1) + copy_replica_data_fw(set_out, set_in, repn); + + /* make changes to the first part's header */ + update_replica_header(set_out, repn); + + util_replica_close(set_out, repn); +out_unmap_in: + util_replica_close(set_in, repn); +out_close: + util_replica_fdclose(REP(set_in, repn)); +out: + return ret; +} + +/* + * add_hdrs_replica -- (internal) add lacking headers to the replica + * + * when the operation fails and returns -1, the replica remains untouched + */ +static int +add_hdrs_replica(struct pool_set *set_in, struct pool_set *set_out, + unsigned repn) +{ + LOG(3, "set %p, repn %u", set_in, repn); + int ret = 0; + + /* open all part files of the input replica */ + if (replica_open_replica_part_files(set_in, repn)) { + LOG(1, "opening replica %u, part files failed", repn); + ret = -1; + goto out; + } + + /* share part file descriptors between poolset structures */ + copy_part_fds(set_out, set_in); + + /* map the whole input replica */ + if (util_replica_open(set_in, repn, MAP_SHARED)) { + LOG(1, "opening input replica failed: replica %u", repn); + ret = -1; + goto out_close; + } + + /* map the whole output replica */ + if (util_replica_open(set_out, repn, MAP_SHARED)) { + LOG(1, "opening output replica failed: replica %u", repn); + ret = -1; + goto out_unmap_in; + } + + /* generate new uuids for lacking headers */ + if (fill_replica_struct_uuids(set_out, repn)) { + LOG(1, "generating lacking uuids for parts failed: replica %u", + repn); + ret = -1; + goto out_unmap_out; + } + + /* copy data between the two mappings of the replica */ + if (REP(set_in, repn)->nparts > 1) + copy_replica_data_bw(set_out, set_in, repn); + + /* create the missing headers */ + if (create_missing_headers(set_out, repn)) { + LOG(1, "creating lacking headers failed: replica %u", repn); + /* + * copy the data back, so we could fall back to the original + * state + */ + if (REP(set_in, repn)->nparts > 1) + copy_replica_data_fw(set_in, set_out, repn); + ret = -1; + goto out_unmap_out; + } + + /* make changes to the first part's header */ + update_replica_header(set_out, repn); + + /* store new uuids in all headers and update linkage in the replica */ + update_uuids(set_out, repn); + +out_unmap_out: + util_replica_close(set_out, repn); +out_unmap_in: + util_replica_close(set_in, repn); +out_close: + util_replica_fdclose(REP(set_in, repn)); +out: + return ret; +} + +/* + * remove_hdrs -- (internal) transform a poolset without the SINGLEHDR option + * (with headers) into a poolset with the SINGLEHDR option + * (without headers) + */ +static int +remove_hdrs(struct pool_set *set_in, struct pool_set *set_out, + struct poolset_health_status *set_in_hs, unsigned flags) +{ + LOG(3, "set_in %p, set_out %p, set_in_hs %p, flags %u", + set_in, set_out, set_in_hs, flags); + for (unsigned r = 0; r < set_in->nreplicas; ++r) { + if (remove_hdrs_replica(set_in, set_out, r)) { + LOG(1, "removing headers from replica %u failed", r); + /* mark all previous replicas as damaged */ + while (--r < set_in->nreplicas) + REP_HEALTH(set_in_hs, r)->flags |= IS_BROKEN; + return -1; + } + } + return 0; +} + +/* + * add_hdrs -- (internal) transform a poolset with the SINGLEHDR option (without + * headers) into a poolset without the SINGLEHDR option (with + * headers) + */ +static int +add_hdrs(struct pool_set *set_in, struct pool_set *set_out, + struct poolset_health_status *set_in_hs, + unsigned flags) +{ + LOG(3, "set_in %p, set_out %p, set_in_hs %p, flags %u", + set_in, set_out, set_in_hs, flags); + for (unsigned r = 0; r < set_in->nreplicas; ++r) { + if (add_hdrs_replica(set_in, set_out, r)) { + LOG(1, "adding headers to replica %u failed", r); + /* mark all previous replicas as damaged */ + while (--r < set_in->nreplicas) + REP_HEALTH(set_in_hs, r)->flags |= IS_BROKEN; + return -1; + } + } + return 0; +} + +/* + * transform_replica -- transforming one poolset into another + */ +int +replica_transform(struct pool_set *set_in, struct pool_set *set_out, + unsigned flags) +{ + LOG(3, "set_in %p, set_out %p", set_in, set_out); + + int ret = 0; + /* validate user arguments */ + if (validate_args(set_in, set_out)) + return -1; + + /* check if the source poolset is healthy */ + struct poolset_health_status *set_in_hs = NULL; + if (replica_check_poolset_health(set_in, &set_in_hs, + 0 /* called from transform */, flags)) { + ERR("source poolset health check failed"); + return -1; + } + + if (!replica_is_poolset_healthy(set_in_hs)) { + ERR("source poolset is broken"); + ret = -1; + errno = EINVAL; + goto free_hs_in; + } + + /* copy value of the ignore_sds flag from the input poolset */ + set_out->ignore_sds = set_in->ignore_sds; + + struct poolset_health_status *set_out_hs = NULL; + if (replica_create_poolset_health_status(set_out, &set_out_hs)) { + ERR("creating poolset health status failed"); + ret = -1; + goto free_hs_in; + } + + /* check if the poolsets are transformable */ + struct poolset_compare_status *set_in_cs = NULL; + struct poolset_compare_status *set_out_cs = NULL; + if (compare_poolsets(set_in, set_out, &set_in_cs, &set_out_cs)) { + ERR("comparing poolsets failed"); + ret = -1; + goto free_hs_out; + } + + enum transform_op operation = identify_transform_operation(set_in_cs, + set_out_cs, set_in_hs, set_out_hs); + + if (operation == NOT_TRANSFORMABLE) { + LOG(1, "poolsets are not transformable"); + ret = -1; + errno = EINVAL; + goto free_cs; + } + + if (operation == RM_HDRS) { + if (!is_dry_run(flags) && + remove_hdrs(set_in, set_out, set_in_hs, + flags)) { + ERR("removing headers failed; falling back to the " + "input poolset"); + if (replica_sync(set_in, set_in_hs, + flags | IS_TRANSFORMED)) { + LOG(1, "falling back to the input poolset " + "failed"); + } else { + LOG(1, "falling back to the input poolset " + "succeeded"); + } + ret = -1; + } + goto free_cs; + } + + if (operation == ADD_HDRS) { + if (!is_dry_run(flags) && + add_hdrs(set_in, set_out, set_in_hs, flags)) { + ERR("adding headers failed; falling back to the " + "input poolset"); + if (replica_sync(set_in, set_in_hs, + flags | IS_TRANSFORMED)) { + LOG(1, "falling back to the input poolset " + "failed"); + } else { + LOG(1, "falling back to the input poolset " + "succeeded"); + } + ret = -1; + } + goto free_cs; + } + + if (operation == ADD_REPLICAS) { + /* + * check if any of the parts that are to be added already exists + */ + if (do_added_parts_exist(set_out, set_out_hs)) { + ERR("some parts being added already exist"); + ret = -1; + errno = EINVAL; + goto free_cs; + } + } + + /* signal that sync is called by transform */ + if (replica_sync(set_out, set_out_hs, flags | IS_TRANSFORMED)) { + ret = -1; + goto free_cs; + } + + if (operation == RM_REPLICAS) { + if (!is_dry_run(flags) && delete_replicas(set_in, set_in_cs)) + ret = -1; + } + +free_cs: + Free(set_in_cs); + Free(set_out_cs); +free_hs_out: + replica_free_poolset_health_status(set_out_hs); +free_hs_in: + replica_free_poolset_health_status(set_in_hs); + return ret; +} diff --git a/src/pmdk/src/librpmem/Makefile b/src/pmdk/src/librpmem/Makefile new file mode 100644 index 000000000..eeb1ee0cc --- /dev/null +++ b/src/pmdk/src/librpmem/Makefile @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# src/librpmem/Makefile -- Makefile for librpmem +# +include ../common.inc + +vpath %.c ../rpmem_common + +ifeq ($(BUILD_RPMEM),y) +LIBRARY_NAME = rpmem +LIBRARY_SO_VERSION = 1 +LIBRARY_VERSION = 0.0 +SOURCE = $(COMMON)/alloc.c\ + $(COMMON)/os_posix.c\ + $(COMMON)/os_thread_posix.c\ + $(COMMON)/out.c\ + $(COMMON)/util.c\ + $(COMMON)/util_posix.c\ + librpmem.c\ + rpmem.c\ + rpmem_obc.c\ + rpmem_cmd.c\ + rpmem_ssh.c\ + rpmem_common.c\ + rpmem_util.c\ + rpmem_fip_common.c\ + rpmem_fip.c + +else +$(info NOTE: Skipping librpmem because $(BUILD_RPMEM_INFO)) +endif + +include ../Makefile.inc + +ifeq ($(BUILD_RPMEM),y) +LIBS += -pthread +LIBS += $(LIBFABRIC_LIBS) +CFLAGS += $(LIBFABRIC_CFLAGS) +CFLAGS += -I. -I../rpmem_common +CFLAGS += -DRPMEMC_LOG_RPMEM +endif diff --git a/src/pmdk/src/librpmem/README b/src/pmdk/src/librpmem/README new file mode 100644 index 000000000..a68cb7232 --- /dev/null +++ b/src/pmdk/src/librpmem/README @@ -0,0 +1,7 @@ +This directory contains a librpmem library which provides +remote access to persistent memory over RDMA. + +** DEPENDENCIES: ** +The librpmem library depends on libfabric (version >= 1.4.2) library: + +https://github.com/ofiwg/libfabric diff --git a/src/pmdk/src/librpmem/librpmem.c b/src/pmdk/src/librpmem/librpmem.c new file mode 100644 index 000000000..0a0c5489e --- /dev/null +++ b/src/pmdk/src/librpmem/librpmem.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * librpmem.c -- entry points for librpmem + */ + +#include +#include + +#include "librpmem.h" + +#include "rpmem.h" +#include "rpmem_common.h" +#include "rpmem_util.h" +#include "rpmem_fip.h" +#include "util.h" +#include "out.h" + +/* + * librpmem_init -- load-time initialization for librpmem + * + * Called automatically by the run-time loader. + */ +ATTR_CONSTRUCTOR +void +librpmem_init(void) +{ + util_init(); + out_init(RPMEM_LOG_PREFIX, RPMEM_LOG_LEVEL_VAR, RPMEM_LOG_FILE_VAR, + RPMEM_MAJOR_VERSION, RPMEM_MINOR_VERSION); + LOG(3, NULL); + rpmem_util_cmds_init(); + + rpmem_util_get_env_max_nlanes(&Rpmem_max_nlanes); + rpmem_util_get_env_wq_size(&Rpmem_wq_size); +} + +/* + * librpmem_fini -- librpmem cleanup routine + * + * Called automatically when the process terminates. + */ +ATTR_DESTRUCTOR +void +librpmem_fini(void) +{ + LOG(3, NULL); + rpmem_util_cmds_fini(); + out_fini(); +} + +/* + * rpmem_check_version -- see if library meets application version requirements + */ +const char * +rpmem_check_version(unsigned major_required, unsigned minor_required) +{ + LOG(3, "major_required %u minor_required %u", + major_required, minor_required); + + if (major_required != RPMEM_MAJOR_VERSION) { + ERR("librpmem major version mismatch (need %u, found %u)", + major_required, RPMEM_MAJOR_VERSION); + return out_get_errormsg(); + } + + if (minor_required > RPMEM_MINOR_VERSION) { + ERR("librpmem minor version mismatch (need %u, found %u)", + minor_required, RPMEM_MINOR_VERSION); + return out_get_errormsg(); + } + + return NULL; +} + +/* + * rpmem_errormsg -- return the last error message + */ +const char * +rpmem_errormsg(void) +{ + return out_get_errormsg(); +} diff --git a/src/pmdk/src/librpmem/librpmem.link.in b/src/pmdk/src/librpmem/librpmem.link.in new file mode 100644 index 000000000..779f28898 --- /dev/null +++ b/src/pmdk/src/librpmem/librpmem.link.in @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2019, Intel Corporation +# +# +# src/librpmem/librpmem.link -- linker link file for librpmem +# +LIBRPMEM_1.0 { + global: + rpmem_create; + rpmem_open; + rpmem_set_attr; + rpmem_close; + rpmem_remove; + rpmem_flush; + rpmem_drain; + rpmem_persist; + rpmem_deep_persist; + rpmem_read; + rpmem_check_version; + rpmem_errormsg; + fault_injection; + local: + *; +}; diff --git a/src/pmdk/src/librpmem/rpmem.c b/src/pmdk/src/librpmem/rpmem.c new file mode 100644 index 000000000..f2181d3c1 --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem.c @@ -0,0 +1,914 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem.c -- main source file for librpmem + */ +#include +#include +#include +#include +#include +#include + +#include "librpmem.h" +#include "out.h" +#include "os.h" +#include "os_thread.h" +#include "util.h" +#include "rpmem.h" +#include "rpmem_common.h" +#include "rpmem_util.h" +#include "rpmem_obc.h" +#include "rpmem_fip.h" +#include "rpmem_fip_common.h" +#include "rpmem_ssh.h" +#include "rpmem_proto.h" + +#define RPMEM_REMOVE_FLAGS_ALL (\ + RPMEM_REMOVE_FORCE | \ + RPMEM_REMOVE_POOL_SET \ +) + +#define RPMEM_CHECK_FORK() do {\ +if (Rpmem_fork_unsafe) {\ + ERR("libfabric is initialized without fork() support");\ + return NULL;\ +}\ +} while (0) + +static os_once_t Rpmem_fork_unsafe_key_once = OS_ONCE_INIT; + +/* + * rpmem_pool -- remote pool context + */ +struct rpmem_pool { + struct rpmem_obc *obc; /* out-of-band connection handle */ + struct rpmem_fip *fip; /* fabric provider handle */ + struct rpmem_target_info *info; + char fip_service[NI_MAXSERV]; + enum rpmem_provider provider; + size_t max_wq_size; /* max WQ size supported by provider */ + os_thread_t monitor; + int closing; + int no_headers; + /* + * Last error code, need to be volatile because it can + * be accessed by multiple threads. + */ + volatile int error; +}; + +/* + * env_get_bool -- parse value of specified environment variable as a bool + * + * Return values: + * 0 - defined, valp has value + * 1 - not defined + * -1 - parsing error + */ +static int +env_get_bool(const char *name, int *valp) +{ + LOG(3, "name %s, valp %p", name, valp); + + const char *env = os_getenv(name); + if (!env) + return 1; + + char *endptr; + errno = 0; + long val = strtol(env, &endptr, 10); + if (*endptr != '\0' || errno) + goto err; + + if (val < INT_MIN || val > INT_MAX) + goto err; + + *valp = (int)val; + + return 0; +err: + RPMEM_LOG(ERR, "!parsing '%s' environment variable failed", name); + return -1; +} + +/* + * rpmem_get_provider -- set provider based on node address and environment + */ +static int +rpmem_set_provider(RPMEMpool *rpp, const char *node) +{ + LOG(3, "rpp %p, node %s", rpp, node); + + struct rpmem_fip_probe probe; + enum rpmem_provider prov = RPMEM_PROV_UNKNOWN; + + int ret = rpmem_fip_probe_get(node, &probe); + if (ret) + return -1; + + /* + * The sockets provider can be used only if specified environment + * variable is set to 1. + */ + if (rpmem_fip_probe(probe, RPMEM_PROV_LIBFABRIC_SOCKETS)) { + int enable; + ret = env_get_bool(RPMEM_PROV_SOCKET_ENV, &enable); + if (!ret && enable) { + prov = RPMEM_PROV_LIBFABRIC_SOCKETS; + } + } + + /* + * The verbs provider is enabled by default. If appropriate + * environment variable is set to 0, the verbs provider is disabled. + * + * The verbs provider has higher priority than sockets provider. + */ + if (rpmem_fip_probe(probe, RPMEM_PROV_LIBFABRIC_VERBS)) { + int enable; + ret = env_get_bool(RPMEM_PROV_VERBS_ENV, &enable); + if (ret == 1 || (!ret && enable)) + prov = RPMEM_PROV_LIBFABRIC_VERBS; + } + + if (prov == RPMEM_PROV_UNKNOWN) + return -1; + + RPMEM_ASSERT(prov < MAX_RPMEM_PROV); + rpp->max_wq_size = probe.max_wq_size[prov]; + rpp->provider = prov; + + return 0; +} + +/* + * rpmem_monitor_thread -- connection monitor background thread + */ +static void * +rpmem_monitor_thread(void *arg) +{ + LOG(3, "arg %p", arg); + + RPMEMpool *rpp = arg; + + int ret = rpmem_obc_monitor(rpp->obc, 0); + if (ret && !rpp->closing) { + RPMEM_LOG(ERR, "unexpected data received"); + rpp->error = errno; + } + + return NULL; +} + +/* + * rpmem_common_init -- common routine for initialization + */ +static RPMEMpool * +rpmem_common_init(const char *target) +{ + LOG(3, "target %s", target); + + int ret; + + RPMEMpool *rpp = calloc(1, sizeof(*rpp)); + if (!rpp) { + ERR("!calloc"); + goto err_malloc_rpmem; + } + + rpp->info = rpmem_target_parse(target); + if (!rpp->info) { + ERR("!parsing target node address failed"); + goto err_target_split; + } + + ret = rpmem_set_provider(rpp, rpp->info->node); + if (ret) { + errno = ENOMEDIUM; + ERR("cannot find provider"); + goto err_provider; + } + + RPMEM_LOG(NOTICE, "provider: %s", rpmem_provider_to_str(rpp->provider)); + + if (rpp->provider == RPMEM_PROV_LIBFABRIC_SOCKETS) { + /* libfabric's sockets provider does not support IPv6 */ + RPMEM_LOG(NOTICE, "forcing using IPv4"); + rpp->info->flags |= RPMEM_FLAGS_USE_IPV4; + } + + rpp->obc = rpmem_obc_init(); + if (!rpp->obc) { + ERR("!out-of-band connection initialization failed"); + goto err_obc_init; + } + + RPMEM_LOG(INFO, "establishing out-of-band connection"); + + ret = rpmem_obc_connect(rpp->obc, rpp->info); + if (ret) { + ERR("!out-of-band connection failed"); + goto err_obc_connect; + } + + RPMEM_LOG(NOTICE, "out-of-band connection established"); + + return rpp; +err_obc_connect: + rpmem_obc_fini(rpp->obc); +err_obc_init: +err_provider: + rpmem_target_free(rpp->info); +err_target_split: + free(rpp); +err_malloc_rpmem: + return NULL; +} + +/* + * rpmem_common_fini -- common routing for deinitialization + */ +static void +rpmem_common_fini(RPMEMpool *rpp, int join) +{ + LOG(3, "rpp %p, join %d", rpp, join); + + rpmem_obc_disconnect(rpp->obc); + + if (join) { + int ret = os_thread_join(&rpp->monitor, NULL); + if (ret) { + errno = ret; + ERR("joining monitor thread failed"); + } + } + + rpmem_obc_fini(rpp->obc); + + rpmem_target_free(rpp->info); + free(rpp); +} + +/* + * rpmem_common_fip_init -- common routine for initializing fabric provider + */ +static int +rpmem_common_fip_init(RPMEMpool *rpp, struct rpmem_req_attr *req, + struct rpmem_resp_attr *resp, void *pool_addr, size_t pool_size, + unsigned *nlanes, size_t buff_size) +{ + LOG(3, "rpp %p, req %p, resp %p, pool_addr %p, pool_size %zu, nlanes " + "%p", rpp, req, resp, pool_addr, pool_size, nlanes); + + int ret; + + struct rpmem_fip_attr fip_attr = { + .provider = req->provider, + .max_wq_size = rpp->max_wq_size, + .persist_method = resp->persist_method, + .laddr = pool_addr, + .size = pool_size, + .buff_size = buff_size, + .nlanes = min(*nlanes, resp->nlanes), + .raddr = (void *)resp->raddr, + .rkey = resp->rkey, + }; + + ret = util_snprintf(rpp->fip_service, sizeof(rpp->fip_service), + "%u", resp->port); + if (ret < 0) { + ERR("!snprintf"); + goto err_port; + } + + rpp->fip = rpmem_fip_init(rpp->info->node, rpp->fip_service, + &fip_attr, nlanes); + if (!rpp->fip) { + ERR("!in-band connection initialization failed"); + ret = -1; + goto err_fip_init; + } + + RPMEM_LOG(NOTICE, "final nlanes: %u", *nlanes); + RPMEM_LOG(INFO, "establishing in-band connection"); + + ret = rpmem_fip_connect(rpp->fip); + if (ret) { + ERR("!establishing in-band connection failed"); + goto err_fip_connect; + } + + RPMEM_LOG(NOTICE, "in-band connection established"); + + return 0; +err_fip_connect: + rpmem_fip_fini(rpp->fip); +err_fip_init: +err_port: + return ret; +} + +/* + * rpmem_common_fip_fini -- common routine for deinitializing fabric provider + */ +static void +rpmem_common_fip_fini(RPMEMpool *rpp) +{ + LOG(3, "rpp %p", rpp); + + RPMEM_LOG(INFO, "closing in-band connection"); + + rpmem_fip_fini(rpp->fip); + + RPMEM_LOG(NOTICE, "in-band connection closed"); +} + +/* + * rpmem_log_args -- log input arguments for rpmem_create and rpmem_open + */ +static void +rpmem_log_args(const char *req, const char *target, const char *pool_set_name, + void *pool_addr, size_t pool_size, unsigned nlanes) +{ + LOG(3, "req %s, target %s, pool_set_name %s, pool_addr %p, pool_size " + "%zu, nlanes %d", req, target, pool_set_name, pool_addr, + pool_size, nlanes); + + RPMEM_LOG(NOTICE, "%s request:", req); + RPMEM_LOG(NOTICE, "\ttarget: %s", target); + RPMEM_LOG(NOTICE, "\tpool set: %s", pool_set_name); + RPMEM_LOG(INFO, "\tpool addr: %p", pool_addr); + RPMEM_LOG(INFO, "\tpool size: %lu", pool_size); + RPMEM_LOG(NOTICE, "\tnlanes: %u", nlanes); +} + +/* + * rpmem_log_resp -- log response attributes + */ +static void +rpmem_log_resp(const char *req, const struct rpmem_resp_attr *resp) +{ + LOG(3, "req %s, resp %p", req, resp); + + RPMEM_LOG(NOTICE, "%s request response:", req); + RPMEM_LOG(NOTICE, "\tnlanes: %u", resp->nlanes); + RPMEM_LOG(NOTICE, "\tport: %u", resp->port); + RPMEM_LOG(NOTICE, "\tpersist method: %s", + rpmem_persist_method_to_str(resp->persist_method)); + RPMEM_LOG(NOTICE, "\tremote addr: 0x%" PRIx64, resp->raddr); +} + +/* + * rpmem_check_args -- validate user's arguments + */ +static int +rpmem_check_args(void *pool_addr, size_t pool_size, unsigned *nlanes) +{ + LOG(3, "pool_addr %p, pool_size %zu, nlanes %p", pool_addr, pool_size, + nlanes); + + if (!pool_addr) { + errno = EINVAL; + ERR("invalid pool address"); + return -1; + } + + if (!IS_PAGE_ALIGNED((uintptr_t)pool_addr)) { + errno = EINVAL; + ERR("Pool address must be aligned to page size (%llu)", + Pagesize); + return -1; + } + + if (!IS_PAGE_ALIGNED(pool_size)) { + errno = EINVAL; + ERR("Pool size must be aligned to page size (%llu)", + Pagesize); + return -1; + } + + if (!pool_size) { + errno = EINVAL; + ERR("invalid pool size"); + return -1; + } + + if (!nlanes) { + errno = EINVAL; + ERR("lanes pointer cannot be NULL"); + return -1; + } + + if (!(*nlanes)) { + errno = EINVAL; + ERR("number of lanes must be positive"); + return -1; + } + + return 0; +} + +/* + * rpmem_create -- create remote pool on target node + * + * target -- target node in format [@][:] + * pool_set_name -- remote pool set name + * pool_addr -- local pool memory address which will be replicated + * pool_size -- required pool size + * nlanes -- number of lanes + * create_attr -- pool attributes used for creating the pool on remote node + */ +RPMEMpool * +rpmem_create(const char *target, const char *pool_set_name, + void *pool_addr, size_t pool_size, unsigned *nlanes, + const struct rpmem_pool_attr *create_attr) +{ + LOG(3, "target %s, pool_set_name %s, pool_addr %p, pool_size %zu, " + "nlanes %p, create_attr %p", target, pool_set_name, + pool_addr, pool_size, nlanes, create_attr); + + os_once(&Rpmem_fork_unsafe_key_once, &rpmem_fip_probe_fork_safety); + RPMEM_CHECK_FORK(); + + rpmem_log_args("create", target, pool_set_name, + pool_addr, pool_size, *nlanes); + + if (rpmem_check_args(pool_addr, pool_size, nlanes)) + return NULL; + + RPMEMpool *rpp = rpmem_common_init(target); + if (!rpp) + goto err_common_init; + + size_t buff_size = RPMEM_DEF_BUFF_SIZE; + struct rpmem_req_attr req = { + .pool_size = pool_size, + .nlanes = min(*nlanes, Rpmem_max_nlanes), + .provider = rpp->provider, + .pool_desc = pool_set_name, + .buff_size = buff_size, + }; + + struct rpmem_resp_attr resp; + int ret = rpmem_obc_create(rpp->obc, &req, &resp, create_attr); + if (ret) { + RPMEM_LOG(ERR, "!create request failed"); + goto err_obc_create; + } + + if (create_attr == NULL || + util_is_zeroed(create_attr, sizeof(*create_attr))) + rpp->no_headers = 1; + + rpmem_log_resp("create", &resp); + + ret = rpmem_common_fip_init(rpp, &req, &resp, + pool_addr, pool_size, nlanes, buff_size); + if (ret) + goto err_fip_init; + + ret = os_thread_create(&rpp->monitor, NULL, rpmem_monitor_thread, rpp); + if (ret) { + errno = ret; + ERR("!starting monitor thread"); + goto err_monitor; + } + + return rpp; +err_monitor: + rpmem_common_fip_fini(rpp); +err_fip_init: + rpmem_obc_close(rpp->obc, RPMEM_CLOSE_FLAGS_REMOVE); +err_obc_create: + rpmem_common_fini(rpp, 0); +err_common_init: + return NULL; +} + +/* + * rpmem_open -- open remote pool on target node + * + * target -- target node in format [@][:] + * pool_set_name -- remote pool set name + * pool_addr -- local pool memory address which will be replicated + * pool_size -- required pool size + * nlanes -- number of lanes + * open_attr -- pool attributes, received from remote host + */ +RPMEMpool * +rpmem_open(const char *target, const char *pool_set_name, + void *pool_addr, size_t pool_size, unsigned *nlanes, + struct rpmem_pool_attr *open_attr) +{ + LOG(3, "target %s, pool_set_name %s, pool_addr %p, pool_size %zu, " + "nlanes %p, create_attr %p", target, pool_set_name, + pool_addr, pool_size, nlanes, open_attr); + + os_once(&Rpmem_fork_unsafe_key_once, &rpmem_fip_probe_fork_safety); + RPMEM_CHECK_FORK(); + + rpmem_log_args("open", target, pool_set_name, + pool_addr, pool_size, *nlanes); + + if (rpmem_check_args(pool_addr, pool_size, nlanes)) + return NULL; + + RPMEMpool *rpp = rpmem_common_init(target); + if (!rpp) + goto err_common_init; + + size_t buff_size = RPMEM_DEF_BUFF_SIZE; + struct rpmem_req_attr req = { + .pool_size = pool_size, + .nlanes = min(*nlanes, Rpmem_max_nlanes), + .provider = rpp->provider, + .pool_desc = pool_set_name, + .buff_size = buff_size, + }; + + struct rpmem_resp_attr resp; + + int ret = rpmem_obc_open(rpp->obc, &req, &resp, open_attr); + if (ret) { + RPMEM_LOG(ERR, "!open request failed"); + goto err_obc_create; + } + + if (open_attr == NULL || util_is_zeroed(open_attr, sizeof(*open_attr))) + rpp->no_headers = 1; + + rpmem_log_resp("open", &resp); + + ret = rpmem_common_fip_init(rpp, &req, &resp, + pool_addr, pool_size, nlanes, buff_size); + if (ret) + goto err_fip_init; + + ret = os_thread_create(&rpp->monitor, NULL, rpmem_monitor_thread, rpp); + if (ret) { + errno = ret; + ERR("!starting monitor thread"); + goto err_monitor; + } + + return rpp; +err_monitor: + rpmem_common_fip_fini(rpp); +err_fip_init: + rpmem_obc_close(rpp->obc, 0); +err_obc_create: + rpmem_common_fini(rpp, 0); +err_common_init: + return NULL; +} + +/* + * rpmem_close -- close remote pool on target node + */ +int +rpmem_close(RPMEMpool *rpp) +{ + LOG(3, "rpp %p", rpp); + + RPMEM_LOG(INFO, "closing out-of-band connection"); + + util_fetch_and_or32(&rpp->closing, 1); + + rpmem_fip_close(rpp->fip); + + int ret = rpmem_obc_close(rpp->obc, 0); + if (ret) + ERR("!close request failed"); + + RPMEM_LOG(NOTICE, "out-of-band connection closed"); + + rpmem_common_fip_fini(rpp); + rpmem_common_fini(rpp, 1); + + return ret; +} + +/* + * rpmem_flush -- flush to target node operation + * + * rpp -- remote pool handle + * offset -- offset in pool + * length -- length of flush operation + * lane -- lane number + * flags -- additional flags + */ +int +rpmem_flush(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane, unsigned flags) +{ + LOG(3, "rpp %p, offset %zu, length %zu, lane %d, flags 0x%x", + rpp, offset, length, lane, flags); + + if (unlikely(rpp->error)) { + errno = rpp->error; + return -1; + } + + if (flags & RPMEM_FLUSH_FLAGS_MASK) { + ERR("invalid flags (0x%x)", flags); + errno = EINVAL; + return -1; + } + + if (rpp->no_headers == 0 && offset < RPMEM_HDR_SIZE) { + ERR("offset (%zu) in pool is less than %d bytes", offset, + RPMEM_HDR_SIZE); + errno = EINVAL; + return -1; + } + + /* + * By default use RDMA SEND flush mode which has atomicity + * guarantees. For relaxed flush use RDMA WRITE. + */ + unsigned mode = RPMEM_PERSIST_SEND; + if (flags & RPMEM_FLUSH_RELAXED) + mode = RPMEM_FLUSH_WRITE; + + int ret = rpmem_fip_flush(rpp->fip, offset, length, lane, mode); + if (unlikely(ret)) { + LOG(2, "flush operation failed"); + rpp->error = ret; + errno = rpp->error; + return -1; + } + + return 0; +} + +/* + * rpmem_drain -- drain on target node operation + * + * rpp -- remote pool handle + * lane -- lane number + * flags -- additional flags + */ +int +rpmem_drain(RPMEMpool *rpp, unsigned lane, unsigned flags) +{ + LOG(3, "rpp %p, lane %d, flags 0x%x", rpp, lane, flags); + + if (unlikely(rpp->error)) { + errno = rpp->error; + return -1; + } + + if (flags != 0) { + ERR("invalid flags (0x%x)", flags); + errno = EINVAL; + return -1; + } + + int ret = rpmem_fip_drain(rpp->fip, lane); + if (unlikely(ret)) { + LOG(2, "drain operation failed"); + rpp->error = ret; + errno = rpp->error; + return -1; + } + + return 0; +} + +/* + * rpmem_persist -- persist operation on target node + * + * rpp -- remote pool handle + * offset -- offset in pool + * length -- length of persist operation + * lane -- lane number + */ +int +rpmem_persist(RPMEMpool *rpp, size_t offset, size_t length, + unsigned lane, unsigned flags) +{ + LOG(3, "rpp %p, offset %zu, length %zu, lane %d, flags 0x%x", + rpp, offset, length, lane, flags); + + if (unlikely(rpp->error)) { + errno = rpp->error; + return -1; + } + + if (flags & RPMEM_PERSIST_FLAGS_MASK) { + ERR("invalid flags (0x%x)", flags); + errno = EINVAL; + return -1; + } + + if (rpp->no_headers == 0 && offset < RPMEM_HDR_SIZE) { + ERR("offset (%zu) in pool is less than %d bytes", offset, + RPMEM_HDR_SIZE); + errno = EINVAL; + return -1; + } + + /* + * By default use RDMA SEND persist mode which has atomicity + * guarantees. For relaxed persist use RDMA WRITE. + */ + unsigned mode = RPMEM_PERSIST_SEND; + if (flags & RPMEM_PERSIST_RELAXED) + mode = RPMEM_FLUSH_WRITE; + + int ret = rpmem_fip_persist(rpp->fip, offset, length, + lane, mode); + if (unlikely(ret)) { + LOG(2, "persist operation failed"); + rpp->error = ret; + errno = rpp->error; + return -1; + } + + return 0; +} + +/* + * rpmem_deep_persist -- deep flush operation on target node + * + * rpp -- remote pool handle + * offset -- offset in pool + * length -- length of deep flush operation + * lane -- lane number + */ +int +rpmem_deep_persist(RPMEMpool *rpp, size_t offset, size_t length, unsigned lane) +{ + LOG(3, "rpp %p, offset %zu, length %zu, lane %d", rpp, offset, length, + lane); + + if (unlikely(rpp->error)) { + errno = rpp->error; + return -1; + } + + if (offset < RPMEM_HDR_SIZE) { + ERR("offset (%zu) in pool is less than %d bytes", offset, + RPMEM_HDR_SIZE); + errno = EINVAL; + return -1; + } + + int ret = rpmem_fip_persist(rpp->fip, offset, length, + lane, RPMEM_DEEP_PERSIST); + if (unlikely(ret)) { + ERR("persist operation failed"); + rpp->error = ret; + errno = rpp->error; + return -1; + } + + return 0; +} + +/* + * rpmem_read -- read data from remote pool: + * + * rpp -- remote pool handle + * buff -- output buffer + * offset -- offset in pool + * length -- length of read operation + */ +int +rpmem_read(RPMEMpool *rpp, void *buff, size_t offset, + size_t length, unsigned lane) +{ + LOG(3, "rpp %p, buff %p, offset %zu, length %zu, lane %d", rpp, buff, + offset, length, lane); + + if (unlikely(rpp->error)) { + errno = rpp->error; + return -1; + } + + if (rpp->no_headers == 0 && offset < RPMEM_HDR_SIZE) + LOG(1, "reading from pool at offset (%zu) less than %d bytes", + offset, RPMEM_HDR_SIZE); + + int ret = rpmem_fip_read(rpp->fip, buff, length, offset, lane); + if (unlikely(ret)) { + errno = ret; + ERR("!read operation failed"); + rpp->error = ret; + return -1; + } + + return 0; +} + +/* + * rpmem_set_attr -- overwrite pool attributes on the remote node + * + * rpp -- remote pool handle + * attr -- new pool attributes for the pool on remote node + */ +int +rpmem_set_attr(RPMEMpool *rpp, const struct rpmem_pool_attr *attr) +{ + LOG(3, "rpp %p, attr %p", rpp, attr); + + if (unlikely(rpp->error)) { + errno = rpp->error; + return -1; + } + + int ret = rpmem_obc_set_attr(rpp->obc, attr); + if (ret) { + RPMEM_LOG(ERR, "!set attributes request failed"); + } + return ret; +} + +/* + * rpmem_remove -- remove pool from remote node + * + * target -- target node in format [@][:] + * pool_set_name -- remote pool set name + * flags -- bitwise OR of one or more of the following flags: + * - RPMEM_REMOVE_FORCE + * - RPMEM_REMOVE_POOL_SET + */ +int +rpmem_remove(const char *target, const char *pool_set, int flags) +{ + LOG(3, "target %s, pool_set %s, flags %d", target, pool_set, flags); + + if (flags & ~(RPMEM_REMOVE_FLAGS_ALL)) { + ERR("invalid flags specified"); + errno = EINVAL; + return -1; + } + + struct rpmem_target_info *info = rpmem_target_parse(target); + if (!info) { + ERR("!parsing target node address failed"); + goto err_target; + } + + const char *argv[5]; + argv[0] = "--remove"; + argv[1] = pool_set; + const char **cur = &argv[2]; + + if (flags & RPMEM_REMOVE_FORCE) + *cur++ = "--force"; + + if (flags & RPMEM_REMOVE_POOL_SET) + *cur++ = "--pool-set"; + + *cur = NULL; + + struct rpmem_ssh *ssh = rpmem_ssh_execv(info, argv); + if (!ssh) { + ERR("!executing ssh command failed"); + goto err_ssh_exec; + } + + int ret; + + ret = rpmem_ssh_monitor(ssh, 0); + if (ret) { + ERR("!waiting for remote command failed"); + goto err_ssh_monitor; + } + + ret = rpmem_ssh_close(ssh); + if (ret) { + errno = ret; + ERR("remote command failed"); + goto err_ssh_close; + } + + rpmem_target_free(info); + + return 0; +err_ssh_monitor: + rpmem_ssh_close(ssh); +err_ssh_close: +err_ssh_exec: + rpmem_target_free(info); +err_target: + return -1; +} + +#if FAULT_INJECTION +void +rpmem_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + return core_inject_fault_at(type, nth, at); +} + +int +rpmem_fault_injection_enabled(void) +{ + return core_fault_injection_enabled(); +} +#endif diff --git a/src/pmdk/src/librpmem/rpmem.h b/src/pmdk/src/librpmem/rpmem.h new file mode 100644 index 000000000..d65da2702 --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem.h -- internal definitions for librpmem + */ +#include "alloc.h" +#include "fault_injection.h" + +#define RPMEM_LOG_PREFIX "librpmem" +#define RPMEM_LOG_LEVEL_VAR "RPMEM_LOG_LEVEL" +#define RPMEM_LOG_FILE_VAR "RPMEM_LOG_FILE" + +#if FAULT_INJECTION +void +rpmem_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at); + +int +rpmem_fault_injection_enabled(void); +#else +static inline void +rpmem_inject_fault_at(enum pmem_allocation_type type, int nth, + const char *at) +{ + abort(); +} + +static inline int +rpmem_fault_injection_enabled(void) +{ + return 0; +} +#endif diff --git a/src/pmdk/src/librpmem/rpmem_cmd.c b/src/pmdk/src/librpmem/rpmem_cmd.c new file mode 100644 index 000000000..f38260a13 --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_cmd.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_cmd.c -- simple interface for running an executable in child process + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "out.h" +#include "os.h" +#include "rpmem_common.h" +#include "rpmem_util.h" +#include "rpmem_cmd.h" + +/* + * rpmem_cmd_init -- initialize command + */ +struct rpmem_cmd * +rpmem_cmd_init(void) +{ + struct rpmem_cmd *cmd = calloc(1, sizeof(*cmd)); + if (!cmd) { + RPMEM_LOG(ERR, "allocating command buffer"); + goto err_alloc_cmd; + } + + return cmd; +err_alloc_cmd: + return NULL; +} + +/* + * rpmem_cmd_fini -- deinitialize command + */ +void +rpmem_cmd_fini(struct rpmem_cmd *cmd) +{ + for (int i = 0; i < cmd->args.argc; i++) + free(cmd->args.argv[i]); + free(cmd->args.argv); + free(cmd); +} + +/* + * rpmem_cmd_push -- push back command's argument + */ +int +rpmem_cmd_push(struct rpmem_cmd *cmd, const char *arg) +{ + size_t argv_count = (size_t)cmd->args.argc + 2; + char **argv = realloc(cmd->args.argv, argv_count * sizeof(char *)); + if (!argv) { + RPMEM_LOG(ERR, "reallocating command argv"); + goto err_realloc; + } + + cmd->args.argv = argv; + + char *arg_dup = strdup(arg); + if (!arg_dup) { + RPMEM_LOG(ERR, "allocating argument"); + goto err_strdup; + } + + cmd->args.argv[cmd->args.argc] = arg_dup; + cmd->args.argc++; + cmd->args.argv[cmd->args.argc] = NULL; + + return 0; +err_strdup: +err_realloc: + return -1; +} + +/* + * rpmem_cmd_log -- print executing command + */ +static void +rpmem_cmd_log(struct rpmem_cmd *cmd) +{ + RPMEM_ASSERT(cmd->args.argc > 0); + + size_t size = 0; + for (int i = 0; i < cmd->args.argc; i++) { + size += strlen(cmd->args.argv[i]) + 1; + } + + char *buff = malloc(size); + if (!buff) { + RPMEM_LOG(ERR, "allocating log buffer for command"); + return; + } + + size_t pos = 0; + + for (int i = 0; pos < size && i < cmd->args.argc; i++) { + int ret = util_snprintf(&buff[pos], size - pos, "%s%s", + cmd->args.argv[i], i == cmd->args.argc - 1 ? + "" : " "); + if (ret < 0) { + RPMEM_LOG(ERR, "!snprintf"); + goto out; + } + + pos += (size_t)ret; + } + + RPMEM_LOG(INFO, "executing command '%s'", buff); + +out: + free(buff); +} + +/* + * rpmem_cmd_run -- run command and connect with stdin, stdout and stderr + * using unix sockets. + * + * The communication with child process is done via socketpairs on + * stdin, stdout and stderr. The socketpairs are used instead of pipes + * because reading from disconnected pipe causes a SIGPIPE signal. + * When using socketpair it is possible to read data using recv(3) + * function with MSG_NOSIGNAL flag, which doesn't send a signal. + */ +int +rpmem_cmd_run(struct rpmem_cmd *cmd) +{ + int fd_in[2]; + int fd_out[2]; + int fd_err[2]; + + rpmem_cmd_log(cmd); + + /* socketpair for stdin */ + int ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd_in); + if (ret < 0) { + RPMEM_LOG(ERR, "creating pipe for stdin"); + goto err_pipe_in; + } + + /* parent process stdin socket */ + cmd->fd_in = fd_in[1]; + + /* socketpair for stdout */ + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd_out); + if (ret < 0) { + RPMEM_LOG(ERR, "creating pipe for stdout"); + goto err_pipe_out; + } + + /* parent process stdout socket */ + cmd->fd_out = fd_out[0]; + + /* socketpair for stderr */ + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd_err); + if (ret < 0) { + RPMEM_LOG(ERR, "creating pipe for stderr"); + goto err_pipe_err; + } + + /* socketpair for stderr */ + cmd->fd_err = fd_err[0]; + + cmd->pid = fork(); + + if (cmd->pid == -1) { + RPMEM_LOG(ERR, "forking command"); + goto err_fork; + } + + if (!cmd->pid) { + dup2(fd_in[0], 0); + dup2(fd_out[1], 1); + dup2(fd_err[1], 2); + + execvp(cmd->args.argv[0], cmd->args.argv); + exit(EXIT_FAILURE); + } + + os_close(fd_in[0]); + os_close(fd_out[1]); + os_close(fd_err[1]); + + return 0; +err_fork: + os_close(fd_err[0]); + os_close(fd_err[1]); +err_pipe_err: + os_close(fd_out[0]); + os_close(fd_out[1]); +err_pipe_out: + os_close(fd_in[0]); + os_close(fd_in[1]); +err_pipe_in: + return -1; +} + +/* + * rpmem_cmd_wait -- wait for process to change state + */ +int +rpmem_cmd_wait(struct rpmem_cmd *cmd, int *status) +{ + if (cmd->pid <= 0) { + RPMEM_LOG(ERR, "wrong PID: %i", cmd->pid); + errno = EINVAL; + return -1; + } + + if (waitpid(cmd->pid, status, 0) != cmd->pid) { + RPMEM_LOG(ERR, "!waitpid failed"); + return -1; + } + + return 0; +} + +/* + * rpmem_cmd_term -- close child process's unix sockets + */ +void +rpmem_cmd_term(struct rpmem_cmd *cmd) +{ + os_close(cmd->fd_in); + os_close(cmd->fd_out); + os_close(cmd->fd_err); + + RPMEM_ASSERT(cmd->pid > 0); +} diff --git a/src/pmdk/src/librpmem/rpmem_cmd.h b/src/pmdk/src/librpmem/rpmem_cmd.h new file mode 100644 index 000000000..0a16dd28e --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_cmd.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_cmd.h -- helper module for invoking separate process + */ + +#ifndef RPMEM_CMD_H +#define RPMEM_CMD_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct rpmem_cmd { + int fd_in; /* stdin */ + int fd_out; /* stdout */ + int fd_err; /* stderr */ + struct { + char **argv; + int argc; + } args; /* command arguments */ + pid_t pid; /* pid of process */ +}; + +struct rpmem_cmd *rpmem_cmd_init(void); +int rpmem_cmd_push(struct rpmem_cmd *cmd, const char *arg); +int rpmem_cmd_run(struct rpmem_cmd *cmd); +void rpmem_cmd_term(struct rpmem_cmd *cmd); +int rpmem_cmd_wait(struct rpmem_cmd *cmd, int *status); +void rpmem_cmd_fini(struct rpmem_cmd *cmd); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/librpmem/rpmem_fip.c b/src/pmdk/src/librpmem/rpmem_fip.c new file mode 100644 index 000000000..fe9e2ebdf --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_fip.c @@ -0,0 +1,1987 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_fip.c -- rpmem libfabric provider module source file + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "out.h" +#include "util.h" +#include "os_thread.h" +#include "os.h" +#include "rpmem_common.h" +#include "rpmem_fip_common.h" +#include "rpmem_proto.h" +#include "rpmem_util.h" +#include "rpmem_fip_msg.h" +#include "rpmem_fip.h" +#include "valgrind_internal.h" + +#define RPMEM_FI_ERR(e, fmt, args...)\ + ERR(fmt ": %s", ## args, fi_strerror((e))) + +#define RPMEM_FI_CLOSE(f, fmt, args...) (\ +{\ + int oerrno = errno;\ + int ret = fi_close(&(f)->fid);\ + if (ret)\ + RPMEM_FI_ERR(ret, fmt, ## args);\ + errno = oerrno;\ + ret;\ +}) + +#define LANE_ALIGN_SIZE 64 +#define LANE_ALIGN __attribute__((aligned(LANE_ALIGN_SIZE))) + +#define RPMEM_RAW_BUFF_SIZE 4096 +#define RPMEM_RAW_SIZE 8 + +typedef ssize_t (*rpmem_fip_flush_fn)(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags); + +typedef int (*rpmem_fip_drain_fn)(struct rpmem_fip *fip, unsigned lane); + +typedef ssize_t (*rpmem_fip_persist_fn)(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags); + +typedef int (*rpmem_fip_init_fn)(struct rpmem_fip *fip); +typedef void (*rpmem_fip_fini_fn)(struct rpmem_fip *fip); + +typedef ssize_t (*cq_read_fn)(struct fid_cq *cq, void *buf, size_t count); + +static ssize_t +cq_read_infinite(struct fid_cq *cq, void *buf, size_t count) +{ + return fi_cq_sread(cq, buf, count, NULL, -1); +} + +/* + * rpmem_fip_ops -- operations specific for persistency method + */ +struct rpmem_fip_ops { + rpmem_fip_flush_fn flush; + rpmem_fip_drain_fn drain; + rpmem_fip_persist_fn persist; + rpmem_fip_init_fn lanes_init; + rpmem_fip_init_fn lanes_init_mem; + rpmem_fip_fini_fn lanes_fini; + rpmem_fip_init_fn lanes_post; +}; + +/* + * rpmem_fip_lane -- base lane structure + */ +struct rpmem_fip_lane { + struct fid_ep *ep; /* endpoint */ + struct fid_cq *cq; /* completion queue */ + uint64_t event; + size_t wq_elems; /* # of elements in work queue */ + int wq_is_flushing; /* work queue is during flush */ +}; + +/* + * rpmem_fip_plane -- persist operation's lane + */ +struct rpmem_fip_plane { + struct rpmem_fip_lane base; /* base lane structure */ + struct rpmem_fip_rma write; /* WRITE message */ + struct rpmem_fip_rma write_cq; /* WRITE message with completion */ + struct rpmem_fip_rma read; /* READ message */ + struct rpmem_fip_msg send; /* SEND message */ + struct rpmem_fip_msg recv; /* RECV message */ +} LANE_ALIGN; + +/* + * rpmem_fip_rlane -- read operation's lane + */ +struct rpmem_fip_rlane { + struct rpmem_fip_lane base; /* base lane structure */ + struct rpmem_fip_rma read; /* READ message */ +}; + +struct rpmem_fip { + struct fi_info *fi; /* fabric interface information */ + struct fid_fabric *fabric; /* fabric domain */ + struct fid_domain *domain; /* fabric protection domain */ + struct fid_eq *eq; /* event queue */ + + int closing; /* closing connections in progress */ + + size_t cq_size; /* completion queue size */ + + uint64_t raddr; /* remote memory base address */ + uint64_t rkey; /* remote memory protection key */ + void *laddr; /* local memory base address */ + size_t size; /* memory size */ + struct fid_mr *mr; /* local memory region */ + void *mr_desc; /* local memory descriptor */ + + enum rpmem_persist_method persist_method; + const struct rpmem_fip_ops *ops; + + unsigned nlanes; + size_t buff_size; + struct rpmem_fip_plane *lanes; + + os_thread_t monitor; + + void *pmsg; /* persist message buffer */ + size_t pmsg_size; + struct fid_mr *pmsg_mr; /* persist message memory region */ + void *pmsg_mr_desc; /* persist message memory descriptor */ + + struct rpmem_msg_persist_resp *pres; /* persist response buffer */ + struct fid_mr *pres_mr; /* persist response memory region */ + void *pres_mr_desc; /* persist response memory descriptor */ + + void *raw_buff; /* READ-after-WRITE buffer */ + struct fid_mr *raw_mr; /* RAW memory region */ + void *raw_mr_desc; /* RAW memory descriptor */ + + cq_read_fn cq_read; /* CQ read function */ +}; + +/* + * rpmem_fip_is_closing -- (internal) atomically reads and returns the + * closing flag + */ +static inline int +rpmem_fip_is_closing(struct rpmem_fip *fip) +{ + int ret; + util_atomic_load_explicit32(&fip->closing, &ret, memory_order_acquire); + return ret; +} + +/* + * rpmem_fip_set_closing -- (internal) atomically set the closing flag + */ +static inline void +rpmem_fip_set_closing(struct rpmem_fip *fip) +{ + /* + * load and store without barriers should be good enough here. + * fetch_and_or are used as workaround for helgrind issue. + */ + util_fetch_and_or32(&fip->closing, 1); +} + +/* + * rpmem_fip_lane_begin -- (internal) initialize list of events for lane + */ +static inline void +rpmem_fip_lane_begin(struct rpmem_fip_lane *lanep, uint64_t event) +{ + lanep->event = event; +} + +/* + * rpmem_fip_lane_init -- (internal) initialize single lane + */ +static int +rpmem_fip_lane_init(struct rpmem_fip *fip, struct rpmem_fip_lane *lanep) +{ + int ret; + + struct fi_cq_attr cq_attr = { + .size = fip->cq_size, + .flags = 0, + .format = FI_CQ_FORMAT_MSG, + .wait_obj = FI_WAIT_UNSPEC, + .signaling_vector = 0, + .wait_cond = FI_CQ_COND_NONE, + .wait_set = NULL, + }; + + /* create a completion queue */ + ret = fi_cq_open(fip->domain, &cq_attr, &lanep->cq, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "opening completion queue"); + goto err_cq_open; + } + + /* create an endpoint */ + ret = fi_endpoint(fip->domain, fip->fi, &lanep->ep, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "allocating endpoint"); + goto err_endpoint; + } + + /* + * Bind an event queue to an endpoint to get + * connection-related events for the endpoint. + */ + ret = fi_ep_bind(lanep->ep, &fip->eq->fid, 0); + if (ret) { + RPMEM_FI_ERR(ret, "binding event queue to endpoint"); + goto err_ep_bind_eq; + } + + /* + * Bind a completion queue to an endpoint to get completion + * events of specified inbound/outbound operations. + * + * FI_SELECTIVE_COMPLETION means all inbound/outbound operations + * must explicitly specify if the completion event should be + * generated or not using FI_COMPLETION flag. + * + * The completion events received are highly related to the + * persistency method used and are configured in lanes + * initialization specified for persistency method utilized. + */ + ret = fi_ep_bind(lanep->ep, &lanep->cq->fid, + FI_RECV | FI_TRANSMIT | FI_SELECTIVE_COMPLETION); + if (ret) { + RPMEM_FI_ERR(ret, "binding completion queue to endpoint"); + goto err_ep_bind_cq; + } + + /* + * Enable endpoint so it is possible to post inbound/outbound + * operations if required. + */ + ret = fi_enable(lanep->ep); + if (ret) { + RPMEM_FI_ERR(ret, "activating endpoint"); + goto err_fi_enable; + } + + return 0; +err_fi_enable: +err_ep_bind_cq: +err_ep_bind_eq: +err_endpoint: + RPMEM_FI_CLOSE(lanep->cq, "closing completion queue"); +err_cq_open: + return -1; +} + +/* + * rpmem_fip_lane_fini -- (internal) deinitialize single lane + */ +static int +rpmem_fip_lane_fini(struct rpmem_fip_lane *lanep) +{ + int ret; + int lret = 0; + + ret = RPMEM_FI_CLOSE(lanep->ep, "closing endpoint"); + if (ret) + lret = ret; + + ret = RPMEM_FI_CLOSE(lanep->cq, "closing completion queue"); + if (ret) + lret = ret; + + return lret; +} + +/* + * rpmem_fip_lane_wait -- (internal) wait for specific event on completion queue + */ +static int +rpmem_fip_lane_wait(struct rpmem_fip *fip, struct rpmem_fip_lane *lanep, + uint64_t e) +{ + ssize_t sret = 0; + struct fi_cq_err_entry err; + const char *str_err; + int ret = 0; + struct fi_cq_msg_entry cq_entry; + + while (lanep->event & e) { + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; + + sret = fip->cq_read(lanep->cq, &cq_entry, 1); + + if (unlikely(sret == -FI_EAGAIN) || sret == 0) + continue; + + if (unlikely(sret < 0)) { + ret = (int)sret; + goto err_cq_read; + } + + lanep->event &= ~cq_entry.flags; + } + + return 0; +err_cq_read: + sret = fi_cq_readerr(lanep->cq, &err, 0); + if (sret < 0) { + RPMEM_FI_ERR((int)sret, "error reading from completion queue: " + "cannot read error from event queue"); + goto err; + } + + str_err = fi_cq_strerror(lanep->cq, err.prov_errno, NULL, NULL, 0); + RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err); +err: + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + return ret; +} + +/* + * rpmem_fip_set_nlanes -- (internal) set maximum number of lanes supported + */ +static void +rpmem_fip_set_nlanes(struct rpmem_fip *fip, unsigned nlanes) +{ + size_t max_nlanes = rpmem_fip_max_nlanes(fip->fi); + RPMEM_ASSERT(max_nlanes < UINT_MAX); + + fip->nlanes = min((unsigned)max_nlanes, nlanes); +} + +/* + * rpmem_fip_getinfo -- (internal) get fabric interface information + */ +static int +rpmem_fip_getinfo(struct rpmem_fip *fip, const char *node, const char *service, + enum rpmem_provider provider, size_t max_wq_size, + enum rpmem_persist_method pm) +{ + int ret = -1; + struct fi_info *hints = rpmem_fip_get_hints(provider); + if (!hints) { + RPMEM_LOG(ERR, "!getting fabric interface information hints"); + goto err_hints; + } + + /* + * WQ size is: + * - >= size required by persist method (pm_wq_size) + * - >= size forced by environment variable (Rpmem_wq_size) + * - but it has to be <= max_wq_size reported by provider + */ + size_t pm_wq_size = rpmem_fip_wq_size(pm, RPMEM_FIP_NODE_CLIENT); + hints->tx_attr->size = + min( + max(pm_wq_size, Rpmem_wq_size), + max_wq_size); + + hints->rx_attr->size = rpmem_fip_rx_size(pm, RPMEM_FIP_NODE_CLIENT); + + /* get maximum available */ + ret = fi_getinfo(RPMEM_FIVERSION, node, service, 0, hints, &fip->fi); + if (ret) { + RPMEM_FI_ERR(ret, "getting fabric interface information"); + goto err_fi_getinfo; + } + + rpmem_fip_print_info(fip->fi); + + /* fallback to free the hints */ +err_fi_getinfo: + fi_freeinfo(hints); +err_hints: + return ret; +} + +/* + * rpmem_fip_init_fabric_res -- (internal) initialize common fabric resources + */ +static int +rpmem_fip_init_fabric_res(struct rpmem_fip *fip) +{ + int ret; + ret = fi_fabric(fip->fi->fabric_attr, &fip->fabric, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "opening fabric domain"); + goto err_fi_fabric; + } + + ret = fi_domain(fip->fabric, fip->fi, &fip->domain, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "opening fabric access domain"); + goto err_fi_domain; + } + + struct fi_eq_attr eq_attr = { + .size = 0, /* use default value */ + .flags = 0, + .wait_obj = FI_WAIT_UNSPEC, + .signaling_vector = 0, + .wait_set = NULL, + }; + + ret = fi_eq_open(fip->fabric, &eq_attr, &fip->eq, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "opening event queue"); + goto err_eq_open; + } + + return 0; +err_eq_open: + RPMEM_FI_CLOSE(fip->domain, "closing fabric access domain"); +err_fi_domain: + RPMEM_FI_CLOSE(fip->fabric, "closing fabric domain"); +err_fi_fabric: + return ret; +} + +/* + * rpmem_fip_fini_fabric_res -- (internal) deinitialize common fabric resources + */ +static void +rpmem_fip_fini_fabric_res(struct rpmem_fip *fip) +{ + RPMEM_FI_CLOSE(fip->eq, "closing event queue"); + RPMEM_FI_CLOSE(fip->domain, "closing fabric access domain"); + RPMEM_FI_CLOSE(fip->fabric, "closing fabric domain"); +} + +/* + * rpmem_fip_init_memory -- (internal) initialize common memory resources + */ +static int +rpmem_fip_init_memory(struct rpmem_fip *fip) +{ + ASSERTne(Pagesize, 0); + int ret; + + /* + * Register local memory space. The local memory will be used + * with WRITE operation in rpmem_fip_persist function thus + * the FI_WRITE access flag. + */ + ret = fi_mr_reg(fip->domain, fip->laddr, fip->size, + FI_WRITE, 0, 0, 0, &fip->mr, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "registrating memory"); + return ret; + } + + /* get local memory descriptor */ + fip->mr_desc = fi_mr_desc(fip->mr); + + return 0; +} + +/* + * rpmem_fip_fini_memory -- (internal) deinitialize common memory resources + */ +static void +rpmem_fip_fini_memory(struct rpmem_fip *fip) +{ + RPMEM_FI_CLOSE(fip->mr, "unregistering memory"); +} + +/* + * rpmem_fip_lanes_init_common -- (internal) initialize common lanes resources + */ +static int +rpmem_fip_lanes_init_common(struct rpmem_fip *fip) +{ + int ret; + + ret = posix_memalign((void **)&fip->lanes, LANE_ALIGN_SIZE, + fip->nlanes * sizeof(*fip->lanes)); + if (ret) { + RPMEM_LOG(ERR, "!allocating lanes"); + goto err_alloc_lanes; + } + memset(fip->lanes, 0, fip->nlanes * sizeof(*fip->lanes)); + + unsigned i; + for (i = 0; i < fip->nlanes; i++) { + ret = rpmem_fip_lane_init(fip, &fip->lanes[i].base); + if (ret) + goto err_lane_init; + + } + + return 0; +err_lane_init: + for (unsigned j = 0; j < i; j++) + rpmem_fip_lane_fini(&fip->lanes[i].base); + free(fip->lanes); +err_alloc_lanes: + return -1; +} + +/* + * rpmem_fip_lanes_fini_common -- (internal) deinitialize common lanes + * resrouces + */ +static int +rpmem_fip_lanes_fini_common(struct rpmem_fip *fip) +{ + int lret = 0; + int ret; + + for (unsigned i = 0; i < fip->nlanes; i++) { + ret = rpmem_fip_lane_fini(&fip->lanes[i].base); + if (ret) + lret = ret; + } + + free(fip->lanes); + + return lret; +} + +/* + * rpmem_fip_lanes_init -- (internal) initialize lanes + */ +static int +rpmem_fip_lanes_init(struct rpmem_fip *fip) +{ + int ret; + + ret = rpmem_fip_lanes_init_common(fip); + if (ret) + return ret; + + ret = fip->ops->lanes_init(fip); + if (ret) + goto err_init_lanes; + + return 0; +err_init_lanes: + rpmem_fip_lanes_fini_common(fip); + return ret; +} + +/* + * rpmem_fip_lane_connect -- (internal) connect on a single lane + */ +static int +rpmem_fip_lane_connect(struct rpmem_fip *fip, struct rpmem_fip_lane *lanep) +{ + struct fi_eq_cm_entry entry; + int ret; + + ret = fi_connect(lanep->ep, fip->fi->dest_addr, NULL, 0); + if (ret) { + RPMEM_FI_ERR(ret, "initiating connection request"); + return ret; + } + + return rpmem_fip_read_eq_check(fip->eq, &entry, FI_CONNECTED, + &lanep->ep->fid, + RPMEM_CONNECT_TIMEOUT); +} + +/* + * rpmem_fip_lanes_connect -- (internal) establish connections on all lanes + */ +static int +rpmem_fip_lanes_connect(struct rpmem_fip *fip) +{ + int ret; + + for (unsigned i = 0; i < fip->nlanes; i++) { + struct rpmem_fip_lane *lanep = &fip->lanes[i].base; + ret = rpmem_fip_lane_connect(fip, lanep); + if (ret) + return ret; + } + + return 0; +} + +/* + * rpmem_fip_lanes_shutdown -- shutdown all endpoints + */ +static int +rpmem_fip_lanes_shutdown(struct rpmem_fip *fip) +{ + int ret; + int lret = 0; + + for (unsigned i = 0; i < fip->nlanes; i++) { + ret = fi_shutdown(fip->lanes[i].base.ep, 0); + if (ret) { + RPMEM_FI_ERR(ret, "disconnecting endpoint"); + lret = ret; + } + } + + return lret; +} + +/* + * rpmem_fip_lane_prep_write -- (internal) choose right WRITE structure + * according to flags and prepare for collecting its completion + */ +static inline struct rpmem_fip_rma * +rpmem_fip_lane_prep_write(struct rpmem_fip_plane *lanep, unsigned flags) +{ + if (flags & RPMEM_COMPLETION) { + rpmem_fip_lane_begin(&lanep->base, FI_WRITE); + return &lanep->write_cq; + } + + return &lanep->write; +} + +/* + * rpmem_fip_monitor_thread -- (internal) monitor in-band connection + */ +static void * +rpmem_fip_monitor_thread(void *arg) +{ + struct rpmem_fip *fip = (struct rpmem_fip *)arg; + struct fi_eq_cm_entry entry; + uint32_t event; + int ret; + + while (!rpmem_fip_is_closing(fip)) { + ret = rpmem_fip_read_eq(fip->eq, &entry, &event, + RPMEM_MONITOR_TIMEOUT); + if (unlikely(ret == 0) && event == FI_SHUTDOWN) { + RPMEM_LOG(ERR, "event queue got FI_SHUTDOWN"); + + /* mark in-band connection as closing */ + rpmem_fip_set_closing(fip); + + for (unsigned i = 0; i < fip->nlanes; i++) { + fi_cq_signal(fip->lanes[i].base.cq); + } + } + } + + return NULL; +} + +/* + * rpmem_fip_monitor_init -- (internal) initialize in-band monitor + */ +static int +rpmem_fip_monitor_init(struct rpmem_fip *fip) +{ + errno = os_thread_create(&fip->monitor, NULL, rpmem_fip_monitor_thread, + fip); + if (errno) { + RPMEM_LOG(ERR, "!connenction monitor thread"); + return -1; + } + + return 0; +} + +/* + * rpmem_fip_monitor_fini -- (internal) finalize in-band monitor + */ +static int +rpmem_fip_monitor_fini(struct rpmem_fip *fip) +{ + rpmem_fip_set_closing(fip); + + int ret = os_thread_join(&fip->monitor, NULL); + if (ret) { + RPMEM_LOG(ERR, "joining monitor thread failed"); + } + + return ret; +} + +/* + * rpmem_fip_init_lanes_common -- (internal) initialize lanes + */ +static int +rpmem_fip_init_lanes_common(struct rpmem_fip *fip) +{ + ASSERTne(Pagesize, 0); + + int ret = 0; + + /* allocate persist messages buffer */ + fip->pmsg_size = roundup(sizeof(struct rpmem_msg_persist) + + fip->buff_size, (size_t)64); + size_t msg_size = fip->nlanes * fip->pmsg_size; + msg_size = PAGE_ALIGNED_UP_SIZE(msg_size); + errno = posix_memalign((void **)&fip->pmsg, Pagesize, msg_size); + if (errno) { + RPMEM_LOG(ERR, "!allocating messages buffer"); + ret = -1; + goto err_malloc_pmsg; + } + + /* + * Register persist messages buffer. The persist messages + * are sent to daemon thus the FI_SEND access flag. + */ + ret = fi_mr_reg(fip->domain, fip->pmsg, msg_size, FI_SEND, + 0, 0, 0, &fip->pmsg_mr, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "registering messages buffer"); + goto err_fi_mr_reg_pmsg; + } + + /* get persist messages buffer local descriptor */ + fip->pmsg_mr_desc = fi_mr_desc(fip->pmsg_mr); + + /* allocate persist response messages buffer */ + size_t msg_resp_size = fip->nlanes * + sizeof(struct rpmem_msg_persist_resp); + msg_resp_size = PAGE_ALIGNED_UP_SIZE(msg_resp_size); + errno = posix_memalign((void **)&fip->pres, Pagesize, msg_resp_size); + if (errno) { + RPMEM_LOG(ERR, "!allocating messages response buffer"); + ret = -1; + goto err_malloc_pres; + } + + /* + * Register persist messages response buffer. The persist response + * messages are received from daemon thus the FI_RECV access flag. + */ + ret = fi_mr_reg(fip->domain, fip->pres, msg_resp_size, FI_RECV, + 0, 0, 0, &fip->pres_mr, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "registering messages response buffer"); + goto err_fi_mr_reg_pres; + } + + /* get persist response messages buffer local descriptor */ + fip->pres_mr_desc = fi_mr_desc(fip->pres_mr); + + return 0; +err_fi_mr_reg_pres: + free(fip->pres); +err_malloc_pres: + RPMEM_FI_CLOSE(fip->pmsg_mr, "unregistering messages buffer"); +err_fi_mr_reg_pmsg: + free(fip->pmsg); +err_malloc_pmsg: + return ret; +} + +/* + * rpmem_fip_get_pmsg -- return persist message buffer + */ +static inline struct rpmem_msg_persist * +rpmem_fip_get_pmsg(struct rpmem_fip *fip, size_t idx) +{ + return (struct rpmem_msg_persist *) + ((uintptr_t)fip->pmsg + idx * fip->pmsg_size); +} + +/* + * rpmem_fip_init_mem_lanes_gpspm -- initialize lanes rma structures + */ +static int +rpmem_fip_init_mem_lanes_gpspm(struct rpmem_fip *fip) +{ + /* + * Initialize all required structures for: + * WRITE, SEND and RECV operations. + * + * If the completion is required the FI_COMPLETION flag and + * appropriate context should be used. + * + * In GPSPM only the RECV and SEND completions are required. + * + * For RECV the context is RECV operation structure used for + * fi_recvmsg(3) function call. + * + * For SEND the context is lane structure. + * + * The received buffer contains a lane id which is used + * to obtain a lane which must be signaled that operation + * has been completed. + */ + unsigned i; + for (i = 0; i < fip->nlanes; i++) { + /* WRITE */ + rpmem_fip_rma_init(&fip->lanes[i].write, + fip->mr_desc, 0, + fip->rkey, + &fip->lanes[i], + 0); + + /* SEND */ + rpmem_fip_msg_init(&fip->lanes[i].send, + fip->pmsg_mr_desc, 0, + &fip->lanes[i], + rpmem_fip_get_pmsg(fip, i), + 0 /* size must be provided when sending msg */, + FI_COMPLETION); + + /* RECV */ + rpmem_fip_msg_init(&fip->lanes[i].recv, + fip->pres_mr_desc, 0, + &fip->lanes[i].recv, + &fip->pres[i], + sizeof(fip->pres[i]), + FI_COMPLETION); + } + + return 0; +} + +/* + * rpmem_fip_fini_lanes_common -- (internal) deinitialize lanes for GPSPM + */ +static void +rpmem_fip_fini_lanes_common(struct rpmem_fip *fip) +{ + RPMEM_FI_CLOSE(fip->pmsg_mr, "unregistering messages buffer"); + RPMEM_FI_CLOSE(fip->pres_mr, "unregistering messages " + "response buffer"); + free(fip->pmsg); + free(fip->pres); +} + +/* + * rpmem_fip_init_lanes_apm -- (internal) initialize lanes for APM + */ +static int +rpmem_fip_init_lanes_apm(struct rpmem_fip *fip) +{ + ASSERTne(Pagesize, 0); + int ret; + + ret = rpmem_fip_init_lanes_common(fip); + if (ret) + goto err_init_lanes_common; + + ASSERT(IS_PAGE_ALIGNED(RPMEM_RAW_BUFF_SIZE)); + errno = posix_memalign((void **)&fip->raw_buff, Pagesize, + RPMEM_RAW_BUFF_SIZE); + if (errno) { + RPMEM_LOG(ERR, "!allocating APM RAW buffer"); + goto err_malloc_raw; + } + + /* register read-after-write buffer */ + ret = fi_mr_reg(fip->domain, fip->raw_buff, RPMEM_RAW_BUFF_SIZE, + FI_REMOTE_WRITE, 0, 0, 0, &fip->raw_mr, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "registering APM read buffer"); + goto err_fi_raw_mr; + } + + /* get read-after-write buffer local descriptor */ + fip->raw_mr_desc = fi_mr_desc(fip->raw_mr); + + return 0; +err_fi_raw_mr: + free(fip->raw_buff); +err_malloc_raw: + rpmem_fip_fini_lanes_common(fip); +err_init_lanes_common: + return -1; +} + +/* + * rpmem_fip_init_mem_lanes_apm -- initialize lanes rma structures + */ +static int +rpmem_fip_init_mem_lanes_apm(struct rpmem_fip *fip) +{ + /* + * Initialize all required structures for: + * WRITE and READ operations. + * + * If the completion is required the FI_COMPLETION flag and + * appropriate context should be used. + * + * In APM only the READ completion is required. + * The context is a lane structure. + */ + for (unsigned i = 0; i < fip->nlanes; i++) { + + /* WRITE */ + rpmem_fip_rma_init(&fip->lanes[i].write, + fip->mr_desc, 0, + fip->rkey, + &fip->lanes[i], + 0); + + /* WRITE + FI_COMPLETION */ + rpmem_fip_rma_init(&fip->lanes[i].write_cq, + fip->mr_desc, 0, + fip->rkey, + &fip->lanes[i], + FI_COMPLETION); + + /* READ */ + rpmem_fip_rma_init(&fip->lanes[i].read, + fip->raw_mr_desc, 0, + fip->rkey, + &fip->lanes[i], + FI_COMPLETION); + + /* SEND */ + rpmem_fip_msg_init(&fip->lanes[i].send, + fip->pmsg_mr_desc, 0, + &fip->lanes[i], + rpmem_fip_get_pmsg(fip, i), + fip->pmsg_size, + FI_COMPLETION); + + /* RECV */ + rpmem_fip_msg_init(&fip->lanes[i].recv, + fip->pres_mr_desc, 0, + &fip->lanes[i].recv, + &fip->pres[i], + sizeof(fip->pres[i]), + FI_COMPLETION); + } + + return 0; +} + +/* + * rpmem_fip_fini_lanes_apm -- (internal) deinitialize lanes for APM + */ +static void +rpmem_fip_fini_lanes_apm(struct rpmem_fip *fip) +{ + RPMEM_FI_CLOSE(fip->raw_mr, "unregistering APM read buffer"); + free(fip->raw_buff); + + rpmem_fip_fini_lanes_common(fip); +} + +/* + * rpmem_fip_wq_inc -- (internal) increment number of elements in WQ + */ +static inline void +rpmem_fip_wq_inc(struct rpmem_fip_plane *lanep) +{ + ++lanep->base.wq_elems; +} + +/* + * rpmem_fip_wq_set_empty -- (internal) zero number of elements in WQ + */ +static inline void +rpmem_fip_wq_set_empty(struct rpmem_fip_plane *lanep) +{ + RPMEM_ASSERT(!lanep->base.wq_is_flushing); + lanep->base.wq_elems = 0; +} + +/* + * rpmem_fip_wq_require_flush -- (internal) is WQ almost full + */ +static inline int +rpmem_fip_wq_require_flush(struct rpmem_fip *fip, struct rpmem_fip_plane *lanep) +{ + RPMEM_ASSERT(lanep->base.wq_elems < fip->fi->tx_attr->size); + + return lanep->base.wq_elems + 1 == fip->fi->tx_attr->size; +} + +/* + * rpmem_fip_wq_is_flushing -- (internal) is WQ flush started + */ +static inline int +rpmem_fip_wq_is_flushing(struct rpmem_fip_plane *lanep) +{ + return lanep->base.wq_is_flushing; +} + +/* + * rpmem_fip_wq_set_flushing -- (internal) mark WQ flush start + */ +static inline void +rpmem_fip_wq_set_flushing(struct rpmem_fip_plane *lanep) +{ + lanep->base.wq_is_flushing = 1; +} + +/* + * if WQ is almost full last WRITE has to report its completion + * otherwise it is unknown when subsequent commands can be posted + */ +#define RPMEM_FIP_WQ_FLUSH_REQ RPMEM_COMPLETION + +/* + * rpmem_fip_wq_flush_wait -- (internal) wait for WRITE completion + * to make sure WQ can accept subsequent commands + */ +static inline int +rpmem_fip_wq_flush_wait(struct rpmem_fip *fip, struct rpmem_fip_plane *lanep) +{ + RPMEM_ASSERT(lanep->base.wq_elems == fip->fi->tx_attr->size); + RPMEM_ASSERT(lanep->base.wq_is_flushing); + + /* wait for WRITE completion */ + int ret = rpmem_fip_lane_wait(fip, &lanep->base, FI_WRITE); + if (unlikely(ret)) { + LOG(2, "waiting for WRITE completion failed"); + return ret; + } + + /* when WRITE completion is reaped WQ is empty */ + lanep->base.wq_is_flushing = 0; + rpmem_fip_wq_set_empty(lanep); + + return 0; +} + +/* + * rpmem_fip_wq_inc_and_flush -- (internal) increment number of elements in WQ + * and flush it + */ +static inline int +rpmem_fip_wq_inc_and_flush(struct rpmem_fip *fip, struct rpmem_fip_plane *lanep) +{ + rpmem_fip_wq_inc(lanep); + rpmem_fip_wq_set_flushing(lanep); + + return rpmem_fip_wq_flush_wait(fip, lanep); +} + +/* + * rpmem_fip_wq_flush_check -- (internal) check if WQ requires flush or it is + * during flushing and handle each case + */ +static inline int +rpmem_fip_wq_flush_check(struct rpmem_fip *fip, struct rpmem_fip_plane *lanep, + unsigned *flags) +{ + if (rpmem_fip_wq_is_flushing(lanep)) + return rpmem_fip_wq_flush_wait(fip, lanep); + + if (rpmem_fip_wq_require_flush(fip, lanep)) + *flags |= RPMEM_FIP_WQ_FLUSH_REQ; + + return 0; +} + +/* + * rpmem_fip_get_wq_size -- get WQ size (for validation purposes only) + */ +inline size_t +rpmem_fip_get_wq_size(struct rpmem_fip *fip) +{ + RPMEM_ASSERT(fip); + RPMEM_ASSERT(fip->fi); + RPMEM_ASSERT(fip->fi->tx_attr); + + return fip->fi->tx_attr->size; +} + +/* + * rpmem_fip_flush_raw -- (internal) perform flush operation using rma WRITE + */ +static int +rpmem_fip_flush_raw(struct rpmem_fip *fip, size_t offset, size_t len, + unsigned lane, unsigned flags) +{ + struct rpmem_fip_plane *lanep = &fip->lanes[lane]; + + int ret; + void *laddr = (void *)((uintptr_t)fip->laddr + offset); + uint64_t raddr = fip->raddr + offset; + + struct rpmem_fip_rma *write = rpmem_fip_lane_prep_write(lanep, flags); + + /* WRITE for requested memory region */ + ret = rpmem_fip_writemsg(lanep->base.ep, write, laddr, len, raddr); + if (unlikely(ret)) { + RPMEM_FI_ERR(ret, "RMA write"); + return ret; + } + + if (flags & RPMEM_FIP_WQ_FLUSH_REQ) + rpmem_fip_wq_set_flushing(lanep); + + return 0; +} + +/* + * rpmem_fip_drain_raw -- (internal) perform drain operation using rma READ + */ +static int +rpmem_fip_drain_raw(struct rpmem_fip *fip, unsigned lane) +{ + struct rpmem_fip_plane *lanep = &fip->lanes[lane]; + int ret; + + rpmem_fip_lane_begin(&lanep->base, FI_READ); + + /* READ to read-after-write buffer */ + ret = rpmem_fip_readmsg(lanep->base.ep, &lanep->read, fip->raw_buff, + RPMEM_RAW_SIZE, fip->raddr); + if (unlikely(ret)) { + RPMEM_FI_ERR(ret, "RMA read"); + return ret; + } + + /* wait for READ completion */ + ret = rpmem_fip_lane_wait(fip, &lanep->base, FI_READ); + if (unlikely(ret)) { + ERR("waiting for READ completion failed"); + return ret; + } + + return 0; +} + +/* + * rpmem_fip_persist_raw -- (internal) perform persist operation using + * READ after WRITE mechanism + */ +static int +rpmem_fip_persist_raw(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + int ret; + + ret = rpmem_fip_flush_raw(fip, offset, len, lane, flags); + if (unlikely(ret)) + return ret; + + /* flush WQ prior to posting subsequent message */ + if (flags & RPMEM_FIP_WQ_FLUSH_REQ) { + struct rpmem_fip_plane *lanep = &fip->lanes[lane]; + ret = rpmem_fip_wq_inc_and_flush(fip, lanep); + if (unlikely(ret)) + return ret; + } + + return rpmem_fip_drain_raw(fip, lane); +} + +/* + * rpmem_fip_post_resp -- (internal) post persist response message buffer + */ +static inline int +rpmem_fip_post_resp(struct rpmem_fip *fip, + struct rpmem_fip_plane *lanep) +{ + int ret = rpmem_fip_recvmsg(lanep->base.ep, &lanep->recv); + if (unlikely(ret)) { + RPMEM_FI_ERR(ret, "posting recv buffer"); + return ret; + } + + return 0; +} + +/* + * rpmem_fip_persist_saw -- (internal) perform persist operation using + * SEND after WRITE mechanism + */ +static int +rpmem_fip_persist_saw(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + struct rpmem_fip_plane *lanep = &fip->lanes[lane]; + void *laddr = (void *)((uintptr_t)fip->laddr + offset); + uint64_t raddr = fip->raddr + offset; + struct rpmem_msg_persist *msg; + int ret; + + ret = rpmem_fip_lane_wait(fip, &lanep->base, FI_SEND); + if (unlikely(ret)) { + ERR("waiting for SEND completion failed"); + return ret; + } + + struct rpmem_fip_rma *write = rpmem_fip_lane_prep_write(lanep, flags); + + /* WRITE for requested memory region */ + ret = rpmem_fip_writemsg(lanep->base.ep, write, laddr, len, raddr); + if (unlikely(ret)) { + RPMEM_FI_ERR((int)ret, "RMA write"); + return ret; + } + + /* flush WQ prior to posting subsequent message */ + if (flags & RPMEM_FIP_WQ_FLUSH_REQ) { + ret = rpmem_fip_wq_inc_and_flush(fip, lanep); + if (unlikely(ret)) + return ret; + } + + rpmem_fip_lane_begin(&lanep->base, FI_RECV | FI_SEND); + + /* SEND persist message */ + msg = rpmem_fip_msg_get_pmsg(&lanep->send); + msg->flags = (flags & RPMEM_FLUSH_PERSIST_MASK); + msg->lane = lane; + msg->addr = raddr; + msg->size = len; + + ret = rpmem_fip_sendmsg(lanep->base.ep, &lanep->send, sizeof(*msg)); + if (unlikely(ret)) { + RPMEM_FI_ERR(ret, "MSG send"); + return ret; + } + + /* wait for persist operation completion */ + ret = rpmem_fip_lane_wait(fip, &lanep->base, FI_RECV); + if (unlikely(ret)) { + ERR("waiting for RECV completion failed"); + return ret; + } + + ret = rpmem_fip_post_resp(fip, lanep); + if (unlikely(ret)) { + ERR("posting RECV buffer failed"); + return ret; + } + + return 0; +} + +/* + * rpmem_fip_persist_send -- (internal) perform persist operation using + * RDMA SEND operation with data inlined in the message buffer. + */ +static int +rpmem_fip_persist_send(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + RPMEM_ASSERT(len <= fip->buff_size); + + struct rpmem_fip_plane *lanep = &fip->lanes[lane]; + void *laddr = (void *)((uintptr_t)fip->laddr + offset); + uint64_t raddr = fip->raddr + offset; + struct rpmem_msg_persist *msg; + int ret; + + ret = rpmem_fip_lane_wait(fip, &lanep->base, FI_SEND); + if (unlikely(ret)) { + ERR("waiting for SEND completion failed"); + return ret; + } + + rpmem_fip_lane_begin(&lanep->base, FI_RECV | FI_SEND); + + /* SEND persist message */ + msg = rpmem_fip_msg_get_pmsg(&lanep->send); + msg->flags = flags; + msg->lane = lane; + msg->addr = raddr; + msg->size = len; + + memcpy(msg->data, laddr, len); + + ret = rpmem_fip_sendmsg(lanep->base.ep, &lanep->send, + sizeof(*msg) + len); + if (unlikely(ret)) { + RPMEM_FI_ERR(ret, "MSG send"); + return ret; + } + + /* wait for persist operation completion */ + ret = rpmem_fip_lane_wait(fip, &lanep->base, FI_RECV); + if (unlikely(ret)) { + ERR("waiting for RECV completion failed"); + return ret; + } + + ret = rpmem_fip_post_resp(fip, lanep); + if (unlikely(ret)) { + ERR("posting RECV buffer failed"); + return ret; + } + + return 0; +} + +/* + * rpmem_fip_persist_gpspm_sockets -- (internal) perform persist operation + * for GPSPM - sockets provider implementation which doesn't use the + * inline persist operation + */ +static ssize_t +rpmem_fip_persist_gpspm_sockets(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + unsigned mode = flags & RPMEM_FLUSH_PERSIST_MASK; + if (mode == RPMEM_PERSIST_SEND) + flags = (flags & ~RPMEM_FLUSH_PERSIST_MASK) | RPMEM_FLUSH_WRITE; + + int ret = rpmem_fip_wq_flush_check(fip, &fip->lanes[lane], &flags); + if (unlikely(ret)) + return -abs(ret); + + /* Limit len to the max value of the return type. */ + len = min(len, SSIZE_MAX); + + ret = rpmem_fip_persist_saw(fip, offset, len, lane, flags); + if (ret) + return -abs(ret); + + rpmem_fip_wq_set_empty(&fip->lanes[lane]); + + return (ssize_t)len; +} + +/* + * rpmem_fip_persist_apm_sockets -- (internal) perform persist operation + * for APM - sockets provider implementation which doesn't use the + * inline persist operation + */ +static ssize_t +rpmem_fip_persist_apm_sockets(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + /* Limit len to the max value of the return type. */ + len = min(len, SSIZE_MAX); + + int ret = rpmem_fip_wq_flush_check(fip, &fip->lanes[lane], &flags); + if (unlikely(ret)) + return -abs(ret); + + ret = rpmem_fip_persist_raw(fip, offset, len, lane, flags); + if (unlikely(ret)) + return -abs(ret); + + rpmem_fip_wq_set_empty(&fip->lanes[lane]); + + return (ssize_t)len; +} + +/* + * rpmem_fip_persist_gpspm -- (internal) perform persist operation for GPSPM + */ +static ssize_t +rpmem_fip_persist_gpspm(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + /* Limit len to the max value of the return type. */ + len = min(len, SSIZE_MAX); + unsigned mode = flags & RPMEM_FLUSH_PERSIST_MASK; + + int ret = rpmem_fip_wq_flush_check(fip, &fip->lanes[lane], &flags); + if (unlikely(ret)) + return -abs(ret); + + if (mode == RPMEM_PERSIST_SEND) { + len = min(len, fip->buff_size); + ret = rpmem_fip_persist_send(fip, offset, len, lane, flags); + } else { + ret = rpmem_fip_persist_saw(fip, offset, len, lane, flags); + } + + if (ret) + return -abs(ret); + + rpmem_fip_wq_set_empty(&fip->lanes[lane]); + + return (ssize_t)len; +} + +/* + * rpmem_fip_drain_nop -- (internal) perform drain operation as NOP + */ +static int +rpmem_fip_drain_nop(struct rpmem_fip *fip, unsigned lane) +{ + (void) fip; + (void) lane; + return 0; +} + +/* + * rpmem_fip_flush_apm -- (internal) perform flush operation for APM + */ +static ssize_t +rpmem_fip_flush_apm(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + struct rpmem_fip_plane *lanep = &fip->lanes[lane]; + int ret; + + /* Limit len to the max value of the return type. */ + len = min(len, SSIZE_MAX); + unsigned mode = flags & RPMEM_FLUSH_PERSIST_MASK; + + ret = rpmem_fip_wq_flush_check(fip, lanep, &flags); + if (unlikely(ret)) + return ret; + + if (mode == RPMEM_PERSIST_SEND) { + /* + * XXX: Probably posting Send in the flush and waiting for the + * response in the drain will give some performance gains. + */ + len = min(len, fip->buff_size); + ret = rpmem_fip_persist_send(fip, offset, len, lane, flags); + } else { + ret = rpmem_fip_flush_raw(fip, offset, len, lane, flags); + } + + if (ret) + return -abs(ret); + + rpmem_fip_wq_inc(lanep); + + return (ssize_t)len; +} + +/* + * rpmem_fip_drain_apm -- (internal) perform drain operation for APM + */ +static int +rpmem_fip_drain_apm(struct rpmem_fip *fip, unsigned lane) +{ + struct rpmem_fip_plane *lanep = &fip->lanes[lane]; + int ret; + + if (unlikely(rpmem_fip_wq_is_flushing(lanep))) { + ret = rpmem_fip_wq_flush_wait(fip, lanep); + if (unlikely(ret)) + return ret; + } + + ret = rpmem_fip_drain_raw(fip, lane); + /* successful drain means WQ is empty */ + if (likely(!ret)) + rpmem_fip_wq_set_empty(lanep); + + return ret; +} + +/* + * rpmem_fip_persist_apm -- (internal) perform persist operation for APM + */ +static ssize_t +rpmem_fip_persist_apm(struct rpmem_fip *fip, size_t offset, + size_t len, unsigned lane, unsigned flags) +{ + /* Limit len to the max value of the return type. */ + len = min(len, SSIZE_MAX); + unsigned mode = flags & RPMEM_FLUSH_PERSIST_MASK; + + int ret = rpmem_fip_wq_flush_check(fip, &fip->lanes[lane], &flags); + if (unlikely(ret)) + return -abs(ret); + + if (unlikely(mode == RPMEM_DEEP_PERSIST)) + ret = rpmem_fip_persist_saw(fip, offset, len, lane, flags); + else if (mode == RPMEM_PERSIST_SEND) { + len = min(len, fip->buff_size); + ret = rpmem_fip_persist_send(fip, offset, len, lane, flags); + } else { + ret = rpmem_fip_persist_raw(fip, offset, len, lane, flags); + } + + if (unlikely(ret)) + return -abs(ret); + + rpmem_fip_wq_set_empty(&fip->lanes[lane]); + + return (ssize_t)len; +} + +/* + * rpmem_fip_post_lanes_common -- (internal) post all persist response message + * buffers + */ +static int +rpmem_fip_post_lanes_common(struct rpmem_fip *fip) +{ + int ret = 0; + for (unsigned i = 0; i < fip->nlanes; i++) { + ret = rpmem_fip_post_resp(fip, &fip->lanes[i]); + if (ret) + break; + } + + return ret; +} + +/* + * rpmem_fip_ops -- some operations specific for persistency method used + * + * Note: GPSPM flush is emulated by persist whereas drain is a nop. + * + * Probably splitting Send-after-Write into two stages (flush + drain) + * will give some performance gains for GPSPM mode. + */ +static const struct rpmem_fip_ops +rpmem_fip_ops[MAX_RPMEM_PROV][MAX_RPMEM_PM] = { + [RPMEM_PROV_LIBFABRIC_VERBS] = { + [RPMEM_PM_GPSPM] = { + .flush = rpmem_fip_persist_gpspm, + .drain = rpmem_fip_drain_nop, + .persist = rpmem_fip_persist_gpspm, + .lanes_init = rpmem_fip_init_lanes_common, + .lanes_init_mem = rpmem_fip_init_mem_lanes_gpspm, + .lanes_fini = rpmem_fip_fini_lanes_common, + .lanes_post = rpmem_fip_post_lanes_common, + }, + [RPMEM_PM_APM] = { + .flush = rpmem_fip_flush_apm, + .drain = rpmem_fip_drain_apm, + .persist = rpmem_fip_persist_apm, + .lanes_init = rpmem_fip_init_lanes_apm, + .lanes_init_mem = rpmem_fip_init_mem_lanes_apm, + .lanes_fini = rpmem_fip_fini_lanes_apm, + .lanes_post = rpmem_fip_post_lanes_common, + }, + }, + [RPMEM_PROV_LIBFABRIC_SOCKETS] = { + [RPMEM_PM_GPSPM] = { + .flush = rpmem_fip_persist_gpspm_sockets, + .drain = rpmem_fip_drain_nop, + .persist = rpmem_fip_persist_gpspm_sockets, + .lanes_init = rpmem_fip_init_lanes_common, + .lanes_init_mem = rpmem_fip_init_mem_lanes_gpspm, + .lanes_fini = rpmem_fip_fini_lanes_common, + .lanes_post = rpmem_fip_post_lanes_common, + }, + [RPMEM_PM_APM] = { + .flush = rpmem_fip_flush_apm, + .drain = rpmem_fip_drain_apm, + .persist = rpmem_fip_persist_apm_sockets, + .lanes_init = rpmem_fip_init_lanes_apm, + .lanes_init_mem = rpmem_fip_init_mem_lanes_apm, + .lanes_fini = rpmem_fip_fini_lanes_apm, + .lanes_post = rpmem_fip_post_lanes_common, + }, + } +}; + +/* + * rpmem_fip_set_attr -- (internal) set required attributes + */ +static void +rpmem_fip_set_attr(struct rpmem_fip *fip, struct rpmem_fip_attr *attr) +{ + fip->raddr = (uint64_t)attr->raddr; + fip->rkey = attr->rkey; + fip->laddr = attr->laddr; + fip->size = attr->size; + fip->buff_size = attr->buff_size; + fip->persist_method = attr->persist_method; + + rpmem_fip_set_nlanes(fip, attr->nlanes); + + /* one for read operation */ + fip->cq_size = rpmem_fip_cq_size(fip->persist_method, + RPMEM_FIP_NODE_CLIENT); + + fip->ops = &rpmem_fip_ops[attr->provider][fip->persist_method]; +} + +/* + * rpmem_fip_init -- initialize fabric provider + */ +struct rpmem_fip * +rpmem_fip_init(const char *node, const char *service, + struct rpmem_fip_attr *attr, unsigned *nlanes) +{ + int ret; + + struct rpmem_fip *fip = calloc(1, sizeof(*fip)); + if (!fip) { + RPMEM_LOG(ERR, "!allocating fabric handle"); + return NULL; + } + + ret = rpmem_fip_getinfo(fip, node, service, + attr->provider, attr->max_wq_size, attr->persist_method); + if (ret) + goto err_getinfo; + + fip->cq_read = attr->provider == RPMEM_PROV_LIBFABRIC_VERBS ? + fi_cq_read : cq_read_infinite; + + rpmem_fip_set_attr(fip, attr); + + *nlanes = fip->nlanes; + + ret = rpmem_fip_init_fabric_res(fip); + if (ret) + goto err_init_fabric_res; + + ret = rpmem_fip_lanes_init(fip); + if (ret) + goto err_init_lanes; + + return fip; +err_init_lanes: + rpmem_fip_fini_fabric_res(fip); +err_init_fabric_res: + fi_freeinfo(fip->fi); +err_getinfo: + free(fip); + return NULL; +} + +/* + * rpmem_fip_fini -- deinitialize fabric provider + */ +void +rpmem_fip_fini(struct rpmem_fip *fip) +{ + fip->ops->lanes_fini(fip); + rpmem_fip_lanes_fini_common(fip); + rpmem_fip_fini_fabric_res(fip); + fi_freeinfo(fip->fi); + free(fip); +} + +/* + * rpmem_fip_connect -- connect to remote peer + */ +int +rpmem_fip_connect(struct rpmem_fip *fip) +{ + int ret; + + ret = rpmem_fip_lanes_connect(fip); + if (ret) + goto err_lanes_connect; + + ret = rpmem_fip_monitor_init(fip); + if (ret) + goto err_monitor; + + ret = rpmem_fip_init_memory(fip); + if (ret) + goto err_init_memory; + + ret = fip->ops->lanes_init_mem(fip); + if (ret) + goto err_init_lanes_mem; + + ret = fip->ops->lanes_post(fip); + if (ret) + goto err_lanes_post; + + return 0; +err_lanes_post: +err_init_lanes_mem: + rpmem_fip_fini_memory(fip); +err_init_memory: + rpmem_fip_monitor_fini(fip); +err_monitor: + rpmem_fip_lanes_shutdown(fip); +err_lanes_connect: + return ret; +} + +/* + * rpmem_fip_close -- close connection to remote peer + */ +int +rpmem_fip_close(struct rpmem_fip *fip) +{ + int ret; + int lret = 0; + + if (unlikely(rpmem_fip_is_closing(fip))) + goto close_monitor; + + rpmem_fip_fini_memory(fip); + + ret = rpmem_fip_lanes_shutdown(fip); + if (ret) + lret = ret; + +close_monitor: + /* close fip monitor */ + ret = rpmem_fip_monitor_fini(fip); + if (ret) + lret = ret; + + return lret; +} + +/* + * rpmem_fip_flush -- perform remote flush operation + */ +int +rpmem_fip_flush(struct rpmem_fip *fip, size_t offset, size_t len, + unsigned lane, unsigned flags) +{ + RPMEM_ASSERT((flags & RPMEM_FLUSH_PERSIST_MASK) <= RPMEM_PERSIST_MAX); + RPMEM_ASSERT(flags != RPMEM_DEEP_PERSIST); + + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + RPMEM_ASSERT(lane < fip->nlanes); + if (unlikely(lane >= fip->nlanes)) + return EINVAL; /* it will be passed to errno */ + + if (unlikely(offset >= fip->size || offset + len > fip->size)) + return EINVAL; /* it will be passed to errno */ + + if (unlikely(len == 0)) + return 0; + + int ret = 0; + while (len > 0) { + size_t tmplen = min(len, fip->fi->ep_attr->max_msg_size); + + ssize_t r = fip->ops->flush(fip, offset, tmplen, lane, flags); + if (r < 0) { + RPMEM_LOG(ERR, "flush operation failed"); + ret = (int)r; + goto err; + } + + tmplen = (size_t)r; + + offset += tmplen; + len -= tmplen; + } +err: + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + return ret; +} + +/* + * rpmem_fip_drain -- perform remote drain operation + */ +int +rpmem_fip_drain(struct rpmem_fip *fip, unsigned lane) +{ + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + RPMEM_ASSERT(lane < fip->nlanes); + if (unlikely(lane >= fip->nlanes)) + return EINVAL; /* it will be passed to errno */ + + int ret = fip->ops->drain(fip, lane); + + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + return ret; +} + +/* + * rpmem_fip_persist -- perform remote persist operation + */ +int +rpmem_fip_persist(struct rpmem_fip *fip, size_t offset, size_t len, + unsigned lane, unsigned flags) +{ + RPMEM_ASSERT((flags & RPMEM_FLUSH_PERSIST_MASK) <= RPMEM_PERSIST_MAX); + + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + RPMEM_ASSERT(lane < fip->nlanes); + if (unlikely(lane >= fip->nlanes)) + return EINVAL; /* it will be passed to errno */ + + if (unlikely(offset >= fip->size || offset + len > fip->size)) + return EINVAL; /* it will be passed to errno */ + + if (unlikely(len == 0)) + return 0; + + int ret = 0; + while (len > 0) { + size_t tmplen = min(len, fip->fi->ep_attr->max_msg_size); + + ssize_t r = fip->ops->persist(fip, offset, tmplen, lane, flags); + if (r < 0) { + RPMEM_LOG(ERR, "persist operation failed"); + ret = (int)r; + goto err; + } + tmplen = (size_t)r; + + offset += tmplen; + len -= tmplen; + } +err: + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + return ret; +} + +/* + * rpmem_fip_read -- perform read operation + */ +int +rpmem_fip_read(struct rpmem_fip *fip, void *buff, size_t len, + size_t off, unsigned lane) +{ + int ret; + + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + RPMEM_ASSERT(lane < fip->nlanes); + if (unlikely(lane >= fip->nlanes)) + return EINVAL; /* it will be passed to errno */ + + if (unlikely(len == 0)) { + return 0; + } + + size_t rd_buff_len = len < fip->fi->ep_attr->max_msg_size ? + len : fip->fi->ep_attr->max_msg_size; + + void *rd_buff; /* buffer for read operation */ + struct fid_mr *rd_mr; /* read buffer memory region */ + void *rd_mr_desc; /* read buffer memory descriptor */ + struct rpmem_fip_rlane rd_lane; + + /* allocate buffer for read operation */ + errno = posix_memalign((void **)&rd_buff, Pagesize, + rd_buff_len); + if (errno) { + RPMEM_LOG(ERR, "!allocating read buffer"); + ret = errno; + goto err_malloc_rd_buff; + } + + /* + * Register buffer for read operation. + * The read operation utilizes READ operation thus + * the FI_REMOTE_WRITE flag. + */ + ret = fi_mr_reg(fip->domain, rd_buff, + rd_buff_len, FI_REMOTE_WRITE, + 0, 0, 0, &rd_mr, NULL); + if (ret) { + RPMEM_FI_ERR(ret, "registrating read buffer"); + goto err_rd_mr; + } + + /* get read buffer local memory descriptor */ + rd_mr_desc = fi_mr_desc(rd_mr); + + /* + * Initialize READ message. The completion is required in order + * to signal thread that READ operation has been completed. + */ + rpmem_fip_rma_init(&rd_lane.read, rd_mr_desc, 0, + fip->rkey, &rd_lane, FI_COMPLETION); + + size_t rd = 0; + uint8_t *cbuff = buff; + struct rpmem_fip_lane *lanep = &fip->lanes[lane].base; + + while (rd < len) { + size_t rd_len = len - rd < rd_buff_len ? + len - rd : rd_buff_len; + size_t rd_off = off + rd; + uint64_t raddr = fip->raddr + rd_off; + + rpmem_fip_lane_begin(lanep, FI_READ); + + ret = rpmem_fip_readmsg(lanep->ep, &rd_lane.read, + rd_buff, rd_len, raddr); + if (ret) { + RPMEM_FI_ERR(ret, "RMA read"); + goto err_readmsg; + } + + VALGRIND_DO_MAKE_MEM_DEFINED(rd_buff, rd_len); + + ret = rpmem_fip_lane_wait(fip, lanep, FI_READ); + if (ret) { + ERR("error when processing read request"); + goto err_lane_wait; + } + + memcpy(&cbuff[rd], rd_buff, rd_len); + + rd += rd_len; + } + + ret = 0; +err_lane_wait: +err_readmsg: + RPMEM_FI_CLOSE(rd_mr, "unregistering memory"); +err_rd_mr: + free(rd_buff); +err_malloc_rd_buff: + if (unlikely(rpmem_fip_is_closing(fip))) + return ECONNRESET; /* it will be passed to errno */ + + return ret; +} + +/* + * parse_bool -- convert string value to boolean + */ +static int +parse_bool(const char *str_value) +{ + if (strcmp(str_value, "0") == 0 || + strcasecmp(str_value, "false") == 0 || + strcasecmp(str_value, "no") == 0 || + strcasecmp(str_value, "off") == 0) { + return 0; + } + + if (strcmp(str_value, "1") == 0 || + strcasecmp(str_value, "true") == 0 || + strcasecmp(str_value, "yes") == 0 || + strcasecmp(str_value, "on") == 0) { + return 1; + } + + return -1; +} + +/* + * rpmem_fip_param_get -- read environment variable in the libfabric way + * + * - If parameter does not exist the output value is not changed. + * - If the environment variable is not set the output value is not changed. + * - If the environment variable is set and its value is not correct the output + * value is set to error value. + * - If the environment variable is set and its value is correct the output + * value is set according to the environment variable value. + */ +static void +rpmem_fip_param_get(const char *var_name, int *value) +{ + struct fi_param *params; + int count; + int ret = fi_getparams(¶ms, &count); + if (ret != FI_SUCCESS) { + RPMEM_FI_ERR(ret, "getting fabric parameters list"); + return; + } + + for (int i = 0; i < count; ++i) { + if (strcmp(params[i].name, var_name) != 0) + continue; + if (!params[i].value) { + break; + } + *value = parse_bool(params[i].value); + break; + } + + fi_freeparams(params); +} + +#define LIBFABRIC_FORK_UNSAFE_VAR "FI_FORK_UNSAFE" + +/* + * rpmem_fip_probe_fork_safety -- probe if libfabric is fork safe + */ +void +rpmem_fip_probe_fork_safety(void) +{ + int *fork_unsafe = &Rpmem_fork_unsafe; /* false by default */ + rpmem_fip_param_get(LIBFABRIC_FORK_UNSAFE_VAR, fork_unsafe); +} diff --git a/src/pmdk/src/librpmem/rpmem_fip.h b/src/pmdk/src/librpmem/rpmem_fip.h new file mode 100644 index 000000000..406b61daa --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_fip.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_fip.h -- rpmem libfabric provider module header file + */ + +#ifndef RPMEM_FIP_H +#define RPMEM_FIP_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct rpmem_fip; + +struct rpmem_fip_attr { + enum rpmem_provider provider; + size_t max_wq_size; + enum rpmem_persist_method persist_method; + void *laddr; + size_t size; + size_t buff_size; + unsigned nlanes; + void *raddr; + uint64_t rkey; +}; + +struct rpmem_fip *rpmem_fip_init(const char *node, const char *service, + struct rpmem_fip_attr *attr, unsigned *nlanes); +void rpmem_fip_fini(struct rpmem_fip *fip); + +int rpmem_fip_connect(struct rpmem_fip *fip); +int rpmem_fip_close(struct rpmem_fip *fip); +int rpmem_fip_process_start(struct rpmem_fip *fip); +int rpmem_fip_process_stop(struct rpmem_fip *fip); + +int rpmem_fip_flush(struct rpmem_fip *fip, size_t offset, size_t len, + unsigned lane, unsigned flags); + +int rpmem_fip_drain(struct rpmem_fip *fip, unsigned lane); + +int rpmem_fip_persist(struct rpmem_fip *fip, size_t offset, size_t len, + unsigned lane, unsigned flags); + +int rpmem_fip_read(struct rpmem_fip *fip, void *buff, + size_t len, size_t off, unsigned lane); +void rpmem_fip_probe_fork_safety(void); + +size_t rpmem_fip_get_wq_size(struct rpmem_fip *fip); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/librpmem/rpmem_obc.c b/src/pmdk/src/librpmem/rpmem_obc.c new file mode 100644 index 000000000..ace16d7a1 --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_obc.c @@ -0,0 +1,677 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * rpmem_obc.c -- rpmem out-of-band connection client source file + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "librpmem.h" +#include "rpmem.h" +#include "rpmem_common.h" +#include "rpmem_obc.h" +#include "rpmem_proto.h" +#include "rpmem_util.h" +#include "rpmem_ssh.h" +#include "out.h" +#include "sys_util.h" +#include "util.h" + +/* + * rpmem_obc -- rpmem out-of-band client connection handle + */ +struct rpmem_obc { + struct rpmem_ssh *ssh; +}; + +/* + * rpmem_obc_is_connected -- (internal) return non-zero value if client is + * connected + */ +static inline int +rpmem_obc_is_connected(struct rpmem_obc *rpc) +{ + return rpc->ssh != NULL; +} + +/* + * rpmem_obc_check_ibc_attr -- (internal) check in-band connection + * attributes + */ +static int +rpmem_obc_check_ibc_attr(struct rpmem_msg_ibc_attr *ibc) +{ + if (ibc->port == 0 || ibc->port > UINT16_MAX) { + ERR("invalid port number received -- %u", ibc->port); + errno = EPROTO; + return -1; + } + + if (ibc->persist_method != RPMEM_PM_GPSPM && + ibc->persist_method != RPMEM_PM_APM) { + ERR("invalid persistency method received -- %u", + ibc->persist_method); + errno = EPROTO; + return -1; + } + + return 0; +} + +/* + * rpmem_obc_check_port -- (internal) verify target node port number + */ +static int +rpmem_obc_check_port(const struct rpmem_target_info *info) +{ + if (!(info->flags & RPMEM_HAS_SERVICE)) + return 0; + + if (*info->service == '\0') { + ERR("invalid port number -- '%s'", info->service); + goto err; + } + + errno = 0; + char *endptr; + long port = strtol(info->service, &endptr, 10); + if (errno || *endptr != '\0') { + ERR("invalid port number -- '%s'", info->service); + goto err; + } + + if (port < 1) { + ERR("port number must be positive -- '%s'", info->service); + goto err; + } + + if (port > UINT16_MAX) { + ERR("port number too large -- '%s'", info->service); + goto err; + } + + return 0; +err: + errno = EINVAL; + return -1; +} + +/* + * rpmem_obc_close_conn -- (internal) close connection + */ +static void +rpmem_obc_close_conn(struct rpmem_obc *rpc) +{ + rpmem_ssh_close(rpc->ssh); + + (void) util_fetch_and_and64(&rpc->ssh, 0); +} + +/* + * rpmem_obc_init_msg_hdr -- (internal) initialize message header + */ +static void +rpmem_obc_set_msg_hdr(struct rpmem_msg_hdr *hdrp, + enum rpmem_msg_type type, size_t size) +{ + hdrp->type = type; + hdrp->size = size; +} + +/* + * rpmem_obc_set_pool_desc -- (internal) fill the pool descriptor field + */ +static void +rpmem_obc_set_pool_desc(struct rpmem_msg_pool_desc *pool_desc, + const char *desc, size_t size) +{ + RPMEM_ASSERT(size <= UINT32_MAX); + RPMEM_ASSERT(size > 0); + + pool_desc->size = (uint32_t)size; + memcpy(pool_desc->desc, desc, size); + pool_desc->desc[size - 1] = '\0'; +} + +/* + * rpmem_obc_alloc_create_msg -- (internal) allocate and fill create request + * message + */ +static struct rpmem_msg_create * +rpmem_obc_alloc_create_msg(const struct rpmem_req_attr *req, + const struct rpmem_pool_attr *pool_attr, size_t *msg_sizep) +{ + size_t pool_desc_size = strlen(req->pool_desc) + 1; + size_t msg_size = sizeof(struct rpmem_msg_create) + pool_desc_size; + struct rpmem_msg_create *msg = malloc(msg_size); + if (!msg) { + ERR("!cannot allocate create request message"); + return NULL; + } + + rpmem_obc_set_msg_hdr(&msg->hdr, RPMEM_MSG_TYPE_CREATE, msg_size); + + msg->c.major = RPMEM_PROTO_MAJOR; + msg->c.minor = RPMEM_PROTO_MINOR; + msg->c.pool_size = req->pool_size; + msg->c.nlanes = req->nlanes; + msg->c.provider = req->provider; + msg->c.buff_size = req->buff_size; + + rpmem_obc_set_pool_desc(&msg->pool_desc, + req->pool_desc, pool_desc_size); + + if (pool_attr) { + pack_rpmem_pool_attr(pool_attr, &msg->pool_attr); + } else { + RPMEM_LOG(INFO, "using zeroed pool attributes"); + memset(&msg->pool_attr, 0, sizeof(msg->pool_attr)); + } + + *msg_sizep = msg_size; + return msg; +} + +/* + * rpmem_obc_check_req -- (internal) check request attributes + */ +static int +rpmem_obc_check_req(const struct rpmem_req_attr *req) +{ + if (req->provider >= MAX_RPMEM_PROV) { + ERR("invalid provider specified -- %u", req->provider); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * rpmem_obj_check_hdr_resp -- (internal) check response message header + */ +static int +rpmem_obc_check_hdr_resp(struct rpmem_msg_hdr_resp *resp, + enum rpmem_msg_type type, size_t size) +{ + if (resp->type != type) { + ERR("invalid message type received -- %u", resp->type); + errno = EPROTO; + return -1; + } + + if (resp->size != size) { + ERR("invalid message size received -- %lu", resp->size); + errno = EPROTO; + return -1; + } + + if (resp->status >= MAX_RPMEM_ERR) { + ERR("invalid status received -- %u", resp->status); + errno = EPROTO; + return -1; + } + + if (resp->status) { + enum rpmem_err status = (enum rpmem_err)resp->status; + ERR("%s", rpmem_util_proto_errstr(status)); + errno = rpmem_util_proto_errno(status); + return -1; + } + + return 0; +} + +/* + * rpmem_obc_check_create_resp -- (internal) check create response message + */ +static int +rpmem_obc_check_create_resp(struct rpmem_msg_create_resp *resp) +{ + if (rpmem_obc_check_hdr_resp(&resp->hdr, RPMEM_MSG_TYPE_CREATE_RESP, + sizeof(struct rpmem_msg_create_resp))) + return -1; + + if (rpmem_obc_check_ibc_attr(&resp->ibc)) + return -1; + + return 0; +} + +/* + * rpmem_obc_get_res -- (internal) read response attributes + */ +static void +rpmem_obc_get_res(struct rpmem_resp_attr *res, + struct rpmem_msg_ibc_attr *ibc) +{ + res->port = (unsigned short)ibc->port; + res->rkey = ibc->rkey; + res->raddr = ibc->raddr; + res->persist_method = + (enum rpmem_persist_method)ibc->persist_method; + res->nlanes = ibc->nlanes; +} + +/* + * rpmem_obc_alloc_open_msg -- (internal) allocate and fill open request message + */ +static struct rpmem_msg_open * +rpmem_obc_alloc_open_msg(const struct rpmem_req_attr *req, + const struct rpmem_pool_attr *pool_attr, size_t *msg_sizep) +{ + size_t pool_desc_size = strlen(req->pool_desc) + 1; + size_t msg_size = sizeof(struct rpmem_msg_open) + pool_desc_size; + struct rpmem_msg_open *msg = malloc(msg_size); + if (!msg) { + ERR("!cannot allocate open request message"); + return NULL; + } + + rpmem_obc_set_msg_hdr(&msg->hdr, RPMEM_MSG_TYPE_OPEN, msg_size); + + msg->c.major = RPMEM_PROTO_MAJOR; + msg->c.minor = RPMEM_PROTO_MINOR; + msg->c.pool_size = req->pool_size; + msg->c.nlanes = req->nlanes; + msg->c.provider = req->provider; + msg->c.buff_size = req->buff_size; + + rpmem_obc_set_pool_desc(&msg->pool_desc, + req->pool_desc, pool_desc_size); + + *msg_sizep = msg_size; + return msg; +} + +/* + * rpmem_obc_check_open_resp -- (internal) check open response message + */ +static int +rpmem_obc_check_open_resp(struct rpmem_msg_open_resp *resp) +{ + if (rpmem_obc_check_hdr_resp(&resp->hdr, RPMEM_MSG_TYPE_OPEN_RESP, + sizeof(struct rpmem_msg_open_resp))) + return -1; + + if (rpmem_obc_check_ibc_attr(&resp->ibc)) + return -1; + + return 0; +} + +/* + * rpmem_obc_check_close_resp -- (internal) check close response message + */ +static int +rpmem_obc_check_close_resp(struct rpmem_msg_close_resp *resp) +{ + if (rpmem_obc_check_hdr_resp(&resp->hdr, RPMEM_MSG_TYPE_CLOSE_RESP, + sizeof(struct rpmem_msg_close_resp))) + return -1; + + return 0; +} + +/* + * rpmem_obc_check_set_attr_resp -- (internal) check set attributes response + * message + */ +static int +rpmem_obc_check_set_attr_resp(struct rpmem_msg_set_attr_resp *resp) +{ + if (rpmem_obc_check_hdr_resp(&resp->hdr, RPMEM_MSG_TYPE_SET_ATTR_RESP, + sizeof(struct rpmem_msg_set_attr_resp))) + return -1; + + return 0; +} + +/* + * rpmem_obc_init -- initialize rpmem obc handle + */ +struct rpmem_obc * +rpmem_obc_init(void) +{ + struct rpmem_obc *rpc = calloc(1, sizeof(*rpc)); + if (!rpc) { + RPMEM_LOG(ERR, "!allocation of rpmem obc failed"); + return NULL; + } + + return rpc; +} + +/* + * rpmem_obc_fini -- destroy rpmem obc handle + * + * This function must be called with connection already closed - after calling + * the rpmem_obc_disconnect or after receiving relevant value from + * rpmem_obc_monitor. + */ +void +rpmem_obc_fini(struct rpmem_obc *rpc) +{ + free(rpc); +} + +/* + * rpmem_obc_connect -- connect to target node + * + * Connects to target node, the target must be in the following format: + * [:]. If the port number is not specified the default + * ssh port will be used. The is translated into IP address. + * + * Returns an error if connection is already established. + */ +int +rpmem_obc_connect(struct rpmem_obc *rpc, const struct rpmem_target_info *info) +{ + if (rpmem_obc_is_connected(rpc)) { + errno = EALREADY; + goto err_notconnected; + } + + if (rpmem_obc_check_port(info)) + goto err_port; + + rpc->ssh = rpmem_ssh_open(info); + if (!rpc->ssh) + goto err_ssh_open; + + return 0; +err_ssh_open: +err_port: +err_notconnected: + return -1; +} + +/* + * rpmem_obc_disconnect -- close the connection to target node + * + * Returns error if socket is not connected. + */ +int +rpmem_obc_disconnect(struct rpmem_obc *rpc) +{ + if (rpmem_obc_is_connected(rpc)) { + rpmem_obc_close_conn(rpc); + return 0; + } + + errno = ENOTCONN; + return -1; +} + +/* + * rpmem_obc_monitor -- monitor connection with target node + * + * The nonblock variable indicates whether this function should return + * immediately (= 1) or may block (= 0). + * + * If the function detects that socket was closed by remote peer it is + * closed on local side and set to -1, so there is no need to call + * rpmem_obc_disconnect function. Please take a look at functions' + * descriptions to see which functions cannot be used if the connection + * has been already closed. + * + * This function expects there is no data pending on socket, if any data + * is pending this function returns an error and sets errno to EPROTO. + * + * Return values: + * 0 - not connected + * 1 - connected + * < 0 - error + */ +int +rpmem_obc_monitor(struct rpmem_obc *rpc, int nonblock) +{ + if (!rpmem_obc_is_connected(rpc)) + return 0; + + return rpmem_ssh_monitor(rpc->ssh, nonblock); +} + +/* + * rpmem_obc_create -- perform create request operation + * + * Returns error if connection has not been established yet. + */ +int +rpmem_obc_create(struct rpmem_obc *rpc, + const struct rpmem_req_attr *req, + struct rpmem_resp_attr *res, + const struct rpmem_pool_attr *pool_attr) +{ + if (!rpmem_obc_is_connected(rpc)) { + ERR("out-of-band connection not established"); + errno = ENOTCONN; + goto err_notconnected; + } + + if (rpmem_obc_check_req(req)) + goto err_req; + + size_t msg_size; + struct rpmem_msg_create *msg = + rpmem_obc_alloc_create_msg(req, pool_attr, &msg_size); + if (!msg) + goto err_alloc_msg; + + RPMEM_LOG(INFO, "sending create request message"); + + rpmem_hton_msg_create(msg); + if (rpmem_ssh_send(rpc->ssh, msg, msg_size)) { + ERR("!sending create request message failed"); + goto err_msg_send; + } + + RPMEM_LOG(NOTICE, "create request message sent"); + RPMEM_LOG(INFO, "receiving create request response"); + + struct rpmem_msg_create_resp resp; + if (rpmem_ssh_recv(rpc->ssh, &resp, + sizeof(resp))) { + ERR("!receiving create request response failed"); + goto err_msg_recv; + } + + RPMEM_LOG(NOTICE, "create request response received"); + + rpmem_ntoh_msg_create_resp(&resp); + + if (rpmem_obc_check_create_resp(&resp)) + goto err_msg_resp; + + rpmem_obc_get_res(res, &resp.ibc); + + free(msg); + return 0; +err_msg_resp: +err_msg_recv: +err_msg_send: + free(msg); +err_alloc_msg: +err_req: +err_notconnected: + return -1; +} + +/* + * rpmem_obc_open -- perform open request operation + * + * Returns error if connection is not already established. + */ +int +rpmem_obc_open(struct rpmem_obc *rpc, + const struct rpmem_req_attr *req, + struct rpmem_resp_attr *res, + struct rpmem_pool_attr *pool_attr) +{ + if (!rpmem_obc_is_connected(rpc)) { + ERR("out-of-band connection not established"); + errno = ENOTCONN; + goto err_notconnected; + } + + if (rpmem_obc_check_req(req)) + goto err_req; + + size_t msg_size; + struct rpmem_msg_open *msg = + rpmem_obc_alloc_open_msg(req, pool_attr, &msg_size); + if (!msg) + goto err_alloc_msg; + + RPMEM_LOG(INFO, "sending open request message"); + + rpmem_hton_msg_open(msg); + if (rpmem_ssh_send(rpc->ssh, msg, msg_size)) { + ERR("!sending open request message failed"); + goto err_msg_send; + } + + RPMEM_LOG(NOTICE, "open request message sent"); + RPMEM_LOG(INFO, "receiving open request response"); + + struct rpmem_msg_open_resp resp; + if (rpmem_ssh_recv(rpc->ssh, &resp, sizeof(resp))) { + ERR("!receiving open request response failed"); + goto err_msg_recv; + } + + RPMEM_LOG(NOTICE, "open request response received"); + + rpmem_ntoh_msg_open_resp(&resp); + + if (rpmem_obc_check_open_resp(&resp)) + goto err_msg_resp; + + rpmem_obc_get_res(res, &resp.ibc); + if (pool_attr) + unpack_rpmem_pool_attr(&resp.pool_attr, pool_attr); + + free(msg); + return 0; +err_msg_resp: +err_msg_recv: +err_msg_send: + free(msg); +err_alloc_msg: +err_req: +err_notconnected: + return -1; +} + +/* + * rpmem_obc_set_attr -- perform set attributes request operation + * + * Returns error if connection is not already established. + */ +int +rpmem_obc_set_attr(struct rpmem_obc *rpc, + const struct rpmem_pool_attr *pool_attr) +{ + if (!rpmem_obc_is_connected(rpc)) { + ERR("out-of-band connection not established"); + errno = ENOTCONN; + goto err_notconnected; + } + + struct rpmem_msg_set_attr msg; + rpmem_obc_set_msg_hdr(&msg.hdr, RPMEM_MSG_TYPE_SET_ATTR, sizeof(msg)); + if (pool_attr) { + memcpy(&msg.pool_attr, pool_attr, sizeof(msg.pool_attr)); + } else { + RPMEM_LOG(INFO, "using zeroed pool attributes"); + memset(&msg.pool_attr, 0, sizeof(msg.pool_attr)); + } + + RPMEM_LOG(INFO, "sending set attributes request message"); + + rpmem_hton_msg_set_attr(&msg); + if (rpmem_ssh_send(rpc->ssh, &msg, sizeof(msg))) { + ERR("!sending set attributes request message failed"); + goto err_msg_send; + } + + RPMEM_LOG(NOTICE, "set attributes request message sent"); + RPMEM_LOG(INFO, "receiving set attributes request response"); + + struct rpmem_msg_set_attr_resp resp; + if (rpmem_ssh_recv(rpc->ssh, &resp, + sizeof(resp))) { + ERR("!receiving set attributes request response failed"); + goto err_msg_recv; + } + + RPMEM_LOG(NOTICE, "set attributes request response received"); + + rpmem_ntoh_msg_set_attr_resp(&resp); + + if (rpmem_obc_check_set_attr_resp(&resp)) + goto err_msg_resp; + + return 0; +err_msg_resp: +err_msg_recv: +err_msg_send: +err_notconnected: + return -1; +} + +/* + * rpmem_obc_close -- perform close request operation + * + * Returns error if connection is not already established. + * + * NOTE: this function does not close the connection, but sends close request + * message to remote node and receives a response. The connection must be + * closed using rpmem_obc_disconnect function. + */ +int +rpmem_obc_close(struct rpmem_obc *rpc, int flags) +{ + if (!rpmem_obc_is_connected(rpc)) { + errno = ENOTCONN; + return -1; + } + + struct rpmem_msg_close msg; + rpmem_obc_set_msg_hdr(&msg.hdr, RPMEM_MSG_TYPE_CLOSE, sizeof(msg)); + msg.flags = (uint32_t)flags; + + RPMEM_LOG(INFO, "sending close request message"); + + rpmem_hton_msg_close(&msg); + if (rpmem_ssh_send(rpc->ssh, &msg, sizeof(msg))) { + RPMEM_LOG(ERR, "!sending close request failed"); + return -1; + } + + RPMEM_LOG(NOTICE, "close request message sent"); + RPMEM_LOG(INFO, "receiving close request response"); + + struct rpmem_msg_close_resp resp; + if (rpmem_ssh_recv(rpc->ssh, &resp, + sizeof(resp))) { + RPMEM_LOG(ERR, "!receiving close request response failed"); + return -1; + } + + RPMEM_LOG(NOTICE, "close request response received"); + + rpmem_ntoh_msg_close_resp(&resp); + + if (rpmem_obc_check_close_resp(&resp)) + return -1; + + return 0; +} diff --git a/src/pmdk/src/librpmem/rpmem_obc.h b/src/pmdk/src/librpmem/rpmem_obc.h new file mode 100644 index 000000000..c372baa5a --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_obc.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_obc.h -- rpmem out-of-band connection client header file + */ + +#ifndef RPMEM_OBC_H +#define RPMEM_OBC_H 1 + +#include +#include + +#include "librpmem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rpmem_obc; + +struct rpmem_obc *rpmem_obc_init(void); +void rpmem_obc_fini(struct rpmem_obc *rpc); + +int rpmem_obc_connect(struct rpmem_obc *rpc, + const struct rpmem_target_info *info); +int rpmem_obc_disconnect(struct rpmem_obc *rpc); + +int rpmem_obc_monitor(struct rpmem_obc *rpc, int nonblock); + +int rpmem_obc_create(struct rpmem_obc *rpc, + const struct rpmem_req_attr *req, + struct rpmem_resp_attr *res, + const struct rpmem_pool_attr *pool_attr); +int rpmem_obc_open(struct rpmem_obc *rpc, + const struct rpmem_req_attr *req, + struct rpmem_resp_attr *res, + struct rpmem_pool_attr *pool_attr); +int rpmem_obc_set_attr(struct rpmem_obc *rpc, + const struct rpmem_pool_attr *pool_attr); +int rpmem_obc_close(struct rpmem_obc *rpc, int flags); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/librpmem/rpmem_ssh.c b/src/pmdk/src/librpmem/rpmem_ssh.c new file mode 100644 index 000000000..7157958bf --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_ssh.c @@ -0,0 +1,442 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_ssh.c -- rpmem ssh transport layer source file + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "os.h" +#include "out.h" +#include "rpmem_common.h" +#include "rpmem_ssh.h" +#include "rpmem_cmd.h" +#include "rpmem_util.h" + +#define ERR_BUFF_LEN 4095 + +/* +1 in order to be sure it is always null-terminated */ +static char error_str[ERR_BUFF_LEN + 1]; + +struct rpmem_ssh { + struct rpmem_cmd *cmd; +}; + +/* + * get_ssh -- return ssh command name + */ +static const char * +get_ssh(void) +{ + char *cmd = os_getenv(RPMEM_SSH_ENV); + if (!cmd) + cmd = RPMEM_DEF_SSH; + + return cmd; +} + +/* + * get_user_at_node -- returns string containing user@node + */ +static char * +get_user_at_node(const struct rpmem_target_info *info) +{ + char *user_at_node = NULL; + + if (info->flags & RPMEM_HAS_USER) { + size_t ulen = strlen(info->user); + size_t nlen = strlen(info->node); + size_t len = ulen + 1 + nlen + 1; + user_at_node = malloc(len); + if (!user_at_node) + goto err_malloc; + int ret = util_snprintf(user_at_node, len, "%s@%s", + info->user, info->node); + if (ret < 0) + goto err_printf; + } else { + user_at_node = strdup(info->node); + if (!user_at_node) + goto err_malloc; + } + + return user_at_node; +err_printf: + free(user_at_node); +err_malloc: + return NULL; +} + +/* + * get_cmd -- return an RPMEM_CMD with appended list of arguments + */ +static char * +get_cmd(const char **argv) +{ + const char *env_cmd = rpmem_util_cmd_get(); + char *cmd = strdup(env_cmd); + if (!cmd) + return NULL; + + size_t cmd_len = strlen(cmd) + 1; + + const char *arg; + while ((arg = *argv++) != NULL) { + size_t len = strlen(arg); + size_t new_cmd_len = cmd_len + len + 1; + char *tmp = realloc(cmd, new_cmd_len); + if (!tmp) + goto err; + + cmd = tmp; + + /* append the argument to the command */ + cmd[cmd_len - 1] = ' '; + memcpy(&cmd[cmd_len], arg, len); + cmd[cmd_len + len] = '\0'; + + cmd_len = new_cmd_len; + } + + return cmd; +err: + free(cmd); + return NULL; +} + +/* + * valist_to_argv -- convert va_list to argv array + */ +static const char ** +valist_to_argv(va_list args) +{ + const char **argv = malloc(sizeof(const char *)); + if (!argv) + return NULL; + + argv[0] = NULL; + size_t nargs = 0; + + const char *arg; + while ((arg = va_arg(args, const char *)) != NULL) { + nargs++; + const char **tmp = realloc(argv, + (nargs + 1) * sizeof(const char *)); + if (!tmp) + goto err; + + argv = tmp; + argv[nargs - 1] = arg; + argv[nargs] = NULL; + } + + return argv; +err: + free(argv); + return NULL; +} + +/* + * rpmem_ssh_execv -- open ssh connection and run $RPMEMD_CMD with + * additional NULL-terminated list of arguments. + */ +struct rpmem_ssh * +rpmem_ssh_execv(const struct rpmem_target_info *info, const char **argv) +{ + struct rpmem_ssh *rps = calloc(1, sizeof(*rps)); + if (!rps) + goto err_zalloc; + + char *user_at_node = get_user_at_node(info); + if (!user_at_node) + goto err_user_node; + + rps->cmd = rpmem_cmd_init(); + if (!rps->cmd) + goto err_cmd_init; + + char *cmd = get_cmd(argv); + if (!cmd) + goto err_cmd; + + int ret = rpmem_cmd_push(rps->cmd, get_ssh()); + if (ret) + goto err_push; + + if (info->flags & RPMEM_HAS_SERVICE) { + /* port number is optional */ + ret = rpmem_cmd_push(rps->cmd, "-p"); + if (ret) + goto err_push; + ret = rpmem_cmd_push(rps->cmd, info->service); + if (ret) + goto err_push; + } + + /* + * Disable allocating pseudo-terminal in order to transfer binary + * data safely. + */ + ret = rpmem_cmd_push(rps->cmd, "-T"); + if (ret) + goto err_push; + + if (info->flags & RPMEM_FLAGS_USE_IPV4) { + ret = rpmem_cmd_push(rps->cmd, "-4"); + if (ret) + goto err_push; + } + + /* fail if password required for authentication */ + ret = rpmem_cmd_push(rps->cmd, "-oBatchMode=yes"); + if (ret) + goto err_push; + + ret = rpmem_cmd_push(rps->cmd, user_at_node); + if (ret) + goto err_push; + + ret = rpmem_cmd_push(rps->cmd, cmd); + if (ret) + goto err_push; + + ret = rpmem_cmd_run(rps->cmd); + if (ret) + goto err_run; + + free(user_at_node); + free(cmd); + + return rps; +err_run: +err_push: + free(cmd); +err_cmd: + rpmem_cmd_fini(rps->cmd); +err_cmd_init: + free(user_at_node); +err_user_node: + free(rps); +err_zalloc: + return NULL; +} + +/* + * rpmem_ssh_exec -- open ssh connection and run $RPMEMD_CMD with + * additional NULL-terminated list of arguments. + */ +struct rpmem_ssh * +rpmem_ssh_exec(const struct rpmem_target_info *info, ...) +{ + struct rpmem_ssh *ssh; + + va_list args; + va_start(args, info); + + const char **argv = valist_to_argv(args); + if (argv) + ssh = rpmem_ssh_execv(info, argv); + else + ssh = NULL; + + va_end(args); + + free(argv); + + return ssh; +} + +/* + * rpmem_ssh_open -- open ssh connection with specified node and wait for status + */ +struct rpmem_ssh * +rpmem_ssh_open(const struct rpmem_target_info *info) +{ + struct rpmem_ssh *ssh = rpmem_ssh_exec(info, NULL); + if (!ssh) + return NULL; + + /* + * Read initial status from invoked command. + * This is for synchronization purposes and to make it possible + * to inform client that command's initialization failed. + */ + int32_t status; + int ret = rpmem_ssh_recv(ssh, &status, sizeof(status)); + if (ret) { + if (ret == 1 || errno == ECONNRESET) + ERR("%s", rpmem_ssh_strerror(ssh, errno)); + else + ERR("!%s", info->node); + goto err_recv_status; + } + + if (status) { + ERR("%s: unexpected status received -- '%d'", + info->node, status); + errno = status; + goto err_status; + } + + RPMEM_LOG(INFO, "received status: %u", status); + + return ssh; +err_recv_status: +err_status: + rpmem_ssh_close(ssh); + return NULL; +} + +/* + * rpmem_ssh_close -- close ssh connection + */ +int +rpmem_ssh_close(struct rpmem_ssh *rps) +{ + int ret, rv; + + rpmem_cmd_term(rps->cmd); + rv = rpmem_cmd_wait(rps->cmd, &ret); + if (rv) + return rv; + + rpmem_cmd_fini(rps->cmd); + free(rps); + + if (WIFEXITED(ret)) + return WEXITSTATUS(ret); + + if (WIFSIGNALED(ret)) { + ERR("signal received -- %d", WTERMSIG(ret)); + return -1; + } + + ERR("exit status -- %d", WEXITSTATUS(ret)); + + return -1; +} + +/* + * rpmem_ssh_send -- send data using ssh transport layer + * + * The data is encoded using base64. + */ +int +rpmem_ssh_send(struct rpmem_ssh *rps, const void *buff, size_t len) +{ + int ret = rpmem_xwrite(rps->cmd->fd_in, buff, len, MSG_NOSIGNAL); + if (ret == 1) { + errno = ECONNRESET; + } else if (ret < 0) { + if (errno == EPIPE) + errno = ECONNRESET; + } + + return ret; +} + +/* + * rpmem_ssh_recv -- receive data using ssh transport layer + * + * The received data is decoded using base64. + */ +int +rpmem_ssh_recv(struct rpmem_ssh *rps, void *buff, size_t len) +{ + int ret = rpmem_xread(rps->cmd->fd_out, buff, + len, MSG_NOSIGNAL); + if (ret == 1) { + errno = ECONNRESET; + } else if (ret < 0) { + if (errno == EPIPE) + errno = ECONNRESET; + } + + return ret; +} + +/* + * rpmem_ssh_monitor -- check connection state of ssh + * + * Return value: + * 0 - disconnected + * 1 - connected + * <0 - error + */ +int +rpmem_ssh_monitor(struct rpmem_ssh *rps, int nonblock) +{ + uint32_t buff; + int flags = MSG_PEEK; + if (nonblock) + flags |= MSG_DONTWAIT; + + int ret = rpmem_xread(rps->cmd->fd_out, &buff, sizeof(buff), flags); + + if (!ret) { + errno = EPROTO; + return -1; + } + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + return 1; + else + return ret; + } + + return 0; +} + +/* + * rpmem_ssh_strerror -- read error using stderr channel + */ +const char * +rpmem_ssh_strerror(struct rpmem_ssh *rps, int oerrno) +{ + size_t len = 0; + ssize_t ret; + while ((ret = read(rps->cmd->fd_err, error_str + len, + ERR_BUFF_LEN - len))) { + if (ret < 0) + return "reading error string failed"; + + len += (size_t)ret; + } + error_str[len] = '\0'; + + if (len == 0) { + int ret; + if (oerrno) { + char buff[UTIL_MAX_ERR_MSG]; + util_strerror(oerrno, buff, UTIL_MAX_ERR_MSG); + ret = util_snprintf(error_str, ERR_BUFF_LEN, + "%s", buff); + } else { + ret = util_snprintf(error_str, ERR_BUFF_LEN, + "unknown error"); + } + if (ret < 0) + FATAL("!snprintf"); + } else { + /* get rid of new line and carriage return chars */ + char *cr = strchr(error_str, '\r'); + if (cr) + *cr = '\0'; + + char *nl = strchr(error_str, '\n'); + if (nl) + *nl = '\0'; + } + + return error_str; +} diff --git a/src/pmdk/src/librpmem/rpmem_ssh.h b/src/pmdk/src/librpmem/rpmem_ssh.h new file mode 100644 index 000000000..97a76ccd1 --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_ssh.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_ssh.h -- rpmem ssh transport layer header file + */ +#ifndef RPMEM_SSH_H +#define RPMEM_SSH_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct rpmem_ssh; + +struct rpmem_ssh *rpmem_ssh_open(const struct rpmem_target_info *info); +struct rpmem_ssh *rpmem_ssh_exec(const struct rpmem_target_info *info, ...); +struct rpmem_ssh *rpmem_ssh_execv(const struct rpmem_target_info *info, + const char **argv); +int rpmem_ssh_close(struct rpmem_ssh *rps); + +int rpmem_ssh_send(struct rpmem_ssh *rps, const void *buff, size_t len); +int rpmem_ssh_recv(struct rpmem_ssh *rps, void *buff, size_t len); +int rpmem_ssh_monitor(struct rpmem_ssh *rps, int nonblock); + +const char *rpmem_ssh_strerror(struct rpmem_ssh *rps, int oerrno); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/librpmem/rpmem_util.c b/src/pmdk/src/librpmem/rpmem_util.c new file mode 100644 index 000000000..6709d7298 --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_util.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * rpmem_util.c -- util functions for librpmem source file + */ + +#include +#include +#include +#include + +#include "out.h" +#include "os.h" +#include "librpmem.h" +#include "rpmem_proto.h" +#include "rpmem_common.h" +#include "rpmem_util.h" + +static const struct rpmem_err_str_errno { + int err; + const char *str; +} rpmem_err_str_errno[MAX_RPMEM_ERR] = { + [RPMEM_SUCCESS] = { + .err = 0, + .str = "Success", + }, + [RPMEM_ERR_BADPROTO] = { + .err = EPROTONOSUPPORT, + .str = "Protocol version number mismatch", + }, + [RPMEM_ERR_BADNAME] = { + .err = EINVAL, + .str = "Invalid pool descriptor", + }, + [RPMEM_ERR_BADSIZE] = { + .err = EFBIG, + .str = "Invalid pool size", + }, + [RPMEM_ERR_BADNLANES] = { + .err = EINVAL, + .str = "Invalid number of lanes", + }, + [RPMEM_ERR_BADPROVIDER] = { + .err = EINVAL, + .str = "Invalid provider", + }, + [RPMEM_ERR_FATAL] = { + .err = EREMOTEIO, + .str = "Fatal error", + }, + [RPMEM_ERR_FATAL_CONN] = { + .err = ECONNABORTED, + .str = "Fatal in-band connection error", + }, + [RPMEM_ERR_BUSY] = { + .err = EBUSY, + .str = "Pool already in use", + }, + [RPMEM_ERR_EXISTS] = { + .err = EEXIST, + .str = "Pool already exists", + }, + [RPMEM_ERR_PROVNOSUP] = { + .err = EMEDIUMTYPE, + .str = "Provider not supported", + }, + [RPMEM_ERR_NOEXIST] = { + .err = ENOENT, + .str = "Pool set or its part doesn't exist or it is " + "unavailable", + }, + [RPMEM_ERR_NOACCESS] = { + .err = EACCES, + .str = "Pool set permission denied", + }, + [RPMEM_ERR_POOL_CFG] = { + .err = EINVAL, + .str = "Invalid pool set configuration", + }, +}; + +static char *Rpmem_cmds; +static char **Rpmem_cmd_arr; +static size_t Rpmem_current_cmd; +static size_t Rpmem_ncmds; + +#define RPMEM_CMD_SEPARATOR '|' + +/* + * rpmem_util_proto_errstr -- return error string for error code + */ +const char * +rpmem_util_proto_errstr(enum rpmem_err err) +{ + RPMEM_ASSERT(err < MAX_RPMEM_ERR); + + const char *ret = rpmem_err_str_errno[err].str; + RPMEM_ASSERT(ret); + + return ret; +} + +/* + * rpmem_util_proto_errno -- return appropriate errno value for error code + */ +int +rpmem_util_proto_errno(enum rpmem_err err) +{ + RPMEM_ASSERT(err < MAX_RPMEM_ERR); + + return rpmem_err_str_errno[err].err; +} + +/* + * rpmem_util_cmds_inc -- increase size of array for rpmem commands + */ +static void +rpmem_util_cmds_inc(void) +{ + Rpmem_ncmds++; + Rpmem_cmd_arr = realloc(Rpmem_cmd_arr, + Rpmem_ncmds * sizeof(*Rpmem_cmd_arr)); + if (!Rpmem_cmd_arr) + RPMEM_FATAL("!realloc"); + +} + +/* + * rpmem_util_cmds_init -- read a RPMEM_CMD from the environment variable + */ +void +rpmem_util_cmds_init(void) +{ + char *cmd = os_getenv(RPMEM_CMD_ENV); + if (!cmd) + cmd = RPMEM_DEF_CMD; + + Rpmem_cmds = strdup(cmd); + if (!Rpmem_cmds) + RPMEM_FATAL("!strdup"); + + char *next = Rpmem_cmds; + while (next) { + rpmem_util_cmds_inc(); + Rpmem_cmd_arr[Rpmem_ncmds - 1] = next; + + next = strchr(next, RPMEM_CMD_SEPARATOR); + if (next) { + *next = '\0'; + next++; + } + } +} + +/* + * rpmem_util_env_fini -- release RPMEM_CMD copy + */ +void +rpmem_util_cmds_fini(void) +{ + RPMEM_ASSERT(Rpmem_cmds); + RPMEM_ASSERT(Rpmem_cmd_arr); + RPMEM_ASSERT(Rpmem_current_cmd < Rpmem_ncmds); + + free(Rpmem_cmds); + Rpmem_cmds = NULL; + + free(Rpmem_cmd_arr); + Rpmem_cmd_arr = NULL; + + Rpmem_ncmds = 0; + Rpmem_current_cmd = 0; +} + +/* + * rpmem_util_cmd_get -- get a next command from RPMEM_CMD + * + * RPMEM_CMD can contain multiple commands separated by RPMEM_CMD_SEPARATOR. + * Commands from RPMEM_CMD are read sequentially and used to establish out of + * band connections to remote nodes in the order read from a poolset file. + * + */ +const char * +rpmem_util_cmd_get(void) +{ + RPMEM_ASSERT(Rpmem_cmds); + RPMEM_ASSERT(Rpmem_cmd_arr); + RPMEM_ASSERT(Rpmem_current_cmd < Rpmem_ncmds); + + char *ret = Rpmem_cmd_arr[Rpmem_current_cmd]; + + Rpmem_current_cmd = (Rpmem_current_cmd + 1) % Rpmem_ncmds; + + return ret; +} + +/* + * rpmem_util_get_env_uint -- read the unsigned value from environment + */ +static void +rpmem_util_get_env_uint(const char *env, unsigned *pval) +{ + char *env_val = os_getenv(env); + if (env_val && env_val[0] != '\0') { + char *endptr; + errno = 0; + + long val = strtol(env_val, &endptr, 10); + + if (endptr[0] != '\0' || val <= 0 || + (errno == ERANGE && + (val == LONG_MAX || val == LONG_MIN))) { + RPMEM_LOG(ERR, "%s variable must be a positive integer", + env); + } else { + *pval = val < UINT_MAX ? (unsigned)val: UINT_MAX; + } + } +} + +/* + * rpmem_util_get_env_max_nlanes -- read the maximum number of lanes from + * RPMEM_MAX_NLANES + */ +void +rpmem_util_get_env_max_nlanes(unsigned *max_nlanes) +{ + rpmem_util_get_env_uint(RPMEM_MAX_NLANES_ENV, max_nlanes); +} + +/* + * rpmem_util_get_env_wq_size -- read the required WQ size from env + */ +void +rpmem_util_get_env_wq_size(unsigned *wq_size) +{ + rpmem_util_get_env_uint(RPMEM_WQ_SIZE_ENV, wq_size); +} diff --git a/src/pmdk/src/librpmem/rpmem_util.h b/src/pmdk/src/librpmem/rpmem_util.h new file mode 100644 index 000000000..d6006d8bd --- /dev/null +++ b/src/pmdk/src/librpmem/rpmem_util.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_util.h -- util functions for librpmem header file + */ + +#ifndef RPMEM_UTIL_H +#define RPMEM_UTIL_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + LERR = 1, + LWARN = 2, + LNOTICE = 3, + LINFO = 4, + _LDBG = 10, +}; + +#define RPMEM_LOG(level, fmt, args...) LOG(L##level, fmt, ## args) +#define RPMEM_DBG(fmt, args...) LOG(_LDBG, fmt, ## args) +#define RPMEM_FATAL(fmt, args...) FATAL(fmt, ## args) +#define RPMEM_ASSERT(cond) ASSERT(cond) + +#define RPMEM_PERSIST_FLAGS_ALL RPMEM_PERSIST_RELAXED +#define RPMEM_PERSIST_FLAGS_MASK ((unsigned)(~RPMEM_PERSIST_FLAGS_ALL)) + +#define RPMEM_FLUSH_FLAGS_ALL RPMEM_FLUSH_RELAXED +#define RPMEM_FLUSH_FLAGS_MASK ((unsigned)(~RPMEM_FLUSH_FLAGS_ALL)) + +const char *rpmem_util_proto_errstr(enum rpmem_err err); +int rpmem_util_proto_errno(enum rpmem_err err); + +void rpmem_util_cmds_init(void); +void rpmem_util_cmds_fini(void); +const char *rpmem_util_cmd_get(void); +void rpmem_util_get_env_max_nlanes(unsigned *max_nlanes); +void rpmem_util_get_env_wq_size(unsigned *wq_size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/libvmem/README.md b/src/pmdk/src/libvmem/README.md new file mode 100644 index 000000000..97deea155 --- /dev/null +++ b/src/pmdk/src/libvmem/README.md @@ -0,0 +1,2 @@ +This library has been moved to a +[separate repository](https://github.com/pmem/vmem). diff --git a/src/pmdk/src/libvmmalloc/README.md b/src/pmdk/src/libvmmalloc/README.md new file mode 100644 index 000000000..97deea155 --- /dev/null +++ b/src/pmdk/src/libvmmalloc/README.md @@ -0,0 +1,2 @@ +This library has been moved to a +[separate repository](https://github.com/pmem/vmem). diff --git a/src/pmdk/src/rpmem_common/Makefile b/src/pmdk/src/rpmem_common/Makefile new file mode 100644 index 000000000..006de394f --- /dev/null +++ b/src/pmdk/src/rpmem_common/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2016, Intel Corporation + +# +# src/rpmem_common/Makefile -- Makefile for rpmem_common +# + +TOP := $(dir $(lastword $(MAKEFILE_LIST)))../.. +HEADERS = $(wildcard *.h) + +include $(TOP)/src/common.inc + +ifneq ($(filter 1 2, $(CSTYLEON)),) +TMP_HEADERS := $(addsuffix tmp, $(HEADERS)) +endif + +all: $(TMP_HEADERS) + +cstyle: + $(CSTYLE) *.[ch] + +%.htmp: %.h + $(call check-cstyle, $<, $@) + +clean: + $(RM) $(TMP_HEADERS) + +clobber: clean + +sparse: +ifeq ($(BUILD_RPMEM),y) + $(sparse-c) +endif diff --git a/src/pmdk/src/rpmem_common/rpmem_common.c b/src/pmdk/src/rpmem_common/rpmem_common.c new file mode 100644 index 000000000..db4f4f2a6 --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_common.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_common.c -- common definitions for librpmem and rpmemd + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "rpmem_common.h" +#include "rpmem_proto.h" +#include "rpmem_common_log.h" +#include "os.h" + +unsigned Rpmem_max_nlanes = UINT_MAX; + +/* + * work queue of size 50 gives best performance of consecutive rpmem_flush + * operations with smallest used resources. Default value obtained empirically. + */ +unsigned Rpmem_wq_size = 50; + +/* + * If set, indicates libfabric does not support fork() and consecutive calls to + * rpmem_create/rpmem_open must fail. + */ +int Rpmem_fork_unsafe; + +/* + * rpmem_xwrite -- send entire buffer or fail + * + * Returns 1 if send returned 0. + */ +int +rpmem_xwrite(int fd, const void *buf, size_t len, int flags) +{ + size_t wr = 0; + const uint8_t *cbuf = buf; + while (wr < len) { + ssize_t sret; + if (!flags) + sret = write(fd, &cbuf[wr], len - wr); + else + sret = send(fd, &cbuf[wr], len - wr, flags); + + if (sret == 0) + return 1; + + if (sret < 0) + return (int)sret; + + wr += (size_t)sret; + } + + return 0; +} + +/* + * rpmem_xread -- read entire buffer or fail + * + * Returns 1 if recv returned 0. + */ +int +rpmem_xread(int fd, void *buf, size_t len, int flags) +{ + size_t rd = 0; + uint8_t *cbuf = buf; + while (rd < len) { + ssize_t sret; + + if (!flags) + sret = read(fd, &cbuf[rd], len - rd); + else + sret = recv(fd, &cbuf[rd], len - rd, flags); + + if (sret == 0) { + RPMEMC_DBG(ERR, "recv/read returned 0"); + return 1; + } + + if (sret < 0) + return (int)sret; + + rd += (size_t)sret; + } + + return 0; +} + +static const char *pm2str[MAX_RPMEM_PM] = { + [RPMEM_PM_APM] = "Appliance Persistency Method", + [RPMEM_PM_GPSPM] = "General Purpose Server Persistency Method", +}; + +/* + * rpmem_persist_method_to_str -- convert enum rpmem_persist_method to string + */ +const char * +rpmem_persist_method_to_str(enum rpmem_persist_method pm) +{ + if (pm >= MAX_RPMEM_PM) + return NULL; + + return pm2str[pm]; +} + +static const char *provider2str[MAX_RPMEM_PROV] = { + [RPMEM_PROV_LIBFABRIC_VERBS] = "verbs", + [RPMEM_PROV_LIBFABRIC_SOCKETS] = "sockets", +}; + +/* + * rpmem_provider_from_str -- convert string to enum rpmem_provider + * + * Returns RPMEM_PROV_UNKNOWN if provider is not known. + */ +enum rpmem_provider +rpmem_provider_from_str(const char *str) +{ + for (enum rpmem_provider p = 0; p < MAX_RPMEM_PROV; p++) { + if (provider2str[p] && strcmp(str, provider2str[p]) == 0) + return p; + } + + return RPMEM_PROV_UNKNOWN; +} + +/* + * rpmem_provider_to_str -- convert enum rpmem_provider to string + */ +const char * +rpmem_provider_to_str(enum rpmem_provider provider) +{ + if (provider >= MAX_RPMEM_PROV) + return NULL; + + return provider2str[provider]; +} + +/* + * rpmem_get_ip_str -- converts socket address to string + */ +const char * +rpmem_get_ip_str(const struct sockaddr *addr) +{ + static char str[INET6_ADDRSTRLEN + NI_MAXSERV + 1]; + char ip[INET6_ADDRSTRLEN]; + struct sockaddr_in *in4; + struct sockaddr_in6 *in6; + + switch (addr->sa_family) { + case AF_INET: + in4 = (struct sockaddr_in *)addr; + if (!inet_ntop(AF_INET, &in4->sin_addr, ip, sizeof(ip))) + return NULL; + if (util_snprintf(str, sizeof(str), "%s:%u", + ip, ntohs(in4->sin_port)) < 0) + return NULL; + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)addr; + if (!inet_ntop(AF_INET6, &in6->sin6_addr, ip, sizeof(ip))) + return NULL; + if (util_snprintf(str, sizeof(str), "%s:%u", + ip, ntohs(in6->sin6_port)) < 0) + return NULL; + break; + default: + return NULL; + } + + return str; +} + +/* + * rpmem_target_parse -- parse target info + */ +struct rpmem_target_info * +rpmem_target_parse(const char *target) +{ + struct rpmem_target_info *info = calloc(1, sizeof(*info)); + if (!info) + return NULL; + + char *str = strdup(target); + if (!str) + goto err_strdup; + + char *tmp = strchr(str, '@'); + if (tmp) { + *tmp = '\0'; + info->flags |= RPMEM_HAS_USER; + strncpy(info->user, str, sizeof(info->user) - 1); + tmp++; + } else { + tmp = str; + } + + if (*tmp == '[') { + tmp++; + /* IPv6 */ + char *end = strchr(tmp, ']'); + if (!end) { + errno = EINVAL; + goto err_ipv6; + } + + *end = '\0'; + strncpy(info->node, tmp, sizeof(info->node) - 1); + tmp = end + 1; + + end = strchr(tmp, ':'); + if (end) { + *end = '\0'; + end++; + info->flags |= RPMEM_HAS_SERVICE; + strncpy(info->service, end, sizeof(info->service) - 1); + } + } else { + char *first = strchr(tmp, ':'); + char *last = strrchr(tmp, ':'); + if (first == last) { + /* IPv4 - one colon */ + if (first) { + *first = '\0'; + first++; + info->flags |= RPMEM_HAS_SERVICE; + strncpy(info->service, first, + sizeof(info->service) - 1); + } + } + + strncpy(info->node, tmp, sizeof(info->node) - 1); + } + + if (*info->node == '\0') { + errno = EINVAL; + goto err_node; + } + + free(str); + + /* make sure that user, node and service are NULL-terminated */ + info->user[sizeof(info->user) - 1] = '\0'; + info->node[sizeof(info->node) - 1] = '\0'; + info->service[sizeof(info->service) - 1] = '\0'; + + return info; +err_node: +err_ipv6: + free(str); +err_strdup: + free(info); + return NULL; +} + +/* + * rpmem_target_free -- free target info + */ +void +rpmem_target_free(struct rpmem_target_info *info) +{ + free(info); +} + +/* + * rpmem_get_ssh_conn_addr -- returns an address which the ssh connection is + * established on + * + * This function utilizes the SSH_CONNECTION environment variable to retrieve + * the server IP address. See ssh(1) for details. + */ +char * +rpmem_get_ssh_conn_addr(void) +{ + char *ssh_conn = os_getenv("SSH_CONNECTION"); + if (!ssh_conn) { + RPMEMC_LOG(ERR, "SSH_CONNECTION variable is not set"); + return NULL; + } + + char *sp = strchr(ssh_conn, ' '); + if (!sp) + goto err_fmt; + + char *addr = strchr(sp + 1, ' '); + if (!addr) + goto err_fmt; + + addr++; + + sp = strchr(addr, ' '); + if (!sp) + goto err_fmt; + + *sp = '\0'; + + return addr; +err_fmt: + RPMEMC_LOG(ERR, "invalid format of SSH_CONNECTION variable"); + return NULL; +} diff --git a/src/pmdk/src/rpmem_common/rpmem_common.h b/src/pmdk/src/rpmem_common/rpmem_common.h new file mode 100644 index 000000000..20d183302 --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_common.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_common.h -- common definitions for librpmem and rpmemd + */ + +#ifndef RPMEM_COMMON_H +#define RPMEM_COMMON_H 1 + +/* + * Values for SO_KEEPALIVE socket option + */ + +#define RPMEM_CMD_ENV "RPMEM_CMD" +#define RPMEM_SSH_ENV "RPMEM_SSH" +#define RPMEM_DEF_CMD "rpmemd" +#define RPMEM_DEF_SSH "ssh" +#define RPMEM_PROV_SOCKET_ENV "RPMEM_ENABLE_SOCKETS" +#define RPMEM_PROV_VERBS_ENV "RPMEM_ENABLE_VERBS" +#define RPMEM_MAX_NLANES_ENV "RPMEM_MAX_NLANES" +#define RPMEM_WQ_SIZE_ENV "RPMEM_WORK_QUEUE_SIZE" +#define RPMEM_ACCEPT_TIMEOUT 30000 +#define RPMEM_CONNECT_TIMEOUT 30000 +#define RPMEM_MONITOR_TIMEOUT 1000 + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * rpmem_err -- error codes + */ +enum rpmem_err { + RPMEM_SUCCESS = 0, + RPMEM_ERR_BADPROTO = 1, + RPMEM_ERR_BADNAME = 2, + RPMEM_ERR_BADSIZE = 3, + RPMEM_ERR_BADNLANES = 4, + RPMEM_ERR_BADPROVIDER = 5, + RPMEM_ERR_FATAL = 6, + RPMEM_ERR_FATAL_CONN = 7, + RPMEM_ERR_BUSY = 8, + RPMEM_ERR_EXISTS = 9, + RPMEM_ERR_PROVNOSUP = 10, + RPMEM_ERR_NOEXIST = 11, + RPMEM_ERR_NOACCESS = 12, + RPMEM_ERR_POOL_CFG = 13, + + MAX_RPMEM_ERR, +}; + +/* + * rpmem_persist_method -- remote persist operation method + */ +enum rpmem_persist_method { + RPMEM_PM_GPSPM = 1, /* General Purpose Server Persistency Method */ + RPMEM_PM_APM = 2, /* Appliance Persistency Method */ + + MAX_RPMEM_PM, +}; + +const char *rpmem_persist_method_to_str(enum rpmem_persist_method pm); + +/* + * rpmem_provider -- supported providers + */ +enum rpmem_provider { + RPMEM_PROV_UNKNOWN = 0, + RPMEM_PROV_LIBFABRIC_VERBS = 1, + RPMEM_PROV_LIBFABRIC_SOCKETS = 2, + + MAX_RPMEM_PROV, +}; + +enum rpmem_provider rpmem_provider_from_str(const char *str); +const char *rpmem_provider_to_str(enum rpmem_provider provider); + +/* + * rpmem_req_attr -- arguments for open/create request + */ +struct rpmem_req_attr { + size_t pool_size; + unsigned nlanes; + size_t buff_size; + enum rpmem_provider provider; + const char *pool_desc; +}; + +/* + * rpmem_resp_attr -- return arguments from open/create request + */ +struct rpmem_resp_attr { + unsigned short port; + uint64_t rkey; + uint64_t raddr; + unsigned nlanes; + enum rpmem_persist_method persist_method; +}; + +#define RPMEM_HAS_USER 0x1 +#define RPMEM_HAS_SERVICE 0x2 +#define RPMEM_FLAGS_USE_IPV4 0x4 +#define RPMEM_MAX_USER (32 + 1) /* see useradd(8) + 1 for '\0' */ +#define RPMEM_MAX_NODE (255 + 1) /* see gethostname(2) + 1 for '\0' */ +#define RPMEM_MAX_SERVICE (NI_MAXSERV + 1) /* + 1 for '\0' */ +#define RPMEM_HDR_SIZE 4096 +#define RPMEM_CLOSE_FLAGS_REMOVE 0x1 +#define RPMEM_DEF_BUFF_SIZE 8192 + +struct rpmem_target_info { + char user[RPMEM_MAX_USER]; + char node[RPMEM_MAX_NODE]; + char service[RPMEM_MAX_SERVICE]; + unsigned flags; +}; + +extern unsigned Rpmem_max_nlanes; +extern unsigned Rpmem_wq_size; +extern int Rpmem_fork_unsafe; + +int rpmem_b64_write(int sockfd, const void *buf, size_t len, int flags); +int rpmem_b64_read(int sockfd, void *buf, size_t len, int flags); +const char *rpmem_get_ip_str(const struct sockaddr *addr); +struct rpmem_target_info *rpmem_target_parse(const char *target); +void rpmem_target_free(struct rpmem_target_info *info); +int rpmem_xwrite(int fd, const void *buf, size_t len, int flags); +int rpmem_xread(int fd, void *buf, size_t len, int flags); +char *rpmem_get_ssh_conn_addr(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/rpmem_common/rpmem_common_log.h b/src/pmdk/src/rpmem_common/rpmem_common_log.h new file mode 100644 index 000000000..c245a8e26 --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_common_log.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_common_log.h -- common log macros for librpmem and rpmemd + */ + +#if defined(RPMEMC_LOG_RPMEM) && defined(RPMEMC_LOG_RPMEMD) + +#error Both RPMEMC_LOG_RPMEM and RPMEMC_LOG_RPMEMD defined + +#elif !defined(RPMEMC_LOG_RPMEM) && !defined(RPMEMC_LOG_RPMEMD) + +#define RPMEMC_LOG(level, fmt, args...) do {} while (0) +#define RPMEMC_DBG(level, fmt, args...) do {} while (0) +#define RPMEMC_FATAL(fmt, args...) do {} while (0) +#define RPMEMC_ASSERT(cond) do {} while (0) + +#elif defined(RPMEMC_LOG_RPMEM) + +#include "out.h" +#include "rpmem_util.h" + +#define RPMEMC_LOG(level, fmt, args...) RPMEM_LOG(level, fmt, ## args) +#define RPMEMC_DBG(level, fmt, args...) RPMEM_DBG(fmt, ## args) +#define RPMEMC_FATAL(fmt, args...) RPMEM_FATAL(fmt, ## args) +#define RPMEMC_ASSERT(cond) RPMEM_ASSERT(cond) + +#else + +#include "rpmemd_log.h" + +#define RPMEMC_LOG(level, fmt, args...) RPMEMD_LOG(level, fmt, ## args) +#define RPMEMC_DBG(level, fmt, args...) RPMEMD_DBG(fmt, ## args) +#define RPMEMC_FATAL(fmt, args...) RPMEMD_FATAL(fmt, ## args) +#define RPMEMC_ASSERT(cond) RPMEMD_ASSERT(cond) + +#endif diff --git a/src/pmdk/src/rpmem_common/rpmem_fip_common.c b/src/pmdk/src/rpmem_common/rpmem_fip_common.c new file mode 100644 index 000000000..dbb2d5a92 --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_fip_common.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * rpmem_common.c -- common definitions for librpmem and rpmemd + */ +#include +#include +#include +#include +#include + +#include "rpmem_common.h" +#include "rpmem_fip_common.h" +#include "rpmem_proto.h" + +#include "rpmem_common_log.h" + +#include "valgrind_internal.h" + +#include + +/* + * rpmem_fip_get_hints -- return fabric interface information hints + */ +struct fi_info * +rpmem_fip_get_hints(enum rpmem_provider provider) +{ + RPMEMC_ASSERT(provider < MAX_RPMEM_PROV); + + struct fi_info *hints = fi_allocinfo(); + if (!hints) { + RPMEMC_LOG(ERR, "!fi_allocinfo"); + return NULL; + } + + /* connection-oriented endpoint */ + hints->ep_attr->type = FI_EP_MSG; + + /* + * Basic memory registration mode indicates that MR attributes + * (rkey, lkey) are selected by provider. + */ + hints->domain_attr->mr_mode = FI_MR_BASIC; + + /* + * FI_THREAD_SAFE indicates MT applications can access any + * resources through interface without any restrictions + */ + hints->domain_attr->threading = FI_THREAD_SAFE; + + /* + * FI_MSG - SEND and RECV + * FI_RMA - WRITE and READ + */ + hints->caps = FI_MSG | FI_RMA; + + /* must register locally accessed buffers */ + hints->mode = FI_CONTEXT | FI_LOCAL_MR | FI_RX_CQ_DATA; + + /* READ-after-WRITE and SEND-after-WRITE message ordering required */ + hints->tx_attr->msg_order = FI_ORDER_RAW | FI_ORDER_SAW; + + hints->addr_format = FI_SOCKADDR; + + if (provider != RPMEM_PROV_UNKNOWN) { + const char *prov_name = rpmem_provider_to_str(provider); + RPMEMC_ASSERT(prov_name != NULL); + + hints->fabric_attr->prov_name = strdup(prov_name); + if (!hints->fabric_attr->prov_name) { + RPMEMC_LOG(ERR, "!strdup(provider)"); + goto err_strdup; + } + } + + return hints; +err_strdup: + fi_freeinfo(hints); + return NULL; +} + +/* + * rpmem_fip_probe_get -- return list of available providers + */ +int +rpmem_fip_probe_get(const char *target, struct rpmem_fip_probe *probe) +{ + struct fi_info *hints = rpmem_fip_get_hints(RPMEM_PROV_UNKNOWN); + if (!hints) + return -1; + + int ret; + struct fi_info *fi; + ret = fi_getinfo(RPMEM_FIVERSION, target, NULL, 0, hints, &fi); + if (ret) { + goto err_getinfo; + } + + if (probe) { + memset(probe, 0, sizeof(*probe)); + + struct fi_info *prov = fi; + while (prov) { + enum rpmem_provider p = rpmem_provider_from_str( + prov->fabric_attr->prov_name); + if (p == RPMEM_PROV_UNKNOWN) { + prov = prov->next; + continue; + } + + probe->providers |= (1U << p); + probe->max_wq_size[p] = prov->tx_attr->size; + prov = prov->next; + } + } + + fi_freeinfo(fi); +err_getinfo: + fi_freeinfo(hints); + return ret; +} + +/* + * rpmem_fip_read_eq -- read event queue entry with specified timeout + */ +int +rpmem_fip_read_eq(struct fid_eq *eq, struct fi_eq_cm_entry *entry, + uint32_t *event, int timeout) +{ + int ret; + ssize_t sret; + struct fi_eq_err_entry err; + + sret = fi_eq_sread(eq, event, entry, sizeof(*entry), timeout, 0); + VALGRIND_DO_MAKE_MEM_DEFINED(&sret, sizeof(sret)); + + if (timeout != -1 && (sret == -FI_ETIMEDOUT || sret == -FI_EAGAIN)) { + errno = ETIMEDOUT; + return 1; + } + + if (sret < 0 || (size_t)sret != sizeof(*entry)) { + if (sret < 0) + ret = (int)sret; + else + ret = -1; + + sret = fi_eq_readerr(eq, &err, 0); + if (sret < 0) { + errno = EIO; + RPMEMC_LOG(ERR, "error reading from event queue: " + "cannot read error from event queue: %s", + fi_strerror((int)sret)); + } else if (sret > 0) { + RPMEMC_ASSERT(sret == sizeof(err)); + errno = -err.prov_errno; + RPMEMC_LOG(ERR, "error reading from event queue: %s", + fi_eq_strerror(eq, err.prov_errno, + NULL, NULL, 0)); + } + + return ret; + } + + return 0; +} + +/* + * rpmem_fip_read_eq -- read event queue entry and expect specified event + * and fid + * + * Returns: + * 1 - timeout + * 0 - success + * otherwise - error + */ +int +rpmem_fip_read_eq_check(struct fid_eq *eq, struct fi_eq_cm_entry *entry, + uint32_t exp_event, fid_t exp_fid, int timeout) +{ + uint32_t event; + int ret = rpmem_fip_read_eq(eq, entry, &event, timeout); + if (ret) + return ret; + + if (event != exp_event || entry->fid != exp_fid) { + errno = EIO; + RPMEMC_LOG(ERR, "unexpected event received (%u) " + "expected (%u)%s", event, exp_event, + entry->fid != exp_fid ? + " invalid endpoint" : ""); + + return -1; + } + + return 0; +} + +/* + * rpmem_fip_lane_attr -- lane attributes + * + * This structure describes how many SQ, RQ and CQ entries are + * required for a single lane. + * + * NOTE: + * - WRITE, READ and SEND requests are placed in SQ, + * - RECV requests are placed in RQ. + */ +struct rpmem_fip_lane_attr { + size_t n_per_sq; /* number of entries per lane in send queue */ + size_t n_per_rq; /* number of entries per lane in receive queue */ + size_t n_per_cq; /* number of entries per lane in completion queue */ +}; + +/* queues size required by remote persist operation methods */ +static const struct rpmem_fip_lane_attr +rpmem_fip_lane_attrs[MAX_RPMEM_FIP_NODE][MAX_RPMEM_PM] = { + [RPMEM_FIP_NODE_CLIENT][RPMEM_PM_GPSPM] = { + .n_per_sq = 2, /* WRITE + SEND */ + .n_per_rq = 1, /* RECV */ + .n_per_cq = 3, + }, + [RPMEM_FIP_NODE_CLIENT][RPMEM_PM_APM] = { + /* WRITE + READ for persist, WRITE + SEND for deep persist */ + .n_per_sq = 2, /* WRITE + SEND */ + .n_per_rq = 1, /* RECV */ + .n_per_cq = 3, + }, + [RPMEM_FIP_NODE_SERVER][RPMEM_PM_GPSPM] = { + .n_per_sq = 1, /* SEND */ + .n_per_rq = 1, /* RECV */ + .n_per_cq = 3, + }, + [RPMEM_FIP_NODE_SERVER][RPMEM_PM_APM] = { + .n_per_sq = 1, /* SEND */ + .n_per_rq = 1, /* RECV */ + .n_per_cq = 3, + }, +}; + +/* + * rpmem_fip_cq_size -- returns completion queue size based on + * persist method and node type + */ +size_t +rpmem_fip_cq_size(enum rpmem_persist_method pm, enum rpmem_fip_node node) +{ + RPMEMC_ASSERT(pm < MAX_RPMEM_PM); + RPMEMC_ASSERT(node < MAX_RPMEM_FIP_NODE); + + const struct rpmem_fip_lane_attr *attr = + &rpmem_fip_lane_attrs[node][pm]; + return attr->n_per_cq ? : 1; +} + +/* + * rpmem_fip_wq_size -- returns submission queue (transmit queue) size based + * on persist method and node type + */ +size_t +rpmem_fip_wq_size(enum rpmem_persist_method pm, enum rpmem_fip_node node) +{ + RPMEMC_ASSERT(pm < MAX_RPMEM_PM); + RPMEMC_ASSERT(node < MAX_RPMEM_FIP_NODE); + + const struct rpmem_fip_lane_attr *attr = + &rpmem_fip_lane_attrs[node][pm]; + return attr->n_per_sq ? : 1; +} + +/* + * rpmem_fip_rx_size -- returns receive queue size based + * on persist method and node type + */ +size_t +rpmem_fip_rx_size(enum rpmem_persist_method pm, enum rpmem_fip_node node) +{ + RPMEMC_ASSERT(pm < MAX_RPMEM_PM); + RPMEMC_ASSERT(node < MAX_RPMEM_FIP_NODE); + + const struct rpmem_fip_lane_attr *attr = + &rpmem_fip_lane_attrs[node][pm]; + return attr->n_per_rq ? : 1; +} + +/* + * rpmem_fip_max_nlanes -- returns maximum number of lanes + */ +size_t +rpmem_fip_max_nlanes(struct fi_info *fi) +{ + return min(min(fi->domain_attr->tx_ctx_cnt, + fi->domain_attr->rx_ctx_cnt), + fi->domain_attr->cq_cnt); +} + +/* + * rpmem_fip_print_info -- print some useful info about fabric interface + */ +void +rpmem_fip_print_info(struct fi_info *fi) +{ + RPMEMC_LOG(INFO, "libfabric version: %s", + fi_tostr(fi, FI_TYPE_VERSION)); + + char *str = fi_tostr(fi, FI_TYPE_INFO); + char *buff = strdup(str); + if (!buff) { + RPMEMC_LOG(ERR, "!allocating string buffer for " + "libfabric interface information"); + return; + } + + RPMEMC_LOG(INFO, "libfabric interface info:"); + + char *nl; + char *last = buff; + while (last != NULL) { + nl = strchr(last, '\n'); + if (nl) { + *nl = '\0'; + nl++; + } + + RPMEMC_LOG(INFO, "%s", last); + + last = nl; + } + + free(buff); +} diff --git a/src/pmdk/src/rpmem_common/rpmem_fip_common.h b/src/pmdk/src/rpmem_common/rpmem_fip_common.h new file mode 100644 index 000000000..6eb801b3e --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_fip_common.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_fip_common.h -- common definitions for librpmem and rpmemd + */ + +#ifndef RPMEM_FIP_COMMON_H +#define RPMEM_FIP_COMMON_H 1 + +#include +#include + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RPMEM_FIVERSION FI_VERSION(1, 4) +#define RPMEM_FIP_CQ_WAIT_MS 100 + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) + +/* + * rpmem_fip_node -- client or server node type + */ +enum rpmem_fip_node { + RPMEM_FIP_NODE_CLIENT, + RPMEM_FIP_NODE_SERVER, + + MAX_RPMEM_FIP_NODE, +}; + +/* + * rpmem_fip_probe -- list of providers + */ +struct rpmem_fip_probe { + unsigned providers; + size_t max_wq_size[MAX_RPMEM_PROV]; +}; + +/* + * rpmem_fip_probe -- returns true if specified provider is available + */ +static inline int +rpmem_fip_probe(struct rpmem_fip_probe probe, enum rpmem_provider provider) +{ + return (probe.providers & (1U << provider)) != 0; +} + +/* + * rpmem_fip_probe_any -- returns true if any provider is available + */ +static inline int +rpmem_fip_probe_any(struct rpmem_fip_probe probe) +{ + return probe.providers != 0; +} + +int rpmem_fip_probe_get(const char *target, struct rpmem_fip_probe *probe); + +struct fi_info *rpmem_fip_get_hints(enum rpmem_provider provider); + +int rpmem_fip_read_eq_check(struct fid_eq *eq, struct fi_eq_cm_entry *entry, + uint32_t exp_event, fid_t exp_fid, int timeout); + +int rpmem_fip_read_eq(struct fid_eq *eq, struct fi_eq_cm_entry *entry, + uint32_t *event, int timeout); + +size_t rpmem_fip_cq_size(enum rpmem_persist_method pm, + enum rpmem_fip_node node); +size_t rpmem_fip_wq_size(enum rpmem_persist_method pm, + enum rpmem_fip_node node); +size_t rpmem_fip_rx_size(enum rpmem_persist_method pm, + enum rpmem_fip_node node); + +size_t rpmem_fip_max_nlanes(struct fi_info *fi); + +void rpmem_fip_print_info(struct fi_info *fi); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/rpmem_common/rpmem_fip_lane.h b/src/pmdk/src/rpmem_common/rpmem_fip_lane.h new file mode 100644 index 000000000..c0c6bac80 --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_fip_lane.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_fip_lane.h -- rpmem fabric provider lane definition + */ + +#include +#include +#include "sys_util.h" +/* + * rpmem_fip_lane -- basic lane structure + * + * This structure consist of a synchronization object and a return value. + * It is possible to wait on the lane for specified event. The event can be + * signalled by another thread which can pass the return value if required. + * + * The sync variable can store up to 64 different events, each event on + * separate bit. + */ +struct rpmem_fip_lane { + os_spinlock_t lock; + int ret; + uint64_t sync; +}; + +/* + * rpmem_fip_lane_init -- initialize basic lane structure + */ +static inline int +rpmem_fip_lane_init(struct rpmem_fip_lane *lanep) +{ + lanep->ret = 0; + lanep->sync = 0; + + return util_spin_init(&lanep->lock, PTHREAD_PROCESS_PRIVATE); +} + +/* + * rpmem_fip_lane_fini -- deinitialize basic lane structure + */ +static inline void +rpmem_fip_lane_fini(struct rpmem_fip_lane *lanep) +{ + util_spin_destroy(&lanep->lock); +} + +/* + * rpmem_fip_lane_busy -- return true if lane has pending events + */ +static inline int +rpmem_fip_lane_busy(struct rpmem_fip_lane *lanep) +{ + util_spin_lock(&lanep->lock); + int ret = lanep->sync != 0; + util_spin_unlock(&lanep->lock); + + return ret; +} + +/* + * rpmem_fip_lane_begin -- begin waiting for specified event(s) + */ +static inline void +rpmem_fip_lane_begin(struct rpmem_fip_lane *lanep, uint64_t sig) +{ + util_spin_lock(&lanep->lock); + lanep->ret = 0; + lanep->sync |= sig; + util_spin_unlock(&lanep->lock); +} + +static inline int +rpmem_fip_lane_is_busy(struct rpmem_fip_lane *lanep, uint64_t sig) +{ + util_spin_lock(&lanep->lock); + int ret = (lanep->sync & sig) != 0; + util_spin_unlock(&lanep->lock); + + return ret; +} + +static inline int +rpmem_fip_lane_ret(struct rpmem_fip_lane *lanep) +{ + util_spin_lock(&lanep->lock); + int ret = lanep->ret; + util_spin_unlock(&lanep->lock); + + return ret; +} + +/* + * rpmem_fip_lane_wait -- wait for specified event(s) + */ +static inline int +rpmem_fip_lane_wait(struct rpmem_fip_lane *lanep, uint64_t sig) +{ + while (rpmem_fip_lane_is_busy(lanep, sig)) + sched_yield(); + + return rpmem_fip_lane_ret(lanep); +} + +/* + * rpmem_fip_lane_signal -- signal lane about specified event + */ +static inline void +rpmem_fip_lane_signal(struct rpmem_fip_lane *lanep, uint64_t sig) +{ + util_spin_lock(&lanep->lock); + lanep->sync &= ~sig; + util_spin_unlock(&lanep->lock); +} + +/* + * rpmem_fip_lane_signal -- signal lane about specified event and store + * return value + */ +static inline void +rpmem_fip_lane_sigret(struct rpmem_fip_lane *lanep, uint64_t sig, int ret) +{ + util_spin_lock(&lanep->lock); + lanep->ret = ret; + lanep->sync &= ~sig; + util_spin_unlock(&lanep->lock); +} diff --git a/src/pmdk/src/rpmem_common/rpmem_fip_msg.h b/src/pmdk/src/rpmem_common/rpmem_fip_msg.h new file mode 100644 index 000000000..7e587ded0 --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_fip_msg.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_fip_msg.h -- simple wrappers for fi_rma(3) and fi_msg(3) functions + */ + +#ifndef RPMEM_FIP_MSG_H +#define RPMEM_FIP_MSG_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * rpmem_fip_rma -- helper struct for RMA operation + */ +struct rpmem_fip_rma { + struct fi_msg_rma msg; /* message structure */ + struct iovec msg_iov; /* IO vector buffer */ + struct fi_rma_iov rma_iov; /* RMA IO vector buffer */ + void *desc; /* local memory descriptor */ + uint64_t flags; /* RMA operation flags */ +}; + +/* + * rpmem_fip_msg -- helper struct for MSG operation + */ +struct rpmem_fip_msg { + struct fi_msg msg; /* message structure */ + struct iovec iov; /* IO vector buffer */ + void *desc; /* local memory descriptor */ + uint64_t flags; /* MSG operation flags */ +}; + +/* + * rpmem_fip_rma_init -- initialize RMA helper struct + */ +static inline void +rpmem_fip_rma_init(struct rpmem_fip_rma *rma, void *desc, + fi_addr_t addr, uint64_t rkey, void *context, uint64_t flags) +{ + memset(rma, 0, sizeof(*rma)); + rma->desc = desc; + rma->flags = flags; + rma->rma_iov.key = rkey; + rma->msg.context = context; + rma->msg.addr = addr; + rma->msg.desc = &rma->desc; + rma->msg.rma_iov = &rma->rma_iov; + rma->msg.rma_iov_count = 1; + rma->msg.msg_iov = &rma->msg_iov; + rma->msg.iov_count = 1; +} + +/* + * rpmem_fip_msg_init -- initialize MSG helper struct + */ +static inline void +rpmem_fip_msg_init(struct rpmem_fip_msg *msg, void *desc, fi_addr_t addr, + void *context, void *buff, size_t len, uint64_t flags) +{ + memset(msg, 0, sizeof(*msg)); + msg->desc = desc; + msg->flags = flags; + msg->iov.iov_base = buff; + msg->iov.iov_len = len; + msg->msg.context = context; + msg->msg.addr = addr; + msg->msg.desc = &msg->desc; + msg->msg.msg_iov = &msg->iov; + msg->msg.iov_count = 1; +} + +/* + * rpmem_fip_writemsg -- wrapper for fi_writemsg + */ +static inline int +rpmem_fip_writemsg(struct fid_ep *ep, struct rpmem_fip_rma *rma, + const void *buff, size_t len, uint64_t addr) +{ + rma->rma_iov.addr = addr; + rma->rma_iov.len = len; + rma->msg_iov.iov_base = (void *)buff; + rma->msg_iov.iov_len = len; + return (int)fi_writemsg(ep, &rma->msg, rma->flags); +} + +/* + * rpmem_fip_readmsg -- wrapper for fi_readmsg + */ +static inline int +rpmem_fip_readmsg(struct fid_ep *ep, struct rpmem_fip_rma *rma, + void *buff, size_t len, uint64_t addr) +{ + rma->rma_iov.addr = addr; + rma->rma_iov.len = len; + rma->msg_iov.iov_base = buff; + rma->msg_iov.iov_len = len; + return (int)fi_readmsg(ep, &rma->msg, rma->flags); +} + +/* + * rpmem_fip_sendmsg -- wrapper for fi_sendmsg + */ +static inline int +rpmem_fip_sendmsg(struct fid_ep *ep, struct rpmem_fip_msg *msg, size_t len) +{ + msg->iov.iov_len = len; + return (int)fi_sendmsg(ep, &msg->msg, msg->flags); +} + +/* + * rpmem_fip_recvmsg -- wrapper for fi_recvmsg + */ +static inline int +rpmem_fip_recvmsg(struct fid_ep *ep, struct rpmem_fip_msg *msg) +{ + return (int)fi_recvmsg(ep, &msg->msg, msg->flags); +} + +/* + * rpmem_fip_msg_get_pmsg -- returns message buffer as a persist message + */ +static inline struct rpmem_msg_persist * +rpmem_fip_msg_get_pmsg(struct rpmem_fip_msg *msg) +{ + return (struct rpmem_msg_persist *)msg->iov.iov_base; +} + +/* + * rpmem_fip_msg_get_pres -- returns message buffer as a persist response + */ +static inline struct rpmem_msg_persist_resp * +rpmem_fip_msg_get_pres(struct rpmem_fip_msg *msg) +{ + return (struct rpmem_msg_persist_resp *)msg->iov.iov_base; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/rpmem_common/rpmem_proto.h b/src/pmdk/src/rpmem_common/rpmem_proto.h new file mode 100644 index 000000000..b07b8aaef --- /dev/null +++ b/src/pmdk/src/rpmem_common/rpmem_proto.h @@ -0,0 +1,545 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmem_proto.h -- rpmem protocol definitions + */ + +#ifndef RPMEM_PROTO_H +#define RPMEM_PROTO_H 1 + +#include +#include + +#include "librpmem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PACKED __attribute__((packed)) + +#define RPMEM_PROTO "tcp" +#define RPMEM_PROTO_MAJOR 0 +#define RPMEM_PROTO_MINOR 1 +#define RPMEM_SIG_SIZE 8 +#define RPMEM_UUID_SIZE 16 +#define RPMEM_PROV_SIZE 32 +#define RPMEM_USER_SIZE 16 + +/* + * rpmem_msg_type -- type of messages + */ +enum rpmem_msg_type { + RPMEM_MSG_TYPE_CREATE = 1, /* create request */ + RPMEM_MSG_TYPE_CREATE_RESP = 2, /* create request response */ + RPMEM_MSG_TYPE_OPEN = 3, /* open request */ + RPMEM_MSG_TYPE_OPEN_RESP = 4, /* open request response */ + RPMEM_MSG_TYPE_CLOSE = 5, /* close request */ + RPMEM_MSG_TYPE_CLOSE_RESP = 6, /* close request response */ + RPMEM_MSG_TYPE_SET_ATTR = 7, /* set attributes request */ + /* set attributes request response */ + RPMEM_MSG_TYPE_SET_ATTR_RESP = 8, + MAX_RPMEM_MSG_TYPE, +}; + +/* + * rpmem_pool_attr_packed -- a packed version + */ +struct rpmem_pool_attr_packed { + char signature[RPMEM_POOL_HDR_SIG_LEN]; /* pool signature */ + uint32_t major; /* format major version number */ + uint32_t compat_features; /* mask: compatible "may" features */ + uint32_t incompat_features; /* mask: "must support" features */ + uint32_t ro_compat_features; /* mask: force RO if unsupported */ + unsigned char poolset_uuid[RPMEM_POOL_HDR_UUID_LEN]; /* pool uuid */ + unsigned char uuid[RPMEM_POOL_HDR_UUID_LEN]; /* first part uuid */ + unsigned char next_uuid[RPMEM_POOL_HDR_UUID_LEN]; /* next pool uuid */ + unsigned char prev_uuid[RPMEM_POOL_HDR_UUID_LEN]; /* prev pool uuid */ + unsigned char user_flags[RPMEM_POOL_USER_FLAGS_LEN]; /* user flags */ +} PACKED; + +/* + * rpmem_msg_ibc_attr -- in-band connection attributes + * + * Used by create request response and open request response. + * Contains essential information to proceed with in-band connection + * initialization. + */ +struct rpmem_msg_ibc_attr { + uint32_t port; /* RDMA connection port */ + uint32_t persist_method; /* persist method */ + uint64_t rkey; /* remote key */ + uint64_t raddr; /* remote address */ + uint32_t nlanes; /* number of lanes */ +} PACKED; + +/* + * rpmem_msg_pool_desc -- remote pool descriptor + */ +struct rpmem_msg_pool_desc { + uint32_t size; /* size of pool descriptor */ + uint8_t desc[0]; /* pool descriptor, null-terminated string */ +} PACKED; + +/* + * rpmem_msg_hdr -- message header which consists of type and size of message + * + * The type must be one of the rpmem_msg_type values. + */ +struct rpmem_msg_hdr { + uint32_t type; /* type of message */ + uint64_t size; /* size of message */ + uint8_t body[0]; +} PACKED; + +/* + * rpmem_msg_hdr_resp -- message response header which consists of type, size + * and status. + * + * The type must be one of the rpmem_msg_type values. + */ +struct rpmem_msg_hdr_resp { + uint32_t status; /* response status */ + uint32_t type; /* type of message */ + uint64_t size; /* size of message */ +} PACKED; + +/* + * rpmem_msg_common -- common fields for open/create messages + */ +struct rpmem_msg_common { + uint16_t major; /* protocol version major number */ + uint16_t minor; /* protocol version minor number */ + uint64_t pool_size; /* minimum required size of a pool */ + uint32_t nlanes; /* number of lanes used by initiator */ + uint32_t provider; /* provider */ + uint64_t buff_size; /* buffer size for inline persist */ +} PACKED; + +/* + * rpmem_msg_create -- create request message + * + * The type of message must be set to RPMEM_MSG_TYPE_CREATE. + * The size of message must be set to + * sizeof(struct rpmem_msg_create) + pool_desc_size + */ +struct rpmem_msg_create { + struct rpmem_msg_hdr hdr; /* message header */ + struct rpmem_msg_common c; + struct rpmem_pool_attr_packed pool_attr; /* pool attributes */ + struct rpmem_msg_pool_desc pool_desc; /* pool descriptor */ +} PACKED; + +/* + * rpmem_msg_create_resp -- create request response message + * + * The type of message must be set to RPMEM_MSG_TYPE_CREATE_RESP. + * The size of message must be set to sizeof(struct rpmem_msg_create_resp). + */ +struct rpmem_msg_create_resp { + struct rpmem_msg_hdr_resp hdr; /* message header */ + struct rpmem_msg_ibc_attr ibc; /* in-band connection attributes */ +} PACKED; + +/* + * rpmem_msg_open -- open request message + * + * The type of message must be set to RPMEM_MSG_TYPE_OPEN. + * The size of message must be set to + * sizeof(struct rpmem_msg_open) + pool_desc_size + */ +struct rpmem_msg_open { + struct rpmem_msg_hdr hdr; /* message header */ + struct rpmem_msg_common c; + struct rpmem_msg_pool_desc pool_desc; /* pool descriptor */ +} PACKED; + +/* + * rpmem_msg_open_resp -- open request response message + * + * The type of message must be set to RPMEM_MSG_TYPE_OPEN_RESP. + * The size of message must be set to sizeof(struct rpmem_msg_open_resp) + */ +struct rpmem_msg_open_resp { + struct rpmem_msg_hdr_resp hdr; /* message header */ + struct rpmem_msg_ibc_attr ibc; /* in-band connection attributes */ + struct rpmem_pool_attr_packed pool_attr; /* pool attributes */ +} PACKED; + +/* + * rpmem_msg_close -- close request message + * + * The type of message must be set to RPMEM_MSG_TYPE_CLOSE + * The size of message must be set to sizeof(struct rpmem_msg_close) + */ +struct rpmem_msg_close { + struct rpmem_msg_hdr hdr; /* message header */ + uint32_t flags; /* flags */ +} PACKED; + +/* + * rpmem_msg_close_resp -- close request response message + * + * The type of message must be set to RPMEM_MSG_TYPE_CLOSE_RESP + * The size of message must be set to sizeof(struct rpmem_msg_close_resp) + */ +struct rpmem_msg_close_resp { + struct rpmem_msg_hdr_resp hdr; /* message header */ + /* no more fields */ +} PACKED; + +#define RPMEM_FLUSH_WRITE 0U /* flush / persist using RDMA WRITE */ +#define RPMEM_DEEP_PERSIST 1U /* deep persist operation */ +#define RPMEM_PERSIST_SEND 2U /* persist using RDMA SEND */ +#define RPMEM_COMPLETION 4U /* schedule command with a completion */ + +/* the two least significant bits are reserved for mode of persist */ +#define RPMEM_FLUSH_PERSIST_MASK 0x3U + +#define RPMEM_PERSIST_MAX 2U /* maximum valid persist value */ + +/* + * rpmem_msg_persist -- remote persist message + */ +struct rpmem_msg_persist { + uint32_t flags; /* lane flags */ + uint32_t lane; /* lane identifier */ + uint64_t addr; /* remote memory address */ + uint64_t size; /* remote memory size */ + uint8_t data[]; +}; + +/* + * rpmem_msg_persist_resp -- remote persist response message + */ +struct rpmem_msg_persist_resp { + uint32_t flags; /* lane flags */ + uint32_t lane; /* lane identifier */ +}; + +/* + * rpmem_msg_set_attr -- set attributes request message + * + * The type of message must be set to RPMEM_MSG_TYPE_SET_ATTR. + * The size of message must be set to sizeof(struct rpmem_msg_set_attr) + */ +struct rpmem_msg_set_attr { + struct rpmem_msg_hdr hdr; /* message header */ + struct rpmem_pool_attr_packed pool_attr; /* pool attributes */ +} PACKED; + +/* + * rpmem_msg_set_attr_resp -- set attributes request response message + * + * The type of message must be set to RPMEM_MSG_TYPE_SET_ATTR_RESP. + * The size of message must be set to sizeof(struct rpmem_msg_set_attr_resp). + */ +struct rpmem_msg_set_attr_resp { + struct rpmem_msg_hdr_resp hdr; /* message header */ +} PACKED; + +/* + * XXX Begin: Suppress gcc conversion warnings for FreeBSD be*toh macros. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +/* + * rpmem_ntoh_msg_ibc_attr -- convert rpmem_msg_ibc attr to host byte order + */ +static inline void +rpmem_ntoh_msg_ibc_attr(struct rpmem_msg_ibc_attr *ibc) +{ + ibc->port = be32toh(ibc->port); + ibc->persist_method = be32toh(ibc->persist_method); + ibc->rkey = be64toh(ibc->rkey); + ibc->raddr = be64toh(ibc->raddr); +} + +/* + * rpmem_ntoh_msg_pool_desc -- convert rpmem_msg_pool_desc to host byte order + */ +static inline void +rpmem_ntoh_msg_pool_desc(struct rpmem_msg_pool_desc *pool_desc) +{ + pool_desc->size = be32toh(pool_desc->size); +} + +/* + * rpmem_ntoh_pool_attr -- convert rpmem_pool_attr to host byte order + */ +static inline void +rpmem_ntoh_pool_attr(struct rpmem_pool_attr_packed *attr) +{ + attr->major = be32toh(attr->major); + attr->ro_compat_features = be32toh(attr->ro_compat_features); + attr->incompat_features = be32toh(attr->incompat_features); + attr->compat_features = be32toh(attr->compat_features); +} + +/* + * rpmem_ntoh_msg_hdr -- convert rpmem_msg_hdr to host byte order + */ +static inline void +rpmem_ntoh_msg_hdr(struct rpmem_msg_hdr *hdrp) +{ + hdrp->type = be32toh(hdrp->type); + hdrp->size = be64toh(hdrp->size); +} + +/* + * rpmem_hton_msg_hdr -- convert rpmem_msg_hdr to network byte order + */ +static inline void +rpmem_hton_msg_hdr(struct rpmem_msg_hdr *hdrp) +{ + rpmem_ntoh_msg_hdr(hdrp); +} + +/* + * rpmem_ntoh_msg_hdr_resp -- convert rpmem_msg_hdr_resp to host byte order + */ +static inline void +rpmem_ntoh_msg_hdr_resp(struct rpmem_msg_hdr_resp *hdrp) +{ + hdrp->status = be32toh(hdrp->status); + hdrp->type = be32toh(hdrp->type); + hdrp->size = be64toh(hdrp->size); +} + +/* + * rpmem_hton_msg_hdr_resp -- convert rpmem_msg_hdr_resp to network byte order + */ +static inline void +rpmem_hton_msg_hdr_resp(struct rpmem_msg_hdr_resp *hdrp) +{ + rpmem_ntoh_msg_hdr_resp(hdrp); +} + +/* + * rpmem_ntoh_msg_common -- convert rpmem_msg_common to host byte order + */ +static inline void +rpmem_ntoh_msg_common(struct rpmem_msg_common *msg) +{ + msg->major = be16toh(msg->major); + msg->minor = be16toh(msg->minor); + msg->pool_size = be64toh(msg->pool_size); + msg->nlanes = be32toh(msg->nlanes); + msg->provider = be32toh(msg->provider); + msg->buff_size = be64toh(msg->buff_size); +} + +/* + * rpmem_hton_msg_common -- convert rpmem_msg_common to network byte order + */ +static inline void +rpmem_hton_msg_common(struct rpmem_msg_common *msg) +{ + rpmem_ntoh_msg_common(msg); +} + +/* + * rpmem_ntoh_msg_create -- convert rpmem_msg_create to host byte order + */ +static inline void +rpmem_ntoh_msg_create(struct rpmem_msg_create *msg) +{ + rpmem_ntoh_msg_hdr(&msg->hdr); + rpmem_ntoh_msg_common(&msg->c); + rpmem_ntoh_pool_attr(&msg->pool_attr); + rpmem_ntoh_msg_pool_desc(&msg->pool_desc); +} + +/* + * rpmem_hton_msg_create -- convert rpmem_msg_create to network byte order + */ +static inline void +rpmem_hton_msg_create(struct rpmem_msg_create *msg) +{ + rpmem_ntoh_msg_create(msg); +} + +/* + * rpmem_ntoh_msg_create_resp -- convert rpmem_msg_create_resp to host byte + * order + */ +static inline void +rpmem_ntoh_msg_create_resp(struct rpmem_msg_create_resp *msg) +{ + rpmem_ntoh_msg_hdr_resp(&msg->hdr); + rpmem_ntoh_msg_ibc_attr(&msg->ibc); +} + +/* + * rpmem_hton_msg_create_resp -- convert rpmem_msg_create_resp to network byte + * order + */ +static inline void +rpmem_hton_msg_create_resp(struct rpmem_msg_create_resp *msg) +{ + rpmem_ntoh_msg_create_resp(msg); +} + +/* + * rpmem_ntoh_msg_open -- convert rpmem_msg_open to host byte order + */ +static inline void +rpmem_ntoh_msg_open(struct rpmem_msg_open *msg) +{ + rpmem_ntoh_msg_hdr(&msg->hdr); + rpmem_ntoh_msg_common(&msg->c); + rpmem_ntoh_msg_pool_desc(&msg->pool_desc); +} +/* + * XXX End: Suppress gcc conversion warnings for FreeBSD be*toh macros + */ +#pragma GCC diagnostic pop +/* + * rpmem_hton_msg_open -- convert rpmem_msg_open to network byte order + */ +static inline void +rpmem_hton_msg_open(struct rpmem_msg_open *msg) +{ + rpmem_ntoh_msg_open(msg); +} + +/* + * rpmem_ntoh_msg_open_resp -- convert rpmem_msg_open_resp to host byte order + */ +static inline void +rpmem_ntoh_msg_open_resp(struct rpmem_msg_open_resp *msg) +{ + rpmem_ntoh_msg_hdr_resp(&msg->hdr); + rpmem_ntoh_msg_ibc_attr(&msg->ibc); + rpmem_ntoh_pool_attr(&msg->pool_attr); +} + +/* + * rpmem_hton_msg_open_resp -- convert rpmem_msg_open_resp to network byte order + */ +static inline void +rpmem_hton_msg_open_resp(struct rpmem_msg_open_resp *msg) +{ + rpmem_ntoh_msg_open_resp(msg); +} + +/* + * rpmem_ntoh_msg_set_attr -- convert rpmem_msg_set_attr to host byte order + */ +static inline void +rpmem_ntoh_msg_set_attr(struct rpmem_msg_set_attr *msg) +{ + rpmem_ntoh_msg_hdr(&msg->hdr); + rpmem_ntoh_pool_attr(&msg->pool_attr); +} + +/* + * rpmem_hton_msg_set_attr -- convert rpmem_msg_set_attr to network byte order + */ +static inline void +rpmem_hton_msg_set_attr(struct rpmem_msg_set_attr *msg) +{ + rpmem_ntoh_msg_set_attr(msg); +} + +/* + * rpmem_ntoh_msg_set_attr_resp -- convert rpmem_msg_set_attr_resp to host byte + * order + */ +static inline void +rpmem_ntoh_msg_set_attr_resp(struct rpmem_msg_set_attr_resp *msg) +{ + rpmem_ntoh_msg_hdr_resp(&msg->hdr); +} + +/* + * rpmem_hton_msg_set_attr_resp -- convert rpmem_msg_set_attr_resp to network + * byte order + */ +static inline void +rpmem_hton_msg_set_attr_resp(struct rpmem_msg_set_attr_resp *msg) +{ + rpmem_hton_msg_hdr_resp(&msg->hdr); +} + +/* + * rpmem_ntoh_msg_close -- convert rpmem_msg_close to host byte order + */ +static inline void +rpmem_ntoh_msg_close(struct rpmem_msg_close *msg) +{ + rpmem_ntoh_msg_hdr(&msg->hdr); +} + +/* + * rpmem_hton_msg_close -- convert rpmem_msg_close to network byte order + */ +static inline void +rpmem_hton_msg_close(struct rpmem_msg_close *msg) +{ + rpmem_ntoh_msg_close(msg); +} + +/* + * rpmem_ntoh_msg_close_resp -- convert rpmem_msg_close_resp to host byte order + */ +static inline void +rpmem_ntoh_msg_close_resp(struct rpmem_msg_close_resp *msg) +{ + rpmem_ntoh_msg_hdr_resp(&msg->hdr); +} + +/* + * rpmem_hton_msg_close_resp -- convert rpmem_msg_close_resp to network byte + * order + */ +static inline void +rpmem_hton_msg_close_resp(struct rpmem_msg_close_resp *msg) +{ + rpmem_ntoh_msg_close_resp(msg); +} + +/* + * pack_rpmem_pool_attr -- copy pool attributes to a packed structure + */ +static inline void +pack_rpmem_pool_attr(const struct rpmem_pool_attr *src, + struct rpmem_pool_attr_packed *dst) +{ + memcpy(dst->signature, src->signature, sizeof(src->signature)); + dst->major = src->major; + dst->compat_features = src->compat_features; + dst->incompat_features = src->incompat_features; + dst->ro_compat_features = src->ro_compat_features; + memcpy(dst->poolset_uuid, src->poolset_uuid, sizeof(dst->poolset_uuid)); + memcpy(dst->uuid, src->uuid, sizeof(dst->uuid)); + memcpy(dst->next_uuid, src->next_uuid, sizeof(dst->next_uuid)); + memcpy(dst->prev_uuid, src->prev_uuid, sizeof(dst->prev_uuid)); + memcpy(dst->user_flags, src->user_flags, sizeof(dst->user_flags)); +} + +/* + * unpack_rpmem_pool_attr -- copy pool attributes to an unpacked structure + */ +static inline void +unpack_rpmem_pool_attr(const struct rpmem_pool_attr_packed *src, + struct rpmem_pool_attr *dst) +{ + memcpy(dst->signature, src->signature, sizeof(src->signature)); + dst->major = src->major; + dst->compat_features = src->compat_features; + dst->incompat_features = src->incompat_features; + dst->ro_compat_features = src->ro_compat_features; + memcpy(dst->poolset_uuid, src->poolset_uuid, sizeof(dst->poolset_uuid)); + memcpy(dst->uuid, src->uuid, sizeof(dst->uuid)); + memcpy(dst->next_uuid, src->next_uuid, sizeof(dst->next_uuid)); + memcpy(dst->prev_uuid, src->prev_uuid, sizeof(dst->prev_uuid)); + memcpy(dst->user_flags, src->user_flags, sizeof(dst->user_flags)); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pmdk/src/tools/.gitignore b/src/pmdk/src/tools/.gitignore new file mode 100644 index 000000000..11dc3fb76 --- /dev/null +++ b/src/pmdk/src/tools/.gitignore @@ -0,0 +1,7 @@ +TAGS +cscope.in.out +cscope.po.out +cscope.out +*.static-debug +*.static-nondebug +.synced diff --git a/src/pmdk/src/tools/Makefile b/src/pmdk/src/tools/Makefile new file mode 100644 index 000000000..f94bcb7d5 --- /dev/null +++ b/src/pmdk/src/tools/Makefile @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2018, Intel Corporation +# +# Makefile -- top Makefile for tools +# + +TOP = ../.. + +TESTCONFIG=$(TOP)/src/test/testconfig.sh + +TARGETS = pmempool rpmemd daxio pmreorder +SCOPEDIRS=$(TARGETS) +SCOPEFILES=$(foreach dir, $(SCOPEDIRS), $(shell find $(dir) -name *.[ch] )) + +all : TARGET = all +check : TARGET = check +test : TARGET = test +clean : TARGET = clean +clobber: TARGET = clobber +cstyle : TARGET = cstyle +format : TARGET = format +install: TARGET = install +uninstall: TARGET = uninstall +sync-remotes: TARGET = sync-remotes +sparse: TARGET = sparse + +all clean clobber cstyle install uninstall check format test sparse: $(TARGETS) + +$(TESTCONFIG): + +sync-remotes: $(TARGETS) $(TESTCONFIG) + +$(TARGETS): + $(MAKE) -C $@ $(TARGET) + +clean: + $(RM) TAGS cscope.in.out cscope.out cscope.po.out + +clobber: clean + +cscope: + cscope -q -b $(SCOPEFILES) + ctags -e $(SCOPEFILES) + +.PHONY: all clean clobber cstyle format install uninstall common cscope sync-remotes $(TARGETS) diff --git a/src/pmdk/src/tools/Makefile.inc b/src/pmdk/src/tools/Makefile.inc new file mode 100644 index 000000000..4713952cf --- /dev/null +++ b/src/pmdk/src/tools/Makefile.inc @@ -0,0 +1,342 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation +# +# src/tools/Makefile.inc -- Makefile include for all tools +# + +TOP := $(dir $(lastword $(MAKEFILE_LIST)))../.. + +include $(TOP)/src/common.inc + +INSTALL_TARGET ?= y + +INCS += -I. +INCS += -I$(TOP)/src/include +INCS += $(OS_INCS) +CFLAGS += -std=gnu99 +CFLAGS += -Wall +CFLAGS += -Werror +CFLAGS += -Wmissing-prototypes +CFLAGS += -Wpointer-arith +CFLAGS += -Wsign-conversion +CFLAGS += -Wsign-compare + +ifeq ($(WCONVERSION_AVAILABLE), y) +CFLAGS += -Wconversion +endif + +CFLAGS += -fno-common + +CFLAGS += -DSRCVERSION='"$(SRCVERSION)"' + +ifeq ($(OS_DIMM),ndctl) +CFLAGS += -DSDS_ENABLED +endif + +ifeq ($(IS_ICC), n) +CFLAGS += -Wunused-macros +CFLAGS += -Wmissing-field-initializers +endif + +ifeq ($(WUNREACHABLE_CODE_RETURN_AVAILABLE), y) +CFLAGS += -Wunreachable-code-return +endif + +ifeq ($(WMISSING_VARIABLE_DECLARATIONS_AVAILABLE), y) +CFLAGS += -Wmissing-variable-declarations +endif + +ifeq ($(WFLOAT_EQUAL_AVAILABLE), y) +CFLAGS += -Wfloat-equal +endif + +ifeq ($(WSWITCH_DEFAULT_AVAILABLE), y) +CFLAGS += -Wswitch-default +endif + +ifeq ($(WCAST_FUNCTION_TYPE_AVAILABLE), y) +CFLAGS += -Wcast-function-type +endif + +ifeq ($(DEBUG),1) +CFLAGS += -ggdb $(EXTRA_CFLAGS_DEBUG) +else +CFLAGS += -O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $(EXTRA_CFLAGS_RELEASE) +endif + +ifeq ($(VALGRIND),0) +CFLAGS += -DVALGRIND_ENABLED=0 +CXXFLAGS += -DVALGRIND_ENABLED=0 +endif + +ifeq ($(FAULT_INJECTION),1) +CFLAGS += -DFAULT_INJECTION=1 +CXXFLAGS += -DFAULT_INJECTION=1 +endif + +ifneq ($(SANITIZE),) +CFLAGS += -fsanitize=$(SANITIZE) +LDFLAGS += -fsanitize=$(SANITIZE) +endif +LDFLAGS += $(OS_LIBS) + +CFLAGS += $(EXTRA_CFLAGS) + +LDFLAGS += -Wl,-z,relro -Wl,--warn-common -Wl,--fatal-warnings $(EXTRA_LDFLAGS) +ifeq ($(DEBUG),1) +LDFLAGS += -L$(TOP)/src/debug +else +LDFLAGS += -L$(TOP)/src/nondebug +endif +TARGET_DIR=$(DESTDIR)$(bindir) +BASH_COMP_FILES ?= +BASH_COMP_DESTDIR = $(DESTDIR)$(bashcompdir) + +ifneq ($(DEBUG),1) +TARGET_STATIC_NONDEBUG=$(TARGET).static-nondebug +endif +TARGET_STATIC_DEBUG=$(TARGET).static-debug + +LIBSDIR=$(TOP)/src +LIBSDIR_DEBUG=$(LIBSDIR)/debug +LIBSDIR_NONDEBUG=$(LIBSDIR)/nondebug + +ifneq ($(DEBUG),) +LIBSDIR_PRIV=$(LIBSDIR_DEBUG) +else +LIBSDIR_PRIV=$(LIBSDIR_NONDEBUG) +endif + +PMEMLOG_PRIV_OBJ=$(LIBSDIR_PRIV)/libpmemlog/libpmemlog_unscoped.o +PMEMOBJ_PRIV_OBJ=$(LIBSDIR_PRIV)/libpmemobj/libpmemobj_unscoped.o +PMEMBLK_PRIV_OBJ=$(LIBSDIR_PRIV)/libpmemblk/libpmemblk_unscoped.o + +LIBS += $(LIBUUID) + +ifeq ($(LIBRT_NEEDED), y) +LIBS += -lrt +endif + +ifeq ($(TOOLS_COMMON), y) +LIBPMEMCOMMON=y +endif + +ifeq ($(LIBPMEMCOMMON), y) +DYNAMIC_LIBS += -lpmemcommon +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemcommon.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemcommon.a +CFLAGS += -I$(TOP)/src/common +LIBPMEMCORE=y +endif + +ifeq ($(LIBPMEMCORE), y) +DYNAMIC_LIBS += -lpmemcore +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemcore.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemcore.a +CFLAGS += -I$(TOP)/src/core +CFLAGS += $(LIBNDCTL_CFLAGS) +LIBS += $(LIBNDCTL_LIBS) +endif + +ifeq ($(LIBPMEMPOOL), y) +LIBPMEM=y +DYNAMIC_LIBS += -lpmempool +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmempool.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmempool.a +endif + +ifeq ($(LIBPMEMBLK), y) +LIBPMEM=y +DYNAMIC_LIBS += -lpmemblk +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemblk.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemblk.a +endif + +ifeq ($(LIBPMEMLOG), y) +LIBPMEM=y +DYNAMIC_LIBS += -lpmemlog +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemlog.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemlog.a +endif + +ifeq ($(LIBPMEMOBJ), y) +LIBPMEM=y +DYNAMIC_LIBS += -lpmemobj +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmemobj.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmemobj.a +endif + +ifeq ($(LIBPMEM),y) +DYNAMIC_LIBS += -lpmem +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmem.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmem.a +endif + +ifeq ($(LIBPMEM2),y) +DYNAMIC_LIBS += -lpmem2 +STATIC_DEBUG_LIBS += $(LIBSDIR_DEBUG)/libpmem2.a +STATIC_NONDEBUG_LIBS += $(LIBSDIR_NONDEBUG)/libpmem2.a +CFLAGS += $(LIBNDCTL_CFLAGS) +LIBS += $(LIBNDCTL_LIBS) +endif + +# If any of these libraries is required, we need to link libpthread +ifneq ($(LIBPMEMCORE)$(LIBPMEMCOMMON)$(LIBPMEM)$(LIBPMEM2)$(LIBPMEMPOOL)$(LIBPMEMBLK)$(LIBPMEMLOG)$(LIBPMEMOBJ),) +LIBS += -pthread +endif + +# If any of these libraries is required, we need to link libdl +ifneq ($(LIBPMEMCOMMON)$(LIBPMEMPOOL)$(LIBPMEMOBJ),) +LIBS += $(LIBDL) +endif + +ifeq ($(TOOLS_COMMON), y) +vpath %.c $(TOP)/src/tools/pmempool + +OBJS += common.o output.o + +CFLAGS += -I$(TOP)/src/core +CFLAGS += -I$(TOP)/src/common +CFLAGS += -I$(TOP)/src/libpmemlog +CFLAGS += -I$(TOP)/src/libpmemblk +CFLAGS += -I$(TOP)/src/libpmemobj +CFLAGS += -I$(TOP)/src/tools/pmempool +CFLAGS += $(UNIX98_CFLAGS) + +endif + +ifneq ($(LIBPMEMLOG_PRIV),) +OBJS += pmemlog_priv.o +endif + +ifneq ($(LIBPMEMOBJ_PRIV),) +OBJS += pmemobj_priv.o +endif + +ifneq ($(LIBPMEMBLK_PRIV),) +OBJS += pmemblk_priv.o +endif + +ifneq ($(HEADERS),) +ifneq ($(filter 1 2, $(CSTYLEON)),) +TMP_HEADERS := $(addsuffix tmp, $(HEADERS)) +endif +endif + +ifeq ($(COVERAGE),1) +CFLAGS += $(GCOV_CFLAGS) +LDFLAGS += $(GCOV_LDFLAGS) +LIBS += $(GCOV_LIBS) +endif + +MAKEFILE_DEPS=$(TOP)/src/tools/Makefile.inc $(TOP)/src/common.inc + +ifneq ($(TARGET),) +all: $(TARGET) $(TARGET_STATIC_NONDEBUG) $(TARGET_STATIC_DEBUG) +else +all: +endif + +SYNC_FILE=.synced + +clean: + $(RM) $(OBJS) $(CLEAN_FILES) $(SYNC_FILE) $(TMP_HEADERS) + +clobber: clean +ifneq ($(TARGET),) + $(RM) $(TARGET) + $(RM) $(TARGET_STATIC_NONDEBUG) + $(RM) $(TARGET_STATIC_DEBUG) + $(RM) -r .deps +endif + +install: all +ifeq ($(INSTALL_TARGET),y) +ifneq ($(TARGET),) + install -d $(TARGET_DIR) + install -p -m 0755 $(TARGET) $(TARGET_DIR) +endif +ifneq ($(BASH_COMP_FILES),) + install -d $(BASH_COMP_DESTDIR) + install -p -m 0644 $(BASH_COMP_FILES) $(BASH_COMP_DESTDIR) +endif +endif + +uninstall: +ifeq ($(INSTALL_TARGET),y) +ifneq ($(TARGET),) + $(RM) $(TARGET_DIR)/$(TARGET) +endif +ifneq ($(BASH_COMP_FILES),) + $(RM) $(BASH_COMP_DESTDIR)/$(BASH_COMP_FILES) +endif +endif + +%.gz: % + gzip -nc ./$< > $@ + +%.txt: % + man ./$< > $@ + +$(TARGET) $(TARGET_STATIC_DEBUG) $(TARGET_STATIC_NONDEBUG): $(TMP_HEADERS) $(OBJS) $(MAKEFILE_DEPS) + +$(TARGET_STATIC_DEBUG): $(STATIC_DEBUG_LIBS) + $(CC) $(LDFLAGS) -o $@ $(OBJS) $(STATIC_DEBUG_LIBS) $(LIBS) + +$(TARGET_STATIC_NONDEBUG): $(STATIC_NONDEBUG_LIBS) + $(CC) $(LDFLAGS) -o $@ $(OBJS) $(STATIC_NONDEBUG_LIBS) $(LIBS) + +$(TARGET): + $(CC) $(LDFLAGS) -o $@ $(OBJS) $(DYNAMIC_LIBS) $(LIBS) + +$(PMEMLOG_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemlog + +pmemlog_priv.o: $(PMEMLOG_PRIV_OBJ) + $(OBJCOPY) --localize-hidden $(addprefix -G, $(LIBPMEMLOG_PRIV)) $< $@ + +$(PMEMOBJ_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemobj + +pmemobj_priv.o: $(PMEMOBJ_PRIV_OBJ) + $(OBJCOPY) --localize-hidden $(addprefix -G, $(LIBPMEMOBJ_PRIV)) $< $@ + +$(PMEMBLK_PRIV_OBJ): + $(MAKE) -C $(LIBSDIR) libpmemblk + +pmemblk_priv.o: $(PMEMBLK_PRIV_OBJ) + $(OBJCOPY) --localize-hidden $(addprefix -G, $(LIBPMEMBLK_PRIV)) $< $@ + +objdir=. + +%.o: %.c $(MAKEFILE_DEPS) + $(call check-cstyle, $<) + @mkdir -p .deps + $(CC) -MD $(CFLAGS) $(INCS) -c -o $@ $(call coverage-path, $<) + $(call check-os, $@, $<) + $(create-deps) + +%.htmp: %.h + $(call check-cstyle, $<, $@) + +test check pcheck: all + +TESTCONFIG=$(TOP)/src/test/testconfig.sh +DIR_SYNC=$(TOP)/src/test/.sync-dir + +$(TESTCONFIG): + +sync-remotes: all $(SYNC_FILE) + +$(SYNC_FILE): $(TARGET) $(TESTCONFIG) +ifeq ($(SCP_TO_REMOTE_NODES), y) + cp $(TARGET) $(DIR_SYNC) + @touch $(SYNC_FILE) +endif + +sparse: + $(if $(TARGET), $(sparse-c)) + +.PHONY: all clean clobber install uninstall test check pcheck + +-include .deps/*.P diff --git a/src/pmdk/src/tools/daxio/.gitignore b/src/pmdk/src/tools/daxio/.gitignore new file mode 100644 index 000000000..ec6140778 --- /dev/null +++ b/src/pmdk/src/tools/daxio/.gitignore @@ -0,0 +1 @@ +daxio diff --git a/src/pmdk/src/tools/daxio/Makefile b/src/pmdk/src/tools/daxio/Makefile new file mode 100644 index 000000000..9fc70dc90 --- /dev/null +++ b/src/pmdk/src/tools/daxio/Makefile @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2020, Intel Corporation +# +# Makefile -- top Makefile for daxio +# + +TOP = ../../.. +include $(TOP)/src/common.inc + +INCS += -I$(TOP)/src/libpmem2 + +ifeq ($(NDCTL_ENABLE),y) + +SCP_TO_REMOTE_NODES = y + +TARGET = daxio +OBJS = daxio.o + +LIBPMEM=y +LIBPMEMCOMMON=y + +CFLAGS += $(LIBNDCTL_CFLAGS) +LIBS += $(LIBNDCTL_LIBS) + +MANPAGES = $(TOP)/doc/daxio.1 + +# XXX: to be done +# BASH_COMP_FILES = daxio.sh + +else +$(info NOTE: Skipping daxio because ndctl is not available) +endif + +include ../Makefile.inc + +.PHONY: test check diff --git a/src/pmdk/src/tools/daxio/README b/src/pmdk/src/tools/daxio/README new file mode 100644 index 000000000..91af2803d --- /dev/null +++ b/src/pmdk/src/tools/daxio/README @@ -0,0 +1,47 @@ +Persistent Memory Development Kit + +This is src/tools/daxio/README. + +This file contains the high-level description of daxio utility. + +The main purpose of daxio is to perform I/O on Device DAX devices or zero +a Device DAX device. Since the standard I/O APIs (read/write) cannot be used +with Device DAX, data transfer is performed on a memory-mapped device. +The daxio may be used to dump Device DAX data to a file, restore data from +a backup copy, or move/copy data to another device. + +There must be at least one Device DAX device involved either as the input +or output. If input or output is not specified, it will default to stdin +or stdout respectively. + +No length specified will default to input file/device length or to the +output file/device length, if input is a special char file or stdin. + +For a Device DAX device, daxio will attempt to clear badblocks within range +of writes before performing the I/O. + +3. Source code +-------------- + +The source code of daxio is located in daxio directory. + +By default daxio is installed in $(DESTDIR)/usr/bin directory. +You can change it by passing $(TOOLSDIR) variable to "make install". +For example, the following command will install daxio in ~/bin directory: + $ make install DESTDIR=~ TOOLSDIR=/bin + +See the top-level README file for detailed information about building and +installation. + +4. Packaging +------------ + +The daxio utility is provided in separate packages. Both rpm and dpkg +packages are built automatically with other packages. + +See the top-level README file for detailed information about building packages. + +5. Versioning +------------- + +The versioning of daxio utility is the same as all PMDK libraries. diff --git a/src/pmdk/src/tools/daxio/daxio.c b/src/pmdk/src/tools/daxio/daxio.c new file mode 100644 index 000000000..66cd0b7ae --- /dev/null +++ b/src/pmdk/src/tools/daxio/daxio.c @@ -0,0 +1,607 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2020, Intel Corporation */ + +/* + * daxio.c -- simple app for reading and writing data from/to + * Device DAX device using mmap instead of file I/O API + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "util.h" +#include "os.h" +#include "badblocks.h" + +#define ALIGN_UP(size, align) (((size) + (align) - 1) & ~((align) - 1)) +#define ALIGN_DOWN(size, align) ((size) & ~((align) - 1)) + +#define ERR(fmt, ...)\ +do {\ + fprintf(stderr, "daxio: " fmt, ##__VA_ARGS__);\ +} while (0) + +#define FAIL(func)\ +do {\ + fprintf(stderr, "daxio: %s:%d: %s: %s\n",\ + __func__, __LINE__, func, strerror(errno));\ +} while (0) + +#define USAGE_MESSAGE \ +"Usage: daxio [option] ...\n"\ +"Valid options:\n"\ +" -i, --input=FILE - input device/file (default stdin)\n"\ +" -o, --output=FILE - output device/file (default stdout)\n"\ +" -k, --skip=BYTES - skip offset for input (default 0)\n"\ +" -s, --seek=BYTES - seek offset for output (default 0)\n"\ +" -l, --len=BYTES - total length to perform the I/O\n"\ +" -b, --clear-bad-blocks= - clear bad blocks (default: yes)\n"\ +" -z, --zero - zeroing the device\n"\ +" -h. --help - print this help\n"\ +" -V, --version - display version of daxio\n" + +struct daxio_device { + char *path; + int fd; + size_t size; /* actual file/device size */ + int is_devdax; + + /* Device DAX only */ + size_t align; /* internal device alignment */ + char *addr; /* mapping base address */ + size_t maplen; /* mapping length */ + size_t offset; /* seek or skip */ + + unsigned major; + unsigned minor; + struct ndctl_ctx *ndctl_ctx; + struct ndctl_region *region; /* parent region */ +}; + +/* + * daxio_context -- context and arguments + */ +struct daxio_context { + size_t len; /* total length of I/O */ + int zero; + int clear_bad_blocks; + struct daxio_device src; + struct daxio_device dst; +}; + +/* + * default context + */ +static struct daxio_context Ctx = { + SIZE_MAX, /* len */ + 0, /* zero */ + 1, /* clear_bad_blocks */ + { NULL, -1, SIZE_MAX, 0, 0, NULL, 0, 0, 0, 0, NULL, NULL }, + { NULL, -1, SIZE_MAX, 0, 0, NULL, 0, 0, 0, 0, NULL, NULL }, +}; + +/* + * print_version -- print daxio version + */ +static void +print_version(void) +{ + printf("%s\n", SRCVERSION); +} + +/* + * print_usage -- print short description of usage + */ +static void +print_usage(void) +{ + fprintf(stderr, USAGE_MESSAGE); +} + +/* + * long_options -- command line options + */ +static const struct option long_options[] = { + {"input", required_argument, NULL, 'i'}, + {"output", required_argument, NULL, 'o'}, + {"skip", required_argument, NULL, 'k'}, + {"seek", required_argument, NULL, 's'}, + {"len", required_argument, NULL, 'l'}, + {"clear-bad-blocks", required_argument, NULL, 'b'}, + {"zero", no_argument, NULL, 'z'}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {NULL, 0, NULL, 0 }, +}; + +/* + * parse_args -- (internal) parse command line arguments + */ +static int +parse_args(struct daxio_context *ctx, int argc, char * const argv[]) +{ + int opt; + size_t offset; + size_t len; + + while ((opt = getopt_long(argc, argv, "i:o:k:s:l:b:zhV", + long_options, NULL)) != -1) { + switch (opt) { + case 'i': + ctx->src.path = optarg; + break; + case 'o': + ctx->dst.path = optarg; + break; + case 'k': + if (util_parse_size(optarg, &offset)) { + ERR("'%s' -- invalid input offset\n", optarg); + return -1; + } + ctx->src.offset = offset; + break; + case 's': + if (util_parse_size(optarg, &offset)) { + ERR("'%s' -- invalid output offset\n", optarg); + return -1; + } + ctx->dst.offset = offset; + break; + case 'l': + if (util_parse_size(optarg, &len)) { + ERR("'%s' -- invalid length\n", optarg); + return -1; + } + ctx->len = len; + break; + case 'z': + ctx->zero = 1; + break; + case 'b': + if (strcmp(optarg, "no") == 0) { + ctx->clear_bad_blocks = 0; + } else if (strcmp(optarg, "yes") == 0) { + ctx->clear_bad_blocks = 1; + } else { + ERR( + "'%s' -- invalid argument of the '--clear-bad-blocks' option\n", + optarg); + return -1; + } + break; + case 'h': + print_usage(); + exit(EXIT_SUCCESS); + case 'V': + print_version(); + exit(EXIT_SUCCESS); + default: + print_usage(); + exit(EXIT_FAILURE); + } + } + + return 0; +} + +/* + * validate_args -- (internal) validate command line arguments + */ +static int +validate_args(struct daxio_context *ctx) +{ + if (ctx->zero && ctx->dst.path == NULL) { + ERR("zeroing flag specified but no output file provided\n"); + return -1; + } + + if (!ctx->zero && ctx->src.path == NULL && ctx->dst.path == NULL) { + ERR("an input file and/or an output file must be provided\n"); + return -1; + } + + /* if no input file provided, use stdin */ + if (ctx->src.path == NULL) { + if (ctx->src.offset != 0) { + ERR( + "skip offset specified but no input file provided\n"); + return -1; + } + ctx->src.fd = STDIN_FILENO; + ctx->src.path = "STDIN"; + } + + /* if no output file provided, use stdout */ + if (ctx->dst.path == NULL) { + if (ctx->dst.offset != 0) { + ERR( + "seek offset specified but no output file provided\n"); + return -1; + } + ctx->dst.fd = STDOUT_FILENO; + ctx->dst.path = "STDOUT"; + } + + return 0; +} + +/* + * match_dev_dax -- (internal) find Device DAX by major/minor device number + */ +static int +match_dev_dax(struct daxio_device *dev, struct daxctl_region *dax_region) +{ + struct daxctl_dev *d; + + daxctl_dev_foreach(dax_region, d) { + if (dev->major == (unsigned)daxctl_dev_get_major(d) && + dev->minor == (unsigned)daxctl_dev_get_minor(d)) { + dev->size = daxctl_dev_get_size(d); + return 1; + } + } + + return 0; +} + +/* + * find_dev_dax -- (internal) check if device is Device DAX + * + * If there is matching Device DAX, find its region, size and alignment. + */ +static int +find_dev_dax(struct ndctl_ctx *ndctl_ctx, struct daxio_device *dev) +{ + struct ndctl_bus *bus = NULL; + struct ndctl_region *region = NULL; + struct ndctl_dax *dax = NULL; + struct daxctl_region *dax_region = NULL; + + ndctl_bus_foreach(ndctl_ctx, bus) { + ndctl_region_foreach(bus, region) { + ndctl_dax_foreach(region, dax) { + dax_region = ndctl_dax_get_daxctl_region(dax); + if (match_dev_dax(dev, dax_region)) { + dev->is_devdax = 1; + dev->align = ndctl_dax_get_align(dax); + dev->region = region; + return 1; + } + } + } + } + + /* try with dax regions */ + struct daxctl_ctx *daxctl_ctx; + if (daxctl_new(&daxctl_ctx)) + return 0; + + int ret = 0; + daxctl_region_foreach(daxctl_ctx, dax_region) { + if (match_dev_dax(dev, dax_region)) { + dev->is_devdax = 1; + dev->align = daxctl_region_get_align(dax_region); + dev->region = region; + ret = 1; + goto end; + } + } + +end: + daxctl_unref(daxctl_ctx); + return ret; +} + +/* + * setup_device -- (internal) open/mmap file/device + */ +static int +setup_device(struct ndctl_ctx *ndctl_ctx, struct daxio_device *dev, int is_dst, + int clear_bad_blocks) +{ + int ret; + int flags = O_RDWR; + int prot = is_dst ? PROT_WRITE : PROT_READ; + + if (dev->fd != -1) { + dev->size = SIZE_MAX; + return 0; /* stdin/stdout */ + } + + /* try to open file/device (if exists) */ + dev->fd = os_open(dev->path, flags, S_IRUSR|S_IWUSR); + if (dev->fd == -1) { + ret = errno; + if (ret == ENOENT && is_dst) { + /* file does not exist - create it */ + flags = O_CREAT|O_WRONLY|O_TRUNC; + dev->size = SIZE_MAX; + dev->fd = os_open(dev->path, flags, S_IRUSR|S_IWUSR); + if (dev->fd == -1) { + FAIL("open"); + return -1; + } + return 0; + } else { + ERR("failed to open '%s': %s\n", dev->path, + strerror(errno)); + return -1; + } + } + + struct stat stbuf; + ret = fstat(dev->fd, &stbuf); + if (ret == -1) { + FAIL("stat"); + return -1; + } + + /* check if this is regular file or device */ + if (S_ISREG(stbuf.st_mode)) { + if (is_dst) + dev->size = SIZE_MAX; + else + dev->size = (size_t)stbuf.st_size; + } else if (S_ISBLK(stbuf.st_mode)) { + dev->size = (size_t)stbuf.st_size; + } else if (S_ISCHR(stbuf.st_mode)) { + dev->size = SIZE_MAX; + dev->major = major(stbuf.st_rdev); + dev->minor = minor(stbuf.st_rdev); + } else { + return -1; + } + + /* check if this is Device DAX */ + if (S_ISCHR(stbuf.st_mode)) + find_dev_dax(ndctl_ctx, dev); + + if (!dev->is_devdax) + return 0; + + if (is_dst && clear_bad_blocks) { + /* XXX - clear only badblocks in range bound by offset/len */ + if (badblocks_clear_all(dev->path)) { + ERR("failed to clear bad blocks on \"%s\"\n" + " Probably you have not enough permissions to do that.\n" + " You can choose one of three options now:\n" + " 1) run 'daxio' with 'sudo' or as 'root',\n" + " 2) turn off clearing bad blocks using\n" + " the '-b/--clear-bad-blocks=no' option or\n" + " 3) change permissions of some resource files -\n" + " - for details see the description of the CHECK_BAD_BLOCKS\n" + " compat feature in the pmempool-feature(1) man page.\n", + dev->path); + return -1; + } + } + + if (dev->align == ULONG_MAX) { + ERR("cannot determine device alignment for \"%s\"\n", + dev->path); + return -1; + } + + if (dev->offset > dev->size) { + ERR("'%zu' -- offset beyond device size (%zu)\n", + dev->offset, dev->size); + return -1; + } + + /* align len/offset to the internal device alignment */ + dev->maplen = ALIGN_UP(dev->size, dev->align); + size_t offset = ALIGN_DOWN(dev->offset, dev->align); + dev->offset = dev->offset - offset; + dev->maplen = dev->maplen - offset; + + dev->addr = mmap(NULL, dev->maplen, prot, MAP_SHARED, dev->fd, + (off_t)offset); + if (dev->addr == MAP_FAILED) { + FAIL("mmap"); + return -1; + } + + return 0; +} + +/* + * setup_devices -- (internal) open/mmap input and output + */ +static int +setup_devices(struct ndctl_ctx *ndctl_ctx, struct daxio_context *ctx) +{ + if (!ctx->zero && + setup_device(ndctl_ctx, &ctx->src, 0, ctx->clear_bad_blocks)) + return -1; + return setup_device(ndctl_ctx, &ctx->dst, 1, ctx->clear_bad_blocks); +} + +/* + * adjust_io_len -- (internal) calculate I/O length if not specified + */ +static void +adjust_io_len(struct daxio_context *ctx) +{ + size_t src_len = ctx->src.maplen - ctx->src.offset; + size_t dst_len = ctx->dst.maplen - ctx->dst.offset; + size_t max_len = SIZE_MAX; + + if (ctx->zero) + assert(ctx->dst.is_devdax); + else + assert(ctx->src.is_devdax || ctx->dst.is_devdax); + + if (ctx->src.is_devdax) + max_len = src_len; + if (ctx->dst.is_devdax) + max_len = max_len < dst_len ? max_len : dst_len; + + /* if length is specified and is not bigger than mmapped region */ + if (ctx->len != SIZE_MAX && ctx->len <= max_len) + return; + + /* adjust len to device size */ + ctx->len = max_len; +} + +/* + * cleanup_device -- (internal) unmap/close file/device + */ +static void +cleanup_device(struct daxio_device *dev) +{ + if (dev->addr) + (void) munmap(dev->addr, dev->maplen); + if (dev->path && dev->fd != -1) + (void) close(dev->fd); +} + +/* + * cleanup_devices -- (internal) unmap/close input and output + */ +static void +cleanup_devices(struct daxio_context *ctx) +{ + cleanup_device(&ctx->dst); + if (!ctx->zero) + cleanup_device(&ctx->src); +} + +/* + * do_io -- (internal) write data to device/file + */ +static int +do_io(struct ndctl_ctx *ndctl_ctx, struct daxio_context *ctx) +{ + ssize_t cnt = 0; + + assert(ctx->src.is_devdax || ctx->dst.is_devdax); + + if (ctx->zero) { + if (ctx->dst.offset > ctx->dst.maplen) { + ERR("output offset larger than device size"); + return -1; + } + if (ctx->dst.offset + ctx->len > ctx->dst.maplen) { + ERR("output offset beyond device size"); + return -1; + } + + char *dst_addr = ctx->dst.addr + ctx->dst.offset; + pmem_memset_persist(dst_addr, 0, ctx->len); + cnt = (ssize_t)ctx->len; + } else if (ctx->src.is_devdax && ctx->dst.is_devdax) { + /* memcpy between src and dst */ + char *src_addr = ctx->src.addr + ctx->src.offset; + char *dst_addr = ctx->dst.addr + ctx->dst.offset; + pmem_memcpy_persist(dst_addr, src_addr, ctx->len); + cnt = (ssize_t)ctx->len; + } else if (ctx->src.is_devdax) { + /* write to file directly from mmap'ed src */ + char *src_addr = ctx->src.addr + ctx->src.offset; + if (ctx->dst.offset) { + if (lseek(ctx->dst.fd, (off_t)ctx->dst.offset, + SEEK_SET) < 0) { + FAIL("lseek"); + goto err; + } + } + do { + ssize_t wcnt = write(ctx->dst.fd, src_addr + cnt, + ctx->len - (size_t)cnt); + if (wcnt == -1) { + FAIL("write"); + goto err; + } + cnt += wcnt; + } while ((size_t)cnt < ctx->len); + } else if (ctx->dst.is_devdax) { + /* read from file directly to mmap'ed dst */ + char *dst_addr = ctx->dst.addr + ctx->dst.offset; + if (ctx->src.offset) { + if (lseek(ctx->src.fd, (off_t)ctx->src.offset, + SEEK_SET) < 0) { + FAIL("lseek"); + return -1; + } + } + do { + ssize_t rcnt = read(ctx->src.fd, dst_addr + cnt, + ctx->len - (size_t)cnt); + if (rcnt == -1) { + FAIL("read"); + goto err; + } + /* end of file */ + if (rcnt == 0) + break; + cnt = cnt + rcnt; + } while ((size_t)cnt < ctx->len); + + pmem_persist(dst_addr, (size_t)cnt); + + if ((size_t)cnt != ctx->len) + ERR("requested size %zu larger than source\n", + ctx->len); + } + + ERR("copied %zd bytes to device \"%s\"\n", cnt, ctx->dst.path); + return 0; + +err: + ERR("failed to perform I/O\n"); + return -1; +} + +int +main(int argc, char **argv) +{ + struct ndctl_ctx *ndctl_ctx; + int ret = EXIT_SUCCESS; + + if (parse_args(&Ctx, argc, argv)) + return EXIT_FAILURE; + + if (validate_args(&Ctx)) + return EXIT_FAILURE; + + if (ndctl_new(&ndctl_ctx)) + return EXIT_FAILURE; + + if (setup_devices(ndctl_ctx, &Ctx)) { + ret = EXIT_FAILURE; + goto err; + } + + if (!Ctx.src.is_devdax && !Ctx.dst.is_devdax) { + ERR("neither input nor output is device dax\n"); + ret = EXIT_FAILURE; + goto err; + } + + adjust_io_len(&Ctx); + + if (do_io(ndctl_ctx, &Ctx)) + ret = EXIT_FAILURE; + +err: + cleanup_devices(&Ctx); + ndctl_unref(ndctl_ctx); + + return ret; +} diff --git a/src/pmdk/src/tools/pmempool/.gitignore b/src/pmdk/src/tools/pmempool/.gitignore new file mode 100644 index 000000000..5b3423cbd --- /dev/null +++ b/src/pmdk/src/tools/pmempool/.gitignore @@ -0,0 +1 @@ +pmempool diff --git a/src/pmdk/src/tools/pmempool/Makefile b/src/pmdk/src/tools/pmempool/Makefile new file mode 100644 index 000000000..ba314a45d --- /dev/null +++ b/src/pmdk/src/tools/pmempool/Makefile @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation +# +# Makefile -- top Makefile for pmempool +# +SCP_TO_REMOTE_NODES = y + +vpath %.c ../../libpmemobj/ +vpath %.c ../../librpmem/ +vpath %.c ../../rpmem_common/ + +TARGET = pmempool + +OBJS = pmempool.o\ + info.o info_blk.o info_log.o info_obj.o ulog.o\ + create.o dump.o check.o rm.o convert.o synchronize.o transform.o\ + rpmem_ssh.o rpmem_cmd.o rpmem_util.o rpmem_common.o feature.o + +LIBPMEM=y +LIBPMEMBLK=y +LIBPMEMOBJ=y +LIBPMEMLOG=y +LIBPMEMPOOL=y +TOOLS_COMMON=y + +LIBPMEMOBJ_PRIV=memblock_from_offset alloc_class_by_id\ + memblock_rebuild_state alloc_class_by_run\ + heap_run_foreach_object alloc_class_collection_new\ + alloc_class_collection_delete + +LIBPMEMBLK_PRIV=btt_init btt_write btt_fini btt_info_convert2h\ + btt_info_convert2le btt_flog_convert2h btt_flog_convert2le + +INCS += -I$(TOP)/src/common +INCS += -I$(TOP)/src/rpmem_common +INCS += -I$(TOP)/src/libpmem2 +INCS += -I$(TOP)/src/librpmem +INCS += -I$(TOP)/src/libpmemlog +INCS += -I$(TOP)/src/libpmemblk +INCS += -I$(TOP)/src/libpmemobj + +CFLAGS += -DUSE_RPMEM + +MANPAGES = $(TOP)/doc/pmempool.1\ + $(TOP)/doc/pmempool-info.1\ + $(TOP)/doc/pmempool-create.1\ + $(TOP)/doc/pmempool-check.1\ + $(TOP)/doc/pmempool-dump.1\ + $(TOP)/doc/pmempool-rm.1\ + $(TOP)/doc/pmempool-convert.1\ + $(TOP)/doc/pmempool-sync.1\ + $(TOP)/doc/pmempool-transform.1 + +BASH_COMP_FILES = bash_completion/pmempool + +include ../Makefile.inc + +.PHONY: test check diff --git a/src/pmdk/src/tools/pmempool/README b/src/pmdk/src/tools/pmempool/README new file mode 100644 index 000000000..e1c7e975a --- /dev/null +++ b/src/pmdk/src/tools/pmempool/README @@ -0,0 +1,306 @@ +Persistent Memory Development Kit + +This is src/tools/pmempool/README. + +This file contains the high-level description of pmempool utility. + + 1. Introduction + 2. Subcommands + 2.1. info + 2.2. check + 2.3. create + 2.4. dump + 2.5. rm + 2.6. sync + 2.7. transform + 2.8. convert + 3. Source code + 4. Packaging + 5. Versioning + +1. Introduction +--------------- + +The main purpose of pmempool is to provide a user with set of utilities for +off-line analysis and manipulation of pools created by pmem libraries. The +pmempool is a generic command which consists of subcommands for specific +purposes. Some of commands are required to work without any impact on processed +pool, but some of them may create new or modify existing one. + +The pmempool may be useful for troubleshooting and may be used by system +administrators and by software developers work on applications based on +Persistent Memory Development Kit. The latter may find these tools useful for testing and debugging +purposes also. + +Currently there is a following set of commands available: + + * info - Prints information and statistics in human-readable + format about specified pool. + + * check - Checks pool's consistency and repairs pool if it is + not consistent. + + * create - Creates a pool of specified type with additional + properties specific for this type of pool. + + * dump - Dumps usable data from pool in hexadecimal or binary + format. + + * rm - Removes pool file or all pool files listed in poolset + configuration file. + + * sync - Synchronizes replicas within a poolset. + + * transform - Modifies internal structure of a poolset. + + * convert - Updates the pool to the latest available + layout version. + +This file contains high-level description of available commands and their +features. For details about usage and available command line arguments please +refer to specific manual pages. There is one common manual page with description +of all commands - pmempool(1) and manual pages which describe all commands +in detail: + pmempool-info(1) + pmempool-check(1) + pmempool-create(1) + pmempool-dump(1) + pmempool-rm(1) + pmempool-sync(1) + pmempool-transform(1) + pmempool-convert(1) + +Subsequent sections contain detailed description of each command, information +about the source code, packaging and versioning scheme. + +2. Subcommands +-------------- + +The pmempool application contains number of commands which perform specific +operations on pool. The following subsections contain detailed description of +existing commands. + +2.1. info +--------- + +The pmempool invoked with *info* command analyzes the existing pool created +by PMDK libraries. The main task of this command is to print all usable +information from pool headers and user data in human readable format. It +automatically recognizes pool type by parsing and analyzing pool header. The +recognition is done by checking the signature in pool header. The main +intention of *info* command is to present internal data structures as they are +stored in file - not for checking consistency. For this purpose there is *check* +command available. + +The pmempool with *info* command analyzes a pool file as long as it is possible +regarding correctness of internal meta-data (correct offsets, sizes etc.). If it +is not possible to analyze rest of file, pmempool exits with an error code and +prints an appropriate error message. + +Currently there is lack of interprocess synchronization for pool files, so the +pmempool with *info* command should be invoked off-line. Using pmempool on +pool file which may be modified by another process may lead to stopping +processing the file. + +There is a set of common features for all pool types and a set of features +specific for particular pool type. +All features are described below. + +** Common features + + * The basic function of *info* command is to print information about the + most important internal data structures from specific pool. By default this + is done by invoking pmempool with *info* command and one or more files. + + * It is possible to print basic statistics about the pool by passing + appropriate command line argument. + + * The type of pool is recognized automatically. The exact list of headers and + internal meta-data depends on pool's type. All information is displayed in + human-readable format. + + * The pool header may be corrupted and automatic recognition of pool's type + will fail. In order to analyze a pool file as a pool of specific type it is + possible to force that by specifying the desired pool type using appropriate + command line argument. + + * Headers and internal meta-data are displayed in human-readable format by + default. However it is possible to display them in mixed format which + consists of hexadecimal dump of headers and parsed data in human-readable + format. + + * By default only non-volatile fields from internal structures are displayed. + In order to display volatile fields you should increase the verbosity level. + + * By default all sizes are displayed in bytes unit. It is possible to print + them in more human-readable formats with appropriate units + (e.g. 4k, 8M, 16G). + +** Features for *log* pool type + + * By default pmempool with *info* command displays the pool header, log pool + type specific header and statistics. It is possible to print data in + hexadecimal format by passing appropriate command line option. + + * It is possible to walk through the usable data using fixed data chunk size. + This feature uses similar approach as pmemlog_walk() function. For details + please refer to libpmemlog(7). + +** Features for *blk* pool type + + * By default pmempool with *info* command displays the pool header, blk pool + type specific header, BTT Info header and statistics. + + * It is possible to print more headers and internal data by passing specific + command line options. It is possible to print the following sections: + BTT Map entries, BTT FLOG, BTT Info backup and data blocks. + + * It is possible to print specific range of blocks in both absolute or relative + manner (e.g. display blocks from 10 to 1000, display 10 blocks starting from + block number 1000) + + * By default when displaying data blocks all blocks are displayed. However it + is possible to skip blocks marked with zero or error flags, or to skip blocks + which are not marked by any flag. Skipping blocks has impact on blocks ranges + (e.g. display 10 blocks marked with error flag in the range from 0 to 10000) + +2.2. check +---------- + +The pmempool invoked with *check* command checks existing pool's consistency. +If the pool is consistent pmempool exits with exit code 0. Otherwise nonzero +error code is returned and appropriate message is displayed. + +In addition it may also try to fix some common known errors upon explicit demand +of user. In this case a pool file will be opened in read-write mode so the user +should be aware of modifications made by pmempool application. + +Below is the description of available features: + + * By default pmempool with *check* command prints brief description about + encountered error(s) and proper error value is returned. If there is no error + nothing is printed and exit code is 0. + + * If an error is encountered while checking a consistency of a pool it is + possible to try to fix all errors. In this case the pool file will be opened + in read-write mode. + + * User may request to _not_ modify pool's file when trying to repair it but + just to report what would be done if the repair was performed. + + * When repairing a pool user may request to create backup before any + modification is made. If it is not possible to create full backup of existing + pool, the process will terminate. + +2.3. create +----------- + +The pmempool invoked with *create* command creates a pool file of specific +type and size. Depending on pool's type it is possible to provide more desired +properties of a pool. + +Below is the description of available features: + + * The main feature is to create pool of specified type and size. Therefore + it is required to pass at least two command line arguments. + + * User may want to create a pool file with size of the whole partition. This is + possible by passing proper command line argument. + + * It is possible to create a pool with the same parameters as another pool + passed in command line arguments - it may be considered as cloning the pool. + +2.4. dump +--------- + +The pmempool invoked with *dump* command dumps usable data from specified +pool. This may be dumped either in hexadecimal or binary format. + +Below is the description of available features: + + * The main feature is to dump all data from a pool file. In case of dumping + data to terminal by default data is dumped in hexadecimal format. In case + of redirecting standard output to a file data will be dumped in binary format. + + * It is possible to specify the format of dumped data to either hexadecimal or + binary. + + * By default data is dumped to standard output. However it is possible to + specify a file name to dump data into. + + * In case of pmem blk pool type it is possible to set range of blocks in either + absolute or relative manner. + + * In case of pmem log pool type it is possible to set size of chunk and range + of chunks to dump in either absolute or relative manner. + +2.5. rm +------- + +The pmempool *rm* command is a simple helper utility which removes pool files +created using either PMDK libraries or pmempool *create* command. + + * The main feature is to parse the poolset configuration file and remove all + listed pool files. + + * It is possible to run the pmempool *rm* command in interactive mode, where + before removing each file the user must confirm the removal operation. + + * The command line interface is similar to interface provided by standard, + system *rm* command. + +2.6. sync +--------- + +The pmempool *sync* synchronize data between replicas within a poolset. + +The command has the following features: + + * Metadata in a poolset are checked for consistency. + + * Missing or damaged parts are recreated. + +2.7. transform +-------------- + +The pmempool *transform* command modifies internal structure of a poolset. + +Available features of the command: + + * Adding replicas. + + * Removing replicas. + +2.8. convert +-------------- + +The pmempool invoked with the *convert* command performs a conversion of the +specified pool to the newest layout supported by this tool. Currently only +libpmemobj pools are supported. It is advised to have a backup of the pool +before conversion. + +3. Source code +-------------- + +The source code of pmempool is located in pmempool directory. + +By default pmempool is installed in $(DESTDIR)/usr/bin directory. +You can change it by passing $(TOOLSDIR) variable to "make install". +For example, the following command will install pmempool in ~/bin directory: + $ make install DESTDIR=~ TOOLSDIR=/bin + +See the top-level README file for detailed information about building and +installation. + +4. Packaging +------------ + +The pmempool application is provided in separate packages. Both rpm and dpkg +packages are built automatically with other packages. + +See the top-level README file for detailed information about building packages. + +5. Versioning +------------- + +The versioning of pmempool application is the same as all PMDK libraries. diff --git a/src/pmdk/src/tools/pmempool/bash_completion/pmempool b/src/pmdk/src/tools/pmempool/bash_completion/pmempool new file mode 100644 index 000000000..ef82bb823 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/bash_completion/pmempool @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2017, Intel Corporation + +# +# pmempool -- bash completion script for pmempool +# + +# +# _pmempool_gen -- generates result for completion +# Arguments: +# $1 - values +# $2 - current string +# $3 - prefix for results +_pmempool_gen() +{ + COMPREPLAY=() + local values=$1 + local cur=$2 + local prefix=$3 + local i=${#COMPREPLY[@]} + for v in $values + do + [[ "$v" == "$cur"* ]] && COMPREPLY[i++]="$prefix$v" + done +} + +# +# _pmempool_get_cmds -- returns available pmempool commands +# +_pmempool_get_cmds() +{ + echo -n $(pmempool --help | grep -e '^\S\+\s\+-' |\ + grep -o '^\S\+' | sed '/help/d') +} + +# +# _pmempool_get_opts -- returns available options for specified command +# Arguments: +# $1 - command +# +_pmempool_get_opts() +{ + local c=$1 + local opts=$(pmempool ${c} --help | grep -o -e "-., --\S\+" |\ + grep -o -e "--\S\+") + echo "$opts" +} + +# +# _pmempool_get_values -- returns available values for specified option +# Arguments: +# $1 - command +# $2 - option +# $3 - values delimiter +# $4 - current values, will be removed from result +# +_pmempool_get_values() +{ + local cmd=$1 + local opt=$2 + local delim=$3 + local curvals=$4 + local vals=$(pmempool ${cmd} --help |\ + grep -o -e "${opt}\s\+\S\+${delim}\S\+" |\ + sed "s/${opt}\s\+\(\S\+${delim}\S\+\)/\1/" |\ + sed "s/${delim}/ /g") + if [ -n "$curvals" ] + then + local OLD_IFS=$IFS + IFS="," + for v in $curvals + do + vals=$(echo $vals | sed "s/$v//g") + done + IFS=$OLD_IFS + fi + echo "${vals}" +} + +# +# _pmempool_get_cmd -- returns command name if exist in specified array +# Arguments: +# $1 - command name +# $2 - list of available commands +# +_pmempool_get_cmd() +{ + local cmd=$1 + local cmds=$2 + + [[ ${cmds} =~ ${cmd} ]] && echo -n ${cmd} +} + +# +# _pmempool_get_used_values -- returns already used values +# Arguments: +# $1 - current string +# $2 - values delimiter +# +_pmempool_get_used_values() +{ + local cur=$1 + local delim=$2 + local used=$(echo $cur | rev | cut -d $delim -s -f1 --complement | rev) + [ -n "$used" ] && used="$used$delim" + echo "$used" +} + +# +# _pmempool_get_current_value -- returns current value string +# Arguments: +# $1 - current string +# $2 - values delimiter +# +_pmempool_get_current_value() +{ + local cur=$1 + local delim=$2 + echo $cur | rev | cut -d $delim -f1 | rev +} + +_pmempool() +{ + local cur prev opts + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + cmds=$(_pmempool_get_cmds) + cmds_all="$cmds help" + opts_pool_types="blk log obj" + cmd=$(_pmempool_get_cmd "${COMP_WORDS[1]}" "$cmds_all") + + if [[ ${cur} == -* ]] + then + local opts=$(_pmempool_get_opts $cmd) + _pmempool_gen "$opts" "$cur" + elif [[ ${prev} == --* ]] + then + local used=$(_pmempool_get_used_values "$cur" ",") + local _cur=$(_pmempool_get_current_value "$cur" ",") + local values=$(_pmempool_get_values ${cmd} ${prev} "," $used) + if [ -n "${values}" ] + then + # values separated by ',' may contain multiple values + _pmempool_gen "$values" "$_cur" "$used" + else + # values separated by '|' may contain only one value + values=$(_pmempool_get_values $cmd $prev "|") + _pmempool_gen "$values" "$cur" + fi + elif [[ $cmd == create ]] + then + case "${COMP_WORDS[@]}" in + *blk*|*log*|*obj*|*--inherit*) + ;; + *) + _pmempool_gen "$opts_pool_types" "$cur" + ;; + esac + elif [[ ${prev} == help ]] + then + _pmempool_gen "$cmds" "$cur" + elif [[ ${prev} == pmempool ]] + then + _pmempool_gen "$cmds_all" "$cur" + fi +} + +complete -o default -F _pmempool pmempool diff --git a/src/pmdk/src/tools/pmempool/check.c b/src/pmdk/src/tools/pmempool/check.c new file mode 100644 index 000000000..4523c93e8 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/check.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * check.c -- pmempool check command source file + */ +#include +#include + +#include "common.h" +#include "check.h" +#include "output.h" +#include "set.h" +#include "file.h" + +#include "libpmempool.h" + +typedef enum +{ + CHECK_RESULT_CONSISTENT, + CHECK_RESULT_NOT_CONSISTENT, + CHECK_RESULT_REPAIRED, + CHECK_RESULT_CANNOT_REPAIR, + CHECK_RESULT_SYNC_REQ, + CHECK_RESULT_ERROR +} check_result_t; + +/* + * pmempool_check_context -- context and arguments for check command + */ +struct pmempool_check_context { + int verbose; /* verbosity level */ + char *fname; /* file name */ + struct pool_set_file *pfile; + bool repair; /* do repair */ + bool backup; /* do backup */ + bool advanced; /* do advanced repairs */ + char *backup_fname; /* backup file name */ + bool exec; /* do execute */ + char ans; /* default answer on all questions or '?' */ +}; + +/* + * pmempool_check_default -- default arguments for check command + */ +static const struct pmempool_check_context pmempool_check_default = { + .verbose = 1, + .fname = NULL, + .repair = false, + .backup = false, + .backup_fname = NULL, + .advanced = false, + .exec = true, + .ans = '?', +}; + +/* + * help_str -- string for help message + */ +static const char * const help_str = +"Check consistency of a pool\n" +"\n" +"Common options:\n" +" -r, --repair try to repair a pool file if possible\n" +" -y, --yes answer yes to all questions\n" +" -d, --dry-run don't execute, just show what would be done\n" +" -b, --backup create backup of a pool file before executing\n" +" -a, --advanced perform advanced repairs\n" +" -q, --quiet be quiet and don't print any messages\n" +" -v, --verbose increase verbosity level\n" +" -h, --help display this help and exit\n" +"\n" +"For complete documentation see %s-check(1) manual page.\n" +; + +/* + * long_options -- command line options + */ +static const struct option long_options[] = { + {"repair", no_argument, NULL, 'r'}, + {"yes", no_argument, NULL, 'y'}, + {"dry-run", no_argument, NULL, 'd'}, + {"no-exec", no_argument, NULL, 'N'}, /* deprecated */ + {"backup", required_argument, NULL, 'b'}, + {"advanced", no_argument, NULL, 'a'}, + {"quiet", no_argument, NULL, 'q'}, + {"verbose", no_argument, NULL, 'v'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0 }, +}; + +/* + * print_usage -- print short description of application's usage + */ +static void +print_usage(const char *appname) +{ + printf("Usage: %s check [] \n", appname); +} + +/* + * print_version -- print version string + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * pmempool_check_help -- print help message for check command + */ +void +pmempool_check_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf(help_str, appname); +} + +/* + * pmempool_check_parse_args -- parse command line arguments + */ +static int +pmempool_check_parse_args(struct pmempool_check_context *pcp, + const char *appname, int argc, char *argv[]) +{ + int opt; + while ((opt = getopt_long(argc, argv, "ahvrdNb:qy", + long_options, NULL)) != -1) { + switch (opt) { + case 'r': + pcp->repair = true; + break; + case 'y': + pcp->ans = 'y'; + break; + case 'd': + case 'N': + pcp->exec = false; + break; + case 'b': + pcp->backup = true; + pcp->backup_fname = optarg; + break; + case 'a': + pcp->advanced = true; + break; + case 'q': + pcp->verbose = 0; + break; + case 'v': + pcp->verbose = 2; + break; + case 'h': + pmempool_check_help(appname); + exit(EXIT_SUCCESS); + default: + print_usage(appname); + exit(EXIT_FAILURE); + } + } + + if (optind < argc) { + pcp->fname = argv[optind]; + } else { + print_usage(appname); + exit(EXIT_FAILURE); + } + + if (!pcp->repair && !pcp->exec) { + outv_err("'-N' option requires '-r'\n"); + exit(EXIT_FAILURE); + } + + if (!pcp->repair && pcp->backup) { + outv_err("'-b' option requires '-r'\n"); + exit(EXIT_FAILURE); + } + + return 0; +} + +static check_result_t pmempool_check_2_check_res_t[] = +{ + [PMEMPOOL_CHECK_RESULT_CONSISTENT] = CHECK_RESULT_CONSISTENT, + [PMEMPOOL_CHECK_RESULT_NOT_CONSISTENT] = CHECK_RESULT_NOT_CONSISTENT, + [PMEMPOOL_CHECK_RESULT_REPAIRED] = CHECK_RESULT_REPAIRED, + [PMEMPOOL_CHECK_RESULT_CANNOT_REPAIR] = CHECK_RESULT_CANNOT_REPAIR, + [PMEMPOOL_CHECK_RESULT_SYNC_REQ] = CHECK_RESULT_SYNC_REQ, + [PMEMPOOL_CHECK_RESULT_ERROR] = CHECK_RESULT_ERROR, +}; + +static const char * +check_ask(const char *msg) +{ + char answer = ask_Yn('?', "%s", msg); + + switch (answer) { + case 'y': + return "yes"; + case 'n': + return "no"; + default: + return "?"; + } +} + +static check_result_t +pmempool_check_perform(struct pmempool_check_context *pc) +{ + struct pmempool_check_args args = { + .path = pc->fname, + .backup_path = pc->backup_fname, + .pool_type = PMEMPOOL_POOL_TYPE_DETECT, + .flags = PMEMPOOL_CHECK_FORMAT_STR + }; + + if (pc->repair) + args.flags |= PMEMPOOL_CHECK_REPAIR; + if (!pc->exec) + args.flags |= PMEMPOOL_CHECK_DRY_RUN; + if (pc->advanced) + args.flags |= PMEMPOOL_CHECK_ADVANCED; + if (pc->ans == 'y') + args.flags |= PMEMPOOL_CHECK_ALWAYS_YES; + if (pc->verbose == 2) + args.flags |= PMEMPOOL_CHECK_VERBOSE; + + PMEMpoolcheck *ppc = pmempool_check_init(&args, sizeof(args)); + + if (ppc == NULL) + return CHECK_RESULT_ERROR; + + struct pmempool_check_status *status = NULL; + while ((status = pmempool_check(ppc)) != NULL) { + switch (status->type) { + case PMEMPOOL_CHECK_MSG_TYPE_ERROR: + outv(1, "%s\n", status->str.msg); + break; + case PMEMPOOL_CHECK_MSG_TYPE_INFO: + outv(2, "%s\n", status->str.msg); + break; + case PMEMPOOL_CHECK_MSG_TYPE_QUESTION: + status->str.answer = check_ask(status->str.msg); + break; + default: + pmempool_check_end(ppc); + exit(EXIT_FAILURE); + } + } + + enum pmempool_check_result ret = pmempool_check_end(ppc); + + return pmempool_check_2_check_res_t[ret]; +} + +/* + * pmempool_check_func -- main function for check command + */ +int +pmempool_check_func(const char *appname, int argc, char *argv[]) +{ + int ret = 0; + check_result_t res = CHECK_RESULT_CONSISTENT; + struct pmempool_check_context pc = pmempool_check_default; + + /* parse command line arguments */ + ret = pmempool_check_parse_args(&pc, appname, argc, argv); + if (ret) + return ret; + + /* set verbosity level */ + out_set_vlevel(pc.verbose); + + res = pmempool_check_perform(&pc); + + switch (res) { + case CHECK_RESULT_CONSISTENT: + outv(2, "%s: consistent\n", pc.fname); + ret = 0; + break; + case CHECK_RESULT_NOT_CONSISTENT: + outv(1, "%s: not consistent\n", pc.fname); + ret = -1; + break; + case CHECK_RESULT_REPAIRED: + outv(1, "%s: repaired\n", pc.fname); + ret = 0; + break; + case CHECK_RESULT_CANNOT_REPAIR: + outv(1, "%s: cannot repair\n", pc.fname); + ret = -1; + break; + case CHECK_RESULT_SYNC_REQ: + outv(1, "%s: sync required\n", pc.fname); + ret = 0; + break; + case CHECK_RESULT_ERROR: + if (errno) + outv_err("%s\n", strerror(errno)); + if (pc.repair) + outv_err("repairing failed\n"); + else + outv_err("checking consistency failed\n"); + ret = -1; + break; + default: + outv_err("status unknown\n"); + ret = -1; + break; + } + + return ret; +} diff --git a/src/pmdk/src/tools/pmempool/check.h b/src/pmdk/src/tools/pmempool/check.h new file mode 100644 index 000000000..93bd2a80c --- /dev/null +++ b/src/pmdk/src/tools/pmempool/check.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * check.h -- pmempool check command header file + */ + +int pmempool_check_func(const char *appname, int argc, char *argv[]); +void pmempool_check_help(const char *appname); diff --git a/src/pmdk/src/tools/pmempool/common.c b/src/pmdk/src/tools/pmempool/common.c new file mode 100644 index 000000000..6d5bf2d3f --- /dev/null +++ b/src/pmdk/src/tools/pmempool/common.c @@ -0,0 +1,1382 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * common.c -- definitions of common functions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +#include "output.h" +#include "libpmem.h" +#include "libpmemblk.h" +#include "libpmemlog.h" +#include "libpmemobj.h" +#include "btt.h" +#include "file.h" +#include "os.h" +#include "set.h" +#include "out.h" +#include "mmap.h" +#include "util_pmem.h" +#include "set_badblocks.h" +#include "util.h" + +#define REQ_BUFF_SIZE 2048U +#define Q_BUFF_SIZE 8192 +typedef const char *(*enum_to_str_fn)(int); + +/* + * pmem_pool_type -- return pool type based on first two pages. + * If pool header's content suggests that pool may be BTT device + * (first page zeroed and no correct signature for pool header), + * signature from second page is checked to prove that it's BTT device layout. + */ +pmem_pool_type_t +pmem_pool_type(const void *base_pool_addr) +{ + struct pool_hdr *hdrp = (struct pool_hdr *)base_pool_addr; + + if (util_is_zeroed(hdrp, DEFAULT_HDR_SIZE)) { + return util_get_pool_type_second_page(base_pool_addr); + } + + pmem_pool_type_t type = pmem_pool_type_parse_hdr(hdrp); + if (type != PMEM_POOL_TYPE_UNKNOWN) + return type; + else + return util_get_pool_type_second_page(base_pool_addr); +} + +/* + * pmem_pool_checksum -- return true if checksum is correct + * based on first two pages + */ +int +pmem_pool_checksum(const void *base_pool_addr) +{ + /* check whether it's btt device -> first page zeroed */ + if (util_is_zeroed(base_pool_addr, DEFAULT_HDR_SIZE)) { + struct btt_info bttinfo; + void *sec_page_addr = (char *)base_pool_addr + DEFAULT_HDR_SIZE; + memcpy(&bttinfo, sec_page_addr, sizeof(bttinfo)); + btt_info_convert2h(&bttinfo); + return util_checksum(&bttinfo, sizeof(bttinfo), + &bttinfo.checksum, 0, 0); + } else { + /* it's not btt device - first page contains header */ + struct pool_hdr hdrp; + memcpy(&hdrp, base_pool_addr, sizeof(hdrp)); + return util_checksum(&hdrp, sizeof(hdrp), + &hdrp.checksum, 0, POOL_HDR_CSUM_END_OFF(&hdrp)); + } +} + +/* + * pmem_pool_type_parse_hdr -- return pool type based only on signature + */ +pmem_pool_type_t +pmem_pool_type_parse_hdr(const struct pool_hdr *hdrp) +{ + if (memcmp(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN) == 0) + return PMEM_POOL_TYPE_LOG; + else if (memcmp(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN) == 0) + return PMEM_POOL_TYPE_BLK; + else if (memcmp(hdrp->signature, OBJ_HDR_SIG, POOL_HDR_SIG_LEN) == 0) + return PMEM_POOL_TYPE_OBJ; + else + return PMEM_POOL_TYPE_UNKNOWN; +} + +/* + * pmem_pool_type_parse_str -- returns pool type from command line arg + */ +pmem_pool_type_t +pmem_pool_type_parse_str(const char *str) +{ + if (strcmp(str, "blk") == 0) { + return PMEM_POOL_TYPE_BLK; + } else if (strcmp(str, "log") == 0) { + return PMEM_POOL_TYPE_LOG; + } else if (strcmp(str, "obj") == 0) { + return PMEM_POOL_TYPE_OBJ; + } else if (strcmp(str, "btt") == 0) { + return PMEM_POOL_TYPE_BTT; + } else { + return PMEM_POOL_TYPE_UNKNOWN; + } +} + +/* + * util_get_pool_type_second_page -- return type based on second page content + */ +pmem_pool_type_t +util_get_pool_type_second_page(const void *pool_base_addr) +{ + struct btt_info bttinfo; + + void *sec_page_addr = (char *)pool_base_addr + DEFAULT_HDR_SIZE; + memcpy(&bttinfo, sec_page_addr, sizeof(bttinfo)); + btt_info_convert2h(&bttinfo); + + if (util_is_zeroed(&bttinfo, sizeof(bttinfo))) + return PMEM_POOL_TYPE_UNKNOWN; + + if (memcmp(bttinfo.sig, BTTINFO_SIG, BTTINFO_SIG_LEN) == 0) + return PMEM_POOL_TYPE_BTT; + + return PMEM_POOL_TYPE_UNKNOWN; +} + +/* + * util_parse_mode -- parse file mode from octal string + */ +int +util_parse_mode(const char *str, mode_t *mode) +{ + mode_t m = 0; + int digits = 0; + + /* skip leading zeros */ + while (*str == '0') + str++; + + /* parse at most 3 octal digits */ + while (digits < 3 && *str != '\0') { + if (*str < '0' || *str > '7') + return -1; + m = (mode_t)(m << 3) | (mode_t)(*str - '0'); + digits++; + str++; + } + + /* more than 3 octal digits */ + if (digits == 3 && *str != '\0') + return -1; + + if (mode) + *mode = m; + + return 0; +} + +static void +util_range_limit(struct range *rangep, struct range limit) +{ + if (rangep->first < limit.first) + rangep->first = limit.first; + if (rangep->last > limit.last) + rangep->last = limit.last; +} + +/* + * util_parse_range_from -- parse range string as interval from specified number + */ +static int +util_parse_range_from(char *str, struct range *rangep, struct range entire) +{ + size_t str_len = strlen(str); + if (str[str_len - 1] != '-') + return -1; + + str[str_len - 1] = '\0'; + + if (util_parse_size(str, (size_t *)&rangep->first)) + return -1; + + rangep->last = entire.last; + util_range_limit(rangep, entire); + + return 0; +} + +/* + * util_parse_range_to -- parse range string as interval to specified number + */ +static int +util_parse_range_to(char *str, struct range *rangep, struct range entire) +{ + + if (str[0] != '-' || str[1] == '\0') + return -1; + + if (util_parse_size(str + 1, (size_t *)&rangep->last)) + return -1; + + rangep->first = entire.first; + util_range_limit(rangep, entire); + + return 0; +} + +/* + * util_parse_range_number -- parse range string as a single number + */ +static int +util_parse_range_number(char *str, struct range *rangep, struct range entire) +{ + if (util_parse_size(str, (size_t *)&rangep->first) != 0) + return -1; + rangep->last = rangep->first; + if (rangep->first > entire.last || + rangep->last < entire.first) + return -1; + util_range_limit(rangep, entire); + return 0; +} + +/* + * util_parse_range -- parse single range string + */ +static int +util_parse_range(char *str, struct range *rangep, struct range entire) +{ + char *dash = strchr(str, '-'); + if (!dash) + return util_parse_range_number(str, rangep, entire); + + /* '-' at the beginning */ + if (dash == str) + return util_parse_range_to(str, rangep, entire); + + /* '-' at the end */ + if (dash[1] == '\0') + return util_parse_range_from(str, rangep, entire); + + *dash = '\0'; + dash++; + + if (util_parse_size(str, (size_t *)&rangep->first)) + return -1; + + if (util_parse_size(dash, (size_t *)&rangep->last)) + return -1; + + if (rangep->first > rangep->last) { + uint64_t tmp = rangep->first; + rangep->first = rangep->last; + rangep->last = tmp; + } + + util_range_limit(rangep, entire); + + return 0; +} + +/* + * util_ranges_overlap -- return 1 if two ranges are overlapped + */ +static int +util_ranges_overlap(struct range *rangep1, struct range *rangep2) +{ + if (rangep1->last + 1 < rangep2->first || + rangep2->last + 1 < rangep1->first) + return 0; + else + return 1; +} + +/* + * util_ranges_add -- create and add range + */ +int +util_ranges_add(struct ranges *rangesp, struct range range) +{ + struct range *rangep = malloc(sizeof(struct range)); + if (!rangep) + err(1, "Cannot allocate memory for range\n"); + memcpy(rangep, &range, sizeof(*rangep)); + + struct range *curp, *next; + uint64_t first = rangep->first; + uint64_t last = rangep->last; + + curp = PMDK_LIST_FIRST(&rangesp->head); + while (curp) { + next = PMDK_LIST_NEXT(curp, next); + if (util_ranges_overlap(curp, rangep)) { + PMDK_LIST_REMOVE(curp, next); + if (curp->first < first) + first = curp->first; + if (curp->last > last) + last = curp->last; + free(curp); + } + curp = next; + } + + rangep->first = first; + rangep->last = last; + + PMDK_LIST_FOREACH(curp, &rangesp->head, next) { + if (curp->first < rangep->first) { + PMDK_LIST_INSERT_AFTER(curp, rangep, next); + return 0; + } + } + + PMDK_LIST_INSERT_HEAD(&rangesp->head, rangep, next); + + return 0; +} + +/* + * util_ranges_contain -- return 1 if ranges contain the number n + */ +int +util_ranges_contain(const struct ranges *rangesp, uint64_t n) +{ + struct range *curp = NULL; + PMDK_LIST_FOREACH(curp, &rangesp->head, next) { + if (curp->first <= n && n <= curp->last) + return 1; + } + + return 0; +} + +/* + * util_ranges_empty -- return 1 if ranges are empty + */ +int +util_ranges_empty(const struct ranges *rangesp) +{ + return PMDK_LIST_EMPTY(&rangesp->head); +} + +/* + * util_ranges_clear -- clear list of ranges + */ +void +util_ranges_clear(struct ranges *rangesp) +{ + while (!PMDK_LIST_EMPTY(&rangesp->head)) { + struct range *rangep = PMDK_LIST_FIRST(&rangesp->head); + PMDK_LIST_REMOVE(rangep, next); + free(rangep); + } +} + +/* + * util_parse_ranges -- parser ranges from string + * + * The valid formats of range are: + * - 'n-m' -- from n to m + * - '-m' -- from minimum passed in entirep->first to m + * - 'n-' -- from n to maximum passed in entirep->last + * - 'n' -- n'th byte/block + * Multiple ranges may be separated by comma: + * 'n1-m1,n2-,-m3,n4' + */ +int +util_parse_ranges(const char *ptr, struct ranges *rangesp, struct range entire) +{ + if (ptr == NULL) + return util_ranges_add(rangesp, entire); + + char *dup = strdup(ptr); + if (!dup) + err(1, "Cannot allocate memory for ranges"); + char *str = dup; + int ret = 0; + char *next = str; + do { + str = next; + next = strchr(str, ','); + if (next != NULL) { + *next = '\0'; + next++; + } + struct range range; + if (util_parse_range(str, &range, entire)) { + ret = -1; + goto out; + } else if (util_ranges_add(rangesp, range)) { + ret = -1; + goto out; + } + } while (next != NULL); +out: + free(dup); + return ret; +} + +/* + * pmem_pool_get_min_size -- return minimum size of pool for specified type + */ +uint64_t +pmem_pool_get_min_size(pmem_pool_type_t type) +{ + switch (type) { + case PMEM_POOL_TYPE_LOG: + return PMEMLOG_MIN_POOL; + case PMEM_POOL_TYPE_BLK: + return PMEMBLK_MIN_POOL; + case PMEM_POOL_TYPE_OBJ: + return PMEMOBJ_MIN_POOL; + default: + break; + } + + return 0; +} + +/* + * util_poolset_map -- map poolset + */ +int +util_poolset_map(const char *fname, struct pool_set **poolset, int rdonly) +{ + if (util_is_poolset_file(fname) != 1) { + int ret = util_poolset_create_set(poolset, fname, 0, 0, true); + if (ret < 0) { + outv_err("cannot open pool set -- '%s'", fname); + return -1; + } + unsigned flags = (rdonly ? POOL_OPEN_COW : 0) | + POOL_OPEN_IGNORE_BAD_BLOCKS; + return util_pool_open_nocheck(*poolset, flags); + } + + /* open poolset file */ + int fd = util_file_open(fname, NULL, 0, O_RDONLY); + if (fd < 0) + return -1; + + struct pool_set *set; + + /* parse poolset file */ + if (util_poolset_parse(&set, fname, fd)) { + outv_err("parsing poolset file failed\n"); + os_close(fd); + return -1; + } + set->ignore_sds = true; + os_close(fd); + + /* read the pool header from first pool set file */ + const char *part0_path = PART(REP(set, 0), 0)->path; + struct pool_hdr hdr; + if (util_file_pread(part0_path, &hdr, sizeof(hdr), 0) != + sizeof(hdr)) { + outv_err("cannot read pool header from poolset\n"); + goto err_pool_set; + } + + util_poolset_free(set); + + util_convert2h_hdr_nocheck(&hdr); + + /* parse pool type from first pool set file */ + pmem_pool_type_t type = pmem_pool_type_parse_hdr(&hdr); + if (type == PMEM_POOL_TYPE_UNKNOWN) { + outv_err("cannot determine pool type from poolset\n"); + return -1; + } + + /* + * Just use one thread - there is no need for multi-threaded access + * to remote pool. + */ + unsigned nlanes = 1; + + /* + * Open the poolset, the values passed to util_pool_open are read + * from the first poolset file, these values are then compared with + * the values from all headers of poolset files. + */ + struct pool_attr attr; + util_pool_hdr2attr(&attr, &hdr); + unsigned flags = (rdonly ? POOL_OPEN_COW : 0) | POOL_OPEN_IGNORE_SDS | + POOL_OPEN_IGNORE_BAD_BLOCKS; + if (util_pool_open(poolset, fname, 0 /* minpartsize */, + &attr, &nlanes, NULL, flags)) { + outv_err("opening poolset failed\n"); + return -1; + } + + return 0; + +err_pool_set: + util_poolset_free(set); + return -1; +} + +/* + * pmem_pool_parse_params -- parse pool type, file size and block size + */ +int +pmem_pool_parse_params(const char *fname, struct pmem_pool_params *paramsp, + int check) +{ + paramsp->type = PMEM_POOL_TYPE_UNKNOWN; + char pool_str_addr[POOL_HDR_DESC_SIZE]; + + enum file_type type = util_file_get_type(fname); + if (type < 0) + return -1; + + int is_poolset = util_is_poolset_file(fname); + if (is_poolset < 0) + return -1; + + paramsp->is_poolset = is_poolset; + int fd = util_file_open(fname, NULL, 0, O_RDONLY); + if (fd < 0) + return -1; + + /* get file size and mode */ + os_stat_t stat_buf; + if (os_fstat(fd, &stat_buf)) { + os_close(fd); + return -1; + } + + int ret = 0; + + assert(stat_buf.st_size >= 0); + paramsp->size = (uint64_t)stat_buf.st_size; + paramsp->mode = stat_buf.st_mode; + + void *addr = NULL; + struct pool_set *set = NULL; + if (paramsp->is_poolset) { + /* close the file */ + os_close(fd); + fd = -1; + + if (check) { + if (util_poolset_map(fname, &set, 0)) { + ret = -1; + goto out_close; + } + } else { + ret = util_poolset_create_set(&set, fname, 0, 0, true); + if (ret < 0) { + outv_err("cannot open pool set -- '%s'", fname); + ret = -1; + goto out_close; + } + if (util_pool_open_nocheck(set, + POOL_OPEN_IGNORE_BAD_BLOCKS)) { + ret = -1; + goto out_close; + } + } + + paramsp->size = set->poolsize; + addr = set->replica[0]->part[0].addr; + + /* + * XXX mprotect for device dax with length not aligned to its + * page granularity causes SIGBUS on the next page fault. + * The length argument of this call should be changed to + * set->poolsize once the kernel issue is solved. + */ + if (mprotect(addr, set->replica[0]->repsize, + PROT_READ) < 0) { + outv_err("!mprotect"); + goto out_close; + } + } else { + /* read first two pages */ + if (type == TYPE_DEVDAX) { + addr = util_file_map_whole(fname); + if (addr == NULL) { + ret = -1; + goto out_close; + } + } else { + ssize_t num = read(fd, pool_str_addr, + POOL_HDR_DESC_SIZE); + if (num < (ssize_t)POOL_HDR_DESC_SIZE) { + outv_err("!read"); + ret = -1; + goto out_close; + } + addr = pool_str_addr; + } + } + + struct pool_hdr hdr; + memcpy(&hdr, addr, sizeof(hdr)); + + util_convert2h_hdr_nocheck(&hdr); + + memcpy(paramsp->signature, hdr.signature, sizeof(paramsp->signature)); + + /* + * Check if file is a part of pool set by comparing + * the UUID with the next part UUID. If it is the same + * it means the pool consist of a single file. + */ + paramsp->is_part = !paramsp->is_poolset && + (memcmp(hdr.uuid, hdr.next_part_uuid, POOL_HDR_UUID_LEN) || + memcmp(hdr.uuid, hdr.prev_part_uuid, POOL_HDR_UUID_LEN) || + memcmp(hdr.uuid, hdr.next_repl_uuid, POOL_HDR_UUID_LEN) || + memcmp(hdr.uuid, hdr.prev_repl_uuid, POOL_HDR_UUID_LEN)); + + if (check) + paramsp->type = pmem_pool_type(addr); + else + paramsp->type = pmem_pool_type_parse_hdr(addr); + + paramsp->is_checksum_ok = pmem_pool_checksum(addr); + + if (paramsp->type == PMEM_POOL_TYPE_BLK) { + struct pmemblk *pbp = addr; + paramsp->blk.bsize = le32toh(pbp->bsize); + } else if (paramsp->type == PMEM_POOL_TYPE_OBJ) { + struct pmemobjpool *pop = addr; + memcpy(paramsp->obj.layout, pop->layout, PMEMOBJ_MAX_LAYOUT); + } + + if (paramsp->is_poolset) + util_poolset_close(set, DO_NOT_DELETE_PARTS); + +out_close: + if (fd >= 0) + (void) os_close(fd); + return ret; +} + +/* + * util_check_memory -- check if memory contains single value + */ +int +util_check_memory(const uint8_t *buff, size_t len, uint8_t val) +{ + size_t i; + for (i = 0; i < len; i++) { + if (buff[i] != val) + return -1; + } + + return 0; +} + +/* + * pmempool_ask_yes_no -- prints the question, + * takes user answer and returns validated value + */ +static char +pmempool_ask_yes_no(char def_ans, const char *answers, const char *qbuff) +{ + char ret = INV_ANS; + printf("%s", qbuff); + size_t len = strlen(answers); + size_t i; + + char def_anslo = (char)tolower(def_ans); + printf(" ["); + for (i = 0; i < len; i++) { + char anslo = (char)tolower(answers[i]); + printf("%c", anslo == def_anslo ? + toupper(anslo) : anslo); + if (i != len - 1) + printf("/"); + } + printf("] "); + + char *line_of_answer = util_readline(stdin); + + if (line_of_answer == NULL) { + outv_err("input is empty"); + return '?'; + } + + char first_letter = line_of_answer[0]; + line_of_answer[0] = (char)tolower(first_letter); + + if (strcmp(line_of_answer, "yes\n") == 0) { + if (strchr(answers, 'y') != NULL) + ret = 'y'; + } + + if (strcmp(line_of_answer, "no\n") == 0) { + if (strchr(answers, 'n') != NULL) + ret = 'n'; + } + + if (strlen(line_of_answer) == 2 && + line_of_answer[1] == '\n') { + if (strchr(answers, line_of_answer[0]) != NULL) + ret = line_of_answer[0]; + } + + if (strlen(line_of_answer) == 1 && + line_of_answer[0] == '\n') { + ret = def_ans; + } + + Free(line_of_answer); + return ret; +} + +/* + * ask -- keep asking for answer until it gets valid input + */ +char +ask(char op, char *answers, char def_ans, const char *fmt, va_list ap) +{ + char qbuff[Q_BUFF_SIZE]; + char ret = INV_ANS; + int is_tty = 0; + if (op != '?') + return op; + + int p = vsnprintf(qbuff, Q_BUFF_SIZE, fmt, ap); + if (p < 0) { + outv_err("vsnprintf"); + exit(EXIT_FAILURE); + } + if (p >= Q_BUFF_SIZE) { + outv_err("vsnprintf: output was truncated"); + exit(EXIT_FAILURE); + } + + is_tty = isatty(fileno(stdin)); + + while ((ret = pmempool_ask_yes_no(def_ans, answers, qbuff)) == INV_ANS) + ; + + if (!is_tty) + printf("%c\n", ret); + + return ret; +} + +char +ask_Yn(char op, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + char ret = ask(op, "yn", 'y', fmt, ap); + va_end(ap); + return ret; +} + +char +ask_yN(char op, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + char ret = ask(op, "yn", 'n', fmt, ap); + va_end(ap); + return ret; +} + +/* + * util_parse_enum -- parse single enum and store to bitmap + */ +static int +util_parse_enum(const char *str, int first, int max, uint64_t *bitmap, + enum_to_str_fn enum_to_str) +{ + for (int i = first; i < max; i++) { + if (strcmp(str, enum_to_str(i)) == 0) { + *bitmap |= (uint64_t)1<opts = options; + opts->noptions = nopts; + opts->req = req; + size_t bitmap_size = howmany(nopts, 8); + opts->bitmap = calloc(bitmap_size, 1); + if (!opts->bitmap) + err(1, "Cannot allocate memory for options bitmap"); + + return opts; +} + +/* + * util_options_free -- free options structure + */ +void +util_options_free(struct options *opts) +{ + free(opts->bitmap); + free(opts); +} + +/* + * util_opt_get_index -- return index of specified option in global + * array of options + */ +static int +util_opt_get_index(const struct options *opts, int opt) +{ + const struct option *lopt = &opts->opts[0]; + int ret = 0; + while (lopt->name) { + if ((lopt->val & ~OPT_MASK) == opt) + return ret; + lopt++; + ret++; + } + return -1; +} + +/* + * util_opt_get_req -- get required option for specified option + */ +static struct option_requirement * +util_opt_get_req(const struct options *opts, int opt, pmem_pool_type_t type) +{ + size_t n = 0; + struct option_requirement *ret = NULL; + struct option_requirement *tmp = NULL; + const struct option_requirement *req = &opts->req[0]; + while (req->opt) { + if (req->opt == opt && (req->type & type)) { + n++; + tmp = realloc(ret, n * sizeof(*ret)); + if (!tmp) + err(1, "Cannot allocate memory for" + " option requirements"); + ret = tmp; + ret[n - 1] = *req; + } + req++; + } + + if (ret) { + tmp = realloc(ret, (n + 1) * sizeof(*ret)); + if (!tmp) + err(1, "Cannot allocate memory for" + " option requirements"); + ret = tmp; + memset(&ret[n], 0, sizeof(*ret)); + } + + return ret; +} + +/* + * util_opt_check_requirements -- check if requirements has been fulfilled + */ +static int +util_opt_check_requirements(const struct options *opts, + const struct option_requirement *req) +{ + int count = 0; + int set = 0; + uint64_t tmp; + while ((tmp = req->req) != 0) { + while (tmp) { + int req_idx = + util_opt_get_index(opts, tmp & OPT_REQ_MASK); + + if (req_idx >= 0 && util_isset(opts->bitmap, req_idx)) { + set++; + break; + } + + tmp >>= OPT_REQ_SHIFT; + } + req++; + count++; + } + + return count != set; +} + +/* + * util_opt_print_requirements -- print requirements for specified option + */ +static void +util_opt_print_requirements(const struct options *opts, + const struct option_requirement *req) +{ + char buff[REQ_BUFF_SIZE]; + unsigned n = 0; + uint64_t tmp; + const struct option *opt = + &opts->opts[util_opt_get_index(opts, req->opt)]; + int sn; + + sn = util_snprintf(&buff[n], REQ_BUFF_SIZE - n, + "option [-%c|--%s] requires: ", opt->val, opt->name); + assert(sn >= 0); + if (sn >= 0) + n += (unsigned)sn; + + size_t rc = 0; + while ((tmp = req->req) != 0) { + if (rc != 0) { + sn = util_snprintf(&buff[n], REQ_BUFF_SIZE - n, + " and "); + assert(sn >= 0); + if (sn >= 0) + n += (unsigned)sn; + } + + size_t c = 0; + while (tmp) { + sn = util_snprintf(&buff[n], REQ_BUFF_SIZE - n, + c == 0 ? "[" : "|"); + assert(sn >= 0); + if (sn >= 0) + n += (unsigned)sn; + + int req_opt_ind = + util_opt_get_index(opts, tmp & OPT_REQ_MASK); + const struct option *req_option = + &opts->opts[req_opt_ind]; + + sn = util_snprintf(&buff[n], REQ_BUFF_SIZE - n, + "-%c|--%s", req_option->val, req_option->name); + assert(sn >= 0); + if (sn >= 0) + n += (unsigned)sn; + + tmp >>= OPT_REQ_SHIFT; + c++; + } + sn = util_snprintf(&buff[n], REQ_BUFF_SIZE - n, "]"); + assert(sn >= 0); + if (sn >= 0) + n += (unsigned)sn; + + req++; + rc++; + } + + outv_err("%s\n", buff); +} + +/* + * util_opt_verify_requirements -- verify specified requirements for options + */ +static int +util_opt_verify_requirements(const struct options *opts, size_t index, + pmem_pool_type_t type) +{ + const struct option *opt = &opts->opts[index]; + int val = opt->val & ~OPT_MASK; + struct option_requirement *req; + + if ((req = util_opt_get_req(opts, val, type)) == NULL) + return 0; + + int ret = 0; + + if (util_opt_check_requirements(opts, req)) { + ret = -1; + util_opt_print_requirements(opts, req); + } + + free(req); + return ret; +} + +/* + * util_opt_verify_type -- check if used option matches pool type + */ +static int +util_opt_verify_type(const struct options *opts, pmem_pool_type_t type, + size_t index) +{ + const struct option *opt = &opts->opts[index]; + int val = opt->val & ~OPT_MASK; + int opt_type = opt->val; + opt_type >>= OPT_SHIFT; + if (!(opt_type & (1<name, val, + out_get_pool_type_str(type)); + return -1; + } + + return 0; +} + +/* + * util_options_getopt -- wrapper for getopt_long which sets bitmap + */ +int +util_options_getopt(int argc, char *argv[], const char *optstr, + const struct options *opts) +{ + int opt = getopt_long(argc, argv, optstr, opts->opts, NULL); + if (opt == -1 || opt == '?') + return opt; + + opt &= ~OPT_MASK; + int option_index = util_opt_get_index(opts, opt); + assert(option_index >= 0); + + util_setbit((uint8_t *)opts->bitmap, (unsigned)option_index); + + return opt; +} + +/* + * util_options_verify -- verify options + */ +int +util_options_verify(const struct options *opts, pmem_pool_type_t type) +{ + for (size_t i = 0; i < opts->noptions; i++) { + if (util_isset(opts->bitmap, i)) { + if (util_opt_verify_type(opts, type, i)) + return -1; + + if (opts->req) + if (util_opt_verify_requirements(opts, i, type)) + return -1; + } + } + + return 0; +} + +/* + * util_heap_max_zone -- get number of zones + */ +unsigned +util_heap_max_zone(size_t size) +{ + unsigned max_zone = 0; + size -= sizeof(struct heap_header); + + while (size >= ZONE_MIN_SIZE) { + max_zone++; + size -= size <= ZONE_MAX_SIZE ? size : ZONE_MAX_SIZE; + } + + return max_zone; +} + +/* + * pool_set_file_open -- opens pool set file or regular file + */ +struct pool_set_file * +pool_set_file_open(const char *fname, + int rdonly, int check) +{ + struct pool_set_file *file = calloc(1, sizeof(*file)); + if (!file) + return NULL; + + file->replica = 0; + file->fname = strdup(fname); + if (!file->fname) + goto err; + + os_stat_t buf; + if (os_stat(fname, &buf)) { + warn("%s", fname); + goto err_free_fname; + } + + file->mtime = buf.st_mtime; + file->mode = buf.st_mode; + if (S_ISBLK(file->mode)) + file->fileio = true; + + if (file->fileio) { + /* Simple file open for BTT device */ + int fd = util_file_open(fname, NULL, 0, O_RDONLY); + if (fd < 0) { + outv_err("util_file_open failed\n"); + goto err_free_fname; + } + + os_off_t seek_size = os_lseek(fd, 0, SEEK_END); + if (seek_size == -1) { + outv_err("lseek SEEK_END failed\n"); + os_close(fd); + goto err_free_fname; + } + + file->size = (size_t)seek_size; + file->fd = fd; + } else { + /* + * The check flag indicates whether the headers from each pool + * set file part should be checked for valid values. + */ + if (check) { + if (util_poolset_map(file->fname, + &file->poolset, rdonly)) + goto err_free_fname; + } else { + int ret = util_poolset_create_set(&file->poolset, + file->fname, 0, 0, true); + + if (ret < 0) { + outv_err("cannot open pool set -- '%s'", + file->fname); + goto err_free_fname; + } + unsigned flags = (rdonly ? POOL_OPEN_COW : 0) | + POOL_OPEN_IGNORE_BAD_BLOCKS; + if (util_pool_open_nocheck(file->poolset, flags)) + goto err_free_fname; + } + + /* get modification time from the first part of first replica */ + const char *path = file->poolset->replica[0]->part[0].path; + if (os_stat(path, &buf)) { + warn("%s", path); + goto err_close_poolset; + } + file->size = file->poolset->poolsize; + file->addr = file->poolset->replica[0]->part[0].addr; + } + return file; + +err_close_poolset: + util_poolset_close(file->poolset, DO_NOT_DELETE_PARTS); +err_free_fname: + free(file->fname); +err: + free(file); + return NULL; +} + +/* + * pool_set_file_close -- closes pool set file or regular file + */ +void +pool_set_file_close(struct pool_set_file *file) +{ + if (!file->fileio) { + if (file->poolset) + util_poolset_close(file->poolset, DO_NOT_DELETE_PARTS); + else if (file->addr) { + munmap(file->addr, file->size); + os_close(file->fd); + } + } + free(file->fname); + free(file); +} + +/* + * pool_set_file_read -- read from pool set file or regular file + * + * 'buff' has to be a buffer at least 'nbytes' long + * 'off' is an offset from the beginning of the file + */ +int +pool_set_file_read(struct pool_set_file *file, void *buff, + size_t nbytes, uint64_t off) +{ + if (off + nbytes > file->size) + return -1; + + if (file->fileio) { + ssize_t num = pread(file->fd, buff, nbytes, (os_off_t)off); + if (num < (ssize_t)nbytes) + return -1; + } else { + memcpy(buff, (char *)file->addr + off, nbytes); + } + return 0; +} + +/* + * pool_set_file_write -- write to pool set file or regular file + * + * 'buff' has to be a buffer at least 'nbytes' long + * 'off' is an offset from the beginning of the file + */ +int +pool_set_file_write(struct pool_set_file *file, void *buff, + size_t nbytes, uint64_t off) +{ + enum file_type type = util_file_get_type(file->fname); + if (type < 0) + return -1; + + if (off + nbytes > file->size) + return -1; + + if (file->fileio) { + ssize_t num = pwrite(file->fd, buff, nbytes, (os_off_t)off); + if (num < (ssize_t)nbytes) + return -1; + } else { + memcpy((char *)file->addr + off, buff, nbytes); + util_persist_auto(type == TYPE_DEVDAX, (char *)file->addr + off, + nbytes); + } + return 0; +} + +/* + * pool_set_file_set_replica -- change replica for pool set file + */ +int +pool_set_file_set_replica(struct pool_set_file *file, size_t replica) +{ + if (!replica) + return 0; + + if (!file->poolset) + return -1; + + if (replica >= file->poolset->nreplicas) + return -1; + + if (file->poolset->replica[replica]->remote) { + outv_err("reading from remote replica not supported"); + return -1; + } + + file->replica = replica; + file->addr = file->poolset->replica[replica]->part[0].addr; + + return 0; +} + +/* + * pool_set_file_nreplicas -- return number of replicas + */ +size_t +pool_set_file_nreplicas(struct pool_set_file *file) +{ + return file->poolset->nreplicas; +} + +/* + * pool_set_file_map -- return mapped address at given offset + */ +void * +pool_set_file_map(struct pool_set_file *file, uint64_t offset) +{ + if (file->addr == MAP_FAILED) + return NULL; + return (char *)file->addr + offset; +} + +/* + * pool_set_file_persist -- propagates and persists changes to a memory range + * + * 'addr' points to the beginning of data in the master replica that has to be + * propagated + * 'len' is the number of bytes to be propagated to other replicas + */ +void +pool_set_file_persist(struct pool_set_file *file, const void *addr, size_t len) +{ + uintptr_t offset = (uintptr_t)((char *)addr - + (char *)file->poolset->replica[0]->part[0].addr); + + for (unsigned r = 1; r < file->poolset->nreplicas; ++r) { + struct pool_replica *rep = file->poolset->replica[r]; + void *dst = (char *)rep->part[0].addr + offset; + memcpy(dst, addr, len); + util_persist(rep->is_pmem, dst, len); + } + struct pool_replica *rep = file->poolset->replica[0]; + util_persist(rep->is_pmem, (void *)addr, len); +} + +/* + * util_pool_clear_badblocks -- clear badblocks in a pool (set or a single file) + */ +int +util_pool_clear_badblocks(const char *path, int create) +{ + LOG(3, "path %s create %i", path, create); + + struct pool_set *setp; + + /* do not check minsize */ + int ret = util_poolset_create_set(&setp, path, 0, 0, + POOL_OPEN_IGNORE_SDS); + if (ret < 0) { + LOG(2, "cannot open pool set -- '%s'", path); + return -1; + } + + if (badblocks_clear_poolset(setp, create)) { + outv_err("clearing bad blocks in the pool set failed -- '%s'", + path); + errno = EIO; + return -1; + } + + return 0; +} diff --git a/src/pmdk/src/tools/pmempool/common.h b/src/pmdk/src/tools/pmempool/common.h new file mode 100644 index 000000000..3c792a5ba --- /dev/null +++ b/src/pmdk/src/tools/pmempool/common.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * common.h -- declarations of common functions + */ + +#include +#include +#include +#include + +#include "queue.h" +#include "log.h" +#include "blk.h" +#include "libpmemobj.h" +#include "lane.h" +#include "ulog.h" +#include "memops.h" +#include "pmalloc.h" +#include "list.h" +#include "obj.h" +#include "memblock.h" +#include "heap_layout.h" +#include "tx.h" +#include "heap.h" +#include "btt_layout.h" +#include "page_size.h" + +/* XXX - modify Linux makefiles to generate srcversion.h and remove #ifdef */ +#ifdef _WIN32 +#include "srcversion.h" +#endif + +#define COUNT_OF(x) (sizeof(x) / sizeof(0[x])) + +#define OPT_SHIFT 12 +#define OPT_MASK (~((1 << OPT_SHIFT) - 1)) +#define OPT_LOG (1 << (PMEM_POOL_TYPE_LOG + OPT_SHIFT)) +#define OPT_BLK (1 << (PMEM_POOL_TYPE_BLK + OPT_SHIFT)) +#define OPT_OBJ (1 << (PMEM_POOL_TYPE_OBJ + OPT_SHIFT)) +#define OPT_BTT (1 << (PMEM_POOL_TYPE_BTT + OPT_SHIFT)) +#define OPT_ALL (OPT_LOG | OPT_BLK | OPT_OBJ | OPT_BTT) + +#define OPT_REQ_SHIFT 8 +#define OPT_REQ_MASK ((1 << OPT_REQ_SHIFT) - 1) +#define _OPT_REQ(c, n) ((c) << (OPT_REQ_SHIFT * (n))) +#define OPT_REQ0(c) _OPT_REQ(c, 0) +#define OPT_REQ1(c) _OPT_REQ(c, 1) +#define OPT_REQ2(c) _OPT_REQ(c, 2) +#define OPT_REQ3(c) _OPT_REQ(c, 3) +#define OPT_REQ4(c) _OPT_REQ(c, 4) +#define OPT_REQ5(c) _OPT_REQ(c, 5) +#define OPT_REQ6(c) _OPT_REQ(c, 6) +#define OPT_REQ7(c) _OPT_REQ(c, 7) + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#define FOREACH_RANGE(range, ranges)\ + PMDK_LIST_FOREACH(range, &(ranges)->head, next) + +#define PLIST_OFF_TO_PTR(pop, off)\ +((off) == 0 ? NULL : (void *)((uintptr_t)(pop) + (off) - OBJ_OOB_SIZE)) + +#define ENTRY_TO_ALLOC_HDR(entry)\ +((void *)((uintptr_t)(entry) - sizeof(struct allocation_header))) + +#define OBJH_FROM_PTR(ptr)\ +((void *)((uintptr_t)(ptr) - sizeof(struct legacy_object_header))) + +#define DEFAULT_HDR_SIZE PMEM_PAGESIZE +#define DEFAULT_DESC_SIZE PMEM_PAGESIZE +#define POOL_HDR_DESC_SIZE (DEFAULT_HDR_SIZE + DEFAULT_DESC_SIZE) + +#define PTR_TO_ALLOC_HDR(ptr)\ +((void *)((uintptr_t)(ptr) -\ + sizeof(struct legacy_object_header))) + +#define OBJH_TO_PTR(objh)\ +((void *)((uintptr_t)(objh) + sizeof(struct legacy_object_header))) + +/* invalid answer for ask_* functions */ +#define INV_ANS '\0' + +#define FORMAT_PRINTF(a, b) __attribute__((__format__(__printf__, (a), (b)))) + +/* + * pmem_pool_type_t -- pool types + */ +typedef enum { + PMEM_POOL_TYPE_LOG = 0x01, + PMEM_POOL_TYPE_BLK = 0x02, + PMEM_POOL_TYPE_OBJ = 0x04, + PMEM_POOL_TYPE_BTT = 0x08, + PMEM_POOL_TYPE_ALL = 0x0f, + PMEM_POOL_TYPE_UNKNOWN = 0x80, +} pmem_pool_type_t; + +struct option_requirement { + int opt; + pmem_pool_type_t type; + uint64_t req; +}; + +struct options { + const struct option *opts; + size_t noptions; + char *bitmap; + const struct option_requirement *req; +}; + +struct pmem_pool_params { + pmem_pool_type_t type; + char signature[POOL_HDR_SIG_LEN]; + uint64_t size; + mode_t mode; + int is_poolset; + int is_part; + int is_checksum_ok; + union { + struct { + uint64_t bsize; + } blk; + struct { + char layout[PMEMOBJ_MAX_LAYOUT]; + } obj; + }; +}; + +struct pool_set_file { + int fd; + char *fname; + void *addr; + size_t size; + struct pool_set *poolset; + size_t replica; + time_t mtime; + mode_t mode; + bool fileio; +}; + +struct pool_set_file *pool_set_file_open(const char *fname, + int rdonly, int check); +void pool_set_file_close(struct pool_set_file *file); +int pool_set_file_read(struct pool_set_file *file, void *buff, + size_t nbytes, uint64_t off); +int pool_set_file_write(struct pool_set_file *file, void *buff, + size_t nbytes, uint64_t off); +int pool_set_file_set_replica(struct pool_set_file *file, size_t replica); +size_t pool_set_file_nreplicas(struct pool_set_file *file); +void *pool_set_file_map(struct pool_set_file *file, uint64_t offset); +void pool_set_file_persist(struct pool_set_file *file, + const void *addr, size_t len); + +struct range { + PMDK_LIST_ENTRY(range) next; + uint64_t first; + uint64_t last; +}; + +struct ranges { + PMDK_LIST_HEAD(rangeshead, range) head; +}; + +pmem_pool_type_t pmem_pool_type_parse_hdr(const struct pool_hdr *hdrp); +pmem_pool_type_t pmem_pool_type(const void *base_pool_addr); +int pmem_pool_checksum(const void *base_pool_addr); +pmem_pool_type_t pmem_pool_type_parse_str(const char *str); +uint64_t pmem_pool_get_min_size(pmem_pool_type_t type); +int pmem_pool_parse_params(const char *fname, struct pmem_pool_params *paramsp, + int check); +int util_poolset_map(const char *fname, struct pool_set **poolset, int rdonly); +struct options *util_options_alloc(const struct option *options, + size_t nopts, const struct option_requirement *req); +void util_options_free(struct options *opts); +int util_options_verify(const struct options *opts, pmem_pool_type_t type); +int util_options_getopt(int argc, char *argv[], const char *optstr, + const struct options *opts); +pmem_pool_type_t util_get_pool_type_second_page(const void *pool_base_addr); +int util_parse_mode(const char *str, mode_t *mode); +int util_parse_ranges(const char *str, struct ranges *rangesp, + struct range entire); +int util_ranges_add(struct ranges *rangesp, struct range range); +void util_ranges_clear(struct ranges *rangesp); +int util_ranges_contain(const struct ranges *rangesp, uint64_t n); +int util_ranges_empty(const struct ranges *rangesp); +int util_check_memory(const uint8_t *buff, size_t len, uint8_t val); +int util_parse_chunk_types(const char *str, uint64_t *types); +int util_parse_lane_sections(const char *str, uint64_t *types); +char ask(char op, char *answers, char def_ans, const char *fmt, va_list ap); +char ask_Yn(char op, const char *fmt, ...) FORMAT_PRINTF(2, 3); +char ask_yN(char op, const char *fmt, ...) FORMAT_PRINTF(2, 3); +unsigned util_heap_max_zone(size_t size); + +int util_pool_clear_badblocks(const char *path, int create); + +static const struct range ENTIRE_UINT64 = { + { NULL, NULL }, /* range */ + 0, /* first */ + UINT64_MAX /* last */ +}; diff --git a/src/pmdk/src/tools/pmempool/convert.c b/src/pmdk/src/tools/pmempool/convert.c new file mode 100644 index 000000000..db59030e5 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/convert.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2018, Intel Corporation */ + +/* + * convert.c -- pmempool convert command source file + */ + +#include +#include +#include +#include +#include +#include + +#include "convert.h" +#include "os.h" + +#ifdef _WIN32 +static const char *delimiter = ";"; +static const char *convert_bin = "\\pmdk-convert.exe"; +#else +static const char *delimiter = ":"; +static const char *convert_bin = "/pmdk-convert"; +#endif // _WIN32 + +static int +pmempool_convert_get_path(char *p, size_t max_len) +{ + char *path = strdup(os_getenv("PATH")); + if (!path) { + perror("strdup"); + return -1; + } + + char *dir = strtok(path, delimiter); + + while (dir) { + size_t length = strlen(dir) + strlen(convert_bin) + 1; + if (length > max_len) { + fprintf(stderr, "very long dir in PATH, ignoring\n"); + continue; + } + + strcpy(p, dir); + strcat(p, convert_bin); + + if (os_access(p, F_OK) == 0) { + free(path); + return 0; + } + + dir = strtok(NULL, delimiter); + } + + free(path); + return -1; +} + +/* + * pmempool_convert_help -- print help message for convert command. This is + * help message from pmdk-convert tool. + */ +void +pmempool_convert_help(const char *appname) +{ + char path[4096]; + if (pmempool_convert_get_path(path, sizeof(path))) { + fprintf(stderr, + "pmdk-convert is not installed. Please install it.\n"); + exit(1); + } + + char *args[] = { path, "-h", NULL }; + + os_execv(path, args); + + perror("execv"); + exit(1); +} + +/* + * pmempool_convert_func -- main function for convert command. + * It invokes pmdk-convert tool. + */ +int +pmempool_convert_func(const char *appname, int argc, char *argv[]) +{ + char path[4096]; + if (pmempool_convert_get_path(path, sizeof(path))) { + fprintf(stderr, + "pmdk-convert is not installed. Please install it.\n"); + exit(1); + } + + char **args = malloc(((size_t)argc + 1) * sizeof(*args)); + if (!args) { + perror("malloc"); + exit(1); + } + + args[0] = path; + for (int i = 1; i < argc; ++i) + args[i] = argv[i]; + args[argc] = NULL; + + os_execv(args[0], args); + + perror("execv"); + free(args); + exit(1); +} diff --git a/src/pmdk/src/tools/pmempool/convert.h b/src/pmdk/src/tools/pmempool/convert.h new file mode 100644 index 000000000..f52060390 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/convert.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * convert.h -- pmempool convert command header file + */ + +#include + +int pmempool_convert_func(const char *appname, int argc, char *argv[]); +void pmempool_convert_help(const char *appname); diff --git a/src/pmdk/src/tools/pmempool/create.c b/src/pmdk/src/tools/pmempool/create.c new file mode 100644 index 000000000..30f8f3e80 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/create.c @@ -0,0 +1,668 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * create.c -- pmempool create command source file + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" +#include "file.h" +#include "create.h" +#include "os.h" + +#include "set.h" +#include "output.h" +#include "libpmemblk.h" +#include "libpmemlog.h" +#include "libpmempool.h" + +#define DEFAULT_MODE 0664 +/* + * pmempool_create -- context and args for create command + */ +struct pmempool_create { + int verbose; + char *fname; + int fexists; + char *inherit_fname; + int max_size; + char *str_type; + struct pmem_pool_params params; + struct pmem_pool_params inherit_params; + char *str_size; + char *str_mode; + char *str_bsize; + uint64_t csize; + int write_btt_layout; + int force; + char *layout; + struct options *opts; + int clearbadblocks; +}; + +/* + * pmempool_create_default -- default args for create command + */ +static const struct pmempool_create pmempool_create_default = { + .verbose = 0, + .fname = NULL, + .fexists = 0, + .inherit_fname = NULL, + .max_size = 0, + .str_type = NULL, + .str_bsize = NULL, + .csize = 0, + .write_btt_layout = 0, + .force = 0, + .layout = NULL, + .clearbadblocks = 0, + .params = { + .type = PMEM_POOL_TYPE_UNKNOWN, + .size = 0, + .mode = DEFAULT_MODE, + } +}; + +/* + * help_str -- string for help message + */ +static const char * const help_str = +"Create pmem pool of specified size, type and name\n" +"\n" +"Common options:\n" +" -s, --size size of pool\n" +" -M, --max-size use maximum available space on file system\n" +" -m, --mode set permissions to (the default is 0664)\n" +" -i, --inherit take required parameters from specified pool file\n" +" -b, --clear-bad-blocks clear bad blocks in existing files\n" +" -f, --force remove the pool first\n" +" -v, --verbose increase verbosity level\n" +" -h, --help display this help and exit\n" +"\n" +"Options for PMEMBLK:\n" +" -w, --write-layout force writing the BTT layout\n" +"\n" +"Options for PMEMOBJ:\n" +" -l, --layout layout name stored in pool's header\n" +"\n" +"For complete documentation see %s-create(1) manual page.\n" +; + +/* + * long_options -- command line options + */ +static const struct option long_options[] = { + {"size", required_argument, NULL, 's' | OPT_ALL}, + {"verbose", no_argument, NULL, 'v' | OPT_ALL}, + {"help", no_argument, NULL, 'h' | OPT_ALL}, + {"max-size", no_argument, NULL, 'M' | OPT_ALL}, + {"inherit", required_argument, NULL, 'i' | OPT_ALL}, + {"mode", required_argument, NULL, 'm' | OPT_ALL}, + {"write-layout", no_argument, NULL, 'w' | OPT_BLK}, + {"layout", required_argument, NULL, 'l' | OPT_OBJ}, + {"force", no_argument, NULL, 'f' | OPT_ALL}, + {"clear-bad-blocks", no_argument, NULL, 'b' | OPT_ALL}, + {NULL, 0, NULL, 0 }, +}; + +/* + * print_usage -- print application usage short description + */ +static void +print_usage(const char *appname) +{ + printf("Usage: %s create [] [] \n", + appname); +} + +/* + * print_version -- print version string + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * pmempool_create_help -- print help message for create command + */ +void +pmempool_create_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf(help_str, appname); +} + +/* + * pmempool_create_obj -- create pmem obj pool + */ +static int +pmempool_create_obj(struct pmempool_create *pcp) +{ + PMEMobjpool *pop = pmemobj_create(pcp->fname, pcp->layout, + pcp->params.size, pcp->params.mode); + if (!pop) { + outv_err("'%s' -- %s\n", pcp->fname, pmemobj_errormsg()); + return -1; + } + + pmemobj_close(pop); + + return 0; +} + +/* + * pmempool_create_blk -- create pmem blk pool + */ +static int +pmempool_create_blk(struct pmempool_create *pcp) +{ + ASSERTne(pcp->params.blk.bsize, 0); + + int ret = 0; + + PMEMblkpool *pbp = pmemblk_create(pcp->fname, pcp->params.blk.bsize, + pcp->params.size, pcp->params.mode); + if (!pbp) { + outv_err("'%s' -- %s\n", pcp->fname, pmemblk_errormsg()); + return -1; + } + + if (pcp->write_btt_layout) { + outv(1, "Writing BTT layout using block %d.\n", + pcp->write_btt_layout); + + if (pmemblk_set_error(pbp, 0) || pmemblk_set_zero(pbp, 0)) { + outv_err("writing BTT layout to block 0 failed\n"); + ret = -1; + } + } + + pmemblk_close(pbp); + + return ret; +} + +/* + * pmempool_create_log -- create pmem log pool + */ +static int +pmempool_create_log(struct pmempool_create *pcp) +{ + PMEMlogpool *plp = pmemlog_create(pcp->fname, + pcp->params.size, pcp->params.mode); + + if (!plp) { + outv_err("'%s' -- %s\n", pcp->fname, pmemlog_errormsg()); + return -1; + } + + pmemlog_close(plp); + + return 0; +} + +/* + * pmempool_get_max_size -- return maximum allowed size of file + */ +#ifndef _WIN32 +static int +pmempool_get_max_size(const char *fname, uint64_t *sizep) +{ + struct statvfs buf; + int ret = 0; + char *name = strdup(fname); + if (name == NULL) { + return -1; + } + + char *dir = dirname(name); + + if (statvfs(dir, &buf)) + ret = -1; + else + *sizep = buf.f_bsize * buf.f_bavail; + + free(name); + + return ret; +} +#else +static int +pmempool_get_max_size(const char *fname, uint64_t *sizep) +{ + int ret = 0; + ULARGE_INTEGER freespace; + char *name = strdup(fname); + if (name == NULL) { + return -1; + } + + char *dir = dirname(name); + wchar_t *str = util_toUTF16(dir); + if (str == NULL) { + free(name); + return -1; + } + if (GetDiskFreeSpaceExW(str, &freespace, NULL, NULL) == 0) + ret = -1; + else + *sizep = freespace.QuadPart; + + free(str); + free(name); + + return ret; +} +#endif + +/* + * print_pool_params -- print some parameters of a pool + */ +static void +print_pool_params(struct pmem_pool_params *params) +{ + outv(1, "\ttype : %s\n", out_get_pool_type_str(params->type)); + outv(1, "\tsize : %s\n", out_get_size_str(params->size, 2)); + outv(1, "\tmode : 0%o\n", params->mode); + switch (params->type) { + case PMEM_POOL_TYPE_BLK: + outv(1, "\tbsize : %s\n", + out_get_size_str(params->blk.bsize, 0)); + break; + case PMEM_POOL_TYPE_OBJ: + outv(1, "\tlayout: '%s'\n", params->obj.layout); + break; + default: + break; + } +} + +/* + * inherit_pool_params -- inherit pool parameters from specified file + */ +static int +inherit_pool_params(struct pmempool_create *pcp) +{ + outv(1, "Parsing pool: '%s'\n", pcp->inherit_fname); + + /* + * If no type string passed, --inherit option must be passed + * so parse file and get required parameters. + */ + if (pmem_pool_parse_params(pcp->inherit_fname, + &pcp->inherit_params, 1)) { + if (errno) + perror(pcp->inherit_fname); + else + outv_err("%s: cannot determine type of pool\n", + pcp->inherit_fname); + return -1; + } + + if (PMEM_POOL_TYPE_UNKNOWN == pcp->inherit_params.type) { + outv_err("'%s' -- unknown pool type\n", + pcp->inherit_fname); + return -1; + } + + print_pool_params(&pcp->inherit_params); + + return 0; +} + +/* + * pmempool_create_parse_args -- parse command line args + */ +static int +pmempool_create_parse_args(struct pmempool_create *pcp, const char *appname, + int argc, char *argv[], struct options *opts) +{ + int opt, ret; + while ((opt = util_options_getopt(argc, argv, "vhi:s:Mm:l:wfb", + opts)) != -1) { + switch (opt) { + case 'v': + pcp->verbose = 1; + break; + case 'h': + pmempool_create_help(appname); + exit(EXIT_SUCCESS); + case 's': + pcp->str_size = optarg; + ret = util_parse_size(optarg, + (size_t *)&pcp->params.size); + if (ret || pcp->params.size == 0) { + outv_err("invalid size value specified '%s'\n", + optarg); + return -1; + } + break; + case 'M': + pcp->max_size = 1; + break; + case 'm': + pcp->str_mode = optarg; + if (util_parse_mode(optarg, &pcp->params.mode)) { + outv_err("invalid mode value specified '%s'\n", + optarg); + return -1; + } + break; + case 'i': + pcp->inherit_fname = optarg; + break; + case 'w': + pcp->write_btt_layout = 1; + break; + case 'l': + pcp->layout = optarg; + break; + case 'f': + pcp->force = 1; + break; + case 'b': + pcp->clearbadblocks = 1; + break; + default: + print_usage(appname); + return -1; + } + } + + /* check for , and strings */ + if (optind + 2 < argc) { + pcp->str_type = argv[optind]; + pcp->str_bsize = argv[optind + 1]; + pcp->fname = argv[optind + 2]; + } else if (optind + 1 < argc) { + pcp->str_type = argv[optind]; + pcp->fname = argv[optind + 1]; + } else if (optind < argc) { + pcp->fname = argv[optind]; + pcp->str_type = NULL; + } else { + print_usage(appname); + return -1; + } + + return 0; +} + +static int +allocate_max_size_available_file(const char *name_of_file, mode_t mode, + os_off_t max_size) +{ + int fd = os_open(name_of_file, O_CREAT | O_EXCL | O_RDWR, mode); + if (fd == -1) { + outv_err("!open '%s' failed", name_of_file); + return -1; + } + + os_off_t offset = 0; + os_off_t length = max_size - (max_size % (os_off_t)Pagesize); + int ret; + do { + ret = os_posix_fallocate(fd, offset, length); + if (ret == 0) + offset += length; + else if (ret != ENOSPC) { + os_close(fd); + if (os_unlink(name_of_file) == -1) + outv_err("!unlink '%s' failed", name_of_file); + errno = ret; + outv_err("!space allocation for '%s' failed", + name_of_file); + return -1; + } + + length /= 2; + length -= (length % (os_off_t)Pagesize); + } while (length > (os_off_t)Pagesize); + + os_close(fd); + + return 0; +} + +/* + * pmempool_create_func -- main function for create command + */ +int +pmempool_create_func(const char *appname, int argc, char *argv[]) +{ + int ret = 0; + struct pmempool_create pc = pmempool_create_default; + pc.opts = util_options_alloc(long_options, sizeof(long_options) / + sizeof(long_options[0]), NULL); + + /* parse command line arguments */ + ret = pmempool_create_parse_args(&pc, appname, argc, argv, pc.opts); + if (ret) + exit(EXIT_FAILURE); + + /* set verbosity level */ + out_set_vlevel(pc.verbose); + + umask(0); + + int exists = util_file_exists(pc.fname); + if (exists < 0) + return -1; + + pc.fexists = exists; + int is_poolset = util_is_poolset_file(pc.fname) == 1; + + if (pc.inherit_fname) { + if (inherit_pool_params(&pc)) { + outv_err("parsing pool '%s' failed\n", + pc.inherit_fname); + return -1; + } + } + + /* + * Parse pool type and other parameters if --inherit option + * passed. It is possible to either pass --inherit option + * or pool type string in command line arguments. This is + * validated here. + */ + if (pc.str_type) { + /* parse pool type string if passed in command line arguments */ + pc.params.type = pmem_pool_type_parse_str(pc.str_type); + if (PMEM_POOL_TYPE_UNKNOWN == pc.params.type) { + outv_err("'%s' -- unknown pool type\n", pc.str_type); + return -1; + } + + if (PMEM_POOL_TYPE_BLK == pc.params.type) { + if (pc.str_bsize == NULL) { + outv_err("blk pool requires " + "argument\n"); + return -1; + } + if (util_parse_size(pc.str_bsize, + (size_t *)&pc.params.blk.bsize)) { + outv_err("cannot parse '%s' as block size\n", + pc.str_bsize); + return -1; + } + } + + if (PMEM_POOL_TYPE_OBJ == pc.params.type && pc.layout != NULL) { + size_t max_layout = PMEMOBJ_MAX_LAYOUT; + + if (strlen(pc.layout) >= max_layout) { + outv_err( + "Layout name is too long, maximum number of characters (including the terminating null byte) is %zu\n", + max_layout); + return -1; + } + + size_t len = sizeof(pc.params.obj.layout); + strncpy(pc.params.obj.layout, pc.layout, len); + pc.params.obj.layout[len - 1] = '\0'; + } + } else if (pc.inherit_fname) { + pc.params.type = pc.inherit_params.type; + } else { + /* neither pool type string nor --inherit options passed */ + print_usage(appname); + return -1; + } + + if (util_options_verify(pc.opts, pc.params.type)) + return -1; + + if (pc.params.type != PMEM_POOL_TYPE_BLK && pc.str_bsize != NULL) { + outv_err("invalid option specified for %s pool type" + " -- block size\n", + out_get_pool_type_str(pc.params.type)); + return -1; + } + + if (is_poolset) { + if (pc.params.size) { + outv_err("-s|--size cannot be used with " + "poolset file\n"); + return -1; + } + + if (pc.max_size) { + outv_err("-M|--max-size cannot be used with " + "poolset file\n"); + return -1; + } + } + + if (pc.params.size && pc.max_size) { + outv_err("-M|--max-size option cannot be used with -s|--size" + " option\n"); + return -1; + } + + if (pc.inherit_fname) { + if (!pc.str_size && !pc.max_size) + pc.params.size = pc.inherit_params.size; + if (!pc.str_mode) + pc.params.mode = pc.inherit_params.mode; + switch (pc.params.type) { + case PMEM_POOL_TYPE_BLK: + if (!pc.str_bsize) + pc.params.blk.bsize = + pc.inherit_params.blk.bsize; + break; + case PMEM_POOL_TYPE_OBJ: + if (!pc.layout) { + memcpy(pc.params.obj.layout, + pc.inherit_params.obj.layout, + sizeof(pc.params.obj.layout)); + } else { + size_t len = sizeof(pc.params.obj.layout); + strncpy(pc.params.obj.layout, pc.layout, + len - 1); + pc.params.obj.layout[len - 1] = '\0'; + } + break; + default: + break; + } + } + + /* + * If neither --size nor --inherit options passed, check + * for --max-size option - if not passed use minimum pool size. + */ + uint64_t min_size = pmem_pool_get_min_size(pc.params.type); + if (pc.params.size == 0) { + if (pc.max_size) { + outv(1, "Maximum size option passed " + "- getting available space of file system.\n"); + ret = pmempool_get_max_size(pc.fname, + &pc.params.size); + if (ret) { + outv_err("cannot get available space of fs\n"); + return -1; + } + if (pc.params.size == 0) { + outv_err("No space left on device\n"); + return -1; + } + outv(1, "Available space is %s\n", + out_get_size_str(pc.params.size, 2)); + if (allocate_max_size_available_file(pc.fname, + pc.params.mode, + (os_off_t)pc.params.size)) + return -1; + /* + * We are going to create pool based + * on file size instead of the pc.params.size. + */ + pc.params.size = 0; + } else { + if (!pc.fexists) { + outv(1, "No size option passed " + "- picking minimum pool size.\n"); + pc.params.size = min_size; + } + } + } else { + if (pc.params.size < min_size) { + outv_err("size must be >= %lu bytes\n", min_size); + return -1; + } + } + + if (pc.force) + pmempool_rm(pc.fname, PMEMPOOL_RM_FORCE); + + outv(1, "Creating pool: %s\n", pc.fname); + print_pool_params(&pc.params); + + if (pc.clearbadblocks) { + int ret = util_pool_clear_badblocks(pc.fname, + 1 /* ignore non-existing */); + if (ret) { + outv_err("'%s' -- clearing bad blocks failed\n", + pc.fname); + return -1; + } + } + + switch (pc.params.type) { + case PMEM_POOL_TYPE_BLK: + ret = pmempool_create_blk(&pc); + break; + case PMEM_POOL_TYPE_LOG: + ret = pmempool_create_log(&pc); + break; + case PMEM_POOL_TYPE_OBJ: + ret = pmempool_create_obj(&pc); + break; + default: + ret = -1; + break; + } + + if (ret) { + outv_err("creating pool file failed\n"); + if (!pc.fexists) + util_unlink(pc.fname); + } + + util_options_free(pc.opts); + return ret; +} diff --git a/src/pmdk/src/tools/pmempool/create.h b/src/pmdk/src/tools/pmempool/create.h new file mode 100644 index 000000000..21d083aa7 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/create.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * create.h -- pmempool create command header file + */ + +int pmempool_create_func(const char *appname, int argc, char *argv[]); +void pmempool_create_help(const char *appname); diff --git a/src/pmdk/src/tools/pmempool/dump.c b/src/pmdk/src/tools/pmempool/dump.c new file mode 100644 index 000000000..3c7b07441 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/dump.c @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * create.c -- pmempool create command source file + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" +#include "dump.h" +#include "output.h" +#include "os.h" +#include "libpmemblk.h" +#include "libpmemlog.h" + +#define VERBOSE_DEFAULT 1 + +/* + * pmempool_dump -- context and arguments for dump command + */ +struct pmempool_dump { + char *fname; + char *ofname; + char *range; + FILE *ofh; + int hex; + uint64_t bsize; + struct ranges ranges; + size_t chunksize; + uint64_t chunkcnt; +}; + +/* + * pmempool_dump_default -- default arguments and context values + */ +static const struct pmempool_dump pmempool_dump_default = { + .fname = NULL, + .ofname = NULL, + .range = NULL, + .ofh = NULL, + .hex = 1, + .bsize = 0, + .chunksize = 0, + .chunkcnt = 0, +}; + +/* + * long_options -- command line options + */ +static const struct option long_options[] = { + {"output", required_argument, NULL, 'o' | OPT_ALL}, + {"binary", no_argument, NULL, 'b' | OPT_ALL}, + {"range", required_argument, NULL, 'r' | OPT_ALL}, + {"chunk", required_argument, NULL, 'c' | OPT_LOG}, + {"help", no_argument, NULL, 'h' | OPT_ALL}, + {NULL, 0, NULL, 0 }, +}; + +/* + * help_str -- string for help message + */ +static const char * const help_str = +"Dump user data from pool\n" +"\n" +"Available options:\n" +" -o, --output output file name\n" +" -b, --binary dump data in binary format\n" +" -r, --range range of bytes/blocks/data chunks\n" +" -c, --chunk size of chunk for PMEMLOG pool\n" +" -h, --help display this help and exit\n" +"\n" +"For complete documentation see %s-dump(1) manual page.\n" +; + +/* + * print_usage -- print application usage short description + */ +static void +print_usage(const char *appname) +{ + printf("Usage: %s dump [] \n", appname); +} + +/* + * print_version -- print version string + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * pmempool_dump_help -- print help message for dump command + */ +void +pmempool_dump_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf(help_str, appname); +} + +/* + * pmempool_dump_log_process_chunk -- callback for pmemlog_walk + */ +static int +pmempool_dump_log_process_chunk(const void *buf, size_t len, void *arg) +{ + struct pmempool_dump *pdp = (struct pmempool_dump *)arg; + + if (len == 0) + return 0; + + struct range *curp = NULL; + if (pdp->chunksize) { + PMDK_LIST_FOREACH(curp, &pdp->ranges.head, next) { + if (pdp->chunkcnt >= curp->first && + pdp->chunkcnt <= curp->last && + pdp->chunksize <= len) { + if (pdp->hex) { + outv_hexdump(VERBOSE_DEFAULT, + buf, pdp->chunksize, + pdp->chunksize * pdp->chunkcnt, + 0); + } else { + if (fwrite(buf, pdp->chunksize, + 1, pdp->ofh) != 1) + err(1, "%s", pdp->ofname); + } + } + } + pdp->chunkcnt++; + } else { + PMDK_LIST_FOREACH(curp, &pdp->ranges.head, next) { + if (curp->first >= len) + continue; + uint8_t *ptr = (uint8_t *)buf + curp->first; + if (curp->last >= len) + curp->last = len - 1; + uint64_t count = curp->last - curp->first + 1; + if (pdp->hex) { + outv_hexdump(VERBOSE_DEFAULT, ptr, + count, curp->first, 0); + } else { + if (fwrite(ptr, count, 1, pdp->ofh) != 1) + err(1, "%s", pdp->ofname); + } + } + } + + return 1; +} + +/* + * pmempool_dump_parse_range -- parse range passed by arguments + */ +static int +pmempool_dump_parse_range(struct pmempool_dump *pdp, size_t max) +{ + struct range entire; + memset(&entire, 0, sizeof(entire)); + + entire.last = max; + + if (util_parse_ranges(pdp->range, &pdp->ranges, entire)) { + outv_err("invalid range value specified" + " -- '%s'\n", pdp->range); + return -1; + } + + if (PMDK_LIST_EMPTY(&pdp->ranges.head)) + util_ranges_add(&pdp->ranges, entire); + + return 0; +} + +/* + * pmempool_dump_log -- dump data from pmem log pool + */ +static int +pmempool_dump_log(struct pmempool_dump *pdp) +{ + PMEMlogpool *plp = pmemlog_open(pdp->fname); + if (!plp) { + warn("%s", pdp->fname); + return -1; + } + + os_off_t off = pmemlog_tell(plp); + if (off < 0) { + warn("%s", pdp->fname); + pmemlog_close(plp); + return -1; + } + + if (off == 0) + goto end; + + size_t max = (size_t)off - 1; + if (pdp->chunksize) + max /= pdp->chunksize; + + if (pmempool_dump_parse_range(pdp, max)) + return -1; + + pdp->chunkcnt = 0; + pmemlog_walk(plp, pdp->chunksize, pmempool_dump_log_process_chunk, pdp); + +end: + pmemlog_close(plp); + + return 0; +} + +/* + * pmempool_dump_blk -- dump data from pmem blk pool + */ +static int +pmempool_dump_blk(struct pmempool_dump *pdp) +{ + PMEMblkpool *pbp = pmemblk_open(pdp->fname, pdp->bsize); + if (!pbp) { + warn("%s", pdp->fname); + return -1; + } + + if (pmempool_dump_parse_range(pdp, pmemblk_nblock(pbp) - 1)) + return -1; + + uint8_t *buff = malloc(pdp->bsize); + if (!buff) + err(1, "Cannot allocate memory for pmemblk block buffer"); + + int ret = 0; + + uint64_t i; + struct range *curp = NULL; + PMDK_LIST_FOREACH(curp, &pdp->ranges.head, next) { + assert((os_off_t)curp->last >= 0); + for (i = curp->first; i <= curp->last; i++) { + if (pmemblk_read(pbp, buff, (os_off_t)i)) { + ret = -1; + outv_err("reading block number %lu " + "failed\n", i); + break; + } + + if (pdp->hex) { + uint64_t offset = i * pdp->bsize; + outv_hexdump(VERBOSE_DEFAULT, buff, + pdp->bsize, offset, 0); + } else { + if (fwrite(buff, pdp->bsize, 1, + pdp->ofh) != 1) { + warn("write"); + ret = -1; + break; + } + } + } + } + + free(buff); + pmemblk_close(pbp); + + return ret; +} + +static const struct option_requirement option_requirements[] = { + { 0, 0, 0} +}; + +/* + * pmempool_dump_func -- dump command main function + */ +int +pmempool_dump_func(const char *appname, int argc, char *argv[]) +{ + struct pmempool_dump pd = pmempool_dump_default; + PMDK_LIST_INIT(&pd.ranges.head); + out_set_vlevel(VERBOSE_DEFAULT); + + struct options *opts = util_options_alloc(long_options, + sizeof(long_options) / sizeof(long_options[0]), + option_requirements); + int ret = 0; + long long chunksize; + int opt; + while ((opt = util_options_getopt(argc, argv, + "ho:br:c:", opts)) != -1) { + switch (opt) { + case 'o': + pd.ofname = optarg; + break; + case 'b': + pd.hex = 0; + break; + case 'r': + pd.range = optarg; + break; + case 'c': + chunksize = atoll(optarg); + if (chunksize <= 0) { + outv_err("invalid chunk size specified '%s'\n", + optarg); + exit(EXIT_FAILURE); + } + pd.chunksize = (size_t)chunksize; + break; + case 'h': + pmempool_dump_help(appname); + exit(EXIT_SUCCESS); + default: + print_usage(appname); + exit(EXIT_FAILURE); + } + } + + if (optind < argc) { + pd.fname = argv[optind]; + } else { + print_usage(appname); + exit(EXIT_FAILURE); + } + + if (pd.ofname == NULL) { + /* use standard output by default */ + pd.ofh = stdout; + } else { + pd.ofh = os_fopen(pd.ofname, "wb"); + if (!pd.ofh) { + warn("%s", pd.ofname); + exit(EXIT_FAILURE); + } + } + + /* set output stream - stdout or file passed by -o option */ + out_set_stream(pd.ofh); + + struct pmem_pool_params params; + /* parse pool type and block size for pmem blk pool */ + pmem_pool_parse_params(pd.fname, ¶ms, 1); + + ret = util_options_verify(opts, params.type); + if (ret) + goto out; + + switch (params.type) { + case PMEM_POOL_TYPE_LOG: + ret = pmempool_dump_log(&pd); + break; + case PMEM_POOL_TYPE_BLK: + pd.bsize = params.blk.bsize; + ret = pmempool_dump_blk(&pd); + break; + case PMEM_POOL_TYPE_OBJ: + outv_err("%s: PMEMOBJ pool not supported\n", pd.fname); + ret = -1; + goto out; + case PMEM_POOL_TYPE_UNKNOWN: + outv_err("%s: unknown pool type -- '%s'\n", pd.fname, + params.signature); + ret = -1; + goto out; + default: + outv_err("%s: cannot determine type of pool\n", pd.fname); + ret = -1; + goto out; + } + + if (ret) + outv_err("%s: dumping pool file failed\n", pd.fname); + +out: + if (pd.ofh != stdout) + fclose(pd.ofh); + + util_ranges_clear(&pd.ranges); + + util_options_free(opts); + + return ret; +} diff --git a/src/pmdk/src/tools/pmempool/dump.h b/src/pmdk/src/tools/pmempool/dump.h new file mode 100644 index 000000000..1b0e7040b --- /dev/null +++ b/src/pmdk/src/tools/pmempool/dump.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * dump.h -- pmempool dump command header file + */ + +int pmempool_dump_func(const char *appname, int argc, char *argv[]); +void pmempool_dump_help(const char *appname); diff --git a/src/pmdk/src/tools/pmempool/feature.c b/src/pmdk/src/tools/pmempool/feature.c new file mode 100644 index 000000000..8f199deaa --- /dev/null +++ b/src/pmdk/src/tools/pmempool/feature.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018-2019, Intel Corporation */ + +/* + * feature.c -- pmempool feature command source file + */ +#include +#include + +#include "common.h" +#include "feature.h" +#include "output.h" +#include "libpmempool.h" + +/* operations over features */ +enum feature_op { + undefined, + enable, + disable, + query +}; + +/* + * feature_ctx -- context and arguments for feature command + */ +struct feature_ctx { + int verbose; + const char *fname; + enum feature_op op; + enum pmempool_feature feature; + unsigned flags; +}; + +/* + * pmempool_feature_default -- default arguments for feature command + */ +static const struct feature_ctx pmempool_feature_default = { + .verbose = 0, + .fname = NULL, + .op = undefined, + .feature = UINT32_MAX, + .flags = 0 +}; + +/* + * help_str -- string for help message + */ +static const char * const help_str = +"Toggle or query a pool feature\n" +"\n" +"For complete documentation see %s-feature(1) manual page.\n" +; + +/* + * long_options -- command line options + */ +static const struct option long_options[] = { + {"enable", required_argument, NULL, 'e'}, + {"disable", required_argument, NULL, 'd'}, + {"query", required_argument, NULL, 'q'}, + {"verbose", no_argument, NULL, 'v'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0 }, +}; + +/* + * print_usage -- print short description of application's usage + */ +static void +print_usage(const char *appname) +{ + printf("Usage: %s feature [] \n", appname); + printf( + "feature: SINGLEHDR, CKSUM_2K, SHUTDOWN_STATE, CHECK_BAD_BLOCKS\n"); +} + +/* + * print_version -- print version string + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * pmempool_feature_help -- print help message for feature command + */ +void +pmempool_feature_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf(help_str, appname); +} + +/* + * feature_perform -- perform operation over function + */ +static int +feature_perform(struct feature_ctx *pfp) +{ + int ret; + + switch (pfp->op) { + case enable: + return pmempool_feature_enable(pfp->fname, pfp->feature, + pfp->flags); + case disable: + return pmempool_feature_disable(pfp->fname, pfp->feature, + pfp->flags); + case query: + ret = pmempool_feature_query(pfp->fname, pfp->feature, + pfp->flags); + if (ret < 0) + return 1; + printf("%d", ret); + return 0; + default: + outv_err("Invalid option."); + return -1; + } +} + +/* + * set_op -- set operation + */ +static void +set_op(const char *appname, struct feature_ctx *pfp, enum feature_op op, + const char *feature) +{ + /* only one operation allowed */ + if (pfp->op != undefined) + goto misuse; + pfp->op = op; + + /* parse feature name */ + uint32_t fval = util_str2pmempool_feature(feature); + if (fval == UINT32_MAX) + goto misuse; + pfp->feature = (enum pmempool_feature)fval; + return; + +misuse: + print_usage(appname); + exit(EXIT_FAILURE); +} + +/* + * parse_args -- parse command line arguments + */ +static int +parse_args(struct feature_ctx *pfp, const char *appname, + int argc, char *argv[]) +{ + int opt; + while ((opt = getopt_long(argc, argv, "vhe:d:q:h", + long_options, NULL)) != -1) { + switch (opt) { + case 'e': + set_op(appname, pfp, enable, optarg); + break; + case 'd': + set_op(appname, pfp, disable, optarg); + break; + case 'q': + set_op(appname, pfp, query, optarg); + break; + case 'v': + pfp->verbose = 2; + break; + case 'h': + pmempool_feature_help(appname); + exit(EXIT_SUCCESS); + default: + print_usage(appname); + exit(EXIT_FAILURE); + } + } + + if (optind >= argc) { + print_usage(appname); + exit(EXIT_FAILURE); + } + pfp->fname = argv[optind]; + return 0; +} + +/* + * pmempool_feature_func -- main function for feature command + */ +int +pmempool_feature_func(const char *appname, int argc, char *argv[]) +{ + struct feature_ctx pf = pmempool_feature_default; + int ret = 0; + + /* parse command line arguments */ + ret = parse_args(&pf, appname, argc, argv); + if (ret) + return ret; + + /* set verbosity level */ + out_set_vlevel(pf.verbose); + + return feature_perform(&pf); +} diff --git a/src/pmdk/src/tools/pmempool/feature.h b/src/pmdk/src/tools/pmempool/feature.h new file mode 100644 index 000000000..216f61d0f --- /dev/null +++ b/src/pmdk/src/tools/pmempool/feature.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * feature.h -- pmempool feature command header file + */ + +int pmempool_feature_func(const char *appname, int argc, char *argv[]); +void pmempool_feature_help(const char *appname); diff --git a/src/pmdk/src/tools/pmempool/info.c b/src/pmdk/src/tools/pmempool/info.c new file mode 100644 index 000000000..58dac13e0 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/info.c @@ -0,0 +1,1034 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * info.c -- pmempool info command main source file + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "output.h" +#include "out.h" +#include "info.h" +#include "set.h" +#include "file.h" +#include "badblocks.h" +#include "set_badblocks.h" + +#define DEFAULT_CHUNK_TYPES\ + ((1<> (ALIGNMENT_DESC_BITS * (x))) & ((1 << ALIGNMENT_DESC_BITS) - 1))) + +#define UNDEF_REPLICA UINT_MAX +#define UNDEF_PART UINT_MAX + +/* + * Default arguments + */ +static const struct pmempool_info_args pmempool_info_args_default = { + /* + * Picked experimentally based on used fields names. + * This should be at least the number of characters of + * the longest field name. + */ + .col_width = 24, + .human = false, + .force = false, + .badblocks = PRINT_BAD_BLOCKS_NOT_SET, + .type = PMEM_POOL_TYPE_UNKNOWN, + .vlevel = VERBOSE_DEFAULT, + .vdata = VERBOSE_SILENT, + .vhdrdump = VERBOSE_SILENT, + .vstats = VERBOSE_SILENT, + .log = { + .walk = 0, + }, + .blk = { + .vmap = VERBOSE_SILENT, + .vflog = VERBOSE_SILENT, + .vbackup = VERBOSE_SILENT, + .skip_zeros = false, + .skip_error = false, + .skip_no_flag = false, + }, + .obj = { + .vlanes = VERBOSE_SILENT, + .vroot = VERBOSE_SILENT, + .vobjects = VERBOSE_SILENT, + .valloc = VERBOSE_SILENT, + .voobhdr = VERBOSE_SILENT, + .vheap = VERBOSE_SILENT, + .vzonehdr = VERBOSE_SILENT, + .vchunkhdr = VERBOSE_SILENT, + .vbitmap = VERBOSE_SILENT, + .lanes_recovery = false, + .ignore_empty_obj = false, + .chunk_types = DEFAULT_CHUNK_TYPES, + .replica = 0, + }, +}; + +/* + * long-options -- structure holding long options. + */ +static const struct option long_options[] = { + {"version", no_argument, NULL, 'V' | OPT_ALL}, + {"verbose", no_argument, NULL, 'v' | OPT_ALL}, + {"help", no_argument, NULL, 'h' | OPT_ALL}, + {"human", no_argument, NULL, 'n' | OPT_ALL}, + {"force", required_argument, NULL, 'f' | OPT_ALL}, + {"data", no_argument, NULL, 'd' | OPT_ALL}, + {"headers-hex", no_argument, NULL, 'x' | OPT_ALL}, + {"stats", no_argument, NULL, 's' | OPT_ALL}, + {"range", required_argument, NULL, 'r' | OPT_ALL}, + {"bad-blocks", required_argument, NULL, 'k' | OPT_ALL}, + {"walk", required_argument, NULL, 'w' | OPT_LOG}, + {"skip-zeros", no_argument, NULL, 'z' | OPT_BLK | OPT_BTT}, + {"skip-error", no_argument, NULL, 'e' | OPT_BLK | OPT_BTT}, + {"skip-no-flag", no_argument, NULL, 'u' | OPT_BLK | OPT_BTT}, + {"map", no_argument, NULL, 'm' | OPT_BLK | OPT_BTT}, + {"flog", no_argument, NULL, 'g' | OPT_BLK | OPT_BTT}, + {"backup", no_argument, NULL, 'B' | OPT_BLK | OPT_BTT}, + {"lanes", no_argument, NULL, 'l' | OPT_OBJ}, + {"recovery", no_argument, NULL, 'R' | OPT_OBJ}, + {"section", required_argument, NULL, 'S' | OPT_OBJ}, + {"object-store", no_argument, NULL, 'O' | OPT_OBJ}, + {"types", required_argument, NULL, 't' | OPT_OBJ}, + {"no-empty", no_argument, NULL, 'E' | OPT_OBJ}, + {"alloc-header", no_argument, NULL, 'A' | OPT_OBJ}, + {"oob-header", no_argument, NULL, 'a' | OPT_OBJ}, + {"root", no_argument, NULL, 'o' | OPT_OBJ}, + {"heap", no_argument, NULL, 'H' | OPT_OBJ}, + {"zones", no_argument, NULL, 'Z' | OPT_OBJ}, + {"chunks", no_argument, NULL, 'C' | OPT_OBJ}, + {"chunk-type", required_argument, NULL, 'T' | OPT_OBJ}, + {"bitmap", no_argument, NULL, 'b' | OPT_OBJ}, + {"replica", required_argument, NULL, 'p' | OPT_OBJ}, + {NULL, 0, NULL, 0 }, +}; + +static const struct option_requirement option_requirements[] = { + { + .opt = 'r', + .type = PMEM_POOL_TYPE_LOG, + .req = OPT_REQ0('d') + }, + { + .opt = 'r', + .type = PMEM_POOL_TYPE_BLK | PMEM_POOL_TYPE_BTT, + .req = OPT_REQ0('d') | OPT_REQ1('m') + }, + { + .opt = 'z', + .type = PMEM_POOL_TYPE_BLK | PMEM_POOL_TYPE_BTT, + .req = OPT_REQ0('d') | OPT_REQ1('m') + }, + { + .opt = 'e', + .type = PMEM_POOL_TYPE_BLK | PMEM_POOL_TYPE_BTT, + .req = OPT_REQ0('d') | OPT_REQ1('m') + }, + { + .opt = 'u', + .type = PMEM_POOL_TYPE_BLK | PMEM_POOL_TYPE_BTT, + .req = OPT_REQ0('d') | OPT_REQ1('m') + }, + { + .opt = 'r', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') | OPT_REQ1('Z') | + OPT_REQ2('C') | OPT_REQ3('l'), + }, + { + .opt = 'R', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('l') + }, + { + .opt = 'S', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('l') + }, + { + .opt = 'E', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') + }, + { + .opt = 'T', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('C') + }, + { + .opt = 'b', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('H') + }, + { + .opt = 'b', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('C') + }, + { + .opt = 'A', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') | OPT_REQ1('l') | OPT_REQ2('o') + }, + { + .opt = 'a', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') | OPT_REQ1('l') | OPT_REQ2('o') + }, + { + .opt = 't', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') | OPT_REQ1('s'), + }, + { + .opt = 'C', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') | OPT_REQ1('H') | OPT_REQ2('s'), + }, + { + .opt = 'Z', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') | OPT_REQ1('H') | OPT_REQ2('s'), + }, + { + .opt = 'd', + .type = PMEM_POOL_TYPE_OBJ, + .req = OPT_REQ0('O') | OPT_REQ1('o'), + }, + { 0, 0, 0} +}; + +/* + * help_str -- string for help message + */ +static const char * const help_str = +"Show information about pmem pool from specified file.\n" +"\n" +"Common options:\n" +" -h, --help Print this help and exit.\n" +" -V, --version Print version and exit.\n" +" -v, --verbose Increase verbisity level.\n" +" -f, --force blk|log|obj|btt Force parsing a pool of specified type.\n" +" -n, --human Print sizes in human readable format.\n" +" -x, --headers-hex Hexdump all headers.\n" +" -d, --data Dump log data and blocks.\n" +" -s, --stats Print statistics.\n" +" -r, --range Range of blocks/chunks/objects.\n" +" -k, --bad-blocks= Print bad blocks.\n" +"\n" +"Options for PMEMLOG:\n" +" -w, --walk Chunk size.\n" +"\n" +"Options for PMEMBLK:\n" +" -m, --map Print BTT Map entries.\n" +" -g, --flog Print BTT FLOG entries.\n" +" -B, --backup Print BTT Info header backup.\n" +" -z, --skip-zeros Skip blocks marked with zero flag.\n" +" -e, --skip-error Skip blocks marked with error flag.\n" +" -u, --skip-no-flag Skip blocks not marked with any flag.\n" +"\n" +"Options for PMEMOBJ:\n" +" -l, --lanes [] Print lanes from specified range.\n" +" -R, --recovery Print only lanes which need recovery.\n" +" -S, --section tx,allocator,list Print only specified sections.\n" +" -O, --object-store Print object store.\n" +" -t, --types Specify objects' type numbers range.\n" +" -E, --no-empty Print only non-empty object store lists.\n" +" -o, --root Print root object information\n" +" -A, --alloc-header Print allocation header for objects in\n" +" object store.\n" +" -a, --oob-header Print OOB header\n" +" -H, --heap Print heap header.\n" +" -Z, --zones [] Print zones header. If range is specified\n" +" and --object|-O option is specified prints\n" +" objects from specified zones only.\n" +" -C, --chunks [] Print zones header. If range is specified\n" +" and --object|-O option is specified prints\n" +" objects from specified zones only.\n" +" -T, --chunk-type used,free,run,footer\n" +" Print only specified type(s) of chunk.\n" +" [requires --chunks|-C]\n" +" -b, --bitmap Print chunk run's bitmap in graphical\n" +" format. [requires --chunks|-C]\n" +" -p, --replica Print info from specified replica\n" +"For complete documentation see %s-info(1) manual page.\n" +; + +/* + * print_usage -- print application usage short description + */ +static void +print_usage(const char *appname) +{ + printf("Usage: %s info [] \n", appname); +} + +/* + * print_version -- print version string + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * pmempool_info_help -- print application usage detailed description + */ +void +pmempool_info_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf(help_str, appname); +} + +/* + * parse_args -- parse command line arguments + * + * Parse command line arguments and store them in pmempool_info_args + * structure. + * Terminates process if invalid arguments passed. + */ +static int +parse_args(const char *appname, int argc, char *argv[], + struct pmempool_info_args *argsp, + struct options *opts) +{ + int opt; + + if (argc == 1) { + print_usage(appname); + + return -1; + } + + struct ranges *rangesp = &argsp->ranges; + while ((opt = util_options_getopt(argc, argv, + "vhnf:ezuF:L:c:dmxVw:gBsr:lRS:OECZHT:bot:aAp:k:", + opts)) != -1) { + + switch (opt) { + case 'v': + argsp->vlevel = VERBOSE_MAX; + break; + case 'V': + print_version(appname); + exit(EXIT_SUCCESS); + case 'h': + pmempool_info_help(appname); + exit(EXIT_SUCCESS); + case 'n': + argsp->human = true; + break; + case 'f': + argsp->type = pmem_pool_type_parse_str(optarg); + if (argsp->type == PMEM_POOL_TYPE_UNKNOWN) { + outv_err("'%s' -- unknown pool type\n", optarg); + return -1; + } + argsp->force = true; + break; + case 'k': + if (strcmp(optarg, "no") == 0) { + argsp->badblocks = PRINT_BAD_BLOCKS_NO; + } else if (strcmp(optarg, "yes") == 0) { + argsp->badblocks = PRINT_BAD_BLOCKS_YES; + } else { + outv_err( + "'%s' -- invalid argument of the '-k/--bad-blocks' option\n", + optarg); + return -1; + } + break; + case 'e': + argsp->blk.skip_error = true; + break; + case 'z': + argsp->blk.skip_zeros = true; + break; + case 'u': + argsp->blk.skip_no_flag = true; + break; + case 'r': + if (util_parse_ranges(optarg, rangesp, + ENTIRE_UINT64)) { + outv_err("'%s' -- cannot parse range(s)\n", + optarg); + return -1; + } + + if (rangesp == &argsp->ranges) + argsp->use_range = 1; + + break; + case 'd': + argsp->vdata = VERBOSE_DEFAULT; + break; + case 'm': + argsp->blk.vmap = VERBOSE_DEFAULT; + break; + case 'g': + argsp->blk.vflog = VERBOSE_DEFAULT; + break; + case 'B': + argsp->blk.vbackup = VERBOSE_DEFAULT; + break; + case 'x': + argsp->vhdrdump = VERBOSE_DEFAULT; + break; + case 's': + argsp->vstats = VERBOSE_DEFAULT; + break; + case 'w': + argsp->log.walk = (size_t)atoll(optarg); + if (argsp->log.walk == 0) { + outv_err("'%s' -- invalid chunk size\n", + optarg); + return -1; + } + break; + case 'l': + argsp->obj.vlanes = VERBOSE_DEFAULT; + rangesp = &argsp->obj.lane_ranges; + break; + case 'R': + argsp->obj.lanes_recovery = true; + break; + case 'O': + argsp->obj.vobjects = VERBOSE_DEFAULT; + rangesp = &argsp->ranges; + break; + case 'a': + argsp->obj.voobhdr = VERBOSE_DEFAULT; + break; + case 'A': + argsp->obj.valloc = VERBOSE_DEFAULT; + break; + case 'E': + argsp->obj.ignore_empty_obj = true; + break; + case 'Z': + argsp->obj.vzonehdr = VERBOSE_DEFAULT; + rangesp = &argsp->obj.zone_ranges; + break; + case 'C': + argsp->obj.vchunkhdr = VERBOSE_DEFAULT; + rangesp = &argsp->obj.chunk_ranges; + break; + case 'H': + argsp->obj.vheap = VERBOSE_DEFAULT; + break; + case 'T': + argsp->obj.chunk_types = 0; + if (util_parse_chunk_types(optarg, + &argsp->obj.chunk_types) || + (argsp->obj.chunk_types & + (1 << CHUNK_TYPE_UNKNOWN))) { + outv_err("'%s' -- cannot parse chunk type(s)\n", + optarg); + return -1; + } + break; + case 'o': + argsp->obj.vroot = VERBOSE_DEFAULT; + break; + case 't': + if (util_parse_ranges(optarg, + &argsp->obj.type_ranges, ENTIRE_UINT64)) { + outv_err("'%s' -- cannot parse range(s)\n", + optarg); + return -1; + } + break; + case 'b': + argsp->obj.vbitmap = VERBOSE_DEFAULT; + break; + case 'p': + { + char *endptr; + int olderrno = errno; + errno = 0; + long long ll = strtoll(optarg, &endptr, 10); + if ((endptr && *endptr != '\0') || errno) { + outv_err("'%s' -- invalid replica number", + optarg); + return -1; + } + errno = olderrno; + argsp->obj.replica = (size_t)ll; + break; + } + default: + print_usage(appname); + return -1; + } + } + + if (optind < argc) { + argsp->file = argv[optind]; + } else { + print_usage(appname); + return -1; + } + + if (!argsp->use_range) + util_ranges_add(&argsp->ranges, ENTIRE_UINT64); + + if (util_ranges_empty(&argsp->obj.type_ranges)) + util_ranges_add(&argsp->obj.type_ranges, ENTIRE_UINT64); + + if (util_ranges_empty(&argsp->obj.lane_ranges)) + util_ranges_add(&argsp->obj.lane_ranges, ENTIRE_UINT64); + + if (util_ranges_empty(&argsp->obj.zone_ranges)) + util_ranges_add(&argsp->obj.zone_ranges, ENTIRE_UINT64); + + if (util_ranges_empty(&argsp->obj.chunk_ranges)) + util_ranges_add(&argsp->obj.chunk_ranges, ENTIRE_UINT64); + + return 0; +} + +/* + * pmempool_info_read -- read data from file + */ +int +pmempool_info_read(struct pmem_info *pip, void *buff, size_t nbytes, + uint64_t off) +{ + return pool_set_file_read(pip->pfile, buff, nbytes, off); +} + +/* + * pmempool_info_badblocks -- (internal) prints info about file badblocks + */ +static int +pmempool_info_badblocks(struct pmem_info *pip, const char *file_name, int v) +{ + int ret; + + if (pip->args.badblocks != PRINT_BAD_BLOCKS_YES) + return 0; + + struct badblocks *bbs = badblocks_new(); + if (bbs == NULL) + return -1; + + ret = badblocks_get(file_name, bbs); + if (ret) { + if (errno == ENOTSUP) { + outv(v, BB_NOT_SUPP "\n"); + ret = -1; + goto exit_free; + } + + outv_err("checking bad blocks failed -- '%s'", file_name); + goto exit_free; + } + + if (bbs->bb_cnt == 0 || bbs->bbv == NULL) + goto exit_free; + + outv(v, "bad blocks:\n"); + outv(v, "\toffset\t\tlength\n"); + + unsigned b; + for (b = 0; b < bbs->bb_cnt; b++) { + outv(v, "\t%zu\t\t%zu\n", + B2SEC(bbs->bbv[b].offset), + B2SEC(bbs->bbv[b].length)); + } + +exit_free: + badblocks_delete(bbs); + + return ret; +} + +/* + * pmempool_info_part -- (internal) print info about poolset part + */ +static int +pmempool_info_part(struct pmem_info *pip, unsigned repn, unsigned partn, int v) +{ + /* get path of the part file */ + const char *path = NULL; + if (repn != UNDEF_REPLICA && partn != UNDEF_PART) { + outv(v, "part %u:\n", partn); + struct pool_set_part *part = + &pip->pfile->poolset->replica[repn]->part[partn]; + path = part->path; + } else { + outv(v, "Part file:\n"); + path = pip->file_name; + } + outv_field(v, "path", "%s", path); + + enum file_type type = util_file_get_type(path); + if (type < 0) + return -1; + + const char *type_str = type == TYPE_DEVDAX ? "device dax" : + "regular file"; + outv_field(v, "type", "%s", type_str); + + /* get size of the part file */ + ssize_t size = util_file_get_size(path); + if (size < 0) { + outv_err("couldn't get size of %s", path); + return -1; + } + outv_field(v, "size", "%s", out_get_size_str((size_t)size, + pip->args.human)); + + /* get alignment of device dax */ + if (type == TYPE_DEVDAX) { + size_t alignment = util_file_device_dax_alignment(path); + outv_field(v, "alignment", "%s", out_get_size_str(alignment, + pip->args.human)); + } + + /* look for bad blocks */ + if (pmempool_info_badblocks(pip, path, VERBOSE_DEFAULT)) { + outv_err("Unable to retrieve badblock info"); + return -1; + } + + return 0; +} + +/* + * pmempool_info_directory -- (internal) print information about directory + */ +static void +pmempool_info_directory(struct pool_set_directory *d, + int v) +{ + outv(v, "Directory %s:\n", d->path); + outv_field(v, "reservation size", "%lu", d->resvsize); +} + +/* + * pmempool_info_replica -- (internal) print info about replica + */ +static int +pmempool_info_replica(struct pmem_info *pip, unsigned repn, int v) +{ + struct pool_replica *rep = pip->pfile->poolset->replica[repn]; + outv(v, "Replica %u%s - %s", repn, + repn == 0 ? " (master)" : "", + rep->remote == NULL ? "local" : "remote"); + + if (rep->remote) { + outv(v, ":\n"); + outv_field(v, "node", "%s", rep->remote->node_addr); + outv_field(v, "pool set", "%s", rep->remote->pool_desc); + + return 0; + } + + outv(v, ", %u part(s):\n", rep->nparts); + for (unsigned p = 0; p < rep->nparts; ++p) { + if (pmempool_info_part(pip, repn, p, v)) + return -1; + } + + if (pip->pfile->poolset->directory_based) { + size_t nd = VEC_SIZE(&rep->directory); + outv(v, "%lu %s:\n", nd, nd == 1 ? "Directory" : "Directories"); + struct pool_set_directory *d; + VEC_FOREACH_BY_PTR(d, &rep->directory) { + pmempool_info_directory(d, v); + } + } + + return 0; +} + +/* + * pmempool_info_poolset -- (internal) print info about poolset structure + */ +static int +pmempool_info_poolset(struct pmem_info *pip, int v) +{ + ASSERTeq(pip->params.is_poolset, 1); + if (pip->pfile->poolset->directory_based) + outv(v, "Directory-based Poolset structure:\n"); + else + outv(v, "Poolset structure:\n"); + + outv_field(v, "Number of replicas", "%u", + pip->pfile->poolset->nreplicas); + for (unsigned r = 0; r < pip->pfile->poolset->nreplicas; ++r) { + if (pmempool_info_replica(pip, r, v)) + return -1; + } + + if (pip->pfile->poolset->options > 0) { + outv_title(v, "Poolset options"); + if (pip->pfile->poolset->options & OPTION_SINGLEHDR) + outv(v, "%s", "SINGLEHDR\n"); + } + + return 0; +} + +/* + * pmempool_info_pool_hdr -- (internal) print pool header information + */ +static int +pmempool_info_pool_hdr(struct pmem_info *pip, int v) +{ + static const char *alignment_desc_str[] = { + " char", + " short", + " int", + " long", + " long long", + " size_t", + " os_off_t", + " float", + " double", + " long double", + " void *", + }; + static const size_t alignment_desc_n = + sizeof(alignment_desc_str) / sizeof(alignment_desc_str[0]); + + int ret = 0; + struct pool_hdr *hdr = malloc(sizeof(struct pool_hdr)); + if (!hdr) + err(1, "Cannot allocate memory for pool_hdr"); + + if (pmempool_info_read(pip, hdr, sizeof(*hdr), 0)) { + outv_err("cannot read pool header\n"); + free(hdr); + return -1; + } + + struct arch_flags arch_flags; + util_get_arch_flags(&arch_flags); + + outv_title(v, "POOL Header"); + outv_hexdump(pip->args.vhdrdump, hdr, sizeof(*hdr), 0, 1); + + util_convert2h_hdr_nocheck(hdr); + + outv_field(v, "Signature", "%.*s%s", POOL_HDR_SIG_LEN, + hdr->signature, + pip->params.is_part ? + " [part file]" : ""); + outv_field(v, "Major", "%d", hdr->major); + outv_field(v, "Mandatory features", "%s", + out_get_incompat_features_str(hdr->features.incompat)); + outv_field(v, "Not mandatory features", "0x%x", hdr->features.compat); + outv_field(v, "Forced RO", "0x%x", hdr->features.ro_compat); + outv_field(v, "Pool set UUID", "%s", + out_get_uuid_str(hdr->poolset_uuid)); + outv_field(v, "UUID", "%s", out_get_uuid_str(hdr->uuid)); + outv_field(v, "Previous part UUID", "%s", + out_get_uuid_str(hdr->prev_part_uuid)); + outv_field(v, "Next part UUID", "%s", + out_get_uuid_str(hdr->next_part_uuid)); + outv_field(v, "Previous replica UUID", "%s", + out_get_uuid_str(hdr->prev_repl_uuid)); + outv_field(v, "Next replica UUID", "%s", + out_get_uuid_str(hdr->next_repl_uuid)); + outv_field(v, "Creation Time", "%s", + out_get_time_str((time_t)hdr->crtime)); + + uint64_t ad = hdr->arch_flags.alignment_desc; + uint64_t cur_ad = arch_flags.alignment_desc; + + outv_field(v, "Alignment Descriptor", "%s", + out_get_alignment_desc_str(ad, cur_ad)); + + for (size_t i = 0; i < alignment_desc_n; i++) { + uint64_t a = GET_ALIGNMENT(ad, i); + if (ad == cur_ad) { + outv_field(v + 1, alignment_desc_str[i], + "%2lu", a); + } else { + uint64_t av = GET_ALIGNMENT(cur_ad, i); + if (a == av) { + outv_field(v + 1, alignment_desc_str[i], + "%2lu [OK]", a); + } else { + outv_field(v + 1, alignment_desc_str[i], + "%2lu [wrong! should be %2lu]", a, av); + } + } + } + + outv_field(v, "Class", "%s", + out_get_arch_machine_class_str( + hdr->arch_flags.machine_class)); + outv_field(v, "Data", "%s", + out_get_arch_data_str(hdr->arch_flags.data)); + outv_field(v, "Machine", "%s", + out_get_arch_machine_str(hdr->arch_flags.machine)); + outv_field(v, "Last shutdown", "%s", + out_get_last_shutdown_str(hdr->sds.dirty)); + outv_field(v, "Checksum", "%s", out_get_checksum(hdr, sizeof(*hdr), + &hdr->checksum, POOL_HDR_CSUM_END_OFF(hdr))); + + free(hdr); + + return ret; +} + +/* + * pmempool_info_file -- print info about single file + */ +static int +pmempool_info_file(struct pmem_info *pip, const char *file_name) +{ + int ret = 0; + + pip->file_name = file_name; + + /* + * If force flag is set 'types' fields _must_ hold + * single pool type - this is validated when processing + * command line arguments. + */ + if (pip->args.force) { + pip->type = pip->args.type; + } else { + if (pmem_pool_parse_params(file_name, &pip->params, 1)) { + if (errno) + perror(file_name); + else + outv_err("%s: cannot determine type of pool\n", + file_name); + return -1; + } + + pip->type = pip->params.type; + } + + if (PMEM_POOL_TYPE_UNKNOWN == pip->type) { + outv_err("%s: unknown pool type -- '%s'\n", file_name, + pip->params.signature); + return -1; + } else if (!pip->args.force && !pip->params.is_checksum_ok) { + outv_err("%s: invalid checksum\n", file_name); + return -1; + } else { + if (util_options_verify(pip->opts, pip->type)) + return -1; + + pip->pfile = pool_set_file_open(file_name, 0, !pip->args.force); + if (!pip->pfile) { + perror(file_name); + return -1; + } + + /* check if we should check and print bad blocks */ + if (pip->args.badblocks == PRINT_BAD_BLOCKS_NOT_SET) { + struct pool_hdr hdr; + if (pmempool_info_read(pip, &hdr, sizeof(hdr), 0)) { + outv_err("cannot read pool header\n"); + goto out_close; + } + util_convert2h_hdr_nocheck(&hdr); + if (hdr.features.compat & POOL_FEAT_CHECK_BAD_BLOCKS) + pip->args.badblocks = PRINT_BAD_BLOCKS_YES; + else + pip->args.badblocks = PRINT_BAD_BLOCKS_NO; + } + + if (pip->type != PMEM_POOL_TYPE_BTT) { + struct pool_set *ps = pip->pfile->poolset; + for (unsigned r = 0; r < ps->nreplicas; ++r) { + if (ps->replica[r]->remote == NULL && + mprotect(ps->replica[r]->part[0].addr, + ps->replica[r]->repsize, + PROT_READ) < 0) { + outv_err( + "%s: failed to change pool protection", + pip->pfile->fname); + + ret = -1; + goto out_close; + } + } + } + + if (pip->args.obj.replica) { + size_t nreplicas = pool_set_file_nreplicas(pip->pfile); + if (nreplicas == 1) { + outv_err("only master replica available"); + ret = -1; + goto out_close; + } + + if (pip->args.obj.replica >= nreplicas) { + outv_err("replica number out of range" + " (valid range is: 0-%" PRIu64 ")", + nreplicas - 1); + ret = -1; + goto out_close; + } + + if (pool_set_file_set_replica(pip->pfile, + pip->args.obj.replica)) { + outv_err("setting replica number failed"); + ret = -1; + goto out_close; + } + } + + /* hdr info is not present in btt device */ + if (pip->type != PMEM_POOL_TYPE_BTT) { + if (pip->params.is_poolset && + pmempool_info_poolset(pip, + VERBOSE_DEFAULT)) { + ret = -1; + goto out_close; + } + if (!pip->params.is_poolset && + pmempool_info_part(pip, UNDEF_REPLICA, + UNDEF_PART, VERBOSE_DEFAULT)) { + ret = -1; + goto out_close; + } + if (pmempool_info_pool_hdr(pip, VERBOSE_DEFAULT)) { + ret = -1; + goto out_close; + } + } + + if (pip->params.is_part) { + ret = 0; + goto out_close; + } + + switch (pip->type) { + case PMEM_POOL_TYPE_LOG: + ret = pmempool_info_log(pip); + break; + case PMEM_POOL_TYPE_BLK: + ret = pmempool_info_blk(pip); + break; + case PMEM_POOL_TYPE_OBJ: + ret = pmempool_info_obj(pip); + break; + case PMEM_POOL_TYPE_BTT: + ret = pmempool_info_btt(pip); + break; + case PMEM_POOL_TYPE_UNKNOWN: + default: + ret = -1; + break; + } +out_close: + pool_set_file_close(pip->pfile); + } + + return ret; +} + +/* + * pmempool_info_alloc -- allocate pmem info context + */ +static struct pmem_info * +pmempool_info_alloc(void) +{ + struct pmem_info *pip = malloc(sizeof(struct pmem_info)); + if (!pip) + err(1, "Cannot allocate memory for pmempool info context"); + + if (pip) { + memset(pip, 0, sizeof(*pip)); + + /* set default command line parameters */ + memcpy(&pip->args, &pmempool_info_args_default, + sizeof(pip->args)); + pip->opts = util_options_alloc(long_options, + sizeof(long_options) / + sizeof(long_options[0]), + option_requirements); + + PMDK_LIST_INIT(&pip->args.ranges.head); + PMDK_LIST_INIT(&pip->args.obj.type_ranges.head); + PMDK_LIST_INIT(&pip->args.obj.lane_ranges.head); + PMDK_LIST_INIT(&pip->args.obj.zone_ranges.head); + PMDK_LIST_INIT(&pip->args.obj.chunk_ranges.head); + PMDK_TAILQ_INIT(&pip->obj.stats.type_stats); + } + + return pip; +} + +/* + * pmempool_info_free -- free pmem info context + */ +static void +pmempool_info_free(struct pmem_info *pip) +{ + if (pip->obj.stats.zone_stats) { + for (uint64_t i = 0; i < pip->obj.stats.n_zones; ++i) + VEC_DELETE(&pip->obj.stats.zone_stats[i].class_stats); + + free(pip->obj.stats.zone_stats); + } + util_options_free(pip->opts); + util_ranges_clear(&pip->args.ranges); + util_ranges_clear(&pip->args.obj.type_ranges); + util_ranges_clear(&pip->args.obj.zone_ranges); + util_ranges_clear(&pip->args.obj.chunk_ranges); + util_ranges_clear(&pip->args.obj.lane_ranges); + + while (!PMDK_TAILQ_EMPTY(&pip->obj.stats.type_stats)) { + struct pmem_obj_type_stats *type = + PMDK_TAILQ_FIRST(&pip->obj.stats.type_stats); + PMDK_TAILQ_REMOVE(&pip->obj.stats.type_stats, type, next); + free(type); + } + + free(pip); +} + +int +pmempool_info_func(const char *appname, int argc, char *argv[]) +{ + int ret = 0; + struct pmem_info *pip = pmempool_info_alloc(); + + /* read command line arguments */ + if ((ret = parse_args(appname, argc, argv, &pip->args, + pip->opts)) == 0) { + /* set some output format values */ + out_set_vlevel(pip->args.vlevel); + out_set_col_width(pip->args.col_width); + + ret = pmempool_info_file(pip, pip->args.file); + } + + pmempool_info_free(pip); + + return ret; +} diff --git a/src/pmdk/src/tools/pmempool/info.h b/src/pmdk/src/tools/pmempool/info.h new file mode 100644 index 000000000..62c2e8307 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/info.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * info.h -- pmempool info command header file + */ + +#include "vec.h" + +/* + * Verbose levels used in application: + * + * VERBOSE_DEFAULT: + * Default value for application's verbosity level. + * This is also set for data structures which should be + * printed without any command line argument. + * + * VERBOSE_MAX: + * Maximum value for application's verbosity level. + * This value is used when -v command line argument passed. + * + * VERBOSE_SILENT: + * This value is higher than VERBOSE_MAX and it is used only + * for verbosity levels of data structures which should _not_ be + * printed without specified command line arguments. + */ +#define VERBOSE_SILENT 0 +#define VERBOSE_DEFAULT 1 +#define VERBOSE_MAX 2 + +/* + * print_bb_e -- printing bad blocks options + */ +enum print_bb_e { + PRINT_BAD_BLOCKS_NOT_SET, + PRINT_BAD_BLOCKS_NO, + PRINT_BAD_BLOCKS_YES, + + PRINT_BAD_BLOCKS_MAX +}; + +/* + * pmempool_info_args -- structure for storing command line arguments + */ +struct pmempool_info_args { + char *file; /* input file */ + unsigned col_width; /* column width for printing fields */ + bool human; /* sizes in human-readable formats */ + bool force; /* force parsing pool */ + enum print_bb_e badblocks; /* print bad blocks */ + pmem_pool_type_t type; /* forced pool type */ + bool use_range; /* use range for blocks */ + struct ranges ranges; /* range of block/chunks to dump */ + int vlevel; /* verbosity level */ + int vdata; /* verbosity level for data dump */ + int vhdrdump; /* verbosity level for headers hexdump */ + int vstats; /* verbosity level for statistics */ + struct { + size_t walk; /* data chunk size */ + } log; + struct { + int vmap; /* verbosity level for BTT Map */ + int vflog; /* verbosity level for BTT FLOG */ + int vbackup; /* verbosity level for BTT Info backup */ + bool skip_zeros; /* skip blocks marked with zero flag */ + bool skip_error; /* skip blocks marked with error flag */ + bool skip_no_flag; /* skip blocks not marked with any flag */ + } blk; + struct { + int vlanes; /* verbosity level for lanes */ + int vroot; + int vobjects; + int valloc; + int voobhdr; + int vheap; + int vzonehdr; + int vchunkhdr; + int vbitmap; + bool lanes_recovery; + bool ignore_empty_obj; + uint64_t chunk_types; + size_t replica; + struct ranges lane_ranges; + struct ranges type_ranges; + struct ranges zone_ranges; + struct ranges chunk_ranges; + } obj; +}; + +/* + * pmem_blk_stats -- structure with statistics for pmemblk + */ +struct pmem_blk_stats { + uint32_t total; /* number of processed blocks */ + uint32_t zeros; /* number of blocks marked by zero flag */ + uint32_t errors; /* number of blocks marked by error flag */ + uint32_t noflag; /* number of blocks not marked with any flag */ +}; + +struct pmem_obj_class_stats { + uint64_t n_units; + uint64_t n_used; + uint64_t unit_size; + uint64_t alignment; + uint32_t nallocs; + uint16_t flags; +}; + +struct pmem_obj_zone_stats { + uint64_t n_chunks; + uint64_t n_chunks_type[MAX_CHUNK_TYPE]; + uint64_t size_chunks; + uint64_t size_chunks_type[MAX_CHUNK_TYPE]; + VEC(, struct pmem_obj_class_stats) class_stats; +}; + +struct pmem_obj_type_stats { + PMDK_TAILQ_ENTRY(pmem_obj_type_stats) next; + uint64_t type_num; + uint64_t n_objects; + uint64_t n_bytes; +}; + +struct pmem_obj_stats { + uint64_t n_total_objects; + uint64_t n_total_bytes; + uint64_t n_zones; + uint64_t n_zones_used; + struct pmem_obj_zone_stats *zone_stats; + PMDK_TAILQ_HEAD(obj_type_stats_head, pmem_obj_type_stats) type_stats; +}; + +/* + * pmem_info -- context for pmeminfo application + */ +struct pmem_info { + const char *file_name; /* current file name */ + struct pool_set_file *pfile; + struct pmempool_info_args args; /* arguments parsed from command line */ + struct options *opts; + struct pool_set *poolset; + pmem_pool_type_t type; + struct pmem_pool_params params; + struct { + struct pmem_blk_stats stats; + } blk; + struct { + struct pmemobjpool *pop; + struct palloc_heap *heap; + struct alloc_class_collection *alloc_classes; + size_t size; + struct pmem_obj_stats stats; + uint64_t uuid_lo; + uint64_t objid; + } obj; +}; + +int pmempool_info_func(const char *appname, int argc, char *argv[]); +void pmempool_info_help(const char *appname); + +int pmempool_info_read(struct pmem_info *pip, void *buff, + size_t nbytes, uint64_t off); +int pmempool_info_blk(struct pmem_info *pip); +int pmempool_info_log(struct pmem_info *pip); +int pmempool_info_obj(struct pmem_info *pip); +int pmempool_info_btt(struct pmem_info *pip); diff --git a/src/pmdk/src/tools/pmempool/info_blk.c b/src/pmdk/src/tools/pmempool/info_blk.c new file mode 100644 index 000000000..61d64ccfe --- /dev/null +++ b/src/pmdk/src/tools/pmempool/info_blk.c @@ -0,0 +1,567 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2018, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * info_blk.c -- pmempool info command source file for blk pool + */ +#include +#include +#include +#include +#include +#include + +#include "os.h" +#include "common.h" +#include "output.h" +#include "info.h" +#include "btt.h" + +/* + * pmempool_info_get_range -- get blocks/data chunk range + * + * Get range based on command line arguments and maximum value. + * Return value: + * 0 - range is empty + * 1 - range is not empty + */ +static int +pmempool_info_get_range(struct pmem_info *pip, struct range *rangep, + struct range *curp, uint32_t max, uint64_t offset) +{ + /* not using range */ + if (!pip->args.use_range) { + rangep->first = 0; + rangep->last = max; + + return 1; + } + + if (curp->first > offset + max) + return 0; + + if (curp->first >= offset) + rangep->first = curp->first - offset; + else + rangep->first = 0; + + if (curp->last < offset) + return 0; + + if (curp->last <= offset + max) + rangep->last = curp->last - offset; + else + rangep->last = max; + + return 1; +} + +/* + * info_blk_skip_block -- get action type for block/data chunk + * + * Return value indicating whether processing block/data chunk + * should be skipped. + * + * Return values: + * 0 - continue processing + * 1 - skip current block + */ +static int +info_blk_skip_block(struct pmem_info *pip, int is_zero, + int is_error) +{ + if (pip->args.blk.skip_no_flag && !is_zero && !is_error) + return 1; + + if (is_zero && pip->args.blk.skip_zeros) + return 1; + + if (is_error && pip->args.blk.skip_error) + return 1; + + return 0; +} + +/* + * info_btt_data -- print block data and corresponding flags from map + */ +static int +info_btt_data(struct pmem_info *pip, int v, struct btt_info *infop, + uint64_t arena_off, uint64_t offset, uint64_t *countp) +{ + if (!outv_check(v)) + return 0; + + int ret = 0; + + size_t mapsize = infop->external_nlba * BTT_MAP_ENTRY_SIZE; + uint32_t *map = malloc(mapsize); + if (!map) + err(1, "Cannot allocate memory for BTT map"); + + uint8_t *block_buff = malloc(infop->external_lbasize); + if (!block_buff) + err(1, "Cannot allocate memory for pmemblk block buffer"); + + /* read btt map area */ + if (pmempool_info_read(pip, (uint8_t *)map, mapsize, + arena_off + infop->mapoff)) { + outv_err("wrong BTT Map size or offset\n"); + ret = -1; + goto error; + } + + uint64_t i; + struct range *curp = NULL; + struct range range; + FOREACH_RANGE(curp, &pip->args.ranges) { + if (pmempool_info_get_range(pip, &range, curp, + infop->external_nlba - 1, offset) == 0) + continue; + for (i = range.first; i <= range.last; i++) { + uint32_t map_entry = le32toh(map[i]); + int is_init = (map_entry & ~BTT_MAP_ENTRY_LBA_MASK) + == 0; + int is_zero = (map_entry & ~BTT_MAP_ENTRY_LBA_MASK) + == BTT_MAP_ENTRY_ZERO || is_init; + int is_error = (map_entry & ~BTT_MAP_ENTRY_LBA_MASK) + == BTT_MAP_ENTRY_ERROR; + + uint64_t blockno = is_init ? i : + map_entry & BTT_MAP_ENTRY_LBA_MASK; + + if (info_blk_skip_block(pip, + is_zero, is_error)) + continue; + + /* compute block's data address */ + uint64_t block_off = arena_off + infop->dataoff + + blockno * infop->internal_lbasize; + + if (pmempool_info_read(pip, block_buff, + infop->external_lbasize, block_off)) { + outv_err("cannot read %lu block\n", i); + ret = -1; + goto error; + } + + if (*countp == 0) + outv_title(v, "PMEM BLK blocks data"); + + /* + * Print block number, offset and flags + * from map entry. + */ + outv(v, "Block %10lu: offset: %s\n", + offset + i, + out_get_btt_map_entry(map_entry)); + + /* dump block's data */ + outv_hexdump(v, block_buff, infop->external_lbasize, + block_off, 1); + + *countp = *countp + 1; + } + } +error: + free(map); + free(block_buff); + return ret; +} + +/* + * info_btt_map -- print all map entries + */ +static int +info_btt_map(struct pmem_info *pip, int v, + struct btt_info *infop, uint64_t arena_off, + uint64_t offset, uint64_t *count) +{ + if (!outv_check(v) && !outv_check(pip->args.vstats)) + return 0; + + int ret = 0; + size_t mapsize = infop->external_nlba * BTT_MAP_ENTRY_SIZE; + + uint32_t *map = malloc(mapsize); + if (!map) + err(1, "Cannot allocate memory for BTT map"); + + /* read btt map area */ + if (pmempool_info_read(pip, (uint8_t *)map, mapsize, + arena_off + infop->mapoff)) { + outv_err("wrong BTT Map size or offset\n"); + ret = -1; + goto error; + } + + uint32_t arena_count = 0; + + uint64_t i; + struct range *curp = NULL; + struct range range; + FOREACH_RANGE(curp, &pip->args.ranges) { + if (pmempool_info_get_range(pip, &range, curp, + infop->external_nlba - 1, offset) == 0) + continue; + for (i = range.first; i <= range.last; i++) { + uint32_t entry = le32toh(map[i]); + int is_zero = (entry & ~BTT_MAP_ENTRY_LBA_MASK) == + BTT_MAP_ENTRY_ZERO || + (entry & ~BTT_MAP_ENTRY_LBA_MASK) == 0; + int is_error = (entry & ~BTT_MAP_ENTRY_LBA_MASK) == + BTT_MAP_ENTRY_ERROR; + + if (info_blk_skip_block(pip, + is_zero, is_error) == 0) { + if (arena_count == 0) + outv_title(v, "PMEM BLK BTT Map"); + + if (is_zero) + pip->blk.stats.zeros++; + if (is_error) + pip->blk.stats.errors++; + if (!is_zero && !is_error) + pip->blk.stats.noflag++; + + pip->blk.stats.total++; + + arena_count++; + (*count)++; + + outv(v, "%010lu: %s\n", offset + i, + out_get_btt_map_entry(entry)); + } + } + } +error: + free(map); + return ret; +} + +/* + * info_btt_flog -- print all flog entries + */ +static int +info_btt_flog(struct pmem_info *pip, int v, + struct btt_info *infop, uint64_t arena_off) +{ + if (!outv_check(v)) + return 0; + + int ret = 0; + struct btt_flog *flogp = NULL; + struct btt_flog *flogpp = NULL; + uint64_t flog_size = infop->nfree * + roundup(2 * sizeof(struct btt_flog), BTT_FLOG_PAIR_ALIGN); + flog_size = roundup(flog_size, BTT_ALIGNMENT); + uint8_t *buff = malloc(flog_size); + if (!buff) + err(1, "Cannot allocate memory for FLOG entries"); + + if (pmempool_info_read(pip, buff, flog_size, + arena_off + infop->flogoff)) { + outv_err("cannot read BTT FLOG"); + ret = -1; + goto error; + } + + outv_title(v, "PMEM BLK BTT FLOG"); + + uint8_t *ptr = buff; + uint32_t i; + for (i = 0; i < infop->nfree; i++) { + flogp = (struct btt_flog *)ptr; + flogpp = flogp + 1; + + btt_flog_convert2h(flogp); + btt_flog_convert2h(flogpp); + + outv(v, "%010d:\n", i); + outv_field(v, "LBA", "0x%08x", flogp->lba); + outv_field(v, "Old map", "0x%08x: %s", flogp->old_map, + out_get_btt_map_entry(flogp->old_map)); + outv_field(v, "New map", "0x%08x: %s", flogp->new_map, + out_get_btt_map_entry(flogp->new_map)); + outv_field(v, "Seq", "0x%x", flogp->seq); + + outv_field(v, "LBA'", "0x%08x", flogpp->lba); + outv_field(v, "Old map'", "0x%08x: %s", flogpp->old_map, + out_get_btt_map_entry(flogpp->old_map)); + outv_field(v, "New map'", "0x%08x: %s", flogpp->new_map, + out_get_btt_map_entry(flogpp->new_map)); + outv_field(v, "Seq'", "0x%x", flogpp->seq); + + ptr += BTT_FLOG_PAIR_ALIGN; + } +error: + free(buff); + return ret; +} + +/* + * info_btt_stats -- print btt related statistics + */ +static void +info_btt_stats(struct pmem_info *pip, int v) +{ + if (pip->blk.stats.total > 0) { + outv_title(v, "PMEM BLK Statistics"); + double perc_zeros = (double)pip->blk.stats.zeros / + (double)pip->blk.stats.total * 100.0; + double perc_errors = (double)pip->blk.stats.errors / + (double)pip->blk.stats.total * 100.0; + double perc_noflag = (double)pip->blk.stats.noflag / + (double)pip->blk.stats.total * 100.0; + + outv_field(v, "Total blocks", "%u", pip->blk.stats.total); + outv_field(v, "Zeroed blocks", "%u [%s]", pip->blk.stats.zeros, + out_get_percentage(perc_zeros)); + outv_field(v, "Error blocks", "%u [%s]", pip->blk.stats.errors, + out_get_percentage(perc_errors)); + outv_field(v, "Blocks without flag", "%u [%s]", + pip->blk.stats.noflag, + out_get_percentage(perc_noflag)); + } +} + +/* + * info_btt_info -- print btt_info structure fields + */ +static int +info_btt_info(struct pmem_info *pip, int v, struct btt_info *infop) +{ + outv_field(v, "Signature", "%.*s", BTTINFO_SIG_LEN, infop->sig); + + outv_field(v, "UUID of container", "%s", + out_get_uuid_str(infop->parent_uuid)); + + outv_field(v, "Flags", "0x%x", infop->flags); + outv_field(v, "Major", "%d", infop->major); + outv_field(v, "Minor", "%d", infop->minor); + outv_field(v, "External LBA size", "%s", + out_get_size_str(infop->external_lbasize, + pip->args.human)); + outv_field(v, "External LBA count", "%u", infop->external_nlba); + outv_field(v, "Internal LBA size", "%s", + out_get_size_str(infop->internal_lbasize, + pip->args.human)); + outv_field(v, "Internal LBA count", "%u", infop->internal_nlba); + outv_field(v, "Free blocks", "%u", infop->nfree); + outv_field(v, "Info block size", "%s", + out_get_size_str(infop->infosize, pip->args.human)); + outv_field(v, "Next arena offset", "0x%lx", infop->nextoff); + outv_field(v, "Arena data offset", "0x%lx", infop->dataoff); + outv_field(v, "Area map offset", "0x%lx", infop->mapoff); + outv_field(v, "Area flog offset", "0x%lx", infop->flogoff); + outv_field(v, "Info block backup offset", "0x%lx", infop->infooff); + outv_field(v, "Checksum", "%s", out_get_checksum(infop, + sizeof(*infop), &infop->checksum, 0)); + + return 0; +} + +/* + * info_btt_layout -- print information about BTT layout + */ +static int +info_btt_layout(struct pmem_info *pip, os_off_t btt_off) +{ + int ret = 0; + + if (btt_off <= 0) { + outv_err("wrong BTT layout offset\n"); + return -1; + } + + struct btt_info *infop = NULL; + + infop = malloc(sizeof(struct btt_info)); + if (!infop) + err(1, "Cannot allocate memory for BTT Info structure"); + + int narena = 0; + uint64_t cur_lba = 0; + uint64_t count_data = 0; + uint64_t count_map = 0; + uint64_t offset = (uint64_t)btt_off; + uint64_t nextoff = 0; + + do { + /* read btt info area */ + if (pmempool_info_read(pip, infop, sizeof(*infop), offset)) { + ret = -1; + outv_err("cannot read BTT Info header\n"); + goto err; + } + + if (util_check_memory((uint8_t *)infop, + sizeof(*infop), 0) == 0) { + outv(1, "\n\n"); + break; + } + + outv(1, "\n[ARENA %d]", narena); + outv_title(1, "PMEM BLK BTT Info Header"); + outv_hexdump(pip->args.vhdrdump, infop, + sizeof(*infop), offset, 1); + + btt_info_convert2h(infop); + + nextoff = infop->nextoff; + + /* print btt info fields */ + if (info_btt_info(pip, 1, infop)) { + ret = -1; + goto err; + } + + /* dump blocks data */ + if (info_btt_data(pip, pip->args.vdata, + infop, offset, cur_lba, &count_data)) { + ret = -1; + goto err; + } + + /* print btt map entries and get statistics */ + if (info_btt_map(pip, pip->args.blk.vmap, infop, + offset, cur_lba, &count_map)) { + ret = -1; + goto err; + } + + /* print flog entries */ + if (info_btt_flog(pip, pip->args.blk.vflog, infop, + offset)) { + ret = -1; + goto err; + } + + /* increment LBA's counter before reading info backup */ + cur_lba += infop->external_nlba; + + /* read btt info backup area */ + if (pmempool_info_read(pip, infop, sizeof(*infop), + offset + infop->infooff)) { + outv_err("wrong BTT Info Backup size or offset\n"); + ret = -1; + goto err; + } + + outv_title(pip->args.blk.vbackup, + "PMEM BLK BTT Info Header Backup"); + if (outv_check(pip->args.blk.vbackup)) + outv_hexdump(pip->args.vhdrdump, infop, + sizeof(*infop), + offset + infop->infooff, 1); + + btt_info_convert2h(infop); + info_btt_info(pip, pip->args.blk.vbackup, infop); + + offset += nextoff; + narena++; + + } while (nextoff > 0); + + info_btt_stats(pip, pip->args.vstats); + +err: + if (infop) + free(infop); + + return ret; +} + +/* + * info_blk_descriptor -- print pmemblk descriptor + */ +static void +info_blk_descriptor(struct pmem_info *pip, int v, struct pmemblk *pbp) +{ + size_t pmemblk_size; + +#ifdef DEBUG + pmemblk_size = offsetof(struct pmemblk, write_lock); +#else + pmemblk_size = sizeof(*pbp); +#endif + + outv_title(v, "PMEM BLK Header"); + /* dump pmemblk header without pool_hdr */ + outv_hexdump(pip->args.vhdrdump, (uint8_t *)pbp + sizeof(pbp->hdr), + pmemblk_size - sizeof(pbp->hdr), sizeof(pbp->hdr), 1); + outv_field(v, "Block size", "%s", + out_get_size_str(pbp->bsize, pip->args.human)); + outv_field(v, "Is zeroed", pbp->is_zeroed ? "true" : "false"); +} + +/* + * pmempool_info_blk -- print information about block type pool + */ +int +pmempool_info_blk(struct pmem_info *pip) +{ + int ret; + struct pmemblk *pbp = malloc(sizeof(struct pmemblk)); + if (!pbp) + err(1, "Cannot allocate memory for pmemblk structure"); + + if (pmempool_info_read(pip, pbp, sizeof(struct pmemblk), 0)) { + outv_err("cannot read pmemblk header\n"); + free(pbp); + return -1; + } + + info_blk_descriptor(pip, VERBOSE_DEFAULT, pbp); + + ssize_t btt_off = (char *)pbp->data - (char *)pbp->addr; + ret = info_btt_layout(pip, btt_off); + + free(pbp); + + return ret; +} + +/* + * pmempool_info_btt -- print information about btt device + */ +int +pmempool_info_btt(struct pmem_info *pip) +{ + int ret; + outv(1, "\nBTT Device"); + ret = info_btt_layout(pip, DEFAULT_HDR_SIZE); + + return ret; +} diff --git a/src/pmdk/src/tools/pmempool/info_log.c b/src/pmdk/src/tools/pmempool/info_log.c new file mode 100644 index 000000000..8345e4a35 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/info_log.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * info_log.c -- pmempool info command source file for log pool + */ +#include +#include +#include +#include + +#include "common.h" +#include "output.h" +#include "info.h" + +/* + * info_log_data -- print used data from log pool + */ +static int +info_log_data(struct pmem_info *pip, int v, struct pmemlog *plp) +{ + if (!outv_check(v)) + return 0; + + uint64_t size_used = plp->write_offset - plp->start_offset; + + if (size_used == 0) + return 0; + + uint8_t *addr = pool_set_file_map(pip->pfile, plp->start_offset); + if (addr == MAP_FAILED) { + warn("%s", pip->file_name); + outv_err("cannot read pmem log data\n"); + return -1; + } + + if (pip->args.log.walk == 0) { + outv_title(v, "PMEMLOG data"); + struct range *curp = NULL; + PMDK_LIST_FOREACH(curp, &pip->args.ranges.head, next) { + uint8_t *ptr = addr + curp->first; + if (curp->last >= size_used) + curp->last = size_used - 1; + uint64_t count = curp->last - curp->first + 1; + outv_hexdump(v, ptr, count, curp->first + + plp->start_offset, 1); + size_used -= count; + if (!size_used) + break; + } + } else { + + /* + * Walk through used data with fixed chunk size + * passed by user. + */ + uint64_t nchunks = size_used / pip->args.log.walk; + + outv_title(v, "PMEMLOG data [chunks: total = %lu size = %ld]", + nchunks, pip->args.log.walk); + + struct range *curp = NULL; + PMDK_LIST_FOREACH(curp, &pip->args.ranges.head, next) { + uint64_t i; + for (i = curp->first; i <= curp->last && + i < nchunks; i++) { + outv(v, "Chunk %10lu:\n", i); + outv_hexdump(v, addr + i * pip->args.log.walk, + pip->args.log.walk, + plp->start_offset + + i * pip->args.log.walk, + 1); + } + } + } + + return 0; +} + +/* + * info_logs_stats -- print log type pool statistics + */ +static void +info_log_stats(struct pmem_info *pip, int v, struct pmemlog *plp) +{ + uint64_t size_total = plp->end_offset - plp->start_offset; + uint64_t size_used = plp->write_offset - plp->start_offset; + uint64_t size_avail = size_total - size_used; + + if (size_total == 0) + return; + + double perc_used = (double)size_used / (double)size_total * 100.0; + double perc_avail = 100.0 - perc_used; + + outv_title(v, "PMEM LOG Statistics"); + outv_field(v, "Total", "%s", + out_get_size_str(size_total, pip->args.human)); + outv_field(v, "Available", "%s [%s]", + out_get_size_str(size_avail, pip->args.human), + out_get_percentage(perc_avail)); + outv_field(v, "Used", "%s [%s]", + out_get_size_str(size_used, pip->args.human), + out_get_percentage(perc_used)); + +} + +/* + * info_log_descriptor -- print pmemlog descriptor and return 1 if + * write offset is valid + */ +static int +info_log_descriptor(struct pmem_info *pip, int v, struct pmemlog *plp) +{ + outv_title(v, "PMEM LOG Header"); + + /* dump pmemlog header without pool_hdr */ + outv_hexdump(pip->args.vhdrdump, (uint8_t *)plp + sizeof(plp->hdr), + sizeof(*plp) - sizeof(plp->hdr), + sizeof(plp->hdr), 1); + + log_convert2h(plp); + + int write_offset_valid = plp->write_offset >= plp->start_offset && + plp->write_offset <= plp->end_offset; + outv_field(v, "Start offset", "0x%lx", plp->start_offset); + outv_field(v, "Write offset", "0x%lx [%s]", plp->write_offset, + write_offset_valid ? "OK":"ERROR"); + outv_field(v, "End offset", "0x%lx", plp->end_offset); + + return write_offset_valid; +} + +/* + * pmempool_info_log -- print information about log type pool + */ +int +pmempool_info_log(struct pmem_info *pip) +{ + int ret = 0; + + struct pmemlog *plp = malloc(sizeof(struct pmemlog)); + if (!plp) + err(1, "Cannot allocate memory for pmemlog structure"); + + if (pmempool_info_read(pip, plp, sizeof(struct pmemlog), 0)) { + outv_err("cannot read pmemlog header\n"); + free(plp); + return -1; + } + + if (info_log_descriptor(pip, VERBOSE_DEFAULT, plp)) { + info_log_stats(pip, pip->args.vstats, plp); + ret = info_log_data(pip, pip->args.vdata, plp); + } + + free(plp); + + return ret; +} diff --git a/src/pmdk/src/tools/pmempool/info_obj.c b/src/pmdk/src/tools/pmempool/info_obj.c new file mode 100644 index 000000000..6dc13d3bf --- /dev/null +++ b/src/pmdk/src/tools/pmempool/info_obj.c @@ -0,0 +1,962 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2019, Intel Corporation */ + +/* + * info_obj.c -- pmempool info command source file for obj pool + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc_class.h" + +#include "set.h" +#include "common.h" +#include "output.h" +#include "info.h" +#include "util.h" + +#define BITMAP_BUFF_SIZE 1024 + +#define OFF_TO_PTR(pop, off) ((void *)((uintptr_t)(pop) + (off))) + +#define PTR_TO_OFF(pop, ptr) ((uintptr_t)(ptr) - (uintptr_t)(pop)) + +/* + * lane_need_recovery -- return 1 if lane section needs recovery + */ +static int +lane_need_recovery(struct pmem_info *pip, struct lane_layout *lane) +{ + return ulog_recovery_needed((struct ulog *)&lane->external, 1) || + ulog_recovery_needed((struct ulog *)&lane->internal, 1) || + ulog_recovery_needed((struct ulog *)&lane->undo, 0); +} + +#define RUN_BITMAP_SEPARATOR_DISTANCE 8 + +/* + * get_bitmap_str -- get bitmap single value string + */ +static const char * +get_bitmap_str(uint64_t val, unsigned values) +{ + static char buff[BITMAP_BUFF_SIZE]; + + unsigned j = 0; + for (unsigned i = 0; i < values && j < BITMAP_BUFF_SIZE - 3; i++) { + buff[j++] = ((val & ((uint64_t)1 << i)) ? 'x' : '.'); + if ((i + 1) % RUN_BITMAP_SEPARATOR_DISTANCE == 0) + buff[j++] = ' '; + } + + buff[j] = '\0'; + + return buff; +} + +/* + * pmem_obj_stats_get_type -- get stats for specified type number + */ +static struct pmem_obj_type_stats * +pmem_obj_stats_get_type(struct pmem_obj_stats *stats, uint64_t type_num) +{ + struct pmem_obj_type_stats *type; + struct pmem_obj_type_stats *type_dest = NULL; + PMDK_TAILQ_FOREACH(type, &stats->type_stats, next) { + if (type->type_num == type_num) + return type; + + if (!type_dest && type->type_num > type_num) + type_dest = type; + } + + type = calloc(1, sizeof(*type)); + if (!type) { + outv_err("cannot allocate memory for type stats\n"); + exit(EXIT_FAILURE); + } + + type->type_num = type_num; + if (type_dest) + PMDK_TAILQ_INSERT_BEFORE(type_dest, type, next); + else + PMDK_TAILQ_INSERT_TAIL(&stats->type_stats, type, next); + + return type; +} + +struct info_obj_redo_args { + int v; + size_t i; + struct pmem_info *pip; +}; + +/* + * info_obj_redo_entry - print redo log entry info + */ +static int +info_obj_redo_entry(struct ulog_entry_base *e, void *arg, + const struct pmem_ops *p_ops) +{ + struct info_obj_redo_args *a = arg; + struct ulog_entry_val *ev; + struct ulog_entry_buf *eb; + + switch (ulog_entry_type(e)) { + case ULOG_OPERATION_AND: + case ULOG_OPERATION_OR: + case ULOG_OPERATION_SET: + ev = (struct ulog_entry_val *)e; + + outv(a->v, "%010zu: " + "Offset: 0x%016jx " + "Value: 0x%016jx ", + a->i++, + ulog_entry_offset(e), + ev->value); + break; + case ULOG_OPERATION_BUF_CPY: + case ULOG_OPERATION_BUF_SET: + eb = (struct ulog_entry_buf *)e; + + outv(a->v, "%010zu: " + "Offset: 0x%016jx " + "Size: %s ", + a->i++, + ulog_entry_offset(e), + out_get_size_str(eb->size, + a->pip->args.human)); + break; + default: + ASSERT(0); /* unreachable */ + } + + return 0; +} + +/* + * info_obj_redo -- print ulog log entries + */ +static void +info_obj_ulog(struct pmem_info *pip, int v, struct ulog *ulog, + const struct pmem_ops *ops) +{ + outv_title(v, "Log entries"); + + struct info_obj_redo_args args = {v, 0, pip}; + ulog_foreach_entry(ulog, info_obj_redo_entry, &args, ops); +} + +/* + * info_obj_alloc_hdr -- print allocation header + */ +static void +info_obj_alloc_hdr(struct pmem_info *pip, int v, + const struct memory_block *m) +{ + outv_title(v, "Allocation Header"); + + outv_field(v, "Size", "%s", out_get_size_str(m->m_ops->get_user_size(m), + pip->args.human)); + outv_field(v, "Extra", "%lu", m->m_ops->get_extra(m)); + outv_field(v, "Flags", "0x%x", m->m_ops->get_flags(m)); +} + +/* + * info_obj_object_hdr -- print object headers and data + */ +static void +info_obj_object_hdr(struct pmem_info *pip, int v, int vid, + const struct memory_block *m, uint64_t id) +{ + struct pmemobjpool *pop = pip->obj.pop; + + void *data = m->m_ops->get_user_data(m); + + outv_nl(vid); + outv_field(vid, "Object", "%lu", id); + outv_field(vid, "Offset", "0x%016lx", PTR_TO_OFF(pop, data)); + + int vahdr = v && pip->args.obj.valloc; + int voobh = v && pip->args.obj.voobhdr; + + outv_indent(vahdr || voobh, 1); + + info_obj_alloc_hdr(pip, vahdr, m); + + outv_hexdump(v && pip->args.vdata, data, + m->m_ops->get_real_size(m), + PTR_TO_OFF(pip->obj.pop, data), 1); + + outv_indent(vahdr || voobh, -1); + +} + +/* + * info_obj_lane_section -- print lane's section + */ +static void +info_obj_lane(struct pmem_info *pip, int v, struct lane_layout *lane) +{ + struct pmem_ops p_ops; + p_ops.base = pip->obj.pop; + + outv_title(v, "Undo Log"); + outv_indent(v, 1); + info_obj_ulog(pip, v, (struct ulog *)&lane->undo, &p_ops); + outv_indent(v, -1); + + outv_nl(v); + outv_title(v, "Internal Undo Log"); + outv_indent(v, 1); + info_obj_ulog(pip, v, (struct ulog *)&lane->internal, &p_ops); + outv_indent(v, -1); + + outv_title(v, "External Undo Log"); + outv_indent(v, 1); + info_obj_ulog(pip, v, (struct ulog *)&lane->external, &p_ops); + outv_indent(v, -1); +} + +/* + * info_obj_lanes -- print lanes structures + */ +static void +info_obj_lanes(struct pmem_info *pip) +{ + int v = pip->args.obj.vlanes; + + if (!outv_check(v)) + return; + + struct pmemobjpool *pop = pip->obj.pop; + /* + * Iterate through all lanes from specified range and print + * specified sections. + */ + struct lane_layout *lanes = (void *)((char *)pip->obj.pop + + pop->lanes_offset); + struct range *curp = NULL; + FOREACH_RANGE(curp, &pip->args.obj.lane_ranges) { + for (uint64_t i = curp->first; + i <= curp->last && i < pop->nlanes; i++) { + + /* For -R check print lane only if needs recovery */ + if (pip->args.obj.lanes_recovery && + !lane_need_recovery(pip, &lanes[i])) + continue; + + outv_title(v, "Lane %" PRIu64, i); + + outv_indent(v, 1); + + info_obj_lane(pip, v, &lanes[i]); + + outv_indent(v, -1); + } + } +} + +/* + * info_obj_heap -- print pmemobj heap headers + */ +static void +info_obj_heap(struct pmem_info *pip) +{ + int v = pip->args.obj.vheap; + struct pmemobjpool *pop = pip->obj.pop; + struct heap_layout *layout = OFF_TO_PTR(pop, pop->heap_offset); + struct heap_header *heap = &layout->header; + + outv(v, "\nPMEMOBJ Heap Header:\n"); + outv_hexdump(v && pip->args.vhdrdump, heap, sizeof(*heap), + pop->heap_offset, 1); + + outv_field(v, "Signature", "%s", heap->signature); + outv_field(v, "Major", "%ld", heap->major); + outv_field(v, "Minor", "%ld", heap->minor); + outv_field(v, "Chunk size", "%s", + out_get_size_str(heap->chunksize, pip->args.human)); + outv_field(v, "Chunks per zone", "%ld", heap->chunks_per_zone); + outv_field(v, "Checksum", "%s", out_get_checksum(heap, sizeof(*heap), + &heap->checksum, 0)); +} + +/* + * info_obj_zone -- print information about zone + */ +static void +info_obj_zone_hdr(struct pmem_info *pip, int v, struct zone_header *zone) +{ + outv_hexdump(v && pip->args.vhdrdump, zone, sizeof(*zone), + PTR_TO_OFF(pip->obj.pop, zone), 1); + outv_field(v, "Magic", "%s", out_get_zone_magic_str(zone->magic)); + outv_field(v, "Size idx", "%u", zone->size_idx); +} + +/* + * info_obj_object -- print information about object + */ +static void +info_obj_object(struct pmem_info *pip, const struct memory_block *m, + uint64_t objid) +{ + if (!util_ranges_contain(&pip->args.ranges, objid)) + return; + + uint64_t type_num = m->m_ops->get_extra(m); + + if (!util_ranges_contain(&pip->args.obj.type_ranges, type_num)) + return; + + uint64_t real_size = m->m_ops->get_real_size(m); + pip->obj.stats.n_total_objects++; + pip->obj.stats.n_total_bytes += real_size; + + struct pmem_obj_type_stats *type_stats = + pmem_obj_stats_get_type(&pip->obj.stats, type_num); + + type_stats->n_objects++; + type_stats->n_bytes += real_size; + + int vid = pip->args.obj.vobjects; + int v = pip->args.obj.vobjects; + + outv_indent(v, 1); + info_obj_object_hdr(pip, v, vid, m, objid); + outv_indent(v, -1); +} + +/* + * info_obj_run_bitmap -- print chunk run's bitmap + */ +static void +info_obj_run_bitmap(int v, struct run_bitmap *b) +{ + /* print only used values for lower verbosity */ + uint32_t i; + for (i = 0; i < b->nbits / RUN_BITS_PER_VALUE; i++) + outv(v, "%s\n", get_bitmap_str(b->values[i], + RUN_BITS_PER_VALUE)); + + unsigned mod = b->nbits % RUN_BITS_PER_VALUE; + if (mod != 0) { + outv(v, "%s\n", get_bitmap_str(b->values[i], mod)); + } +} + +/* + * info_obj_memblock_is_root -- (internal) checks whether the object is root + */ +static int +info_obj_memblock_is_root(struct pmem_info *pip, const struct memory_block *m) +{ + uint64_t roff = pip->obj.pop->root_offset; + if (roff == 0) + return 0; + + struct memory_block rm = memblock_from_offset(pip->obj.heap, roff); + + return MEMORY_BLOCK_EQUALS(*m, rm); +} + +/* + * info_obj_run_cb -- (internal) run object callback + */ +static int +info_obj_run_cb(const struct memory_block *m, void *arg) +{ + struct pmem_info *pip = arg; + + if (info_obj_memblock_is_root(pip, m)) + return 0; + + info_obj_object(pip, m, pip->obj.objid++); + + return 0; +} + +static struct pmem_obj_class_stats * +info_obj_class_stats_get_or_insert(struct pmem_obj_zone_stats *stats, + uint64_t unit_size, uint64_t alignment, + uint32_t nallocs, uint16_t flags) +{ + struct pmem_obj_class_stats *cstats; + VEC_FOREACH_BY_PTR(cstats, &stats->class_stats) { + if (cstats->alignment == alignment && + cstats->flags == flags && + cstats->nallocs == nallocs && + cstats->unit_size == unit_size) + return cstats; + } + + struct pmem_obj_class_stats s = {0, 0, unit_size, + alignment, nallocs, flags}; + + if (VEC_PUSH_BACK(&stats->class_stats, s) != 0) + return NULL; + + return &VEC_BACK(&stats->class_stats); +} + +/* + * info_obj_chunk -- print chunk info + */ +static void +info_obj_chunk(struct pmem_info *pip, uint64_t c, uint64_t z, + struct chunk_header *chunk_hdr, struct chunk *chunk, + struct pmem_obj_zone_stats *stats) +{ + int v = pip->args.obj.vchunkhdr; + outv(v, "\n"); + outv_field(v, "Chunk", "%lu", c); + + struct pmemobjpool *pop = pip->obj.pop; + + outv_hexdump(v && pip->args.vhdrdump, chunk_hdr, sizeof(*chunk_hdr), + PTR_TO_OFF(pop, chunk_hdr), 1); + + outv_field(v, "Type", "%s", out_get_chunk_type_str(chunk_hdr->type)); + outv_field(v, "Flags", "0x%x %s", chunk_hdr->flags, + out_get_chunk_flags(chunk_hdr->flags)); + outv_field(v, "Size idx", "%u", chunk_hdr->size_idx); + + struct memory_block m = MEMORY_BLOCK_NONE; + m.zone_id = (uint32_t)z; + m.chunk_id = (uint32_t)c; + m.size_idx = (uint32_t)chunk_hdr->size_idx; + memblock_rebuild_state(pip->obj.heap, &m); + + if (chunk_hdr->type == CHUNK_TYPE_USED || + chunk_hdr->type == CHUNK_TYPE_FREE) { + VEC_FRONT(&stats->class_stats).n_units += + chunk_hdr->size_idx; + + if (chunk_hdr->type == CHUNK_TYPE_USED) { + VEC_FRONT(&stats->class_stats).n_used += + chunk_hdr->size_idx; + + /* skip root object */ + if (!info_obj_memblock_is_root(pip, &m)) { + info_obj_object(pip, &m, pip->obj.objid++); + } + } + } else if (chunk_hdr->type == CHUNK_TYPE_RUN) { + struct chunk_run *run = (struct chunk_run *)chunk; + + outv_hexdump(v && pip->args.vhdrdump, run, + sizeof(run->hdr.block_size) + + sizeof(run->hdr.alignment), + PTR_TO_OFF(pop, run), 1); + + struct run_bitmap bitmap; + m.m_ops->get_bitmap(&m, &bitmap); + + struct pmem_obj_class_stats *cstats = + info_obj_class_stats_get_or_insert(stats, + run->hdr.block_size, run->hdr.alignment, bitmap.nbits, + chunk_hdr->flags); + if (cstats == NULL) { + outv_err("out of memory, can't allocate statistics"); + return; + } + + outv_field(v, "Block size", "%s", + out_get_size_str(run->hdr.block_size, + pip->args.human)); + + uint32_t units = bitmap.nbits; + uint32_t free_space = 0; + uint32_t max_free_block = 0; + m.m_ops->calc_free(&m, &free_space, &max_free_block); + uint32_t used = units - free_space; + + cstats->n_units += units; + cstats->n_used += used; + + outv_field(v, "Bitmap", "%u / %u", used, units); + + info_obj_run_bitmap(v && pip->args.obj.vbitmap, &bitmap); + + m.m_ops->iterate_used(&m, info_obj_run_cb, pip); + } +} + +/* + * info_obj_zone_chunks -- print chunk headers from specified zone + */ +static void +info_obj_zone_chunks(struct pmem_info *pip, struct zone *zone, uint64_t z, + struct pmem_obj_zone_stats *stats) +{ + VEC_INIT(&stats->class_stats); + + struct pmem_obj_class_stats default_class_stats = {0, 0, + CHUNKSIZE, 0, 0, 0}; + VEC_PUSH_BACK(&stats->class_stats, default_class_stats); + + uint64_t c = 0; + while (c < zone->header.size_idx) { + enum chunk_type type = zone->chunk_headers[c].type; + uint64_t size_idx = zone->chunk_headers[c].size_idx; + if (util_ranges_contain(&pip->args.obj.chunk_ranges, c)) { + if (pip->args.obj.chunk_types & (1ULL << type)) { + stats->n_chunks++; + stats->n_chunks_type[type]++; + + stats->size_chunks += size_idx; + stats->size_chunks_type[type] += size_idx; + + info_obj_chunk(pip, c, z, + &zone->chunk_headers[c], + &zone->chunks[c], stats); + + } + + if (size_idx > 1 && type != CHUNK_TYPE_RUN && + pip->args.obj.chunk_types & + (1 << CHUNK_TYPE_FOOTER)) { + size_t f = c + size_idx - 1; + info_obj_chunk(pip, f, z, + &zone->chunk_headers[f], + &zone->chunks[f], stats); + } + } + + c += size_idx; + } +} + +/* + * info_obj_root_obj -- print root object + */ +static void +info_obj_root_obj(struct pmem_info *pip) +{ + int v = pip->args.obj.vroot; + + struct pmemobjpool *pop = pip->obj.pop; + if (!pop->root_offset) { + outv(v, "\nNo root object...\n"); + return; + } + + outv_title(v, "Root object"); + outv_field(v, "Offset", "0x%016zx", pop->root_offset); + uint64_t root_size = pop->root_size; + outv_field(v, "Size", "%s", + out_get_size_str(root_size, pip->args.human)); + + struct memory_block m = memblock_from_offset( + pip->obj.heap, pop->root_offset); + + /* do not print object id and offset for root object */ + info_obj_object_hdr(pip, v, VERBOSE_SILENT, &m, 0); +} + +/* + * info_obj_zones -- print zones and chunks + */ +static void +info_obj_zones_chunks(struct pmem_info *pip) +{ + if (!outv_check(pip->args.obj.vheap) && + !outv_check(pip->args.vstats) && + !outv_check(pip->args.obj.vobjects)) + return; + + struct pmemobjpool *pop = pip->obj.pop; + struct heap_layout *layout = OFF_TO_PTR(pop, pop->heap_offset); + size_t maxzone = util_heap_max_zone(pop->heap_size); + pip->obj.stats.n_zones = maxzone; + pip->obj.stats.zone_stats = calloc(maxzone, + sizeof(struct pmem_obj_zone_stats)); + if (!pip->obj.stats.zone_stats) + err(1, "Cannot allocate memory for zone stats"); + + for (size_t i = 0; i < maxzone; i++) { + struct zone *zone = ZID_TO_ZONE(layout, i); + + if (util_ranges_contain(&pip->args.obj.zone_ranges, i)) { + int vvv = pip->args.obj.vheap && + (pip->args.obj.vzonehdr || + pip->args.obj.vchunkhdr); + + outv_title(vvv, "Zone %zu", i); + + if (zone->header.magic == ZONE_HEADER_MAGIC) + pip->obj.stats.n_zones_used++; + + info_obj_zone_hdr(pip, pip->args.obj.vheap && + pip->args.obj.vzonehdr, + &zone->header); + + outv_indent(vvv, 1); + info_obj_zone_chunks(pip, zone, i, + &pip->obj.stats.zone_stats[i]); + outv_indent(vvv, -1); + } + } +} + +/* + * info_obj_descriptor -- print pmemobj descriptor + */ +static void +info_obj_descriptor(struct pmem_info *pip) +{ + int v = VERBOSE_DEFAULT; + + if (!outv_check(v)) + return; + + outv(v, "\nPMEM OBJ Header:\n"); + struct pmemobjpool *pop = pip->obj.pop; + + uint8_t *hdrptr = (uint8_t *)pop + sizeof(pop->hdr); + size_t hdrsize = sizeof(*pop) - sizeof(pop->hdr); + size_t hdroff = sizeof(pop->hdr); + outv_hexdump(pip->args.vhdrdump, hdrptr, hdrsize, hdroff, 1); + + /* check if layout is zeroed */ + char *layout = util_check_memory((uint8_t *)pop->layout, + sizeof(pop->layout), 0) ? + pop->layout : "(null)"; + + /* address for checksum */ + void *dscp = (void *)((uintptr_t)(pop) + sizeof(struct pool_hdr)); + + outv_field(v, "Layout", "%s", layout); + outv_field(v, "Lanes offset", "0x%lx", pop->lanes_offset); + outv_field(v, "Number of lanes", "%lu", pop->nlanes); + outv_field(v, "Heap offset", "0x%lx", pop->heap_offset); + outv_field(v, "Heap size", "%lu", pop->heap_size); + outv_field(v, "Checksum", "%s", out_get_checksum(dscp, OBJ_DSC_P_SIZE, + &pop->checksum, 0)); + outv_field(v, "Root offset", "0x%lx", pop->root_offset); + + /* run id with -v option */ + outv_field(v + 1, "Run id", "%lu", pop->run_id); +} + +/* + * info_obj_stats_objjects -- print objects' statistics + */ +static void +info_obj_stats_objects(struct pmem_info *pip, int v, + struct pmem_obj_stats *stats) +{ + outv_field(v, "Number of objects", "%lu", + stats->n_total_objects); + outv_field(v, "Number of bytes", "%s", out_get_size_str( + stats->n_total_bytes, pip->args.human)); + + outv_title(v, "Objects by type"); + + outv_indent(v, 1); + struct pmem_obj_type_stats *type_stats; + PMDK_TAILQ_FOREACH(type_stats, &pip->obj.stats.type_stats, next) { + if (!type_stats->n_objects) + continue; + + double n_objects_perc = 100.0 * + (double)type_stats->n_objects / + (double)stats->n_total_objects; + double n_bytes_perc = 100.0 * + (double)type_stats->n_bytes / + (double)stats->n_total_bytes; + + outv_nl(v); + outv_field(v, "Type number", "%lu", type_stats->type_num); + outv_field(v, "Number of objects", "%lu [%s]", + type_stats->n_objects, + out_get_percentage(n_objects_perc)); + outv_field(v, "Number of bytes", "%s [%s]", + out_get_size_str( + type_stats->n_bytes, + pip->args.human), + out_get_percentage(n_bytes_perc)); + } + outv_indent(v, -1); +} + +/* + * info_boj_stats_alloc_classes -- print allocation classes' statistics + */ +static void +info_obj_stats_alloc_classes(struct pmem_info *pip, int v, + struct pmem_obj_zone_stats *stats) +{ + uint64_t total_bytes = 0; + uint64_t total_used = 0; + + outv_indent(v, 1); + + struct pmem_obj_class_stats *cstats; + VEC_FOREACH_BY_PTR(cstats, &stats->class_stats) { + if (cstats->n_units == 0) + continue; + + double used_perc = 100.0 * + (double)cstats->n_used / (double)cstats->n_units; + + outv_nl(v); + outv_field(v, "Unit size", "%s", out_get_size_str( + cstats->unit_size, pip->args.human)); + outv_field(v, "Units", "%lu", cstats->n_units); + outv_field(v, "Used units", "%lu [%s]", + cstats->n_used, + out_get_percentage(used_perc)); + + uint64_t bytes = cstats->unit_size * + cstats->n_units; + uint64_t used = cstats->unit_size * + cstats->n_used; + + total_bytes += bytes; + total_used += used; + + double used_bytes_perc = 100.0 * (double)used / (double)bytes; + + outv_field(v, "Bytes", "%s", + out_get_size_str(bytes, pip->args.human)); + outv_field(v, "Used bytes", "%s [%s]", + out_get_size_str(used, pip->args.human), + out_get_percentage(used_bytes_perc)); + } + + outv_indent(v, -1); + + double used_bytes_perc = total_bytes ? 100.0 * + (double)total_used / (double)total_bytes : 0.0; + + outv_nl(v); + outv_field(v, "Total bytes", "%s", + out_get_size_str(total_bytes, pip->args.human)); + outv_field(v, "Total used bytes", "%s [%s]", + out_get_size_str(total_used, pip->args.human), + out_get_percentage(used_bytes_perc)); + +} + +/* + * info_obj_stats_chunks -- print chunks' statistics + */ +static void +info_obj_stats_chunks(struct pmem_info *pip, int v, + struct pmem_obj_zone_stats *stats) +{ + outv_field(v, "Number of chunks", "%lu", stats->n_chunks); + + outv_indent(v, 1); + for (unsigned type = 0; type < MAX_CHUNK_TYPE; type++) { + double type_perc = 100.0 * + (double)stats->n_chunks_type[type] / + (double)stats->n_chunks; + if (stats->n_chunks_type[type]) { + outv_field(v, out_get_chunk_type_str(type), + "%lu [%s]", + stats->n_chunks_type[type], + out_get_percentage(type_perc)); + } + } + outv_indent(v, -1); + + outv_nl(v); + outv_field(v, "Total chunks size", "%s", out_get_size_str( + stats->size_chunks, pip->args.human)); + + outv_indent(v, 1); + for (unsigned type = 0; type < MAX_CHUNK_TYPE; type++) { + double type_perc = 100.0 * + (double)stats->size_chunks_type[type] / + (double)stats->size_chunks; + if (stats->size_chunks_type[type]) { + outv_field(v, out_get_chunk_type_str(type), + "%lu [%s]", + stats->size_chunks_type[type], + out_get_percentage(type_perc)); + } + + } + outv_indent(v, -1); +} + +/* + * info_obj_add_zone_stats -- add stats to total + */ +static void +info_obj_add_zone_stats(struct pmem_obj_zone_stats *total, + struct pmem_obj_zone_stats *stats) +{ + total->n_chunks += stats->n_chunks; + total->size_chunks += stats->size_chunks; + + for (int type = 0; type < MAX_CHUNK_TYPE; type++) { + total->n_chunks_type[type] += + stats->n_chunks_type[type]; + total->size_chunks_type[type] += + stats->size_chunks_type[type]; + } + + struct pmem_obj_class_stats *cstats; + VEC_FOREACH_BY_PTR(cstats, &stats->class_stats) { + struct pmem_obj_class_stats *ctotal = + info_obj_class_stats_get_or_insert(total, cstats->unit_size, + cstats->alignment, cstats->nallocs, cstats->flags); + if (ctotal == NULL) { + outv_err("out of memory, can't allocate statistics"); + return; + } + ctotal->n_units += cstats->n_units; + ctotal->n_used += cstats->n_used; + } +} + +/* + * info_obj_stats_zones -- print zones' statistics + */ +static void +info_obj_stats_zones(struct pmem_info *pip, int v, struct pmem_obj_stats *stats, + struct pmem_obj_zone_stats *total) +{ + double used_zones_perc = 100.0 * (double)stats->n_zones_used / + (double)stats->n_zones; + + outv_field(v, "Number of zones", "%lu", stats->n_zones); + outv_field(v, "Number of used zones", "%lu [%s]", stats->n_zones_used, + out_get_percentage(used_zones_perc)); + + outv_indent(v, 1); + for (uint64_t i = 0; i < stats->n_zones_used; i++) { + outv_title(v, "Zone %" PRIu64, i); + + struct pmem_obj_zone_stats *zstats = &stats->zone_stats[i]; + + info_obj_stats_chunks(pip, v, zstats); + + outv_title(v, "Zone's allocation classes"); + info_obj_stats_alloc_classes(pip, v, zstats); + + info_obj_add_zone_stats(total, zstats); + } + outv_indent(v, -1); +} + +/* + * info_obj_stats -- print statistics + */ +static void +info_obj_stats(struct pmem_info *pip) +{ + int v = pip->args.vstats; + + if (!outv_check(v)) + return; + + struct pmem_obj_stats *stats = &pip->obj.stats; + struct pmem_obj_zone_stats total; + memset(&total, 0, sizeof(total)); + + outv_title(v, "Statistics"); + + outv_title(v, "Objects"); + info_obj_stats_objects(pip, v, stats); + + outv_title(v, "Heap"); + info_obj_stats_zones(pip, v, stats, &total); + + if (stats->n_zones_used > 1) { + outv_title(v, "Total zone's statistics"); + outv_title(v, "Chunks statistics"); + info_obj_stats_chunks(pip, v, &total); + + outv_title(v, "Allocation classes"); + info_obj_stats_alloc_classes(pip, v, &total); + } + VEC_DELETE(&total.class_stats); +} + +static struct pmem_info *Pip; +#ifndef _WIN32 +static void +info_obj_sa_sigaction(int signum, siginfo_t *info, void *context) +{ + uintptr_t offset = (uintptr_t)info->si_addr - (uintptr_t)Pip->obj.pop; + outv_err("Invalid offset 0x%lx\n", offset); + exit(EXIT_FAILURE); +} + +static struct sigaction info_obj_sigaction = { + .sa_sigaction = info_obj_sa_sigaction, + .sa_flags = SA_SIGINFO +}; +#else +#define CALL_FIRST 1 + +static LONG CALLBACK +exception_handler(_In_ PEXCEPTION_POINTERS ExceptionInfo) +{ + PEXCEPTION_RECORD record = ExceptionInfo->ExceptionRecord; + if (record->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) { + return EXCEPTION_CONTINUE_SEARCH; + } + uintptr_t offset = (uintptr_t)record->ExceptionInformation[1] - + (uintptr_t)Pip->obj.pop; + outv_err("Invalid offset 0x%lx\n", offset); + exit(EXIT_FAILURE); +} +#endif + +/* + * info_obj -- print information about obj pool type + */ +int +pmempool_info_obj(struct pmem_info *pip) +{ + pip->obj.pop = pool_set_file_map(pip->pfile, 0); + if (pip->obj.pop == NULL) + return -1; + + pip->obj.size = pip->pfile->size; + + struct palloc_heap *heap = calloc(1, sizeof(*heap)); + if (heap == NULL) + err(1, "Cannot allocate memory for heap data"); + + heap->layout = OFF_TO_PTR(pip->obj.pop, pip->obj.pop->heap_offset); + heap->base = pip->obj.pop; + pip->obj.alloc_classes = alloc_class_collection_new(); + pip->obj.heap = heap; + + Pip = pip; +#ifndef _WIN32 + if (sigaction(SIGSEGV, &info_obj_sigaction, NULL)) { +#else + if (AddVectoredExceptionHandler(CALL_FIRST, exception_handler) == + NULL) { +#endif + perror("sigaction"); + return -1; + } + + pip->obj.uuid_lo = pmemobj_get_uuid_lo(pip->obj.pop); + + info_obj_descriptor(pip); + info_obj_lanes(pip); + info_obj_root_obj(pip); + info_obj_heap(pip); + info_obj_zones_chunks(pip); + info_obj_stats(pip); + + free(heap); + alloc_class_collection_delete(pip->obj.alloc_classes); + + return 0; +} diff --git a/src/pmdk/src/tools/pmempool/output.c b/src/pmdk/src/tools/pmempool/output.c new file mode 100644 index 000000000..ba960fafa --- /dev/null +++ b/src/pmdk/src/tools/pmempool/output.c @@ -0,0 +1,844 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * output.c -- definitions of output printing related functions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "feature.h" +#include "common.h" +#include "output.h" + +#define _STR(s) #s +#define STR(s) _STR(s) +#define TIME_STR_FMT "%a %b %d %Y %H:%M:%S" +#define UUID_STR_MAX 37 +#define HEXDUMP_ROW_WIDTH 16 +/* + * 2 chars + space per byte + + * space after 8 bytes and terminating NULL + */ +#define HEXDUMP_ROW_HEX_LEN (HEXDUMP_ROW_WIDTH * 3 + 1 + 1) +/* 1 printable char per byte + terminating NULL */ +#define HEXDUMP_ROW_ASCII_LEN (HEXDUMP_ROW_WIDTH + 1) +#define SEPARATOR_CHAR '-' +#define MAX_INDENT 32 +#define INDENT_CHAR ' ' + +static char out_indent_str[MAX_INDENT + 1]; +static int out_indent_level; +static int out_vlevel; +static unsigned out_column_width = 20; +static FILE *out_fh; +static const char *out_prefix; + +#define STR_MAX 256 + +/* + * outv_check -- verify verbosity level + */ +int +outv_check(int vlevel) +{ + return vlevel && (out_vlevel >= vlevel); +} + +/* + * out_set_col_width -- set column width + * + * See: outv_field() function + */ +void +out_set_col_width(unsigned col_width) +{ + out_column_width = col_width; +} + +/* + * out_set_vlevel -- set verbosity level + */ +void +out_set_vlevel(int vlevel) +{ + out_vlevel = vlevel; + if (out_fh == NULL) + out_fh = stdout; +} + +/* + * out_set_prefix -- set prefix to output format + */ +void +out_set_prefix(const char *prefix) +{ + out_prefix = prefix; +} + +/* + * out_set_stream -- set output stream + */ +void +out_set_stream(FILE *stream) +{ + out_fh = stream; + + memset(out_indent_str, INDENT_CHAR, MAX_INDENT); +} + +/* + * outv_err -- print error message + */ +void +outv_err(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + outv_err_vargs(fmt, ap); + va_end(ap); +} + +/* + * outv_err_vargs -- print error message + */ +void +outv_err_vargs(const char *fmt, va_list ap) +{ + char *_str = strdup(fmt); + if (!_str) + err(1, "strdup"); + char *str = _str; + + fprintf(stderr, "error: "); + int errstr = str[0] == '!'; + if (errstr) + str++; + + char *nl = strchr(str, '\n'); + if (nl) + *nl = '\0'; + + vfprintf(stderr, str, ap); + if (errstr) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); + + free(_str); +} + +/* + * outv_indent -- change indentation level by factor + */ +void +outv_indent(int vlevel, int i) +{ + if (!outv_check(vlevel)) + return; + + out_indent_str[out_indent_level] = INDENT_CHAR; + out_indent_level += i; + if (out_indent_level < 0) + out_indent_level = 0; + if (out_indent_level > MAX_INDENT) + out_indent_level = MAX_INDENT; + + out_indent_str[out_indent_level] = '\0'; +} + +/* + * _out_prefix -- print prefix if defined + */ +static void +_out_prefix(void) +{ + if (out_prefix) + fprintf(out_fh, "%s: ", out_prefix); +} + +/* + * _out_indent -- print indent + */ +static void +_out_indent(void) +{ + fprintf(out_fh, "%s", out_indent_str); +} + +/* + * outv -- print message taking into account verbosity level + */ +void +outv(int vlevel, const char *fmt, ...) +{ + va_list ap; + + if (!outv_check(vlevel)) + return; + + _out_prefix(); + _out_indent(); + va_start(ap, fmt); + vfprintf(out_fh, fmt, ap); + va_end(ap); +} + +/* + * outv_nl -- print new line without indentation + */ +void +outv_nl(int vlevel) +{ + if (!outv_check(vlevel)) + return; + + _out_prefix(); + fprintf(out_fh, "\n"); +} + +void +outv_title(int vlevel, const char *fmt, ...) +{ + va_list ap; + if (!outv_check(vlevel)) + return; + + fprintf(out_fh, "\n"); + _out_prefix(); + _out_indent(); + va_start(ap, fmt); + vfprintf(out_fh, fmt, ap); + va_end(ap); + fprintf(out_fh, ":\n"); +} + +/* + * outv_field -- print field name and value in specified format + * + * Field name will have fixed width which can be changed by + * out_set_column_width() function. + * vlevel - verbosity level + * field - field name + * fmt - format form value + */ +void +outv_field(int vlevel, const char *field, const char *fmt, ...) +{ + va_list ap; + + if (!outv_check(vlevel)) + return; + + _out_prefix(); + _out_indent(); + va_start(ap, fmt); + fprintf(out_fh, "%-*s : ", out_column_width, field); + vfprintf(out_fh, fmt, ap); + fprintf(out_fh, "\n"); + va_end(ap); +} + +/* + * out_get_percentage -- return percentage string + */ +const char * +out_get_percentage(double perc) +{ + static char str_buff[STR_MAX] = {0, }; + int ret = 0; + + if (perc > 0.0 && perc < 0.0001) { + ret = util_snprintf(str_buff, STR_MAX, "%e %%", perc); + if (ret < 0) + return ""; + } else { + int decimal = 0; + if (perc >= 100.0 || perc < DBL_EPSILON) + decimal = 0; + else + decimal = 6; + + ret = util_snprintf(str_buff, STR_MAX, "%.*f %%", decimal, + perc); + if (ret < 0) + return ""; + } + + return str_buff; +} + +/* + * out_get_size_str -- return size string + * + * human - if 1 return size in human-readable format + * if 2 return size in bytes and human-readable format + * otherwise return size in bytes. + */ +const char * +out_get_size_str(uint64_t size, int human) +{ + static char str_buff[STR_MAX] = {0, }; + char units[] = { + 'K', 'M', 'G', 'T', '\0' + }; + const int nunits = sizeof(units) / sizeof(units[0]); + int ret = 0; + + if (!human) { + ret = util_snprintf(str_buff, STR_MAX, "%"PRIu64, size); + } else { + int i = -1; + double dsize = (double)size; + uint64_t csize = size; + + while (csize >= 1024 && i < nunits) { + csize /= 1024; + dsize /= 1024.0; + i++; + } + + if (i >= 0 && i < nunits) + if (human == 1) + ret = util_snprintf(str_buff, STR_MAX, + "%.1f%c", dsize, units[i]); + else + ret = util_snprintf(str_buff, STR_MAX, + "%.1f%c [%" PRIu64"]", dsize, + units[i], size); + else + ret = util_snprintf(str_buff, STR_MAX, "%"PRIu64, + size); + } + + if (ret < 0) + return ""; + + return str_buff; +} + +/* + * out_get_uuid_str -- returns uuid in human readable format + */ +const char * +out_get_uuid_str(uuid_t uuid) +{ + static char uuid_str[UUID_STR_MAX] = {0, }; + + int ret = util_uuid_to_string(uuid, uuid_str); + if (ret != 0) { + outv(2, "failed to covert uuid to string"); + return NULL; + } + return uuid_str; +} + +/* + * out_get_time_str -- returns time in human readable format + */ +const char * +out_get_time_str(time_t time) +{ + static char str_buff[STR_MAX] = {0, }; + struct tm *tm = util_localtime(&time); + + if (tm) { + strftime(str_buff, STR_MAX, TIME_STR_FMT, tm); + } else { + int ret = util_snprintf(str_buff, STR_MAX, "unknown"); + if (ret < 0) + return ""; + } + + return str_buff; +} + +/* + * out_get_ascii_str -- get string with printable ASCII dump buffer + * + * Convert non-printable ASCII characters to dot '.' + * See: util_get_printable_ascii() function. + */ +static int +out_get_ascii_str(char *str, size_t str_len, const uint8_t *datap, size_t len) +{ + int c = 0; + size_t i; + char pch; + + if (str_len < len) + return -1; + + for (i = 0; i < len; i++) { + pch = util_get_printable_ascii((char)datap[i]); + int t = util_snprintf(str + c, str_len - (size_t)c, "%c", pch); + if (t < 0) + return -1; + c += t; + } + + return c; +} + +/* + * out_get_hex_str -- get string with hexadecimal dump of buffer + * + * Hexadecimal bytes in format %02x, each one followed by space, + * additional space after every 8th byte. + */ +static int +out_get_hex_str(char *str, size_t str_len, const uint8_t *datap, size_t len) +{ + int c = 0; + size_t i; + int t; + + if (str_len < (3 * len + 1)) + return -1; + + for (i = 0; i < len; i++) { + /* add space after n*8 byte */ + if (i && (i % 8) == 0) { + t = util_snprintf(str + c, str_len - (size_t)c, " "); + if (t < 0) + return -1; + c += t; + } + t = util_snprintf(str + c, str_len - (size_t)c, "%02x ", + datap[i]); + if (t < 0) + return -1; + c += t; + } + + return c; +} + +/* + * outv_hexdump -- print buffer in canonical hex+ASCII format + * + * Print offset in hexadecimal, + * sixteen space-separated, two column, hexadecimal bytes, + * followed by the same sixteen bytes converted to printable ASCII characters + * enclosed in '|' characters. + */ +void +outv_hexdump(int vlevel, const void *addr, size_t len, size_t offset, int sep) +{ + if (!outv_check(vlevel) || len <= 0) + return; + + const uint8_t *datap = (uint8_t *)addr; + uint8_t row_hex_str[HEXDUMP_ROW_HEX_LEN] = {0, }; + uint8_t row_ascii_str[HEXDUMP_ROW_ASCII_LEN] = {0, }; + size_t curr = 0; + size_t prev = 0; + int repeated = 0; + int n = 0; + + while (len) { + size_t curr_len = min(len, HEXDUMP_ROW_WIDTH); + + /* + * Check if current row is the same as the previous one + * don't check it for first and last rows. + */ + if (len != curr_len && curr && + !memcmp(datap + prev, datap + curr, curr_len)) { + if (!repeated) { + /* print star only for the first repeated */ + fprintf(out_fh, "*\n"); + repeated = 1; + } + } else { + repeated = 0; + + /* row with hexadecimal bytes */ + int rh = out_get_hex_str((char *)row_hex_str, + HEXDUMP_ROW_HEX_LEN, datap + curr, curr_len); + /* row with printable ascii chars */ + int ra = out_get_ascii_str((char *)row_ascii_str, + HEXDUMP_ROW_ASCII_LEN, datap + curr, curr_len); + + if (ra && rh) + n = fprintf(out_fh, "%08zx %-*s|%-*s|\n", + curr + offset, + HEXDUMP_ROW_HEX_LEN, row_hex_str, + HEXDUMP_ROW_WIDTH, row_ascii_str); + prev = curr; + } + + len -= curr_len; + curr += curr_len; + } + + if (sep && n) { + while (--n) + fprintf(out_fh, "%c", SEPARATOR_CHAR); + fprintf(out_fh, "\n"); + } +} + +/* + * out_get_checksum -- return checksum string with result + */ +const char * +out_get_checksum(void *addr, size_t len, uint64_t *csump, size_t skip_off) +{ + static char str_buff[STR_MAX] = {0, }; + int ret = 0; + + uint64_t csum = util_checksum_compute(addr, len, csump, skip_off); + + if (*csump == htole64(csum)) + ret = util_snprintf(str_buff, STR_MAX, "0x%" PRIx64" [OK]", + le64toh(csum)); + else + ret = util_snprintf(str_buff, STR_MAX, + "0x%" PRIx64 " [wrong! should be: 0x%" PRIx64 "]", + le64toh(*csump), le64toh(csum)); + + if (ret < 0) + return ""; + + return str_buff; +} + +/* + * out_get_btt_map_entry -- return BTT map entry with flags strings + */ +const char * +out_get_btt_map_entry(uint32_t map) +{ + static char str_buff[STR_MAX] = {0, }; + + int is_init = (map & ~BTT_MAP_ENTRY_LBA_MASK) == 0; + int is_zero = (map & ~BTT_MAP_ENTRY_LBA_MASK) == + BTT_MAP_ENTRY_ZERO; + int is_error = (map & ~BTT_MAP_ENTRY_LBA_MASK) == + BTT_MAP_ENTRY_ERROR; + int is_normal = (map & ~BTT_MAP_ENTRY_LBA_MASK) == + BTT_MAP_ENTRY_NORMAL; + + uint32_t lba = map & BTT_MAP_ENTRY_LBA_MASK; + + int ret = util_snprintf(str_buff, STR_MAX, "0x%08x state: %s", lba, + is_init ? "init" : + is_zero ? "zero" : + is_error ? "error" : + is_normal ? "normal" : "unknown"); + + if (ret < 0) + return ""; + + return str_buff; +} + +/* + * out_get_pool_type_str -- get pool type string + */ +const char * +out_get_pool_type_str(pmem_pool_type_t type) +{ + switch (type) { + case PMEM_POOL_TYPE_LOG: + return "log"; + case PMEM_POOL_TYPE_BLK: + return "blk"; + case PMEM_POOL_TYPE_OBJ: + return "obj"; + case PMEM_POOL_TYPE_BTT: + return "btt"; + default: + return "unknown"; + } +} + +/* + * out_get_pool_signature -- return signature of specified pool type + */ +const char * +out_get_pool_signature(pmem_pool_type_t type) +{ + switch (type) { + case PMEM_POOL_TYPE_LOG: + return LOG_HDR_SIG; + case PMEM_POOL_TYPE_BLK: + return BLK_HDR_SIG; + case PMEM_POOL_TYPE_OBJ: + return OBJ_HDR_SIG; + default: + return NULL; + } +} + +/* + * out_get_chunk_type_str -- get chunk type string + */ +const char * +out_get_chunk_type_str(enum chunk_type type) +{ + switch (type) { + case CHUNK_TYPE_FOOTER: + return "footer"; + case CHUNK_TYPE_FREE: + return "free"; + case CHUNK_TYPE_USED: + return "used"; + case CHUNK_TYPE_RUN: + return "run"; + case CHUNK_TYPE_UNKNOWN: + default: + return "unknown"; + } +} + +/* + * out_get_chunk_flags -- get names of set flags for chunk header + */ +const char * +out_get_chunk_flags(uint16_t flags) +{ + if (flags & CHUNK_FLAG_COMPACT_HEADER) + return "compact header"; + else if (flags & CHUNK_FLAG_HEADER_NONE) + return "header none"; + + return ""; +} + +/* + * out_get_zone_magic_str -- get zone magic string with additional + * information about correctness of the magic value + */ +const char * +out_get_zone_magic_str(uint32_t magic) +{ + static char str_buff[STR_MAX] = {0, }; + + const char *correct = NULL; + switch (magic) { + case 0: + correct = "uninitialized"; + break; + case ZONE_HEADER_MAGIC: + correct = "OK"; + break; + default: + correct = "wrong! should be " STR(ZONE_HEADER_MAGIC); + break; + } + + int ret = util_snprintf(str_buff, STR_MAX, "0x%08x [%s]", magic, + correct); + + if (ret < 0) + return ""; + + return str_buff; +} + +/* + * out_get_pmemoid_str -- get PMEMoid string + */ +const char * +out_get_pmemoid_str(PMEMoid oid, uint64_t uuid_lo) +{ + static char str_buff[STR_MAX] = {0, }; + int free_cor = 0; + int ret = 0; + char *correct = "OK"; + if (oid.pool_uuid_lo && oid.pool_uuid_lo != uuid_lo) { + ret = util_snprintf(str_buff, STR_MAX, + "wrong! should be 0x%016"PRIx64, uuid_lo); + if (ret < 0) + err(1, "snprintf: %d", ret); + correct = strdup(str_buff); + if (!correct) + err(1, "Cannot allocate memory for PMEMoid string\n"); + free_cor = 1; + } + + ret = util_snprintf(str_buff, STR_MAX, + "off: 0x%016"PRIx64" pool_uuid_lo: 0x%016" + PRIx64" [%s]", oid.off, oid.pool_uuid_lo, correct); + + if (free_cor) + free(correct); + + if (ret < 0) + err(1, "snprintf: %d", ret); + + return str_buff; +} + +/* + * out_get_arch_machine_class_str -- get a string representation of the machine + * class + */ +const char * +out_get_arch_machine_class_str(uint8_t machine_class) +{ + + switch (machine_class) { + case PMDK_MACHINE_CLASS_64: + return "64"; + default: + return "unknown"; + } +} + +/* + * out_get_arch_data_str -- get a string representation of the data endianness + */ +const char * +out_get_arch_data_str(uint8_t data) +{ + switch (data) { + case PMDK_DATA_LE: + return "2's complement, little endian"; + case PMDK_DATA_BE: + return "2's complement, big endian"; + default: + return "unknown"; + } +} + +/* + * out_get_arch_machine_str -- get a string representation of the machine type + */ +const char * +out_get_arch_machine_str(uint16_t machine) +{ + static char str_buff[STR_MAX] = {0, }; + switch (machine) { + case PMDK_MACHINE_X86_64: + return "AMD X86-64"; + case PMDK_MACHINE_AARCH64: + return "Aarch64"; + case PMDK_MACHINE_PPC64: + return "PPC64"; + default: + break; + } + + int ret = util_snprintf(str_buff, STR_MAX, "unknown %u", machine); + if (ret < 0) + return "unknown"; + return str_buff; +} + +/* + * out_get_last_shutdown_str -- get a string representation of the finish state + */ +const char * +out_get_last_shutdown_str(uint8_t dirty) +{ + if (dirty) + return "dirty"; + else + return "clean"; +} + +/* + * out_get_alignment_descr_str -- get alignment descriptor string + */ +const char * +out_get_alignment_desc_str(uint64_t ad, uint64_t valid_ad) +{ + static char str_buff[STR_MAX] = {0, }; + int ret = 0; + + if (ad == valid_ad) + ret = util_snprintf(str_buff, STR_MAX, "0x%016"PRIx64"[OK]", + ad); + else + ret = util_snprintf(str_buff, STR_MAX, "0x%016"PRIx64" " + "[wrong! should be 0x%016"PRIx64"]", ad, valid_ad); + + if (ret < 0) + return ""; + + return str_buff; +} + +/* + * out_concat -- concatenate the new element to the list of strings + * + * If concatenation is successful it increments current position in the output + * string and number of elements in the list. Elements are separated with ", ". + */ +static int +out_concat(char *str_buff, int *curr, int *count, const char *str) +{ + ASSERTne(str_buff, NULL); + ASSERTne(curr, NULL); + ASSERTne(str, NULL); + + const char *separator = (count != NULL && *count > 0) ? ", " : ""; + int ret = util_snprintf(str_buff + *curr, + (size_t)(STR_MAX - *curr), "%s%s", separator, str); + if (ret < 0) + return -1; + *curr += ret; + if (count) + ++(*count); + return 0; +} + +/* + * out_get_incompat_features_str -- (internal) get a string with names of + * incompatibility flags + */ +const char * +out_get_incompat_features_str(uint32_t incompat) +{ + static char str_buff[STR_MAX] = {0}; + features_t features = {POOL_FEAT_ZERO, incompat, POOL_FEAT_ZERO}; + int ret = 0; + + if (incompat == 0) { + /* print the value only */ + return "0x0"; + } else { + /* print the value and the left square bracket */ + ret = util_snprintf(str_buff, STR_MAX, "0x%x [", incompat); + if (ret < 0) { + ERR("snprintf for incompat features: %d", ret); + return ""; + } + + /* print names of known options */ + int count = 0; + int curr = ret; + features_t found; + const char *feat; + + while (((feat = util_feature2str(features, &found))) != NULL) { + util_feature_disable(&features, found); + ret = out_concat(str_buff, &curr, &count, feat); + if (ret < 0) + return ""; + } + + /* check if any unknown flags are set */ + if (!util_feature_is_zero(features)) { + if (out_concat(str_buff, &curr, &count, + "?UNKNOWN_FLAG?")) + return ""; + } + + /* print the right square bracket */ + if (out_concat(str_buff, &curr, NULL, "]")) + return ""; + } + return str_buff; +} diff --git a/src/pmdk/src/tools/pmempool/output.h b/src/pmdk/src/tools/pmempool/output.h new file mode 100644 index 000000000..4b6460fa5 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/output.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * output.h -- declarations of output printing related functions + */ + +#include +#include +#include + +void out_set_vlevel(int vlevel); +void out_set_stream(FILE *stream); +void out_set_prefix(const char *prefix); +void out_set_col_width(unsigned col_width); +void outv_err(const char *fmt, ...) FORMAT_PRINTF(1, 2); +void out_err(const char *file, int line, const char *func, + const char *fmt, ...) FORMAT_PRINTF(4, 5); +void outv_err_vargs(const char *fmt, va_list ap); +void outv_indent(int vlevel, int i); +void outv(int vlevel, const char *fmt, ...) FORMAT_PRINTF(2, 3); +void outv_nl(int vlevel); +int outv_check(int vlevel); +void outv_title(int vlevel, const char *fmt, ...) FORMAT_PRINTF(2, 3); +void outv_field(int vlevel, const char *field, const char *fmt, + ...) FORMAT_PRINTF(3, 4); +void outv_hexdump(int vlevel, const void *addr, size_t len, size_t offset, + int sep); +const char *out_get_uuid_str(uuid_t uuid); +const char *out_get_time_str(time_t time); +const char *out_get_size_str(uint64_t size, int human); +const char *out_get_percentage(double percentage); +const char *out_get_checksum(void *addr, size_t len, uint64_t *csump, + uint64_t skip_off); +const char *out_get_btt_map_entry(uint32_t map); +const char *out_get_pool_type_str(pmem_pool_type_t type); +const char *out_get_pool_signature(pmem_pool_type_t type); +const char *out_get_tx_state_str(uint64_t state); +const char *out_get_chunk_type_str(enum chunk_type type); +const char *out_get_chunk_flags(uint16_t flags); +const char *out_get_zone_magic_str(uint32_t magic); +const char *out_get_pmemoid_str(PMEMoid oid, uint64_t uuid_lo); +const char *out_get_arch_machine_class_str(uint8_t machine_class); +const char *out_get_arch_data_str(uint8_t data); +const char *out_get_arch_machine_str(uint16_t machine); +const char *out_get_last_shutdown_str(uint8_t dirty); +const char *out_get_alignment_desc_str(uint64_t ad, uint64_t cur_ad); +const char *out_get_incompat_features_str(uint32_t incompat); diff --git a/src/pmdk/src/tools/pmempool/pmempool.c b/src/pmdk/src/tools/pmempool/pmempool.c new file mode 100644 index 000000000..077294f2b --- /dev/null +++ b/src/pmdk/src/tools/pmempool/pmempool.c @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2020, Intel Corporation */ + +/* + * pmempool.c -- pmempool main source file + */ + +#include +#include +#include +#include +#include +#include +#include +#include "common.h" +#include "output.h" +#include "info.h" +#include "create.h" +#include "dump.h" +#include "check.h" +#include "rm.h" +#include "convert.h" +#include "synchronize.h" +#include "transform.h" +#include "feature.h" +#include "set.h" +#include "pmemcommon.h" + +#ifndef _WIN32 +#include "rpmem_common.h" +#include "rpmem_util.h" +#endif + +#define APPNAME "pmempool" + +#define PMEMPOOL_TOOL_LOG_PREFIX "pmempool" +#define PMEMPOOL_TOOL_LOG_LEVEL_VAR "PMEMPOOL_TOOL_LOG_LEVEL" +#define PMEMPOOL_TOOL_LOG_FILE_VAR "PMEMPOOL_TOOL_LOG_FILE" + +/* + * command -- struct for pmempool commands definition + */ +struct command { + const char *name; + const char *brief; + int (*func)(const char *, int, char *[]); + void (*help)(const char *); +}; + +static const struct command *get_command(const char *cmd_str); +static void print_help(const char *appname); + +/* + * long_options -- pmempool command line arguments + */ +static const struct option long_options[] = { + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0 }, +}; + +/* + * help_help -- prints help message for help command + */ +static void +help_help(const char *appname) +{ + printf("Usage: %s help \n", appname); +} + +/* + * help_func -- prints help message for specified command + */ +static int +help_func(const char *appname, int argc, char *argv[]) +{ + if (argc > 1) { + char *cmd_str = argv[1]; + const struct command *cmdp = get_command(cmd_str); + + if (cmdp && cmdp->help) { + cmdp->help(appname); + return 0; + } else { + outv_err("No help text for '%s' command\n", cmd_str); + return -1; + } + } else { + print_help(appname); + return -1; + } +} + +/* + * commands -- definition of all pmempool commands + */ +static const struct command commands[] = { + { + .name = "info", + .brief = "print information and statistics about a pool", + .func = pmempool_info_func, + .help = pmempool_info_help, + }, + { + .name = "create", + .brief = "create a pool", + .func = pmempool_create_func, + .help = pmempool_create_help, + }, + { + .name = "dump", + .brief = "dump user data from a pool", + .func = pmempool_dump_func, + .help = pmempool_dump_help, + }, + { + .name = "check", + .brief = "check consistency of a pool", + .func = pmempool_check_func, + .help = pmempool_check_help, + }, + { + .name = "rm", + .brief = "remove pool or poolset", + .func = pmempool_rm_func, + .help = pmempool_rm_help, + }, + { + .name = "convert", + .brief = "perform pool layout conversion", + .func = pmempool_convert_func, + .help = pmempool_convert_help, + }, + { + .name = "sync", + .brief = "synchronize data between replicas", + .func = pmempool_sync_func, + .help = pmempool_sync_help, + }, + { + .name = "transform", + .brief = "modify internal structure of a poolset", + .func = pmempool_transform_func, + .help = pmempool_transform_help, + }, + { + .name = "feature", + .brief = "toggle / query pool features", + .func = pmempool_feature_func, + .help = pmempool_feature_help, + }, + { + .name = "help", + .brief = "print help text about a command", + .func = help_func, + .help = help_help, + }, +}; + +/* + * number of pmempool commands + */ +#define COMMANDS_NUMBER (sizeof(commands) / sizeof(commands[0])) + +/* + * print_version -- prints pmempool version message + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * print_usage -- prints pmempool usage message + */ +static void +print_usage(const char *appname) +{ + printf("usage: %s [--version] [--help] []\n", appname); +} + +/* + * print_help -- prints pmempool help message + */ +static void +print_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf("\n"); + printf("Options:\n"); + printf(" -V, --version display version\n"); + printf(" -h, --help display this help and exit\n"); + printf("\n"); + printf("The available commands are:\n"); + unsigned i; + for (i = 0; i < COMMANDS_NUMBER; i++) { + const char *format = (strlen(commands[i].name) / 8) + ? "%s\t- %s\n" : "%s\t\t- %s\n"; + printf(format, commands[i].name, commands[i].brief); + } + printf("\n"); + printf("For complete documentation see %s(1) manual page.\n", appname); +} + +/* + * get_command -- returns command for specified command name + */ +static const struct command * +get_command(const char *cmd_str) +{ + unsigned i; + for (i = 0; i < COMMANDS_NUMBER; i++) { + if (strcmp(cmd_str, commands[i].name) == 0) + return &commands[i]; + } + + return NULL; +} + +int +main(int argc, char *argv[]) +{ + int opt; + int option_index; + int ret = 0; +#ifdef _WIN32 + util_suppress_errmsg(); + wchar_t **wargv = CommandLineToArgvW(GetCommandLineW(), &argc); + for (int i = 0; i < argc; i++) { + argv[i] = util_toUTF8(wargv[i]); + if (argv[i] == NULL) { + for (i--; i >= 0; i--) + free(argv[i]); + outv_err("Error during arguments conversion\n"); + return 1; + } + } +#endif + + common_init(PMEMPOOL_TOOL_LOG_PREFIX, + PMEMPOOL_TOOL_LOG_LEVEL_VAR, + PMEMPOOL_TOOL_LOG_FILE_VAR, + 0 /* major version */, + 0 /* minor version */); + +#ifndef _WIN32 + util_remote_init(); + rpmem_util_cmds_init(); +#endif + + if (argc < 2) { + print_usage(APPNAME); + goto end; + } + + while ((opt = getopt_long(2, argv, "Vh", + long_options, &option_index)) != -1) { + switch (opt) { + case 'V': + print_version(APPNAME); + goto end; + case 'h': + print_help(APPNAME); + goto end; + default: + print_usage(APPNAME); + ret = 1; + goto end; + } + } + + char *cmd_str = argv[optind]; + + const struct command *cmdp = get_command(cmd_str); + + if (cmdp) { + ret = cmdp->func(APPNAME, argc - 1, argv + 1); + } else { + outv_err("'%s' -- unknown command\n", cmd_str); + ret = 1; + } + +end: + +#ifndef _WIN32 + util_remote_fini(); + rpmem_util_cmds_fini(); +#endif + + common_fini(); + +#ifdef _WIN32 + for (int i = argc; i > 0; i--) + free(argv[i - 1]); +#endif + if (ret) + return 1; + + return 0; +} diff --git a/src/pmdk/src/tools/pmempool/pmempool.rc b/src/pmdk/src/tools/pmempool/pmempool.rc new file mode 100644 index 000000000..19b8de30a Binary files /dev/null and b/src/pmdk/src/tools/pmempool/pmempool.rc differ diff --git a/src/pmdk/src/tools/pmempool/pmempool.vcxproj b/src/pmdk/src/tools/pmempool/pmempool.vcxproj new file mode 100644 index 000000000..15eaf831f --- /dev/null +++ b/src/pmdk/src/tools/pmempool/pmempool.vcxproj @@ -0,0 +1,178 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {492baa3d-0d5d-478e-9765-500463ae69aa} + + + {f7c6c6b6-4142-4c82-8699-4a9d8183181b} + + + {0b1818eb-bdc8-4865-964f-db8bf05cfd86} + + + {1baa1617-93ae-4196-8a1a-bd492fb18aef} + + + {cf9a0883-6334-44c7-ac29-349468c78e27} + + + {9e9e3d25-2139-4a5d-9200-18148ddead45} + + + {9186eac4-2f34-4f17-b940-6585d7869bcd} + + + + + + + {7DC3B3DD-73ED-4602-9AF3-8D7053620DEA} + Win32Proj + pmempool + 10.0.17134.0 + + + + Application + true + v140 + NotSet + + + Application + false + v140 + false + NotSet + + + + + + + + + + + + + + + + + $(SolutionDir)\core;$(SolutionDir)\common;$(SolutionDir)\test\unittest;$(SolutionDir)\windows\include;$(SolutionDir)\include;$(SolutionDir)\windows\getopt;$(SolutionDir)\libpmemlog;$(SolutionDir)\libpmemblk;$(SolutionDir)\libpmemobj;$(SolutionDir)\libpmem2;$(IncludePath) + $(SolutionDir)$(Platform)\$(Configuration)\libs\ + + + $(SolutionDir)\core;$(SolutionDir)\common;$(SolutionDir)\test\unittest;$(SolutionDir)\windows\include;$(SolutionDir)\include;$(SolutionDir)\windows\getopt;$(SolutionDir)\libpmemlog;$(SolutionDir)\libpmemblk;$(SolutionDir)\libpmemobj;$(SolutionDir)\libpmem2;$(IncludePath) + $(SolutionDir)$(Platform)\$(Configuration)\libs\ + + + + NotUsing + Level3 + PMDK_UTF8_API;SDS_ENABLED; NTDDI_VERSION=NTDDI_WIN10_RS1;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + platform.h + CompileAsC + false + true + + + Console + Shlwapi.lib;%(AdditionalDependencies) + true + Debug + + + + + + + _DEBUG + + + + + Level3 + NotUsing + true + PMDK_UTF8_API;SDS_ENABLED; NTDDI_VERSION=NTDDI_WIN10_RS1;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + CompileAsC + platform.h + false + true + + + Console + Shlwapi.lib;%(AdditionalDependencies) + true + DebugFastLink + + + + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/tools/pmempool/pmempool.vcxproj.filters b/src/pmdk/src/tools/pmempool/pmempool.vcxproj.filters new file mode 100644 index 000000000..f4f60a932 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/pmempool.vcxproj.filters @@ -0,0 +1,157 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {c91552dc-7579-447b-ad7f-7b2307c52502} + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + libs + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + \ No newline at end of file diff --git a/src/pmdk/src/tools/pmempool/rm.c b/src/pmdk/src/tools/pmempool/rm.c new file mode 100644 index 000000000..0f21403e6 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/rm.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2014-2018, Intel Corporation */ + +/* + * rm.c -- pmempool rm command main source file + */ + +#include +#include +#include +#include +#include +#include + +#include "os.h" +#include "out.h" +#include "common.h" +#include "output.h" +#include "file.h" +#include "rm.h" +#include "set.h" + +#ifdef USE_RPMEM +#include "librpmem.h" +#endif + +enum ask_type { + ASK_SOMETIMES, /* ask before removing write-protected files */ + ASK_ALWAYS, /* always ask */ + ASK_NEVER, /* never ask */ +}; + +/* verbosity level */ +static int vlevel; +/* force remove and ignore errors */ +static int force; +/* poolset files options */ +#define RM_POOLSET_NONE (0) +#define RM_POOLSET_LOCAL (1 << 0) +#define RM_POOLSET_REMOTE (1 << 1) +#define RM_POOLSET_ALL (RM_POOLSET_LOCAL | RM_POOLSET_REMOTE) +static int rm_poolset_mode; +/* mode of interaction */ +static enum ask_type ask_mode; +/* indicates whether librpmem is available */ +static int rpmem_avail; + +/* help message */ +static const char * const help_str = +"Remove pool file or all files from poolset\n" +"\n" +"Available options:\n" +" -h, --help Print this help message.\n" +" -v, --verbose Be verbose.\n" +" -s, --only-pools Remove only pool files (default).\n" +" -a, --all Remove all poolset files - local and remote.\n" +" -l, --local Remove local poolset files\n" +" -r, --remote Remove remote poolset files\n" +" -f, --force Ignore nonexisting files.\n" +" -i, --interactive Prompt before every single removal.\n" +"\n" +"For complete documentation see %s-rm(1) manual page.\n"; + +/* short options string */ +static const char *optstr = "hvsfialr"; +/* long options */ +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"verbose", no_argument, NULL, 'v'}, + {"only-pools", no_argument, NULL, 's'}, + {"all", no_argument, NULL, 'a'}, + {"local", no_argument, NULL, 'l'}, + {"remote", no_argument, NULL, 'r'}, + {"force", no_argument, NULL, 'f'}, + {"interactive", no_argument, NULL, 'i'}, + {NULL, 0, NULL, 0 }, +}; + +/* + * print_usage -- print usage message + */ +static void +print_usage(const char *appname) +{ + printf("Usage: %s rm [] \n", appname); +} + +/* + * pmempool_rm_help -- print help message + */ +void +pmempool_rm_help(const char *appname) +{ + print_usage(appname); + printf(help_str, appname); +} + +/* + * rm_file -- remove single file + */ +static int +rm_file(const char *file) +{ + int write_protected = os_access(file, W_OK) != 0; + char cask = 'y'; + switch (ask_mode) { + case ASK_ALWAYS: + cask = '?'; + break; + case ASK_NEVER: + cask = 'y'; + break; + case ASK_SOMETIMES: + cask = write_protected ? '?' : 'y'; + break; + default: + outv_err("unknown state"); + return 1; + } + + const char *pre_msg = write_protected ? "write-protected " : ""; + char ans = ask_Yn(cask, "remove %sfile '%s' ?", pre_msg, file); + if (ans == 'y') { + if (util_unlink(file)) { + outv_err("cannot remove file '%s'", file); + return 1; + } + + outv(1, "removed '%s'\n", file); + } + + return 0; +} + +/* + * remove_remote -- (internal) remove remote pool + */ +static int +remove_remote(const char *target, const char *pool_set) +{ +#ifdef USE_RPMEM + char cask = 'y'; + switch (ask_mode) { + case ASK_ALWAYS: + cask = '?'; + break; + case ASK_NEVER: + case ASK_SOMETIMES: + cask = 'y'; + break; + default: + outv_err("unknown state"); + return 1; + } + + char ans = ask_Yn(cask, "remove remote pool '%s' on '%s'?", + pool_set, target); + if (ans == INV_ANS) + outv(1, "invalid answer\n"); + + if (ans != 'y') + return 0; + + if (!rpmem_avail) { + if (force) { + outv(1, "cannot remove '%s' on '%s' -- " + "librpmem not available", pool_set, target); + return 0; + } + + outv_err("!cannot remove '%s' on '%s' -- " + "librpmem not available", pool_set, target); + return 1; + } + + int flags = 0; + if (rm_poolset_mode & RM_POOLSET_REMOTE) + flags |= RPMEM_REMOVE_POOL_SET; + if (force) + flags |= RPMEM_REMOVE_FORCE; + + int ret = Rpmem_remove(target, pool_set, flags); + if (ret) { + if (force) { + ret = 0; + outv(1, "cannot remove '%s' on '%s'", + pool_set, target); + } else { + /* + * Callback cannot return < 0 value because it + * is interpretted as error in parsing poolset file. + */ + ret = 1; + outv_err("!cannot remove '%s' on '%s'", + pool_set, target); + } + } else { + outv(1, "removed '%s' on '%s'\n", + pool_set, target); + } + + return ret; +#else + outv_err("remote replication not supported"); + return 1; +#endif +} + +/* + * rm_poolset_cb -- (internal) callback for removing replicas + */ +static int +rm_poolset_cb(struct part_file *pf, void *arg) +{ + int *error = (int *)arg; + int ret; + if (pf->is_remote) { + ret = remove_remote(pf->remote->node_addr, + pf->remote->pool_desc); + } else { + const char *part_file = pf->part->path; + + outv(2, "part file : %s\n", part_file); + + int exists = util_file_exists(part_file); + if (exists < 0) + ret = 1; + else if (!exists) { + /* + * Ignore not accessible file if force + * flag is set. + */ + if (force) + return 0; + + ret = 1; + outv_err("!cannot remove file '%s'", part_file); + } else { + ret = rm_file(part_file); + } + } + + if (ret) + *error = ret; + + return 0; +} + +/* + * rm_poolset -- remove files parsed from poolset file + */ +static int +rm_poolset(const char *file) +{ + int error = 0; + int ret = util_poolset_foreach_part(file, rm_poolset_cb, &error); + if (ret == -1) { + outv_err("parsing poolset failed: %s\n", + out_get_errormsg()); + return ret; + } + + if (error && !force) { + outv_err("!removing '%s' failed\n", file); + return error; + } + + return 0; +} + +/* + * pmempool_rm_func -- main function for rm command + */ +int +pmempool_rm_func(const char *appname, int argc, char *argv[]) +{ + /* by default do not remove any poolset files */ + rm_poolset_mode = RM_POOLSET_NONE; + + int opt; + while ((opt = getopt_long(argc, argv, optstr, + long_options, NULL)) != -1) { + switch (opt) { + case 'h': + pmempool_rm_help(appname); + return 0; + case 'v': + vlevel++; + break; + case 's': + rm_poolset_mode = RM_POOLSET_NONE; + break; + case 'a': + rm_poolset_mode |= RM_POOLSET_ALL; + break; + case 'l': + rm_poolset_mode |= RM_POOLSET_LOCAL; + break; + case 'r': + rm_poolset_mode |= RM_POOLSET_REMOTE; + break; + case 'f': + force = 1; + ask_mode = ASK_NEVER; + break; + case 'i': + ask_mode = ASK_ALWAYS; + break; + default: + print_usage(appname); + return 1; + } + } + + out_set_vlevel(vlevel); + + if (optind == argc) { + print_usage(appname); + return 1; + } + +#ifdef USE_RPMEM + /* + * Try to load librpmem, if loading failed - + * assume it is not available. + */ + util_remote_init(); + rpmem_avail = !util_remote_load(); +#endif + + int lret = 0; + for (int i = optind; i < argc; i++) { + char *file = argv[i]; + /* check if file exists and we can read it */ + int exists = os_access(file, F_OK | R_OK) == 0; + if (!exists) { + /* ignore not accessible file if force flag is set */ + if (force) + continue; + + outv_err("!cannot remove '%s'", file); + lret = 1; + continue; + } + + int is_poolset = util_is_poolset_file(file); + if (is_poolset < 0) { + outv(1, "%s: cannot determine type of file", file); + if (force) + continue; + } + + if (is_poolset) + outv(2, "poolset file: %s\n", file); + else + outv(2, "pool file : %s\n", file); + + int ret; + if (is_poolset) { + ret = rm_poolset(file); + if (!ret && (rm_poolset_mode & RM_POOLSET_LOCAL)) + ret = rm_file(file); + } else { + ret = rm_file(file); + } + + if (ret) + lret = ret; + } + + return lret; +} diff --git a/src/pmdk/src/tools/pmempool/rm.h b/src/pmdk/src/tools/pmempool/rm.h new file mode 100644 index 000000000..519157416 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/rm.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2014-2020, Intel Corporation */ + +/* + * rm.h -- pmempool rm command header file + */ + +void pmempool_rm_help(const char *appname); +int pmempool_rm_func(const char *appname, int argc, char *argv[]); diff --git a/src/pmdk/src/tools/pmempool/synchronize.c b/src/pmdk/src/tools/pmempool/synchronize.c new file mode 100644 index 000000000..7015336a3 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/synchronize.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * synchronize.c -- pmempool sync command source file + */ + +#include "synchronize.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" +#include "output.h" +#include "libpmempool.h" + +/* + * pmempool_sync_context -- context and arguments for sync command + */ +struct pmempool_sync_context { + unsigned flags; /* flags which modify the command execution */ + char *poolset_file; /* a path to a poolset file */ +}; + +/* + * pmempool_sync_default -- default arguments for sync command + */ +static const struct pmempool_sync_context pmempool_sync_default = { + .flags = 0, + .poolset_file = NULL, +}; + +/* + * help_str -- string for help message + */ +static const char * const help_str = +"Check consistency of a pool\n" +"\n" +"Common options:\n" +" -b, --bad-blocks fix bad blocks - it requires creating or reading special recovery files\n" +" -d, --dry-run do not apply changes, only check for viability of synchronization\n" +" -v, --verbose increase verbosity level\n" +" -h, --help display this help and exit\n" +"\n" +"For complete documentation see %s-sync(1) manual page.\n" +; + +/* + * long_options -- command line options + */ +static const struct option long_options[] = { + {"bad-blocks", no_argument, NULL, 'b'}, + {"dry-run", no_argument, NULL, 'd'}, + {"help", no_argument, NULL, 'h'}, + {"verbose", no_argument, NULL, 'v'}, + {NULL, 0, NULL, 0 }, +}; + +/* + * print_usage -- (internal) print application usage short description + */ +static void +print_usage(const char *appname) +{ + printf("usage: %s sync [] \n", appname); +} + +/* + * print_version -- (internal) print version string + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * pmempool_sync_help -- print help message for the sync command + */ +void +pmempool_sync_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf(help_str, appname); +} + +/* + * pmempool_sync_parse_args -- (internal) parse command line arguments + */ +static int +pmempool_sync_parse_args(struct pmempool_sync_context *ctx, const char *appname, + int argc, char *argv[]) +{ + int opt; + while ((opt = getopt_long(argc, argv, "bdhv", + long_options, NULL)) != -1) { + switch (opt) { + case 'd': + ctx->flags |= PMEMPOOL_SYNC_DRY_RUN; + break; + case 'b': + ctx->flags |= PMEMPOOL_SYNC_FIX_BAD_BLOCKS; + break; + case 'h': + pmempool_sync_help(appname); + exit(EXIT_SUCCESS); + case 'v': + out_set_vlevel(1); + break; + default: + print_usage(appname); + exit(EXIT_FAILURE); + } + } + + if (optind < argc) { + ctx->poolset_file = argv[optind]; + } else { + print_usage(appname); + exit(EXIT_FAILURE); + } + + return 0; +} + +/* + * pmempool_sync_func -- main function for the sync command + */ +int +pmempool_sync_func(const char *appname, int argc, char *argv[]) +{ + int ret = 0; + struct pmempool_sync_context ctx = pmempool_sync_default; + + /* parse command line arguments */ + if ((ret = pmempool_sync_parse_args(&ctx, appname, argc, argv))) + return ret; + + ret = pmempool_sync(ctx.poolset_file, ctx.flags); + + if (ret) { + outv_err("failed to synchronize: %s\n", pmempool_errormsg()); + if (errno) + outv_err("%s\n", strerror(errno)); + return -1; + } else { + outv(1, "%s: synchronized\n", ctx.poolset_file); + return 0; + } +} diff --git a/src/pmdk/src/tools/pmempool/synchronize.h b/src/pmdk/src/tools/pmempool/synchronize.h new file mode 100644 index 000000000..32c044de2 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/synchronize.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * synchronize.h -- pmempool sync command header file + */ + +int pmempool_sync_func(const char *appname, int argc, char *argv[]); +void pmempool_sync_help(const char *appname); diff --git a/src/pmdk/src/tools/pmempool/transform.c b/src/pmdk/src/tools/pmempool/transform.c new file mode 100644 index 000000000..a749880e3 --- /dev/null +++ b/src/pmdk/src/tools/pmempool/transform.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2018, Intel Corporation */ + +/* + * transform.c -- pmempool transform command source file + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" +#include "output.h" +#include "transform.h" +#include "libpmempool.h" + +/* + * pmempool_transform_context -- context and arguments for transform command + */ +struct pmempool_transform_context { + unsigned flags; /* flags which modify the command execution */ + char *poolset_file_src; /* a path to a source poolset file */ + char *poolset_file_dst; /* a path to a target poolset file */ +}; + +/* + * pmempool_transform_default -- default arguments for transform command + */ +static const struct pmempool_transform_context pmempool_transform_default = { + .flags = 0, + .poolset_file_src = NULL, + .poolset_file_dst = NULL, +}; + +/* + * help_str -- string for help message + */ +static const char * const help_str = +"Modify internal structure of a poolset\n" +"\n" +"Common options:\n" +" -d, --dry-run do not apply changes, only check for viability of" +" transformation\n" +" -v, --verbose increase verbosity level\n" +" -h, --help display this help and exit\n" +"\n" +"For complete documentation see %s-transform(1) manual page.\n" +; + +/* + * long_options -- command line options + */ +static const struct option long_options[] = { + {"dry-run", no_argument, NULL, 'd'}, + {"help", no_argument, NULL, 'h'}, + {"verbose", no_argument, NULL, 'v'}, + {NULL, 0, NULL, 0 }, +}; + +/* + * print_usage -- print application usage short description + */ +static void +print_usage(const char *appname) +{ + printf("usage: %s transform [] " + " \n", appname); +} + +/* + * print_version -- print version string + */ +static void +print_version(const char *appname) +{ + printf("%s %s\n", appname, SRCVERSION); +} + +/* + * pmempool_transform_help -- print help message for the transform command + */ +void +pmempool_transform_help(const char *appname) +{ + print_usage(appname); + print_version(appname); + printf(help_str, appname); +} + +/* + * pmempool_check_parse_args -- parse command line arguments + */ +static int +pmempool_transform_parse_args(struct pmempool_transform_context *ctx, + const char *appname, int argc, char *argv[]) +{ + int opt; + while ((opt = getopt_long(argc, argv, "dhv", + long_options, NULL)) != -1) { + switch (opt) { + case 'd': + ctx->flags = PMEMPOOL_TRANSFORM_DRY_RUN; + break; + case 'h': + pmempool_transform_help(appname); + exit(EXIT_SUCCESS); + case 'v': + out_set_vlevel(1); + break; + default: + print_usage(appname); + exit(EXIT_FAILURE); + } + } + + if (optind + 1 < argc) { + ctx->poolset_file_src = argv[optind]; + ctx->poolset_file_dst = argv[optind + 1]; + } else { + print_usage(appname); + exit(EXIT_FAILURE); + } + + return 0; +} + +/* + * pmempool_transform_func -- main function for the transform command + */ +int +pmempool_transform_func(const char *appname, int argc, char *argv[]) +{ + int ret; + struct pmempool_transform_context ctx = pmempool_transform_default; + + /* parse command line arguments */ + if ((ret = pmempool_transform_parse_args(&ctx, appname, argc, argv))) + return ret; + + ret = pmempool_transform(ctx.poolset_file_src, ctx.poolset_file_dst, + ctx.flags); + + if (ret) { + if (errno) + outv_err("%s\n", strerror(errno)); + outv_err("failed to transform %s -> %s: %s\n", + ctx.poolset_file_src, ctx.poolset_file_dst, + pmempool_errormsg()); + return -1; + } else { + outv(1, "%s -> %s: transformed\n", ctx.poolset_file_src, + ctx.poolset_file_dst); + return 0; + } +} diff --git a/src/pmdk/src/tools/pmempool/transform.h b/src/pmdk/src/tools/pmempool/transform.h new file mode 100644 index 000000000..6f0192fbd --- /dev/null +++ b/src/pmdk/src/tools/pmempool/transform.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * transform.h -- pmempool transform command header file + */ + +int pmempool_transform_func(const char *appname, int argc, char *argv[]); +void pmempool_transform_help(const char *appname); diff --git a/src/pmdk/src/tools/pmreorder/.gitignore b/src/pmdk/src/tools/pmreorder/.gitignore new file mode 100644 index 000000000..15bf9f0c9 --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +*.pyc +pmreorder diff --git a/src/pmdk/src/tools/pmreorder/Makefile b/src/pmdk/src/tools/pmreorder/Makefile new file mode 100644 index 000000000..f6bcc4d7f --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation +# +# Makefile -- Makefile for pmreorder +# + +include ../Makefile.inc + +FLAKE8 := $(shell flake8 --version 2>/dev/null) + +cstyle: +ifdef FLAKE8 + flake8 . +else + @echo "Flake8 not found. Python files check skipped." +endif diff --git a/src/pmdk/src/tools/pmreorder/binaryoutputhandler.py b/src/pmdk/src/tools/pmreorder/binaryoutputhandler.py new file mode 100644 index 000000000..868ccb4ee --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/binaryoutputhandler.py @@ -0,0 +1,218 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +import utils +from reorderexceptions import InconsistentFileException + + +class BinaryOutputHandler: + """ + Handle :class:`BinaryFile` objects. + + Creates and aggregates :class:`BinaryFile` objects for ease of use. + Implements methods for batch handling of aggregated files. + + :ivar _files: A list of registered files, most recent last. + :type _files: list + """ + + def __init__(self, checker): + """ + Binary handler constructor. + + :param checker: consistency checker object + :type checker: ConsistencyCheckerBase + """ + self._files = [] + self._checker = checker + + def add_file(self, file, map_base, size): + """ + Create and append a mapped file to :attr:`_files`. + + :param file: Full path of the mapped file to be added. + :type file: str + :param map_base: Base address of the mapped file. + :type map_base: int + :param size: Size of the file. + :type size: int + :return: None + """ + self._files.append(BinaryFile(file, map_base, size, self._checker)) + + def remove_file(self, file): + """Remove file from :attr:`_files`. + + :param file: File to be removed. + :type file: str + :return: None + """ + for bf in self._files: + if bf.file_name is file: + self._files.remove(bf) + + def do_store(self, store_op): + """ + Perform a store to the given file. + + The file is chosen based on the address and size + of the store. + + :param store_op: The store operation to be performed. + :type store_op: Store + :return: None + :raises: Generic exception - to be precised later. + """ + store_ok = False + for bf in self._files: + if utils.range_cmp(store_op, bf) == 0: + bf.do_store(store_op) + store_ok = True + if not store_ok: + raise OSError( + "No suitable file found for store {}" + .format(store_op)) + + def do_revert(self, store_op): + """ + Reverts a store made to a file. + + Performing a revert on a store that has not been made + previously yields undefined behavior. + + :param store_op: The store to be reverted. + :type store_op: Store + :return: None + :raises: Generic exception - to be precised later. + """ + revert_ok = False + for bf in self._files: + if utils.range_cmp(store_op, bf) == 0: + bf.do_revert(store_op) + revert_ok = True + if not revert_ok: + raise OSError( + "No suitable file found for store {}" + .format(store_op)) + + def check_consistency(self): + """ + Checks consistency of each registered file. + + :return: None + :raises: Generic exception - to be precised later. + """ + for bf in self._files: + if not bf.check_consistency(): + raise InconsistentFileException( + "File {} inconsistent".format(bf)) + + +class BinaryFile(utils.Rangeable): + """Binary file handler. + + It is a handler for binary file operations. Internally it + uses mmap to write to and read from the file. + + :ivar _file_name: Full path of the mapped file. + :type _file_name: str + :ivar _map_base: Base address of the mapped file. + :type _map_base: int + :ivar _map_max: Max address of the mapped file. + :type _map_max: int + :ivar _file_map: Memory mapped from the file. + :type _file_map: mmap.mmap + :ivar _checker: consistency checker object + :type _checker: ConsistencyCheckerBase + """ + + def __init__(self, file_name, map_base, size, checker): + """ + Initializes the binary file handler. + + :param file_name: Full path of the mapped file to be added. + :type file_name: str + :param map_base: Base address of the mapped file. + :type map_base: int + :param size: Size of the file. + :type size: int + :param checker: consistency checker object + :type checker: ConsistencyCheckerBase + :return: None + """ + self._file_name = file_name + self._map_base = map_base + self._map_max = map_base + size + # TODO consider mmaping only necessary parts on demand + self._file_map = utils.memory_map(file_name) + self._checker = checker + + def __str__(self): + return self._file_name + + def do_store(self, store_op): + """ + Perform the store on the file. + + The store records the old value for reverting. + + :param store_op: The store to be performed. + :type store_op: Store + :return: None + """ + base_off = store_op.get_base_address() - self._map_base + max_off = store_op.get_max_address() - self._map_base + # read and save old value + store_op.old_value = bytes(self._file_map[base_off:max_off]) + # write out the new value + self._file_map[base_off:max_off] = store_op.new_value + self._file_map.flush(base_off & ~4095, 4096) + + def do_revert(self, store_op): + """ + Reverts the store. + + Write back the old value recorded while doing the store. + Reverting a store which has not been made previously has + undefined behavior. + + :param store_op: The store to be reverted. + :type store_op: Store + :return: None + """ + base_off = store_op.get_base_address() - self._map_base + max_off = store_op.get_max_address() - self._map_base + # write out the old value + self._file_map[base_off:max_off] = store_op.old_value + self._file_map.flush(base_off & ~4095, 4096) + + def check_consistency(self): + """ + Check consistency of the file. + + :return: True if consistent, False otherwise. + :rtype: bool + """ + return self._checker.check_consistency(self._file_name) == 0 + + def get_base_address(self): + """ + Returns the base address of the file. + + Overrides from :class:`utils.Rangeable`. + + :return: The base address of the mapping passed to the constructor. + :rtype: int + """ + return self._map_base + + def get_max_address(self): + """ + Get max address of the file mapping. + + Overrides from :class:`utils.Rangeable`. + + :return: The max address of the mapping. + :rtype: int + """ + return self._map_max diff --git a/src/pmdk/src/tools/pmreorder/consistencycheckwrap.py b/src/pmdk/src/tools/pmreorder/consistencycheckwrap.py new file mode 100644 index 000000000..91fa46a93 --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/consistencycheckwrap.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +from sys import exit +from os import path +from ctypes import cdll, c_char_p, c_int +import os + +checkers = ["prog", "lib"] + + +class ConsistencyCheckerBase: + """ + Base class for consistency checker classes. + Checker of each type should implement check_consistency method. + """ + def check_consistency(self, filename): + pass + + +class LibChecker(ConsistencyCheckerBase): + """ + Allows registration of a consistency checking function and verifying + the consistency of a file. + + The function has to be in a shared library. It is then used to check + consistency of an arbitrary file. The function has to take a file name + as the only parameter and return an int: 0 for inconsistent, 1 for + consistent. The prototype of the function:: + + int func_name(const char* file_name) + """ + + def __init__(self, library_name, func_name): + """ + Loads the consistency checking function from the given library. + + :param library_name: The full name of the library. + :type library_name: str + :param func_name: The name of the consistency + checking function within the library. + :type func_name: str + :return: None + """ + self._lib_func = getattr(cdll.LoadLibrary(library_name), func_name) + self._lib_func.argtypes = [c_char_p] + self._lib_func.restype = c_int + + def check_consistency(self, filename): + """ + Checks the consistency of a given file + using the previously loaded function. + + :param filename: The full name of the file to be checked. + :type filename: str + :return: 1 if file is consistent, 0 otherwise. + :rtype: int + :raises: Generic exception, when no function has been loaded. + """ + if self._lib_func is None: + raise RuntimeError("Consistency check function not loaded") + return self._lib_func(filename) + + +class ProgChecker(ConsistencyCheckerBase): + """ + Allows registration of a consistency checking program and verifying + the consistency of a file. + """ + + def __init__(self, bin_path, bin_args): + self._bin_path = bin_path + self._bin_cmd = bin_args + + def check_consistency(self, filename): + """ + Checks the consistency of a given file + using the previously loaded function. + + :param filename: The full name of the file to be checked. + :type filename: str + :return: 1 if file is consistent, 0 otherwise. + :rtype: int + :raises: Generic exception, when no function has been loaded. + """ + if self._bin_path is None or self._bin_cmd is None: + raise RuntimeError("consistency check handle not set") + return os.system(self._bin_path + " " + self._bin_cmd + " " + filename) + + +def get_checker(checker_type, checker_path_args, name): + + checker_path_args = checker_path_args.split(" ", 1) + checker_path = checker_path_args[0] + + # check for params + if len(checker_path_args) > 1: + args = checker_path_args[1] + else: + args = "" + + if not path.exists(checker_path): + print("Invalid path:" + checker_path) + exit(1) + + checker = None + if checker_type == "prog": + checker = ProgChecker(checker_path, args) + elif checker_type == "lib": + checker = LibChecker(checker_path, name) + + return checker diff --git a/src/pmdk/src/tools/pmreorder/loggingfacility.py b/src/pmdk/src/tools/pmreorder/loggingfacility.py new file mode 100644 index 000000000..c6f2fab19 --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/loggingfacility.py @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2020, Intel Corporation + +import logging + +log_levels = ["debug", "info", "warning", "error", "critical"] + + +class LoggingBase: + def debug(self, text): + pass + + def info(self, text): + pass + + def warning(self, text): + pass + + def error(self, text): + pass + + def critical(self, text): + pass + + +class DefaultFileLogger(LoggingBase): + def __init__(self, name="pmreorder", **kwargs): + logging.basicConfig(**kwargs) + self.__logger = logging.getLogger(name) + + def debug(self, text): + self.__logger.debug(text) + + def info(self, text): + self.__logger.info(text) + + def warning(self, text): + self.__logger.warning(text) + + def error(self, text): + self.__logger.error(text) + + def critical(self, text): + self.__logger.critical(text) + + +class DefaultPrintLogger(LoggingBase): + + def debug(self, text): + print("DEBUG:", text) + + def info(self, text): + print("INFO:", text) + + def warning(self, text): + print("WARNING:", text) + + def error(self, text): + print("ERROR:", text) + + def critical(self, text): + print("CRITICAL:", text) + + +def get_logger(log_output, log_level=None): + logger = None + # check if log_level is valid + log_level = "warning" if log_level is None else log_level + numeric_level = getattr(logging, log_level.upper()) + if not isinstance(numeric_level, int): + raise ValueError('Invalid log level: {}'.format(log_level.upper())) + + if log_output is None: + logger = DefaultPrintLogger() + else: + logger = DefaultFileLogger(filename=log_output, level=numeric_level) + return logger diff --git a/src/pmdk/src/tools/pmreorder/markerparser.py b/src/pmdk/src/tools/pmreorder/markerparser.py new file mode 100644 index 000000000..c9e41c26e --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/markerparser.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + + +import os +import json + + +class MarkerParser: + """ + Parse marker config file and command line arg provided by user + via -x parameter. + """ + def marker_file_parser(self, macros): + """ + Parse markers passed by file. + They should be in json format: + { "MARKER_NAME"="ENGINE_TYPE" } and separated by commas. + """ + markers = {} + try: + with open(macros) as config_file: + markers = json.load(config_file) + except json.decoder.JSONDecodeError: + print("Invalid config macros file format: ", macros, + "Use: {\"MARKER_NAME1\"=\"ENGINE_TYPE1\"," + "\"MARKER_NAME2\"=\"ENGINE_TYPE2\"}") + + return markers + + def marker_cli_parser(self, macros): + """ + Parse markers passed by cli. + They should be in specific format: + MARKER_NAME=ENGINE_TYPE and separated by commas. + """ + try: + markers_array = macros.split(",") + return dict(pair.split('=') for pair in markers_array) + except ValueError: + print("Invalid extended macros format: ", macros, + "Use: MARKER_NAME1=ENGINE_TYPE1,MARKER_NAME2=ENGINE_TYPE2") + + def get_markers(self, markerset): + """ + Parse markers based on their format. + """ + if markerset is not None: + if os.path.exists(markerset): + return self.marker_file_parser(markerset) + else: + return self.marker_cli_parser(markerset) diff --git a/src/pmdk/src/tools/pmreorder/memoryoperations.py b/src/pmdk/src/tools/pmreorder/memoryoperations.py new file mode 100644 index 000000000..509ab7eab --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/memoryoperations.py @@ -0,0 +1,413 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +from utils import Rangeable +from utils import range_cmp +from utils import StackTrace +from sys import byteorder + + +class BaseOperation: + """ + Base class for all memory operations. + """ + pass + + +class Fence(BaseOperation): + """ + Describes a fence operation. + + The exact type of the memory barrier is not important, + it is interpreted as an SFENCE or MFENCE. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: New Fence object. + :rtype: Fence + """ + return Fence() + + +class Store(BaseOperation, Rangeable): + """ + Describes a store operation. + + :ivar address: The virtual address at which to store the new value. + :type address: int + :ivar new_value: The new value to be written. + :type new_value: bytearray + :ivar size: The size of the store in bytes. + :type size: int + :ivar old_value: The old value read from the file. + :type old_value: bytearray + :ivar flushed: Indicates whether the store has been flushed. + :type flushed: bool + """ + def __init__(self, values): + """ + Initializes the object based on the describing string. + + :param values: Pre-formatted string describing the store. + :type values: str + :return: None + """ + params = values.split(";") + # calculate the offset given the registered file mapping + self.address = int(params[1], 16) + self.size = int(params[3], 16) + self.new_value = \ + int(params[2], 16).to_bytes(self.size, byteorder=byteorder) + if len(params) > 4: + self.trace = StackTrace(params[4:]) + else: + self.trace = StackTrace(["No trace available", ]) + self.old_value = None + self.flushed = False + + def __str__(self): + return "addr: " + hex(self.address) + " size " + \ + str(self.size) + " value " + str(self.new_value) + + def get_base_address(self): + """ + Override from :class:`utils.Rangeable`. + + :return: Virtual address of the store. + :rtype: int + """ + return self.address + + def get_max_address(self): + """ + Override from :class:`utils.Rangeable`. + + :return: Virtual address of the first byte after the store. + :rtype: int + """ + return self.address + self.size + + class Factory(): + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Pre-formatted string describing the store. + :type values: str + :return: New Store object. + :rtype: Store + """ + return Store(values) + + +class FlushBase(BaseOperation, Rangeable): + """ + Base class for flush operations. + """ + def is_in_flush(self, store_op): + """ + Check if a given store is within the flush. + + :param store_op: Store operation to check. + :return: True if store is in flush, false otherwise. + :rtype: bool + """ + raise NotImplementedError + + +class Flush(FlushBase): + """ + Describes a flush operation. + + Examples of flush instructions are CLFLUSH, CLFLUSHOPT or CLWB. + + :ivar _address: Virtual address of the flush. + :type _address: int + :ivar _size: The size of the flush in bytes (should be cache line aligned). + :type _size: int + """ + def __init__(self, values): + """ + Initializes the object based on the describing string. + + :param values: Pre-formatted string describing the flush. + :type values: str + :return: None + """ + params = values.split(";") + self._address = int(params[1], 16) + self._size = int(params[2], 16) + + def is_in_flush(self, store_op): + """ + Override from :class:`FlushBase`. + + :param store_op: Store operation to check. + :return: True if store is in flush, false otherwise. + :rtype: bool + """ + if range_cmp(store_op, self) == 0: + return True + else: + return False + + def get_base_address(self): + """ + Override from :class:`utils.Rangeable`. + + :return: Virtual address of the flush. + :rtype: int + """ + return self._address + + def get_max_address(self): + """ + Override from :class:`utils.Rangeable`. + + :return: Virtual address of the first byte after the flush. + :rtype: int + """ + return self._address + self._size + + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Pre-formatted string describing the flush. + :type values: str + :return: New Flush object. + :rtype: Flush + """ + return Flush(values) + + +class ReorderBase(BaseOperation): + """ + Base class for all reorder type classes. + """ + pass + + +class NoReorderDoCheck(ReorderBase): + """ + Describes the type of reordering engine to be used. + + This marker class triggers writing the whole sequence of stores + between barriers. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: New NoReorderDoCheck object. + :rtype: NoReorderDoCheck + """ + return NoReorderDoCheck() + + +class ReorderFull(ReorderBase): + """ + Describes the type of reordering engine to be used. + + This marker class triggers writing all possible sequences of stores + between barriers. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: New ReorderFull object. + :rtype: ReorderFull + """ + return ReorderFull() + + +class ReorderAccumulative(ReorderBase): + """ + Describes the type of reordering engine to be used. + + This marker class triggers writing all + possible accumulative sequences of stores + between barriers. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: New ReorderAccumulative object. + :rtype: ReorderAccumulative + """ + return ReorderAccumulative() + + +class ReorderReverseAccumulative(ReorderBase): + """ + Describes the type of reordering engine to be used. + + This marker class triggers writing all + possible reverted accumulative sequences of stores + between barriers. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: New ReorderReverseAccumulative object. + :rtype: ReorderReverseAccumulative + """ + return ReorderReverseAccumulative() + + +class NoReorderNoCheck(ReorderBase): + """ + Describes the type of reordering engine to be used. + + This marker class triggers writing the whole sequence of stores + between barriers. It additionally marks that no consistency checking + is to be made. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: New NoReorderNoCheck object. + :rtype: NoReorderNoCheck + """ + return NoReorderNoCheck() + + +class ReorderDefault(ReorderBase): + """ + Describes the default reordering engine to be used. + + This marker class triggers default reordering. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: ReorderDefault object. + :rtype: ReorderDefault + """ + return ReorderDefault() + + +class ReorderPartial(ReorderBase): + """ + Describes the type of reordering engine to be used. + + This marker class triggers writing a subset of all possible + sequences of stores between barriers. + + The type of partial reordering is chosen at runtime. Not yet + implemented. + """ + class Factory: + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Ignored. + :type values: str + :return: New ReorderPartial object. + :rtype: ReorderPartial + """ + return ReorderPartial() + + +class Register_file(BaseOperation): + """ + Describes the file to be mapped into processes address space. + + :ivar name: The full name of the file. + :type name: str + :ivar address: The base address where the file was mapped. + :type address: int + :ivar size: The size of the mapping. + :type size: int + :ivar offset: The start offset of the mapping within the file. + :type offset: int + """ + def __init__(self, values): + """ + Initializes the object based on the describing string. + + :param values: Pre-formatted string describing the flush. + :type values: str + :return: None + """ + params = values.split(";") + self.name = params[1] + self.address = int(params[2], 16) + self.size = int(params[3], 16) + self.offset = int(params[4], 16) + + class Factory(): + """ + Internal factory class to be used in dynamic object creation. + """ + def create(self, values): + """ + Factory object creation method. + + :param values: Pre-formatted string + describing the file registration. + :type values: str + :return: New Register_file object. + :rtype: Register_file + """ + return Register_file(values) diff --git a/src/pmdk/src/tools/pmreorder/operationfactory.py b/src/pmdk/src/tools/pmreorder/operationfactory.py new file mode 100644 index 000000000..81362acb4 --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/operationfactory.py @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2019, Intel Corporation + +import memoryoperations +from reorderexceptions import NotSupportedOperationException + + +class OperationFactory: + """ + An abstract memory operation factory. + + This object factory puts special constraints on names of classes. + It creates objects based on log in string format, as such the + classes have to start with a capital letter and the rest of the + name has to be in lowercase. For example:: + + STORE -> Store + FULL_REORDER -> Full_reorder + + The object to be created has to have and internal **Factory** class + with a :func:`create` method taking a string parameter. For example see + :class:`memoryoperations.Store`. + + :cvar __factories: The registered object factories. + :type __factories: dict + """ + __factories = {} + __suffix = ['.BEGIN', '.END'] + memoryoperations.BaseOperation() + + @staticmethod + def add_factory(id_, operation_factory): + """ + Explicitly register an object factory. + + This method should be used when the factory cannot be inferred + from the name of the object to be created. + + :param id_: The id under which this factory is to be registered + in the dictionary. + :type id_: str + :param operation_factory: The operation factory to be registered. + :return: None + """ + OperationFactory.__factories[id_] = operation_factory + + @staticmethod + def create_operation(string_operation, markers, stack): + + def check_marker_format(marker): + """ + Checks if marker has proper suffix. + """ + for s in OperationFactory.__suffix: + if marker.endswith(s): + return + + raise NotSupportedOperationException( + "Incorrect marker format {}, suffix is missing." + .format(marker)) + + def check_pair_consistency(stack, marker): + """ + Checks if markers do not cross. + You can pop from stack only if end + marker match previous one. + + Example OK: + MACRO1.BEGIN + MACRO2.BEGIN + MACRO2.END + MACRO1.END + + Example NOT OK: + MACRO1.BEGIN + MACRO2.BEGIN + MACRO1.END + MACRO2.END + """ + top = stack[-1][0] + if top.endswith(OperationFactory.__suffix[0]): + top = top[:-len(OperationFactory.__suffix[0])] + if marker.endswith(OperationFactory.__suffix[-1]): + marker = marker[:-len(OperationFactory.__suffix[-1])] + + if top != marker: + raise NotSupportedOperationException( + "Cannot cross markers: {0}, {1}" + .format(top, marker)) + + """ + Creates the object based on the pre-formatted string. + + The string needs to be in the specific format. Each specific value + in the string has to be separated with a `;`. The first field + has to be the name of the operation, the rest are operation + specific values. + + :param string_operation: The string describing the operation. + :param markers: The dict describing the pair marker-engine. + :param stack: The stack describing the order of engine changes. + :return: The specific object instantiated based on the string. + """ + id_ = string_operation.split(";")[0] + id_case_sensitive = id_.lower().capitalize() + + # checks if id_ is one of memoryoperation classes + mem_ops = getattr(memoryoperations, id_case_sensitive, None) + + # if class is not one of memoryoperations + # it means it can be user defined marker + if mem_ops is None: + check_marker_format(id_) + # if id_ is section BEGIN + if id_.endswith(OperationFactory.__suffix[0]): + # BEGIN defined by user + marker_name = id_.partition('.')[0] + if markers is not None and marker_name in markers: + engine = markers[marker_name] + try: + mem_ops = getattr(memoryoperations, engine) + except AttributeError: + raise NotSupportedOperationException( + "Not supported reorder engine: {}" + .format(engine)) + # BEGIN but not defined by user + else: + mem_ops = stack[-1][1] + + if issubclass(mem_ops, memoryoperations.ReorderBase): + stack.append((id_, mem_ops)) + + # END section + elif id_.endswith(OperationFactory.__suffix[-1]): + check_pair_consistency(stack, id_) + stack.pop() + mem_ops = stack[-1][1] + + # here we have proper memory operation to perform, + # it can be Store, Fence, ReorderDefault etc. + id_ = mem_ops.__name__ + if id_ not in OperationFactory.__factories: + OperationFactory.__factories[id_] = mem_ops.Factory() + + return OperationFactory.__factories[id_].create(string_operation) diff --git a/src/pmdk/src/tools/pmreorder/opscontext.py b/src/pmdk/src/tools/pmreorder/opscontext.py new file mode 100644 index 000000000..a134c4d34 --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/opscontext.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +from operationfactory import OperationFactory +from binaryoutputhandler import BinaryOutputHandler +import reorderengines +import memoryoperations +from itertools import repeat + + +class OpsContext: + """ + Holds the context of the performed operations. + + :ivar _operations: The operations to be performed, based on the log file. + :type _operations: list of strings + :ivar reorder_engine: The reordering engine used at the moment. + :type one of the reorderengine Class + :ivar default_engine: The default reordering engine. + :type default_engine: One of the reorderengines Class + :ivar test_on_barrier: Check consistency on barrier. + :type test_on_barrier: bool + :ivar default_barrier: Default consistency barrier status. + :type default_barrier: bool + :ivar file_handler: The file handler used. + """ + def __init__(self, log_file, checker, logger, arg_engine, markers): + """ + Splits the operations in the log file and sets the instance variables + to default values. + + :param log_file: The full name of the log file. + :type log_file: str + :return: None + """ + # TODO reading the whole file at once is rather naive + # change in the future + self._operations = open(log_file).read().split("|") + engine = reorderengines.get_engine(arg_engine) + self.reorder_engine = engine + self.test_on_barrier = engine.test_on_barrier + self.default_engine = self.reorder_engine + self.default_barrier = self.default_engine.test_on_barrier + self.file_handler = BinaryOutputHandler(checker) + self.checker = checker + self.logger = logger + self.markers = markers + self.stack_engines = [('START', getattr(memoryoperations, arg_engine))] + + # TODO this should probably be made a generator + def extract_operations(self): + """ + Creates specific operation objects based on the labels available + in the split log file. + + :return: list of subclasses of :class:`memoryoperations.BaseOperation` + """ + stop_index = start_index = 0 + + for i, elem in enumerate(self._operations): + if "START" in elem: + start_index = i + elif "STOP" in elem: + stop_index = i + + return list(map(OperationFactory.create_operation, + self._operations[start_index + 1:stop_index], + repeat(self.markers), repeat(self.stack_engines))) diff --git a/src/pmdk/src/tools/pmreorder/pmreorder.py b/src/pmdk/src/tools/pmreorder/pmreorder.py new file mode 100644 index 000000000..e0f3e539e --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/pmreorder.py @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2019, Intel Corporation + +import argparse +import statemachine +import opscontext +import consistencycheckwrap +import loggingfacility +import markerparser +import sys +import reorderengines + + +def main(): + pmreorder_version = "unknown" + + ''' + Argv[1] should be given in order to use -v or --version flag. It is passed + from the installed script. We check whether argv[1] was given and if it's + not any of regular parameters we use it as a version of pmreorder and + remove it from the arguments list. + ''' + if len(sys.argv) > 1 and sys.argv[1][0] != "-": + pmreorder_version = sys.argv[1] + del sys.argv[1] + + # TODO unicode support + # TODO parameterize reorder engine type + parser = argparse.ArgumentParser(description="Store reordering tool") + parser.add_argument("-l", "--logfile", + required=True, + help="the pmemcheck log file to process") + parser.add_argument("-c", "--checker", + choices=consistencycheckwrap.checkers, + default=consistencycheckwrap.checkers[0], + help="choose consistency checker type") + parser.add_argument("-p", "--path", + required=True, + help="path to the consistency checker and arguments", + nargs='+') + parser.add_argument("-n", "--name", + help="consistency check function " + + "for the 'lib' checker") + parser.add_argument("-o", "--output", + help="set the logger output file") + parser.add_argument("-e", "--output-level", + choices=loggingfacility.log_levels, + help="set the output log level") + parser.add_argument("-x", "--extended-macros", + help="list of pairs MARKER=ENGINE or " + + "json config file") + parser.add_argument("-v", "--version", + help="print version of the pmreorder", + action="version", + version="%(prog)s " + pmreorder_version) + engines_keys = list(reorderengines.engines.keys()) + parser.add_argument("-r", "--default-engine", + help="set default reorder engine " + + "default=NoReorderNoChecker", + choices=engines_keys, + default=engines_keys[0]) + args = parser.parse_args() + logger = loggingfacility.get_logger( + args.output, + args.output_level) + checker = consistencycheckwrap.get_checker( + args.checker, + ' '.join(args.path), + args.name) + + markers = markerparser.MarkerParser().get_markers(args.extended_macros) + + # create the script context + context = opscontext.OpsContext( + args.logfile, + checker, + logger, + args.default_engine, + markers) + + # init and run the state machine + a = statemachine.StateMachine(statemachine.InitState(context)) + if a.run_all(context.extract_operations()) is False: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/pmdk/src/tools/pmreorder/reorderengines.py b/src/pmdk/src/tools/pmreorder/reorderengines.py new file mode 100644 index 000000000..75a70ea8d --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/reorderengines.py @@ -0,0 +1,341 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + + +from itertools import combinations +from itertools import permutations +from itertools import islice +from itertools import chain +from random import sample +from functools import partial +from reorderexceptions import NotSupportedOperationException +import collections + + +class FullReorderEngine: + def __init__(self): + self.test_on_barrier = True + """ + Realizes a full reordering of stores within a given list. + Example: + input: (a, b, c) + output: + () + ('a',) + ('b',) + ('c',) + ('a', 'b') + ('a', 'c') + ('b', 'a') + ('b', 'c') + ('c', 'a') + ('c', 'b') + ('a', 'b', 'c') + ('a', 'c', 'b') + ('b', 'a', 'c') + ('b', 'c', 'a') + ('c', 'a', 'b') + ('c', 'b', 'a') + """ + def generate_sequence(self, store_list): + """ + Generates all possible combinations of all possible lengths, + based on the operations in the list. + + :param store_list: The list of stores to be reordered. + :type store_list: list of :class:`memoryoperations.Store` + :return: Yields all combinations of stores. + :rtype: iterable + """ + for length in range(0, len(store_list) + 1): + for permutation in permutations(store_list, length): + yield permutation + + +class AccumulativeReorderEngine: + def __init__(self): + self.test_on_barrier = True + """ + Realizes an accumulative reorder of stores within a given list. + Example: + input: (a, b, c) + output: + () + ('a') + ('a', 'b') + ('a', 'b', 'c') + """ + def generate_sequence(self, store_list): + """ + Generates all accumulative lists, + based on the operations in the store list. + + :param store_list: The list of stores to be reordered. + :type store_list: list of :class:`memoryoperations.Store` + :return: Yields all accumulative combinations of stores. + :rtype: iterable + """ + + for i in range(0, len(store_list) + 1): + out_list = [store_list[i] for i in range(0, i)] + yield out_list + + +class AccumulativeReverseReorderEngine: + def __init__(self): + self.test_on_barrier = True + """ + Realizes an accumulative reorder of stores + within a given list in reverse order. + Example: + input: (a, b, c) + output: + () + ('c') + ('c', 'b') + ('c', 'b', 'a') + """ + def generate_sequence(self, store_list): + """ + Reverse all elements order and + generates all accumulative lists. + + :param store_list: The list of stores to be reordered. + :type store_list: list of :class:`memoryoperations.Store` + :return: Yields all accumulative combinations of stores. + :rtype: iterable + """ + store_list = list(reversed(store_list)) + for i in range(len(store_list) + 1): + yield [store_list[j] for j in range(i)] + + +class SlicePartialReorderEngine: + """ + Generates a slice of the full reordering of stores within a given list. + Example: + input: (a, b, c), start = 2, stop = None, step = 2 + output: + ('b') + ('a', 'b') + ('b', 'c') + """ + def __init__(self, start, stop, step=1): + """ + Initializes the generator with the provided parameters. + + :param start: Number of preceding elements to be skipped. + :param stop: The element at which the slice is to stop. + :param step: How many values are skipped between successive calls. + """ + self._start = start + self._stop = stop + self._step = step + self.test_on_barrier = True + + def generate_sequence(self, store_list): + """ + This generator yields a slice of all possible combinations. + + The result may be a set of combinations of different lengths, + depending on the slice parameters provided at object creation. + + :param store_list: The list of stores to be reordered. + :type store_list: list of :class:`memoryoperations.Store` + :return: Yields a slice of all combinations of stores. + :rtype: iterable + """ + for sl in islice(chain(*map(lambda x: combinations(store_list, x), + range(0, len(store_list) + 1))), + self._start, self._stop, self._step): + yield sl + + +class FilterPartialReorderEngine: + """ + Generates a filtered set of the combinations + without duplication of stores within a given list. + Example: + input: (a, b, c), filter = filter_min_elem, kwarg1 = 2 + output: + (a, b) + (a, c) + (b, c) + (a, b, c) + + input: (a, b, c), filter = filter_max_elem, kwarg1 = 2 + output: + () + (a) + (b) + (c) + (a, b) + (a, c) + (b, c) + + input: (a, b, c), filter = filter_between_elem, kwarg1 = 2, kwarg2 = 2 + output: + (a, b) + (a, c) + (b, c) + """ + def __init__(self, func, **kwargs): + """ + Initializes the generator with the provided parameters. + + :param func: The filter function. + :param **kwargs: Arguments to the filter function. + """ + self._filter = func + self._filter_kwargs = kwargs + self.test_on_barrier = True + + @staticmethod + def filter_min_elem(store_list, **kwargs): + """ + Filter stores list if number of element is less than kwarg1 + """ + if (len(store_list) < kwargs["kwarg1"]): + return False + return True + + @staticmethod + def filter_max_elem(store_list, **kwargs): + """ + Filter stores list if number of element is greater than kwarg1. + """ + if (len(store_list) > kwargs["kwarg1"]): + return False + return True + + @staticmethod + def filter_between_elem(store_list, **kwargs): + """ + Filter stores list if number of element is + greater or equal kwarg1 and less or equal kwarg2. + """ + store_len = len(store_list) + if (store_len >= kwargs["kwarg1"] and store_len <= kwargs["kwarg2"]): + return True + return False + + def generate_sequence(self, store_list): + """ + This generator yields a filtered set of combinations. + + :param store_list: The list of stores to be reordered. + :type store_list: list of :class:`memoryoperations.Store` + :return: Yields a filtered set of combinations. + :rtype: iterable + """ + filter_fun = getattr(self, self._filter, None) + for elem in filter( + partial(filter_fun, **self._filter_kwargs), chain( + *map(lambda x: combinations(store_list, x), range( + 0, len(store_list) + 1)))): + yield elem + + +class RandomPartialReorderEngine: + """ + Generates a random sequence of combinations of stores. + Example: + input: (a, b, c), max_seq = 3 + output: + ('b', 'c') + ('b',) + ('a', 'b', 'c') + """ + def __init__(self, max_seq=3): + """ + Initializes the generator with the provided parameters. + + :param max_seq: The number of combinations to be generated. + """ + self.test_on_barrier = True + self._max_seq = max_seq + + def generate_sequence(self, store_list): + """ + This generator yields a random sequence of combinations. + Number of combinations without replacement has to be limited to + 1000 because of exponential growth of elements. + Example: + for 10 element from 80 -> 1646492110120 combinations + for 20 element from 80 -> 3.5353161422122E+18 combinations + for 40 element from 80 -> 1.0750720873334E+23 combinations + :param store_list: The list of stores to be reordered. + :type store_list: list of :class:`memoryoperations.Store` + :return: Yields a random sequence of combinations. + :rtype: iterable + """ + population = list(chain(*map( + lambda x: islice(combinations(store_list, x), 1000), + range(0, len(store_list) + 1)))) + population_size = len(population) + for elem in sample(population, self._max_seq if self._max_seq <= + population_size else population_size): + yield elem + + +class NoReorderEngine: + def __init__(self): + self.test_on_barrier = True + """ + A NULL reorder engine. + Example: + input: (a, b, c) + output: (a, b, c) + """ + def generate_sequence(self, store_list): + """ + This generator does not modify the provided store list. + + :param store_list: The list of stores to be reordered. + :type store_list: The list of :class:`memoryoperations.Store` + :return: The unmodified list of stores. + :rtype: iterable + """ + return [store_list] + + +class NoCheckerEngine: + def __init__(self): + self.test_on_barrier = False + """ + A NULL reorder engine. + Example: + input: (a, b, c) + output: (a, b, c) + """ + def generate_sequence(self, store_list): + """ + This generator does not modify the provided store list + and does not do the check. + + :param store_list: The list of stores to be reordered. + :type store_list: The list of :class:`memoryoperations.Store` + :return: The unmodified list of stores. + :rtype: iterable + """ + return [store_list] + + +def get_engine(engine): + if engine in engines: + reorder_engine = engines[engine]() + else: + raise NotSupportedOperationException( + "Not supported reorder engine: {}" + .format(engine)) + + return reorder_engine + + +engines = collections.OrderedDict([ + ('NoReorderNoCheck', NoCheckerEngine), + ('ReorderFull', FullReorderEngine), + ('NoReorderDoCheck', NoReorderEngine), + ('ReorderAccumulative', AccumulativeReorderEngine), + ('ReorderReverseAccumulative', AccumulativeReverseReorderEngine), + ('ReorderPartial', RandomPartialReorderEngine)]) diff --git a/src/pmdk/src/tools/pmreorder/reorderexceptions.py b/src/pmdk/src/tools/pmreorder/reorderexceptions.py new file mode 100644 index 000000000..6c384151d --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/reorderexceptions.py @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + + +class InconsistentFileException(Exception): + pass + + +class NotSupportedOperationException(Exception): + pass diff --git a/src/pmdk/src/tools/pmreorder/statemachine.py b/src/pmdk/src/tools/pmreorder/statemachine.py new file mode 100644 index 000000000..9df52c30c --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/statemachine.py @@ -0,0 +1,364 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2020, Intel Corporation + +import memoryoperations as memops +import reorderengines +from reorderexceptions import InconsistentFileException +from reorderexceptions import NotSupportedOperationException + + +class State: + """ + The base class of all states. + + :ivar _context: The reordering context. + :type _context: opscontext.OpsContext + :ivar trans_stores: The list of unflushed stores. + :type trans_stores: list of :class:`memoryoperations.Store` + """ + trans_stores = [] + + def __init__(self, context): + """ + Default state constructor. + + :param context: The context of the reordering. + :type context: opscontext.OpsContext + """ + self._context = context + + def next(self, in_op): + """ + Go to the next state based on the input. + + :Note: + The next state might in fact be the same state. + + :param in_op: The state switch trigger operation. + :type in_op: subclass of :class:`memoryoperations.BaseOperation` + :return: The next state. + :rtype: subclass of :class:`State` + """ + raise NotImplementedError + + def run(self, in_op): + """ + Perform the required operation in this state. + + :param in_op: The operation to be performed in this state. + :type in_op: subclass of :class:`memoryoperations.BaseOperation` + :return: None + """ + raise NotImplementedError + + +class InitState(State): + """ + The initial no-op state. + """ + def __init__(self, context): + """ + Saves the reordering context. + + :param context: The reordering context. + :type context: opscontext.OpsContext + """ + super(InitState, self).__init__(context) + + def next(self, in_op): + """ + Switch to the next valid state. + + :param in_op: Ignored. + :return: The next valid state. + :rtype: CollectingState + """ + return CollectingState(self._context) + + def run(self, in_op): + """ + Does nothing. + + :param in_op: Ignored. + :return: always True + """ + return True + + +class CollectingState(State): + """ + Collects appropriate operations. + + This state mostly aggregates stores and flushes. It also + validates which stores will be made persistent and passes + them on to the next state. + + :ivar _ops_list: The list of collected stores. + :type _ops_list: list of :class:`memoryoperations.Store` + :ivar _inner_state: The internal state of operations. + :type _inner_state: str + """ + def __init__(self, context): + """ + Saves the reordering context. + + :param context: The reordering context. + :type context: opscontext.OpsContext + """ + super(CollectingState, self).__init__(context) + self._ops_list = [] + self._ops_list += State.trans_stores + self._inner_state = "init" + + def next(self, in_op): + """ + Switch to the next valid state. + + :param in_op: The state switch trigger operation. + :type in_op: subclass of :class:`memoryoperations.BaseOperation` + :return: The next state. + :rtype: subclass of :class:`State` + """ + if isinstance(in_op, memops.Fence) and \ + self._inner_state == "flush": + return ReplayingState(self._ops_list, self._context) + else: + return self + + def run(self, in_op): + """ + Perform operations in this state. + + Based on the type of operation, different handling is employed. + The recognized and handled types of operations are: + + * :class:`memoryoperations.ReorderBase` + * :class:`memoryoperations.FlushBase` + * :class:`memoryoperations.Store` + * :class:`memoryoperations.Register_file` + + :param in_op: The operation to be performed in this state. + :type in_op: subclass of :class:`memoryoperations.BaseOperation` + :return: always True + """ + self.move_inner_state(in_op) + if isinstance(in_op, memops.ReorderBase): + self.substitute_reorder(in_op) + elif isinstance(in_op, memops.FlushBase): + self.flush_stores(in_op) + elif isinstance(in_op, memops.Store): + self._ops_list.append(in_op) + elif isinstance(in_op, memops.Register_file): + self.reg_file(in_op) + + return True + + def substitute_reorder(self, order_ops): + """ + Changes the reordering engine based on the log marker class. + + :param order_ops: The reordering marker class. + :type order_ops: subclass of :class:`memoryoperations.ReorderBase` + :return: None + """ + if isinstance(order_ops, memops.ReorderFull): + self._context.reorder_engine = \ + reorderengines.FullReorderEngine() + self._context.test_on_barrier = \ + self._context.reorder_engine.test_on_barrier + elif isinstance(order_ops, memops.ReorderPartial): + # TODO add macro in valgrind or + # parameter inside the tool to support parameters? + self._context.reorder_engine = \ + reorderengines.RandomPartialReorderEngine(3) + self._context.test_on_barrier = \ + self._context.reorder_engine.test_on_barrier + elif isinstance(order_ops, memops.ReorderAccumulative): + self._context.reorder_engine = \ + reorderengines.AccumulativeReorderEngine() + self._context.test_on_barrier = \ + self._context.reorder_engine.test_on_barrier + elif isinstance(order_ops, memops.ReorderReverseAccumulative): + self._context.reorder_engine = \ + reorderengines.AccumulativeReverseReorderEngine() + self._context.test_on_barrier = \ + self._context.reorder_engine.test_on_barrier + elif isinstance(order_ops, memops.NoReorderDoCheck): + self._context.reorder_engine = reorderengines.NoReorderEngine() + self._context.test_on_barrier = \ + self._context.reorder_engine.test_on_barrier + elif isinstance(order_ops, memops.NoReorderNoCheck): + self._context.reorder_engine = reorderengines.NoCheckerEngine() + self._context.test_on_barrier = \ + self._context.reorder_engine.test_on_barrier + elif isinstance(order_ops, memops.ReorderDefault): + self._context.reorder_engine = self._context.default_engine + self._context.test_on_barrier = self._context.default_barrier + else: + raise NotSupportedOperationException( + "Not supported reorder engine: {}" + .format(order_ops)) + + def flush_stores(self, flush_op): + """ + Marks appropriate stores as flushed. + + Does not align the flush, the log is expected to have the + flushes properly aligned. + + :param flush_op: The flush operation marker. + :type flush_op: subclass of :class:`memoryoperations.FlushBase` + :return: None + """ + for st in self._ops_list: + if flush_op.is_in_flush(st): + st.flushed = True + + def reg_file(self, file_op): + """ + Register a new file mapped into virtual memory. + + :param file_op: File registration operation marker. + :type file_op: memoryoperations.Register_file + :return: None + """ + self._context.file_handler.add_file(file_op.name, + file_op.address, + file_op.size) + + def move_inner_state(self, in_op): + """ + Tracks the internal state of the collection. + + The collected stores need to be processed only at specific moments - + after full persistent memory barriers (flush-fence). + + :param in_op: The performed operation. + :type in_op: subclass of :class:`memoryoperations.BaseOperation` + :return: None + """ + if isinstance(in_op, memops.Store) and \ + self._inner_state == "init": + self._inner_state = "dirty" + elif isinstance(in_op, memops.FlushBase) and \ + self._inner_state == "dirty": + self._inner_state = "flush" + elif isinstance(in_op, memops.Fence) and \ + self._inner_state == "flush": + self._inner_state = "fence" + elif isinstance(in_op, memops.Flush) and \ + self._inner_state == "init": + self._inner_state = "flush" + + +class ReplayingState(State): + """ + Replays all collected stores according to the reordering context. + + :ivar _ops_list: The list of stores to be reordered and replayed. + :type _ops_list: list of :class:`memoryoperations.Store` + """ + def __init__(self, in_ops_list, context): + """ + + :param in_ops_list: + :param context: + :return: + """ + super(ReplayingState, self).__init__(context) + self._ops_list = in_ops_list + + def next(self, in_op): + """ + Switches to the collecting state regardless of the input. + + :param in_op: Ignored. + :type in_op: subclass of :class:`memoryoperations.BaseOperation` + :return: The next state. + :rtype: CollectingState + """ + return CollectingState(self._context) + + def run(self, in_op): + """ + Perform operations in this state. + + The replaying state performs reordering and if necessary checks + the consistency of the registered files. The decisions and + type of reordering to be used is defined by the context. + + :param in_op: The operation to be performed in this state. + :type in_op: subclass of :class:`memoryoperations.BaseOperation` + :return: State of consistency check. + """ + # specifies consistency state of sequence + consistency = True + + # consider only flushed stores + flushed_stores = list(filter(lambda x: x.flushed, self._ops_list)) + + # not-flushed stores should be passed to next state + State.trans_stores = list(filter(lambda x: x.flushed is False, + self._ops_list)) + + if self._context.test_on_barrier: + for seq in self._context.reorder_engine.generate_sequence( + flushed_stores): + for op in seq: + # do stores + self._context.file_handler.do_store(op) + # check consistency of all files + try: + self._context.file_handler.check_consistency() + except InconsistentFileException as e: + consistency = False + self._context.logger.warning(e) + stacktrace = "Call trace:\n" + for num, op in enumerate(seq): + stacktrace += "Store [{}]:\n".format(num) + stacktrace += str(op.trace) + self._context.logger.warning(stacktrace) + + for op in reversed(seq): + # revert the changes + self._context.file_handler.do_revert(op) + # write all flushed stores + for op in flushed_stores: + self._context.file_handler.do_store(op) + + return consistency + + +class StateMachine: + """ + The state machine driver. + + :ivar _curr_state: The current state. + :type _curr_state: subclass of :class:`State` + """ + def __init__(self, init_state): + """ + Initialize the state machine with a specified state. + + :param init_state: The initial state to be used. + :type init_state: subclass of :class:`State` + """ + self._curr_state = init_state + + def run_all(self, operations): + """ + Starts the state machine. + + :param operations: The operations to be performed by the state + machine. + :type operations: list of :class:`memoryoperations.BaseOperation` + :return: None + """ + all_consistent = True + for ops in operations: + self._curr_state = self._curr_state.next(ops) + check = self._curr_state.run(ops) + if check is False: + all_consistent = check + + return all_consistent diff --git a/src/pmdk/src/tools/pmreorder/utils.py b/src/pmdk/src/tools/pmreorder/utils.py new file mode 100644 index 000000000..6ee6e67e2 --- /dev/null +++ b/src/pmdk/src/tools/pmreorder/utils.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + + +import os +import mmap + + +class Rangeable: + """ + Interface for all rangeable objects. + + All rangeable objects must be able to return their base and max + addresses. + """ + def get_base_address(self): + """ + Getter for the base address of the object. + + :return: The base address of the object. + :rtype: int + """ + raise NotImplementedError + + def get_max_address(self): + """ + Getter for the max address of the object. + + :return: The max address of the object. + :rtype: int + """ + raise NotImplementedError + + +class StackTrace: + def __init__(self, trace=None): + self.trace = trace + + def __str__(self): + ret = "" + if self.trace is not None: + for line in self.trace: + ret += " by\t{}\n".format(line) + return ret + + +def memory_map(filename, size=0, access=mmap.ACCESS_WRITE, offset=0): + """ + Memory map a file. + + :Warning: + `offset` has to be a non-negative multiple of PAGESIZE or + ALLOCATIONGRANULARITY + + :param filename: The file to be mapped. + :type filename: str + :param size: Number of bytes to be mapped. If is equal 0, the + whole file at the moment of the call will be mapped. + :type size: int + :param offset: The offset within the file to be mapped. + :type offset: int + :param access: The type of access provided to mmap. + :return: The mapped file. + :rtype: mmap.mmap + """ + fd = os.open(filename, os.O_RDWR) + m_file = mmap.mmap(fd, size, access=access, offset=offset) + os.close(fd) + return m_file + + +def range_cmp(lhs, rhs): + """ + A range compare function. + + :param lhs: The left hand side of the comparison. + :type lhs: Rangeable + :param rhs: The right hand side of the comparison. + :type rhs: Rangeable + :return: -1 if lhs is before rhs, 1 when after and 0 on overlap. + :rtype: int + + The comparison function may be explained as:: + + Will return -1: + |___lhs___| + |___rhs___| + + Will return +1: + |___rhs___| + |___lhs___| + + Will return 0: + |___lhs___| + |___rhs___| + """ + if lhs.get_max_address() <= rhs.get_base_address(): + return -1 + elif lhs.get_base_address() >= rhs.get_max_address(): + return 1 + else: + return 0 diff --git a/src/pmdk/src/tools/rpmemd/.gitignore b/src/pmdk/src/tools/rpmemd/.gitignore new file mode 100644 index 000000000..131edf728 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/.gitignore @@ -0,0 +1 @@ +rpmemd diff --git a/src/pmdk/src/tools/rpmemd/Makefile b/src/pmdk/src/tools/rpmemd/Makefile new file mode 100644 index 000000000..f0cdced63 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/Makefile @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation +# +# Makefile -- top Makefile for rpmemd +# + +vpath %.c ../../rpmem_common/ + +TOP = ../../.. +include $(TOP)/src/common.inc + +ifeq ($(BUILD_RPMEM),y) +SCP_TO_REMOTE_NODES = y +TARGET = rpmemd +OBJS = rpmemd.o\ + rpmemd_log.o\ + rpmemd_config.o\ + rpmem_common.o\ + rpmemd_obc.o\ + rpmemd_db.o\ + rpmemd_fip.o\ + rpmem_fip_common.o\ + rpmemd_util.o + +LIBPMEM=y +TOOLS_COMMON=y +LIBPMEMBLK_PRIV=btt_info_convert2h + +INCS += -I$(TOP)/src/rpmem_common +CFLAGS += -DRPMEMC_LOG_RPMEMD +ifneq ($(DEBUG),) +CFLAGS += -DDEBUG +endif +CFLAGS += $(LIBFABRIC_CFLAGS) + +LIBS += -pthread +LIBS += $(LIBFABRIC_LIBS) + +INSTALL_TARGET=y + +else +$(info NOTE: Skipping rpmemd because $(BUILD_RPMEM_INFO)) +endif + +include ../Makefile.inc + +.PHONY: test check diff --git a/src/pmdk/src/tools/rpmemd/README b/src/pmdk/src/tools/rpmemd/README new file mode 100644 index 000000000..ffba03dfa --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/README @@ -0,0 +1,8 @@ +This directory contains a tool which supports remote access +to persistent memory over RDMA on server side. + +** DEPENDENCIES: ** +The rpmemd depends on libfabric library: + +https://github.com/ofiwg/libfabric + diff --git a/src/pmdk/src/tools/rpmemd/rpmemd.c b/src/pmdk/src/tools/rpmemd/rpmemd.c new file mode 100644 index 000000000..8a4b9edb6 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd.c @@ -0,0 +1,803 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd.c -- rpmemd main source file + */ + +#include +#include +#include +#include +#include +#include + +#include "librpmem.h" +#include "rpmemd.h" +#include "rpmemd_log.h" +#include "rpmemd_config.h" +#include "rpmem_common.h" +#include "rpmemd_fip.h" +#include "rpmemd_obc.h" +#include "rpmemd_db.h" +#include "rpmemd_util.h" +#include "pool_hdr.h" +#include "os.h" +#include "os_thread.h" +#include "util.h" +#include "uuid.h" +#include "set.h" + +/* + * rpmemd -- rpmem handle + */ +struct rpmemd { + struct rpmemd_obc *obc; /* out-of-band connection handle */ + struct rpmemd_db *db; /* pool set database handle */ + struct rpmemd_db_pool *pool; /* pool handle */ + char *pool_desc; /* pool descriptor */ + struct rpmemd_fip *fip; /* fabric provider handle */ + struct rpmemd_config config; /* configuration */ + enum rpmem_persist_method persist_method; + int closing; /* set when closing connection */ + int created; /* pool created */ + os_thread_t fip_thread; + int fip_running; +}; + +#ifdef DEBUG +/* + * bool2str -- convert bool to yes/no string + */ +static inline const char * +bool2str(int v) +{ + return v ? "yes" : "no"; +} +#endif + +/* + * str_or_null -- return null string instead of NULL pointer + */ +static inline const char * +_str(const char *str) +{ + if (!str) + return "(null)"; + return str; +} + +/* + * uuid2str -- convert uuid to string + */ +static const char * +uuid2str(const uuid_t uuid) +{ + static char uuid_str[64] = {0, }; + + int ret = util_uuid_to_string(uuid, uuid_str); + if (ret != 0) { + return "(error)"; + } + + return uuid_str; +} + +/* + * rpmemd_get_pm -- returns persist method based on configuration + */ +static enum rpmem_persist_method +rpmemd_get_pm(struct rpmemd_config *config) +{ + enum rpmem_persist_method ret = RPMEM_PM_GPSPM; + + if (config->persist_apm) + ret = RPMEM_PM_APM; + + return ret; +} + +/* + * rpmemd_db_get_status -- convert error number to status for db operation + */ +static int +rpmemd_db_get_status(int err) +{ + switch (err) { + case EEXIST: + return RPMEM_ERR_EXISTS; + case EACCES: + return RPMEM_ERR_NOACCESS; + case ENOENT: + return RPMEM_ERR_NOEXIST; + case EWOULDBLOCK: + return RPMEM_ERR_BUSY; + case EBADF: + return RPMEM_ERR_BADNAME; + case EINVAL: + return RPMEM_ERR_POOL_CFG; + default: + return RPMEM_ERR_FATAL; + } +} + +/* + * rpmemd_check_pool -- verify pool parameters + */ +static int +rpmemd_check_pool(struct rpmemd *rpmemd, const struct rpmem_req_attr *req, + int *status) +{ + if (rpmemd->pool->pool_size < RPMEM_MIN_POOL) { + RPMEMD_LOG(ERR, "invalid pool size -- must be >= %zu", + RPMEM_MIN_POOL); + *status = RPMEM_ERR_POOL_CFG; + return -1; + } + + if (rpmemd->pool->pool_size < req->pool_size) { + RPMEMD_LOG(ERR, "requested size is too big"); + *status = RPMEM_ERR_BADSIZE; + return -1; + } + + return 0; +} + +/* + * rpmemd_deep_persist -- perform deep persist operation + */ +static int +rpmemd_deep_persist(const void *addr, size_t size, void *ctx) +{ + struct rpmemd *rpmemd = (struct rpmemd *)ctx; + return util_replica_deep_persist(addr, size, rpmemd->pool->set, 0); +} + +/* + * rpmemd_common_fip_init -- initialize fabric provider + */ +static int +rpmemd_common_fip_init(struct rpmemd *rpmemd, const struct rpmem_req_attr *req, + struct rpmem_resp_attr *resp, int *status) +{ + /* register the whole pool with header in RDMA */ + void *addr = (void *)((uintptr_t)rpmemd->pool->pool_addr); + struct rpmemd_fip_attr fip_attr = { + .addr = addr, + .size = req->pool_size, + .nlanes = req->nlanes, + .nthreads = rpmemd->config.nthreads, + .provider = req->provider, + .persist_method = rpmemd->persist_method, + .deep_persist = rpmemd_deep_persist, + .ctx = rpmemd, + .buff_size = req->buff_size, + }; + + const int is_pmem = rpmemd_db_pool_is_pmem(rpmemd->pool); + if (rpmemd_apply_pm_policy(&fip_attr.persist_method, + &fip_attr.persist, + &fip_attr.memcpy_persist, + is_pmem)) { + *status = RPMEM_ERR_FATAL; + goto err_fip_init; + } + + const char *node = rpmem_get_ssh_conn_addr(); + enum rpmem_err err; + + rpmemd->fip = rpmemd_fip_init(node, NULL, &fip_attr, resp, &err); + if (!rpmemd->fip) { + *status = (int)err; + goto err_fip_init; + } + + return 0; +err_fip_init: + return -1; +} + +/* + * rpmemd_print_req_attr -- print request attributes + */ +static void +rpmemd_print_req_attr(const struct rpmem_req_attr *req) +{ + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "pool descriptor: '%s'", + _str(req->pool_desc)); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "pool size: %lu", req->pool_size); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "nlanes: %u", req->nlanes); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "provider: %s", + rpmem_provider_to_str(req->provider)); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "buff_size: %lu", req->buff_size); +} + +/* + * rpmemd_print_pool_attr -- print pool attributes + */ +static void +rpmemd_print_pool_attr(const struct rpmem_pool_attr *attr) +{ + if (attr == NULL) { + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "NULL"); + } else { + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "signature: '%s'", + _str(attr->signature)); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "major: %u", attr->major); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "compat_features: 0x%x", + attr->compat_features); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "incompat_features: 0x%x", + attr->incompat_features); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "ro_compat_features: 0x%x", + attr->ro_compat_features); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "poolset_uuid: %s", + uuid2str(attr->poolset_uuid)); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "uuid: %s", + uuid2str(attr->uuid)); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "next_uuid: %s", + uuid2str(attr->next_uuid)); + RPMEMD_LOG(INFO, RPMEMD_LOG_INDENT "prev_uuid: %s", + uuid2str(attr->prev_uuid)); + } +} + +/* + * rpmemd_print_resp_attr -- print response attributes + */ +static void +rpmemd_print_resp_attr(const struct rpmem_resp_attr *attr) +{ + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "port: %u", attr->port); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "rkey: 0x%lx", attr->rkey); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "raddr: 0x%lx", attr->raddr); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "nlanes: %u", attr->nlanes); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "persist method: %s", + rpmem_persist_method_to_str(attr->persist_method)); +} + +/* + * rpmemd_fip_thread -- background thread for establishing in-band connection + */ +static void * +rpmemd_fip_thread(void *arg) +{ + struct rpmemd *rpmemd = (struct rpmemd *)arg; + int ret; + + RPMEMD_LOG(INFO, "waiting for in-band connection"); + + ret = rpmemd_fip_accept(rpmemd->fip, RPMEM_ACCEPT_TIMEOUT); + if (ret) + goto err_accept; + + RPMEMD_LOG(NOTICE, "in-band connection established"); + + ret = rpmemd_fip_process_start(rpmemd->fip); + if (ret) + goto err_process_start; + + return NULL; +err_process_start: + rpmemd_fip_close(rpmemd->fip); +err_accept: + return (void *)(uintptr_t)ret; +} + +/* + * rpmemd_fip_start_thread -- start background thread for establishing + * in-band connection + */ +static int +rpmemd_fip_start_thread(struct rpmemd *rpmemd) +{ + errno = os_thread_create(&rpmemd->fip_thread, NULL, + rpmemd_fip_thread, rpmemd); + if (errno) { + RPMEMD_LOG(ERR, "!creating in-band thread"); + goto err_os_thread_create; + } + + rpmemd->fip_running = 1; + + return 0; +err_os_thread_create: + return -1; +} + +/* + * rpmemd_fip_stop_thread -- stop background thread for in-band connection + */ +static int +rpmemd_fip_stop_thread(struct rpmemd *rpmemd) +{ + RPMEMD_ASSERT(rpmemd->fip_running); + void *tret; + errno = os_thread_join(&rpmemd->fip_thread, &tret); + if (errno) + RPMEMD_LOG(ERR, "!waiting for in-band thread"); + + int ret = (int)(uintptr_t)tret; + if (ret) + RPMEMD_LOG(ERR, "in-band thread failed -- '%d'", ret); + + return ret; +} + +/* + * rpmemd_fip-stop -- stop in-band thread and stop processing thread + */ +static int +rpmemd_fip_stop(struct rpmemd *rpmemd) +{ + int ret; + + int fip_ret = rpmemd_fip_stop_thread(rpmemd); + if (fip_ret) { + RPMEMD_LOG(ERR, "!in-band thread failed"); + } + + if (!fip_ret) { + ret = rpmemd_fip_process_stop(rpmemd->fip); + if (ret) { + RPMEMD_LOG(ERR, "!stopping fip process failed"); + } + } + + rpmemd->fip_running = 0; + + return fip_ret; +} + +/* + * rpmemd_close_pool -- close pool and remove it if required + */ +static int +rpmemd_close_pool(struct rpmemd *rpmemd, int remove) +{ + int ret = 0; + + RPMEMD_LOG(NOTICE, "closing pool"); + rpmemd_db_pool_close(rpmemd->db, rpmemd->pool); + RPMEMD_LOG(INFO, "pool closed"); + + if (remove) { + RPMEMD_LOG(NOTICE, "removing '%s'", rpmemd->pool_desc); + ret = rpmemd_db_pool_remove(rpmemd->db, + rpmemd->pool_desc, 0, 0); + if (ret) { + RPMEMD_LOG(ERR, "!removing pool '%s' failed", + rpmemd->pool_desc); + } else { + RPMEMD_LOG(INFO, "removed '%s'", rpmemd->pool_desc); + } + } + + free(rpmemd->pool_desc); + + return ret; +} + +/* + * rpmemd_req_cleanup -- cleanup in-band connection and all resources allocated + * during open/create requests + */ +static void +rpmemd_req_cleanup(struct rpmemd *rpmemd) +{ + if (!rpmemd->fip_running) + return; + + int ret; + + ret = rpmemd_fip_stop(rpmemd); + if (!ret) { + rpmemd_fip_close(rpmemd->fip); + rpmemd_fip_fini(rpmemd->fip); + } + + int remove = rpmemd->created && ret; + rpmemd_close_pool(rpmemd, remove); +} + +/* + * rpmemd_req_create -- handle create request + */ +static int +rpmemd_req_create(struct rpmemd_obc *obc, void *arg, + const struct rpmem_req_attr *req, + const struct rpmem_pool_attr *pool_attr) +{ + RPMEMD_ASSERT(arg != NULL); + RPMEMD_LOG(NOTICE, "create request:"); + rpmemd_print_req_attr(req); + RPMEMD_LOG(NOTICE, "pool attributes:"); + rpmemd_print_pool_attr(pool_attr); + + struct rpmemd *rpmemd = (struct rpmemd *)arg; + + int ret; + int status = 0; + int err_send = 1; + struct rpmem_resp_attr resp; + memset(&resp, 0, sizeof(resp)); + + if (rpmemd->pool) { + RPMEMD_LOG(ERR, "pool already opened"); + ret = -1; + status = RPMEM_ERR_FATAL; + goto err_pool_opened; + } + + rpmemd->pool_desc = strdup(req->pool_desc); + if (!rpmemd->pool_desc) { + RPMEMD_LOG(ERR, "!allocating pool descriptor"); + ret = -1; + status = RPMEM_ERR_FATAL; + goto err_strdup; + } + + rpmemd->pool = rpmemd_db_pool_create(rpmemd->db, + req->pool_desc, 0, pool_attr); + if (!rpmemd->pool) { + ret = -1; + status = rpmemd_db_get_status(errno); + goto err_pool_create; + } + + rpmemd->created = 1; + + ret = rpmemd_check_pool(rpmemd, req, &status); + if (ret) + goto err_pool_check; + + ret = rpmemd_common_fip_init(rpmemd, req, &resp, &status); + if (ret) + goto err_fip_init; + + RPMEMD_LOG(NOTICE, "create request response: (status = %u)", status); + if (!status) + rpmemd_print_resp_attr(&resp); + ret = rpmemd_obc_create_resp(obc, status, &resp); + if (ret) + goto err_create_resp; + + ret = rpmemd_fip_start_thread(rpmemd); + if (ret) + goto err_fip_start; + + return 0; +err_fip_start: +err_create_resp: + err_send = 0; + rpmemd_fip_fini(rpmemd->fip); +err_fip_init: +err_pool_check: + rpmemd_db_pool_close(rpmemd->db, rpmemd->pool); + rpmemd_db_pool_remove(rpmemd->db, req->pool_desc, 0, 0); +err_pool_create: + free(rpmemd->pool_desc); +err_strdup: +err_pool_opened: + if (err_send) + ret = rpmemd_obc_create_resp(obc, status, &resp); + rpmemd->closing = 1; + return ret; +} + +/* + * rpmemd_req_open -- handle open request + */ +static int +rpmemd_req_open(struct rpmemd_obc *obc, void *arg, + const struct rpmem_req_attr *req) +{ + RPMEMD_ASSERT(arg != NULL); + RPMEMD_LOG(NOTICE, "open request:"); + rpmemd_print_req_attr(req); + struct rpmemd *rpmemd = (struct rpmemd *)arg; + + int ret; + int status = 0; + int err_send = 1; + struct rpmem_resp_attr resp; + memset(&resp, 0, sizeof(resp)); + + struct rpmem_pool_attr pool_attr; + memset(&pool_attr, 0, sizeof(pool_attr)); + + if (rpmemd->pool) { + RPMEMD_LOG(ERR, "pool already opened"); + ret = -1; + status = RPMEM_ERR_FATAL; + goto err_pool_opened; + } + + rpmemd->pool_desc = strdup(req->pool_desc); + if (!rpmemd->pool_desc) { + RPMEMD_LOG(ERR, "!allocating pool descriptor"); + ret = -1; + status = RPMEM_ERR_FATAL; + goto err_strdup; + } + + rpmemd->pool = rpmemd_db_pool_open(rpmemd->db, + req->pool_desc, 0, &pool_attr); + if (!rpmemd->pool) { + ret = -1; + status = rpmemd_db_get_status(errno); + goto err_pool_open; + } + + RPMEMD_LOG(NOTICE, "pool attributes:"); + rpmemd_print_pool_attr(&pool_attr); + + ret = rpmemd_check_pool(rpmemd, req, &status); + if (ret) + goto err_pool_check; + + ret = rpmemd_common_fip_init(rpmemd, req, &resp, &status); + if (ret) + goto err_fip_init; + + RPMEMD_LOG(NOTICE, "open request response: (status = %u)", status); + if (!status) + rpmemd_print_resp_attr(&resp); + + ret = rpmemd_obc_open_resp(obc, status, &resp, &pool_attr); + if (ret) + goto err_open_resp; + + ret = rpmemd_fip_start_thread(rpmemd); + if (ret) + goto err_fip_start; + + return 0; +err_fip_start: +err_open_resp: + err_send = 0; + rpmemd_fip_fini(rpmemd->fip); +err_fip_init: +err_pool_check: + rpmemd_db_pool_close(rpmemd->db, rpmemd->pool); +err_pool_open: + free(rpmemd->pool_desc); +err_strdup: +err_pool_opened: + if (err_send) + ret = rpmemd_obc_open_resp(obc, status, &resp, &pool_attr); + rpmemd->closing = 1; + return ret; +} + +/* + * rpmemd_req_close -- handle close request + */ +static int +rpmemd_req_close(struct rpmemd_obc *obc, void *arg, int flags) +{ + RPMEMD_ASSERT(arg != NULL); + RPMEMD_LOG(NOTICE, "close request"); + + struct rpmemd *rpmemd = (struct rpmemd *)arg; + + rpmemd->closing = 1; + + int ret; + int status = 0; + + if (!rpmemd->pool) { + RPMEMD_LOG(ERR, "pool not opened"); + status = RPMEM_ERR_FATAL; + return rpmemd_obc_close_resp(obc, status); + } + + ret = rpmemd_fip_stop(rpmemd); + if (ret) { + status = RPMEM_ERR_FATAL; + } else { + rpmemd_fip_close(rpmemd->fip); + rpmemd_fip_fini(rpmemd->fip); + } + + int remove = rpmemd->created && + (status || (flags & RPMEM_CLOSE_FLAGS_REMOVE)); + if (rpmemd_close_pool(rpmemd, remove)) + RPMEMD_LOG(ERR, "closing pool failed"); + + RPMEMD_LOG(NOTICE, "close request response (status = %u)", status); + ret = rpmemd_obc_close_resp(obc, status); + + return ret; +} + +/* + * rpmemd_req_set_attr -- handle set attributes request + */ +static int +rpmemd_req_set_attr(struct rpmemd_obc *obc, void *arg, + const struct rpmem_pool_attr *pool_attr) +{ + RPMEMD_ASSERT(arg != NULL); + RPMEMD_LOG(NOTICE, "set attributes request"); + struct rpmemd *rpmemd = (struct rpmemd *)arg; + RPMEMD_ASSERT(rpmemd->pool != NULL); + + int ret; + int status = 0; + int err_send = 1; + + ret = rpmemd_db_pool_set_attr(rpmemd->pool, pool_attr); + if (ret) { + ret = -1; + status = rpmemd_db_get_status(errno); + goto err_set_attr; + } + + RPMEMD_LOG(NOTICE, "new pool attributes:"); + rpmemd_print_pool_attr(pool_attr); + + ret = rpmemd_obc_set_attr_resp(obc, status); + if (ret) + goto err_set_attr_resp; + + return ret; +err_set_attr_resp: + err_send = 0; +err_set_attr: + if (err_send) + ret = rpmemd_obc_set_attr_resp(obc, status); + return ret; +} + +static struct rpmemd_obc_requests rpmemd_req = { + .create = rpmemd_req_create, + .open = rpmemd_req_open, + .close = rpmemd_req_close, + .set_attr = rpmemd_req_set_attr, +}; + +/* + * rpmemd_print_info -- print basic info and configuration + */ +static void +rpmemd_print_info(struct rpmemd *rpmemd) +{ + RPMEMD_LOG(NOTICE, "ssh connection: %s", + _str(os_getenv("SSH_CONNECTION"))); + RPMEMD_LOG(NOTICE, "user: %s", _str(os_getenv("USER"))); + RPMEMD_LOG(NOTICE, "configuration"); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "pool set directory: '%s'", + _str(rpmemd->config.poolset_dir)); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "persist method: %s", + rpmem_persist_method_to_str(rpmemd->persist_method)); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "number of threads: %lu", + rpmemd->config.nthreads); + RPMEMD_DBG(RPMEMD_LOG_INDENT "persist APM: %s", + bool2str(rpmemd->config.persist_apm)); + RPMEMD_DBG(RPMEMD_LOG_INDENT "persist GPSPM: %s", + bool2str(rpmemd->config.persist_general)); + RPMEMD_DBG(RPMEMD_LOG_INDENT "use syslog: %s", + bool2str(rpmemd->config.use_syslog)); + RPMEMD_DBG(RPMEMD_LOG_INDENT "log file: %s", + _str(rpmemd->config.log_file)); + RPMEMD_DBG(RPMEMD_LOG_INDENT "log level: %s", + rpmemd_log_level_to_str(rpmemd->config.log_level)); +} + +int +main(int argc, char *argv[]) +{ + util_init(); + + int send_status = 1; + int ret = 1; + + struct rpmemd *rpmemd = calloc(1, sizeof(*rpmemd)); + if (!rpmemd) { + RPMEMD_LOG(ERR, "!calloc"); + goto err_rpmemd; + } + + rpmemd->obc = rpmemd_obc_init(STDIN_FILENO, STDOUT_FILENO); + if (!rpmemd->obc) { + RPMEMD_LOG(ERR, "out-of-band connection initialization"); + goto err_obc; + } + + if (rpmemd_log_init(DAEMON_NAME, NULL, 0)) { + RPMEMD_LOG(ERR, "logging subsystem initialization failed"); + goto err_log_init; + } + + if (rpmemd_config_read(&rpmemd->config, argc, argv) != 0) { + RPMEMD_LOG(ERR, "reading configuration failed"); + goto err_config; + } + + rpmemd_log_close(); + rpmemd_log_level = rpmemd->config.log_level; + if (rpmemd_log_init(DAEMON_NAME, rpmemd->config.log_file, + rpmemd->config.use_syslog)) { + RPMEMD_LOG(ERR, "logging subsystem initialization" + " failed (%s, %d)", rpmemd->config.log_file, + rpmemd->config.use_syslog); + goto err_log_init_config; + } + + RPMEMD_LOG(INFO, "%s version %s", DAEMON_NAME, SRCVERSION); + rpmemd->persist_method = rpmemd_get_pm(&rpmemd->config); + + rpmemd->db = rpmemd_db_init(rpmemd->config.poolset_dir, 0666); + if (!rpmemd->db) { + RPMEMD_LOG(ERR, "!pool set db initialization"); + goto err_db_init; + } + + if (rpmemd->config.rm_poolset) { + RPMEMD_LOG(INFO, "removing '%s'", + rpmemd->config.rm_poolset); + if (rpmemd_db_pool_remove(rpmemd->db, + rpmemd->config.rm_poolset, + rpmemd->config.force, + rpmemd->config.pool_set)) { + RPMEMD_LOG(ERR, "removing '%s' failed", + rpmemd->config.rm_poolset); + ret = errno; + } else { + RPMEMD_LOG(NOTICE, "removed '%s'", + rpmemd->config.rm_poolset); + ret = 0; + } + send_status = 0; + goto out_rm; + } + + ret = rpmemd_obc_status(rpmemd->obc, 0); + if (ret) { + RPMEMD_LOG(ERR, "writing status failed"); + goto err_status; + } + + rpmemd_print_info(rpmemd); + + while (!ret) { + ret = rpmemd_obc_process(rpmemd->obc, &rpmemd_req, rpmemd); + if (ret) { + RPMEMD_LOG(ERR, "out-of-band connection" + " process failed"); + goto err; + } + + if (rpmemd->closing) + break; + } + + rpmemd_db_fini(rpmemd->db); + rpmemd_config_free(&rpmemd->config); + rpmemd_log_close(); + rpmemd_obc_fini(rpmemd->obc); + free(rpmemd); + + return 0; +err: + rpmemd_req_cleanup(rpmemd); +err_status: +out_rm: + rpmemd_db_fini(rpmemd->db); +err_db_init: +err_log_init_config: + rpmemd_config_free(&rpmemd->config); +err_config: + rpmemd_log_close(); +err_log_init: + if (send_status) { + if (rpmemd_obc_status(rpmemd->obc, (uint32_t)errno)) + RPMEMD_LOG(ERR, "writing status failed"); + } + rpmemd_obc_fini(rpmemd->obc); +err_obc: + free(rpmemd); +err_rpmemd: + return ret; +} diff --git a/src/pmdk/src/tools/rpmemd/rpmemd.h b/src/pmdk/src/tools/rpmemd/rpmemd.h new file mode 100644 index 000000000..8fda7db72 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd.h -- rpmemd main header file + */ + +#define DAEMON_NAME "rpmemd" diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_config.c b/src/pmdk/src/tools/rpmemd/rpmemd_config.c new file mode 100644 index 000000000..a544cd1a5 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_config.c @@ -0,0 +1,640 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_config.c -- rpmemd config source file + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rpmemd.h" +#include "rpmemd_log.h" +#include "rpmemd_config.h" +#include "os.h" + +#define CONFIG_LINE_SIZE_INIT 50 +#define INVALID_CHAR_POS UINT64_MAX + +struct rpmemd_special_chars_pos { + uint64_t equal_char; + uint64_t comment_char; + uint64_t EOL_char; +}; + +enum rpmemd_option { + RPD_OPT_LOG_FILE, + RPD_OPT_POOLSET_DIR, + RPD_OPT_PERSIST_APM, + RPD_OPT_PERSIST_GENERAL, + RPD_OPT_USE_SYSLOG, + RPD_OPT_LOG_LEVEL, + RPD_OPT_RM_POOLSET, + + RPD_OPT_MAX_VALUE, + RPD_OPT_INVALID = UINT64_MAX, +}; + +static const char *optstr = "c:hVr:fst:"; + +/* + * options -- cl and config file options + */ +static const struct option options[] = { +{"config", required_argument, NULL, 'c'}, +{"help", no_argument, NULL, 'h'}, +{"version", no_argument, NULL, 'V'}, +{"log-file", required_argument, NULL, RPD_OPT_LOG_FILE}, +{"poolset-dir", required_argument, NULL, RPD_OPT_POOLSET_DIR}, +{"persist-apm", no_argument, NULL, RPD_OPT_PERSIST_APM}, +{"persist-general", no_argument, NULL, RPD_OPT_PERSIST_GENERAL}, +{"use-syslog", no_argument, NULL, RPD_OPT_USE_SYSLOG}, +{"log-level", required_argument, NULL, RPD_OPT_LOG_LEVEL}, +{"remove", required_argument, NULL, 'r'}, +{"force", no_argument, NULL, 'f'}, +{"pool-set", no_argument, NULL, 's'}, +{"nthreads", required_argument, NULL, 't'}, +{NULL, 0, NULL, 0}, +}; + +#define VALUE_INDENT " " + +static const char * const help_str = +"\n" +"Options:\n" +" -c, --config configuration file location\n" +" -r, --remove remove pool described by given poolset file\n" +" -f, --force ignore errors when removing a pool\n" +" -t, --nthreads number of processing threads\n" +" -h, --help display help message and exit\n" +" -V, --version display target daemon version and exit\n" +" --log-file log file location\n" +" --poolset-dir pool set files directory\n" +" --persist-apm enable Appliance Persistency Method\n" +" --persist-general enable General Server Persistency Mechanism\n" +" --use-syslog use syslog(3) for logging messages\n" +" --log-level set log level value\n" +VALUE_INDENT "err error conditions\n" +VALUE_INDENT "warn warning conditions\n" +VALUE_INDENT "notice normal, but significant, condition\n" +VALUE_INDENT "info informational message\n" +VALUE_INDENT "debug debug-level message\n" +"\n" +"For complete documentation see %s(1) manual page."; + +/* + * print_version -- (internal) prints version message + */ +static void +print_version(void) +{ + RPMEMD_LOG(ERR, "%s version %s", DAEMON_NAME, SRCVERSION); +} + +/* + * print_usage -- (internal) prints usage message + */ +static void +print_usage(const char *name) +{ + RPMEMD_LOG(ERR, "usage: %s [--version] [--help] []", + name); +} + +/* + * print_help -- (internal) prints help message + */ +static void +print_help(const char *name) +{ + print_usage(name); + print_version(); + RPMEMD_LOG(ERR, help_str, DAEMON_NAME); +} + +/* + * parse_config_string -- (internal) parse string value + */ +static inline char * +parse_config_string(const char *value) +{ + if (strlen(value) == 0) { + errno = EINVAL; + return NULL; + } + char *output = strdup(value); + if (output == NULL) + RPMEMD_FATAL("!strdup"); + return output; +} + +/* + * parse_config_bool -- (internal) parse yes / no flag + */ +static inline int +parse_config_bool(bool *config_value, const char *value) +{ + if (value == NULL) + *config_value = true; + else if (strcmp("yes", value) == 0) + *config_value = true; + else if (strcmp("no", value) == 0) + *config_value = false; + else { + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * set_option -- (internal) set single config option + */ +static int +set_option(enum rpmemd_option option, const char *value, + struct rpmemd_config *config) +{ + int ret = 0; + + switch (option) { + case RPD_OPT_LOG_FILE: + free(config->log_file); + config->log_file = parse_config_string(value); + if (config->log_file == NULL) + return -1; + else + config->use_syslog = false; + break; + case RPD_OPT_POOLSET_DIR: + free(config->poolset_dir); + config->poolset_dir = parse_config_string(value); + if (config->poolset_dir == NULL) + return -1; + break; + case RPD_OPT_PERSIST_APM: + ret = parse_config_bool(&config->persist_apm, value); + break; + case RPD_OPT_PERSIST_GENERAL: + ret = parse_config_bool(&config->persist_general, value); + break; + case RPD_OPT_USE_SYSLOG: + ret = parse_config_bool(&config->use_syslog, value); + break; + case RPD_OPT_LOG_LEVEL: + config->log_level = rpmemd_log_level_from_str(value); + if (config->log_level == MAX_RPD_LOG) { + errno = EINVAL; + return -1; + } + break; + default: + errno = EINVAL; + return -1; + } + + return ret; +} + +/* + * get_config_line -- (internal) read single line from file + */ +static int +get_config_line(FILE *file, char **line, uint64_t *line_max, + uint8_t *line_max_increased, struct rpmemd_special_chars_pos *pos) +{ + uint8_t line_complete = 0; + uint64_t line_length = 0; + char *line_part = *line; + do { + char *ret = fgets(line_part, + (int)(*line_max - line_length), file); + if (ret == NULL) + return 0; + for (uint64_t i = 0; i < *line_max; ++i) { + if (line_part[i] == '\n') + line_complete = 1; + else if (line_part[i] == '\0') { + line_length += i; + if (line_length + 1 < *line_max) + line_complete = 1; + break; + } else if (line_part[i] == '#' && + pos->comment_char == UINT64_MAX) + pos->comment_char = line_length + i; + else if (line_part[i] == '=' && + pos->equal_char == UINT64_MAX) + pos->equal_char = line_length + i; + } + if (line_complete == 0) { + *line = realloc(*line, sizeof(char) * (*line_max) * 2); + if (*line == NULL) { + RPMEMD_FATAL("!realloc"); + } + line_part = *line + *line_max - 1; + line_length = *line_max - 1; + *line_max *= 2; + *line_max_increased = 1; + } + } while (line_complete != 1); + + pos->EOL_char = line_length; + return 0; +} + +/* + * trim_line_element -- (internal) remove white characters + */ +static char * +trim_line_element(char *line, uint64_t start, uint64_t end) +{ + for (; start <= end; ++start) { + if (!isspace(line[start])) + break; + } + + for (; end > start; --end) { + if (!isspace(line[end - 1])) + break; + } + + if (start == end) + return NULL; + + line[end] = '\0'; + return &line[start]; +} + +/* + * parse_config_key -- (internal) lookup config key + */ +static enum rpmemd_option +parse_config_key(const char *key) +{ + for (int i = 0; options[i].name != 0; ++i) { + if (strcmp(key, options[i].name) == 0) + return (enum rpmemd_option)options[i].val; + } + + return RPD_OPT_INVALID; +} + +/* + * parse_config_line -- (internal) parse single config line + * + * Return newly written option flag. Store possible errors in errno. + */ +static int +parse_config_line(char *line, struct rpmemd_special_chars_pos *pos, + struct rpmemd_config *config, uint64_t disabled) +{ + if (pos->comment_char < pos->equal_char) + pos->equal_char = INVALID_CHAR_POS; + + uint64_t end_of_content = pos->comment_char != INVALID_CHAR_POS ? + pos->comment_char : pos->EOL_char; + + if (pos->equal_char == INVALID_CHAR_POS) { + char *leftover = trim_line_element(line, 0, end_of_content); + if (leftover != NULL) { + errno = EINVAL; + return -1; + } else { + return 0; + } + } + + char *key_name = trim_line_element(line, 0, pos->equal_char); + char *value = trim_line_element(line, pos->equal_char + 1, + end_of_content); + + if (key_name == NULL || value == NULL) { + errno = EINVAL; + return -1; + } + + enum rpmemd_option key = parse_config_key(key_name); + if (key != RPD_OPT_INVALID) { + if ((disabled & (uint64_t)(1 << key)) == 0) + if (set_option(key, value, config) != 0) + return -1; + } else { + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * parse_config_file -- (internal) parse config file + */ +static int +parse_config_file(const char *filename, struct rpmemd_config *config, + uint64_t disabled, int required) +{ + RPMEMD_ASSERT(filename != NULL); + + FILE *file = os_fopen(filename, "r"); + if (file == NULL) { + if (required) { + RPMEMD_LOG(ERR, "!%s", filename); + goto error_fopen; + } else { + goto optional_config_missing; + } + } + + uint8_t line_max_increased = 0; + uint64_t line_max = CONFIG_LINE_SIZE_INIT; + uint64_t line_num = 1; + char *line = (char *)malloc(sizeof(char) * line_max); + if (line == NULL) { + RPMEMD_LOG(ERR, "!malloc"); + goto error_malloc_line; + } + + char *line_copy = (char *)malloc(sizeof(char) * line_max); + if (line_copy == NULL) { + RPMEMD_LOG(ERR, "!malloc"); + goto error_malloc_line_copy; + } + + struct rpmemd_special_chars_pos pos; + + do { + memset(&pos, 0xff, sizeof(pos)); + if (get_config_line(file, &line, &line_max, + &line_max_increased, &pos) != 0) + goto error; + + if (line_max_increased) { + char *line_new = (char *)realloc(line_copy, + sizeof(char) * line_max); + if (line_new == NULL) { + RPMEMD_LOG(ERR, "!malloc"); + goto error; + } + line_copy = line_new; + line_max_increased = 0; + } + + if (pos.EOL_char != INVALID_CHAR_POS) { + strcpy(line_copy, line); + int ret = parse_config_line(line_copy, &pos, config, + disabled); + if (ret != 0) { + size_t len = strlen(line); + if (len > 0 && line[len - 1] == '\n') + line[len - 1] = '\0'; + RPMEMD_LOG(ERR, "Invalid config file line at " + "%s:%lu\n%s", + filename, line_num, line); + goto error; + } + } + ++line_num; + } while (pos.EOL_char != INVALID_CHAR_POS); + + free(line_copy); + free(line); + fclose(file); +optional_config_missing: + return 0; + +error: + free(line_copy); +error_malloc_line_copy: + free(line); +error_malloc_line: + fclose(file); +error_fopen: + return -1; +} + +/* + * parse_cl_args -- (internal) parse command line arguments + */ +static void +parse_cl_args(int argc, char *argv[], struct rpmemd_config *config, + const char **config_file, uint64_t *cl_options) +{ + RPMEMD_ASSERT(argv != NULL); + RPMEMD_ASSERT(config != NULL); + + int opt; + int option_index = 0; + + while ((opt = getopt_long(argc, argv, optstr, options, + &option_index)) != -1) { + + switch (opt) { + case 'c': + (*config_file) = optarg; + break; + case 'r': + config->rm_poolset = optarg; + break; + case 'f': + config->force = true; + break; + case 's': + config->pool_set = true; + break; + case 't': + errno = 0; + char *endptr; + config->nthreads = strtoul(optarg, &endptr, 10); + if (errno || *endptr != '\0') { + RPMEMD_LOG(ERR, + "invalid number of threads -- '%s'", + optarg); + exit(-1); + } + break; + case 'h': + print_help(argv[0]); + exit(0); + case 'V': + print_version(); + exit(0); + break; + default: + if (set_option((enum rpmemd_option)opt, optarg, config) + == 0) { + *cl_options |= (UINT64_C(1) << opt); + } else { + print_usage(argv[0]); + exit(-1); + } + } + } +} + +/* + * get_home_dir -- (internal) return user home directory + * + * Function will lookup user home directory in order: + * 1. HOME environment variable + * 2. Password file entry using real user ID + */ +static void +get_home_dir(char *str, size_t size) +{ + char *home = os_getenv(HOME_ENV); + if (home) { + int r = util_snprintf(str, size, "%s", home); + if (r < 0) + RPMEMD_FATAL("!snprintf"); + } else { + uid_t uid = getuid(); + struct passwd *pw = getpwuid(uid); + if (pw == NULL) + RPMEMD_FATAL("!getpwuid"); + + int r = util_snprintf(str, size, "%s", pw->pw_dir); + if (r < 0) + RPMEMD_FATAL("!snprintf"); + } +} + +/* + * concat_dir_and_file_name -- (internal) concatenate directory and file name + * into single string path + */ +static void +concat_dir_and_file_name(char *path, size_t size, const char *dir, + const char *file) +{ + int r = util_snprintf(path, size, "%s/%s", dir, file); + if (r < 0) + RPMEMD_FATAL("!snprintf"); +} + +/* + * str_replace_home -- (internal) replace $HOME string with user home directory + * + * If function does not find $HOME string it will return haystack untouched. + * Otherwise it will allocate new string with $HOME replaced with provided + * home_dir path. haystack will be released and newly created string returned. + */ +static char * +str_replace_home(char *haystack, const char *home_dir) +{ + const size_t placeholder_len = strlen(HOME_STR_PLACEHOLDER); + const size_t home_len = strlen(home_dir); + size_t haystack_len = strlen(haystack); + + char *pos = strstr(haystack, HOME_STR_PLACEHOLDER); + if (!pos) + return haystack; + + const char *after = pos + placeholder_len; + if (isalnum(*after)) + return haystack; + + haystack_len += home_len - placeholder_len + 1; + char *buf = malloc(sizeof(char) * haystack_len); + if (!buf) + RPMEMD_FATAL("!malloc"); + + *pos = '\0'; + int r = util_snprintf(buf, haystack_len, "%s%s%s", haystack, home_dir, + after); + if (r < 0) + RPMEMD_FATAL("!snprintf"); + + free(haystack); + return buf; +} + +/* + * config_set_default -- (internal) load default config + */ +static void +config_set_default(struct rpmemd_config *config, const char *poolset_dir) +{ + config->log_file = strdup(RPMEMD_DEFAULT_LOG_FILE); + if (!config->log_file) + RPMEMD_FATAL("!strdup"); + + config->poolset_dir = strdup(poolset_dir); + if (!config->poolset_dir) + RPMEMD_FATAL("!strdup"); + + config->persist_apm = false; + config->persist_general = true; + config->use_syslog = true; + config->max_lanes = RPMEM_DEFAULT_MAX_LANES; + config->log_level = RPD_LOG_ERR; + config->rm_poolset = NULL; + config->force = false; + config->nthreads = RPMEM_DEFAULT_NTHREADS; +} + +/* + * rpmemd_config_read -- read config from cl and config files + * + * cl param overwrites configuration from any config file. Config file are read + * in order: + * 1. Global config file + * 2. User config file + * or + * cl provided config file + */ +int +rpmemd_config_read(struct rpmemd_config *config, int argc, char *argv[]) +{ + const char *cl_config_file = NULL; + char user_config_file[PATH_MAX]; + char home_dir[PATH_MAX]; + uint64_t cl_options = 0; + + get_home_dir(home_dir, PATH_MAX); + config_set_default(config, home_dir); + parse_cl_args(argc, argv, config, &cl_config_file, &cl_options); + + if (cl_config_file) { + if (parse_config_file(cl_config_file, config, cl_options, 1)) { + rpmemd_config_free(config); + return 1; + } + } else { + if (parse_config_file(RPMEMD_GLOBAL_CONFIG_FILE, config, + cl_options, 0)) { + rpmemd_config_free(config); + return 1; + } + + concat_dir_and_file_name(user_config_file, PATH_MAX, home_dir, + RPMEMD_USER_CONFIG_FILE); + if (parse_config_file(user_config_file, config, cl_options, + 0)) { + rpmemd_config_free(config); + return 1; + } + } + + config->poolset_dir = str_replace_home(config->poolset_dir, home_dir); + return 0; +} + +/* + * rpmemd_config_free -- rpmemd config release + */ +void +rpmemd_config_free(struct rpmemd_config *config) +{ + free(config->log_file); + free(config->poolset_dir); +} diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_config.h b/src/pmdk/src/tools/rpmemd/rpmemd_config.h new file mode 100644 index 000000000..530865926 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_config.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_config.h -- internal definitions for rpmemd config + */ + +#include +#include + +#ifndef RPMEMD_DEFAULT_LOG_FILE +#define RPMEMD_DEFAULT_LOG_FILE ("/var/log/" DAEMON_NAME ".log") +#endif + +#ifndef RPMEMD_GLOBAL_CONFIG_FILE +#define RPMEMD_GLOBAL_CONFIG_FILE ("/etc/" DAEMON_NAME "/" DAEMON_NAME\ + ".conf") +#endif + +#define RPMEMD_USER_CONFIG_FILE ("." DAEMON_NAME ".conf") + +#define RPMEM_DEFAULT_MAX_LANES 1024 + +#define RPMEM_DEFAULT_NTHREADS 0 + +#define HOME_ENV "HOME" + +#define HOME_STR_PLACEHOLDER ("$" HOME_ENV) + +struct rpmemd_config { + char *log_file; + char *poolset_dir; + const char *rm_poolset; + bool force; + bool pool_set; + bool persist_apm; + bool persist_general; + bool use_syslog; + uint64_t max_lanes; + enum rpmemd_log_level log_level; + size_t nthreads; +}; + +int rpmemd_config_read(struct rpmemd_config *config, int argc, char *argv[]); +void rpmemd_config_free(struct rpmemd_config *config); diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_db.c b/src/pmdk/src/tools/rpmemd/rpmemd_db.c new file mode 100644 index 000000000..63fa78731 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_db.c @@ -0,0 +1,635 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_db.c -- rpmemd database of pool set files + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "queue.h" +#include "set.h" +#include "os.h" +#include "out.h" +#include "file.h" +#include "sys_util.h" + +#include "librpmem.h" +#include "rpmemd_db.h" +#include "rpmemd_log.h" + +/* + * struct rpmemd_db -- pool set database structure + */ +struct rpmemd_db { + os_mutex_t lock; + char *root_dir; + mode_t mode; +}; + +/* + * declaration of the 'struct list_head' type + */ +PMDK_LIST_HEAD(list_head, rpmemd_db_entry); + +/* + * struct rpmemd_db_entry -- entry in the pool set list + */ +struct rpmemd_db_entry { + PMDK_LIST_ENTRY(rpmemd_db_entry) next; + char *pool_desc; + struct pool_set *set; +}; + +/* + * rpmemd_db_init -- initialize the rpmem database of pool set files + */ +struct rpmemd_db * +rpmemd_db_init(const char *root_dir, mode_t mode) +{ + if (root_dir[0] != '/') { + RPMEMD_LOG(ERR, "root directory is not an absolute path" + " -- '%s'", root_dir); + errno = EINVAL; + return NULL; + } + struct rpmemd_db *db = calloc(1, sizeof(*db)); + if (!db) { + RPMEMD_LOG(ERR, "!allocating the rpmem database structure"); + return NULL; + } + + db->root_dir = strdup(root_dir); + if (!db->root_dir) { + RPMEMD_LOG(ERR, "!allocating the root dir path"); + free(db); + return NULL; + } + + db->mode = mode; + + util_mutex_init(&db->lock); + + return db; +} + +/* + * rpmemd_db_concat -- (internal) concatenate two paths + */ +static char * +rpmemd_db_concat(const char *path1, const char *path2) +{ + size_t len1 = strlen(path1); + size_t len2 = strlen(path2); + size_t new_len = len1 + len2 + 2; /* +1 for '/' in snprintf() */ + + if (path1[0] != '/') { + RPMEMD_LOG(ERR, "the first path is not an absolute one -- '%s'", + path1); + errno = EINVAL; + return NULL; + } + if (path2[0] == '/') { + RPMEMD_LOG(ERR, "the second path is not a relative one -- '%s'", + path2); + /* set to EBADF to distinguish this case from other errors */ + errno = EBADF; + return NULL; + } + + char *new_str = malloc(new_len); + if (new_str == NULL) { + RPMEMD_LOG(ERR, "!allocating path buffer"); + return NULL; + } + + int ret = util_snprintf(new_str, new_len, "%s/%s", path1, path2); + if (ret < 0) { + RPMEMD_LOG(ERR, "!snprintf"); + free(new_str); + errno = EINVAL; + return NULL; + } + + return new_str; +} + +/* + * rpmemd_db_get_path -- (internal) get the full path of the pool set file + */ +static char * +rpmemd_db_get_path(struct rpmemd_db *db, const char *pool_desc) +{ + return rpmemd_db_concat(db->root_dir, pool_desc); +} + +/* + * rpmemd_db_pool_madvise -- (internal) workaround device dax alignment issue + */ +static int +rpmemd_db_pool_madvise(struct pool_set *set) +{ + /* + * This is a workaround for an issue with using device dax with + * libibverbs. The problem is that we use ibv_fork_init(3) which + * makes all registered memory being madvised with MADV_DONTFORK + * flag. In libpmemobj the remote replication is performed without + * pool header (first 4k). In such case the address passed to + * madvise(2) is aligned to 4k, but device dax can require different + * alignment (default is 2MB). This workaround madvises the entire + * memory region before registering it by ibv_reg_mr(3). + */ + const struct pool_set_part *part = &set->replica[0]->part[0]; + if (part->is_dev_dax) { + int ret = os_madvise(part->addr, part->filesize, + MADV_DONTFORK); + if (ret) { + ERR("!madvise"); + return -1; + } + } + return 0; +} + +/* + * rpmemd_get_attr -- (internal) get pool attributes from remote pool attributes + */ +static void +rpmemd_get_attr(struct pool_attr *attr, const struct rpmem_pool_attr *rattr) +{ + LOG(3, "attr %p, rattr %p", attr, rattr); + memcpy(attr->signature, rattr->signature, POOL_HDR_SIG_LEN); + attr->major = rattr->major; + attr->features.compat = rattr->compat_features; + attr->features.incompat = rattr->incompat_features; + attr->features.ro_compat = rattr->ro_compat_features; + memcpy(attr->poolset_uuid, rattr->poolset_uuid, POOL_HDR_UUID_LEN); + memcpy(attr->first_part_uuid, rattr->uuid, POOL_HDR_UUID_LEN); + memcpy(attr->prev_repl_uuid, rattr->prev_uuid, POOL_HDR_UUID_LEN); + memcpy(attr->next_repl_uuid, rattr->next_uuid, POOL_HDR_UUID_LEN); + memcpy(attr->arch_flags, rattr->user_flags, POOL_HDR_ARCH_LEN); +} + +/* + * rpmemd_db_pool_create -- create a new pool set + */ +struct rpmemd_db_pool * +rpmemd_db_pool_create(struct rpmemd_db *db, const char *pool_desc, + size_t pool_size, const struct rpmem_pool_attr *rattr) +{ + RPMEMD_ASSERT(db != NULL); + + util_mutex_lock(&db->lock); + + struct rpmemd_db_pool *prp = NULL; + struct pool_set *set; + char *path; + int ret; + + prp = malloc(sizeof(struct rpmemd_db_pool)); + if (!prp) { + RPMEMD_LOG(ERR, "!allocating pool set db entry"); + goto err_unlock; + } + + path = rpmemd_db_get_path(db, pool_desc); + if (!path) { + goto err_free_prp; + } + + struct pool_attr attr; + struct pool_attr *pattr = NULL; + if (rattr != NULL) { + rpmemd_get_attr(&attr, rattr); + pattr = &attr; + } + + ret = util_pool_create_uuids(&set, path, 0, RPMEM_MIN_POOL, + RPMEM_MIN_PART, pattr, NULL, REPLICAS_DISABLED, + POOL_REMOTE); + if (ret) { + RPMEMD_LOG(ERR, "!cannot create pool set -- '%s'", path); + goto err_free_path; + } + + ret = util_poolset_chmod(set, db->mode); + if (ret) { + RPMEMD_LOG(ERR, "!cannot change pool set mode bits to 0%o", + db->mode); + } + + if (rpmemd_db_pool_madvise(set)) + goto err_poolset_close; + + /* mark as opened */ + prp->pool_addr = set->replica[0]->part[0].addr; + prp->pool_size = set->poolsize; + prp->set = set; + + free(path); + util_mutex_unlock(&db->lock); + + return prp; + +err_poolset_close: + util_poolset_close(set, DO_NOT_DELETE_PARTS); +err_free_path: + free(path); +err_free_prp: + free(prp); +err_unlock: + util_mutex_unlock(&db->lock); + return NULL; +} + +/* + * rpmemd_db_pool_open -- open a pool set + */ +struct rpmemd_db_pool * +rpmemd_db_pool_open(struct rpmemd_db *db, const char *pool_desc, + size_t pool_size, struct rpmem_pool_attr *rattr) +{ + RPMEMD_ASSERT(db != NULL); + RPMEMD_ASSERT(rattr != NULL); + + util_mutex_lock(&db->lock); + + struct rpmemd_db_pool *prp = NULL; + struct pool_set *set; + char *path; + int ret; + + prp = malloc(sizeof(struct rpmemd_db_pool)); + if (!prp) { + RPMEMD_LOG(ERR, "!allocating pool set db entry"); + goto err_unlock; + } + + path = rpmemd_db_get_path(db, pool_desc); + if (!path) { + goto err_free_prp; + } + + ret = util_pool_open_remote(&set, path, 0, RPMEM_MIN_PART, rattr); + if (ret) { + RPMEMD_LOG(ERR, "!cannot open pool set -- '%s'", path); + goto err_free_path; + } + + if (rpmemd_db_pool_madvise(set)) + goto err_poolset_close; + + /* mark as opened */ + prp->pool_addr = set->replica[0]->part[0].addr; + prp->pool_size = set->poolsize; + prp->set = set; + + free(path); + util_mutex_unlock(&db->lock); + + return prp; + +err_poolset_close: + util_poolset_close(set, DO_NOT_DELETE_PARTS); +err_free_path: + free(path); +err_free_prp: + free(prp); +err_unlock: + util_mutex_unlock(&db->lock); + return NULL; +} + +/* + * rpmemd_db_pool_close -- close a pool set + */ +void +rpmemd_db_pool_close(struct rpmemd_db *db, struct rpmemd_db_pool *prp) +{ + RPMEMD_ASSERT(db != NULL); + + util_mutex_lock(&db->lock); + + util_poolset_close(prp->set, DO_NOT_DELETE_PARTS); + free(prp); + + util_mutex_unlock(&db->lock); +} + +/* + * rpmemd_db_pool_set_attr -- overwrite pool attributes + */ +int +rpmemd_db_pool_set_attr(struct rpmemd_db_pool *prp, + const struct rpmem_pool_attr *rattr) +{ + RPMEMD_ASSERT(prp != NULL); + RPMEMD_ASSERT(prp->set != NULL); + RPMEMD_ASSERT(prp->set->nreplicas == 1); + + return util_replica_set_attr(prp->set->replica[0], rattr); +} + +struct rm_cb_args { + int force; + int ret; +}; + +/* + * rm_poolset_cb -- (internal) callback for removing part files + */ +static int +rm_poolset_cb(struct part_file *pf, void *arg) +{ + struct rm_cb_args *args = (struct rm_cb_args *)arg; + if (pf->is_remote) { + RPMEMD_LOG(ERR, "removing remote replica not supported"); + return -1; + } + + int ret = util_unlink_flock(pf->part->path); + if (!args->force && ret) { + RPMEMD_LOG(ERR, "!unlink -- '%s'", pf->part->path); + args->ret = ret; + } + + return 0; +} + +/* + * rpmemd_db_pool_remove -- remove a pool set + */ +int +rpmemd_db_pool_remove(struct rpmemd_db *db, const char *pool_desc, + int force, int pool_set) +{ + RPMEMD_ASSERT(db != NULL); + RPMEMD_ASSERT(pool_desc != NULL); + + util_mutex_lock(&db->lock); + + struct rm_cb_args args; + args.force = force; + args.ret = 0; + char *path; + + path = rpmemd_db_get_path(db, pool_desc); + if (!path) { + args.ret = -1; + goto err_unlock; + } + + int ret = util_poolset_foreach_part(path, rm_poolset_cb, &args); + if (!force && ret) { + RPMEMD_LOG(ERR, "!removing '%s' failed", path); + args.ret = ret; + goto err_free_path; + } + + if (pool_set) + os_unlink(path); + +err_free_path: + free(path); +err_unlock: + util_mutex_unlock(&db->lock); + return args.ret; +} + +/* + * rpmemd_db_fini -- deinitialize the rpmem database of pool set files + */ +void +rpmemd_db_fini(struct rpmemd_db *db) +{ + RPMEMD_ASSERT(db != NULL); + + util_mutex_destroy(&db->lock); + free(db->root_dir); + free(db); +} + +/* + * rpmemd_db_check_dups_set -- (internal) check for duplicates in the database + */ +static inline int +rpmemd_db_check_dups_set(struct pool_set *set, const char *path) +{ + for (unsigned r = 0; r < set->nreplicas; r++) { + struct pool_replica *rep = set->replica[r]; + for (unsigned p = 0; p < rep->nparts; p++) { + if (strcmp(path, rep->part[p].path) == 0) + return -1; + } + } + return 0; +} + +/* + * rpmemd_db_check_dups -- (internal) check for duplicates in the database + */ +static int +rpmemd_db_check_dups(struct list_head *head, struct rpmemd_db *db, + const char *pool_desc, struct pool_set *set) +{ + struct rpmemd_db_entry *edb; + + PMDK_LIST_FOREACH(edb, head, next) { + for (unsigned r = 0; r < edb->set->nreplicas; r++) { + struct pool_replica *rep = edb->set->replica[r]; + for (unsigned p = 0; p < rep->nparts; p++) { + if (rpmemd_db_check_dups_set(set, + rep->part[p].path)) { + RPMEMD_LOG(ERR, "part file '%s' from " + "pool set '%s' duplicated in " + "pool set '%s'", + rep->part[p].path, + pool_desc, + edb->pool_desc); + errno = EEXIST; + return -1; + } + + } + } + } + return 0; +} + +/* + * rpmemd_db_add -- (internal) add an entry for a given set to the database + */ +static struct rpmemd_db_entry * +rpmemd_db_add(struct list_head *head, struct rpmemd_db *db, + const char *pool_desc, struct pool_set *set) +{ + struct rpmemd_db_entry *edb; + + edb = calloc(1, sizeof(*edb)); + if (!edb) { + RPMEMD_LOG(ERR, "!allocating database entry"); + goto err_calloc; + } + + edb->set = set; + edb->pool_desc = strdup(pool_desc); + if (!edb->pool_desc) { + RPMEMD_LOG(ERR, "!allocating path for database entry"); + goto err_strdup; + } + + PMDK_LIST_INSERT_HEAD(head, edb, next); + + return edb; + +err_strdup: + free(edb); +err_calloc: + return NULL; +} + +/* + * new_paths -- (internal) create two new paths + */ +static int +new_paths(const char *dir, const char *name, const char *old_desc, + char **path, char **new_desc) +{ + *path = rpmemd_db_concat(dir, name); + if (!(*path)) + return -1; + + if (old_desc[0] != 0) + *new_desc = rpmemd_db_concat(old_desc, name); + else { + *new_desc = strdup(name); + if (!(*new_desc)) { + RPMEMD_LOG(ERR, "!allocating new descriptor"); + } + } + if (!(*new_desc)) { + free(*path); + return -1; + } + return 0; +} + +/* + * rpmemd_db_check_dir_r -- (internal) recursively check given directory + * for duplicates + */ +static int +rpmemd_db_check_dir_r(struct list_head *head, struct rpmemd_db *db, + const char *dir, char *pool_desc) +{ + char *new_dir, *new_desc, *full_path; + struct dirent *dentry; + struct pool_set *set = NULL; + DIR *dirp; + int ret = 0; + + dirp = opendir(dir); + if (dirp == NULL) { + RPMEMD_LOG(ERR, "cannot open the directory -- %s", dir); + return -1; + } + + while ((dentry = readdir(dirp)) != NULL) { + if (strcmp(dentry->d_name, ".") == 0 || + strcmp(dentry->d_name, "..") == 0) + continue; + + if (dentry->d_type == DT_DIR) { /* directory */ + if (new_paths(dir, dentry->d_name, pool_desc, + &new_dir, &new_desc)) + goto err_closedir; + + /* call recursively for a new directory */ + ret = rpmemd_db_check_dir_r(head, db, new_dir, + new_desc); + free(new_dir); + free(new_desc); + if (ret) + goto err_closedir; + continue; + + } + + if (new_paths(dir, dentry->d_name, pool_desc, + &full_path, &new_desc)) { + goto err_closedir; + } + if (util_poolset_read(&set, full_path)) { + RPMEMD_LOG(ERR, "!error reading pool set file -- %s", + full_path); + goto err_free_paths; + } + if (rpmemd_db_check_dups(head, db, new_desc, set)) { + RPMEMD_LOG(ERR, "!duplicate found in pool set file" + " -- %s", full_path); + goto err_free_set; + } + if (rpmemd_db_add(head, db, new_desc, set) == NULL) { + goto err_free_set; + } + + free(new_desc); + free(full_path); + } + + closedir(dirp); + return 0; + +err_free_set: + util_poolset_close(set, DO_NOT_DELETE_PARTS); +err_free_paths: + free(new_desc); + free(full_path); +err_closedir: + closedir(dirp); + return -1; +} + +/* + * rpmemd_db_check_dir -- check given directory for duplicates + */ +int +rpmemd_db_check_dir(struct rpmemd_db *db) +{ + RPMEMD_ASSERT(db != NULL); + + util_mutex_lock(&db->lock); + + struct list_head head; + PMDK_LIST_INIT(&head); + + int ret = rpmemd_db_check_dir_r(&head, db, db->root_dir, ""); + + while (!PMDK_LIST_EMPTY(&head)) { + struct rpmemd_db_entry *edb = PMDK_LIST_FIRST(&head); + PMDK_LIST_REMOVE(edb, next); + util_poolset_close(edb->set, DO_NOT_DELETE_PARTS); + free(edb->pool_desc); + free(edb); + } + + util_mutex_unlock(&db->lock); + + return ret; +} + +/* + * rpmemd_db_pool_is_pmem -- true if pool is in PMEM + */ +int +rpmemd_db_pool_is_pmem(struct rpmemd_db_pool *pool) +{ + return REP(pool->set, 0)->is_pmem; +} diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_db.h b/src/pmdk/src/tools/rpmemd/rpmemd_db.h new file mode 100644 index 000000000..17d087b27 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_db.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_db.h -- internal definitions for rpmemd database of pool set files + */ + +struct rpmemd_db; +struct rpmem_pool_attr; + +/* + * struct rpmemd_db_pool -- remote pool context + */ +struct rpmemd_db_pool { + void *pool_addr; + size_t pool_size; + struct pool_set *set; +}; + +struct rpmemd_db *rpmemd_db_init(const char *root_dir, mode_t mode); +struct rpmemd_db_pool *rpmemd_db_pool_create(struct rpmemd_db *db, + const char *pool_desc, size_t pool_size, + const struct rpmem_pool_attr *rattr); +struct rpmemd_db_pool *rpmemd_db_pool_open(struct rpmemd_db *db, + const char *pool_desc, size_t pool_size, struct rpmem_pool_attr *rattr); +int rpmemd_db_pool_remove(struct rpmemd_db *db, const char *pool_desc, + int force, int pool_set); +int rpmemd_db_pool_set_attr(struct rpmemd_db_pool *prp, + const struct rpmem_pool_attr *rattr); +void rpmemd_db_pool_close(struct rpmemd_db *db, struct rpmemd_db_pool *prp); +void rpmemd_db_fini(struct rpmemd_db *db); +int rpmemd_db_check_dir(struct rpmemd_db *db); +int rpmemd_db_pool_is_pmem(struct rpmemd_db_pool *pool); diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_fip.c b/src/pmdk/src/tools/rpmemd/rpmemd_fip.c new file mode 100644 index 000000000..3e4ff5284 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_fip.c @@ -0,0 +1,1216 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_fip.c -- rpmemd libfabric provider module source file + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "rpmemd_log.h" + +#include "rpmem_common.h" +#include "rpmem_proto.h" +#include "rpmem_fip_msg.h" +#include "rpmem_fip_common.h" +#include "rpmemd_fip.h" + +#include "os_thread.h" +#include "util.h" +#include "valgrind_internal.h" + +#define RPMEMD_FI_ERR(e, fmt, args...)\ + RPMEMD_LOG(ERR, fmt ": %s", ## args, fi_strerror((e))) + +#define RPMEMD_FI_CLOSE(f, fmt, args...) (\ +{\ + int ret = fi_close(&(f)->fid);\ + if (ret)\ + RPMEMD_FI_ERR(ret, fmt, ## args);\ + ret;\ +}) + +/* + * rpmem_fip_lane -- base lane structure + */ +struct rpmem_fip_lane { + struct fid_ep *ep; + struct fid_cq *cq; +}; + +/* + * rpmemd_fip_lane -- daemon's lane + */ +struct rpmemd_fip_lane { + struct rpmem_fip_lane base; /* lane base structure */ + struct rpmem_fip_msg recv; /* RECV message */ + struct rpmem_fip_msg send; /* SEND message */ + struct rpmem_msg_persist_resp resp; /* persist response msg buffer */ + int send_posted; /* send buffer has been posted */ + int recv_posted; /* recv buffer has been posted */ +}; + +/* + * rpmemd_fip_thread -- thread context + */ +struct rpmemd_fip_thread { + struct rpmemd_fip *fip; /* main context */ + os_thread_t thread; /* thread structure */ + struct fid_cq *cq; /* per-thread completion queue */ + struct rpmemd_fip_lane **lanes; /* lanes processed by this thread */ + size_t nlanes; /* number of lanes processed by this thread */ +}; + +/* + * rpmemd_fip -- main context of rpmemd_fip + */ +struct rpmemd_fip { + struct fi_info *fi; /* fabric interface information */ + struct fid_fabric *fabric; /* fabric domain */ + struct fid_domain *domain; /* fabric protection domain */ + struct fid_eq *eq; /* event queue */ + struct fid_pep *pep; /* passive endpoint - listener */ + struct fid_mr *mr; /* memory region for pool */ + + int (*persist)(const void *addr, size_t len); /* persist function */ + void *(*memcpy_persist)(void *pmemdest, const void *src, size_t len); + int (*deep_persist)(const void *addr, size_t len, void *ctx); + void *ctx; + void *addr; /* pool's address */ + size_t size; /* size of the pool */ + enum rpmem_persist_method persist_method; + + volatile int closing; /* flag for closing background threads */ + unsigned nlanes; /* number of lanes */ + size_t nthreads; /* number of threads for processing */ + size_t cq_size; /* size of completion queue */ + size_t lanes_per_thread; /* number of lanes per thread */ + size_t buff_size; /* size of buffer for inlined data */ + + struct rpmemd_fip_lane *lanes; + struct rpmem_fip_lane rd_lane; /* lane for read operation */ + + void *pmsg; /* persist message buffer */ + size_t pmsg_size; /* persist message buffer size including alignment */ + struct fid_mr *pmsg_mr; /* persist message memory region */ + void *pmsg_mr_desc; /* persist message local descriptor */ + + struct rpmem_msg_persist_resp *pres; /* persist response buffer */ + struct fid_mr *pres_mr; /* persist response memory region */ + void *pres_mr_desc; /* persist response local descriptor */ + + struct rpmemd_fip_thread *threads; +}; + +/* + * rpmemd_fip_get_pmsg -- return persist message buffer + */ +static inline struct rpmem_msg_persist * +rpmemd_fip_get_pmsg(struct rpmemd_fip *fip, size_t idx) +{ + return (struct rpmem_msg_persist *) + ((uintptr_t)fip->pmsg + idx * fip->pmsg_size); +} + +/* + * rpmemd_fip_getinfo -- obtain fabric interface information + */ +static int +rpmemd_fip_getinfo(struct rpmemd_fip *fip, const char *service, + const char *node, enum rpmem_provider provider) +{ + int ret; + + struct fi_info *hints = rpmem_fip_get_hints(provider); + if (!hints) { + RPMEMD_LOG(ERR, "getting fabric interface hints"); + ret = -1; + goto err_fi_get_hints; + } + + ret = fi_getinfo(RPMEM_FIVERSION, node, service, FI_SOURCE, + hints, &fip->fi); + if (ret) { + RPMEMD_FI_ERR(ret, "getting fabric interface information"); + goto err_fi_getinfo; + } + + rpmem_fip_print_info(fip->fi); + + fi_freeinfo(hints); + return 0; +err_fi_getinfo: + fi_freeinfo(hints); +err_fi_get_hints: + return ret; +} + +/* + * rpmemd_fip_set_resp -- fill the response structure + */ +static int +rpmemd_fip_set_resp(struct rpmemd_fip *fip, struct rpmem_resp_attr *resp) +{ + int ret; + if (fip->fi->addr_format == FI_SOCKADDR_IN) { + struct sockaddr_in addr_in; + size_t addrlen = sizeof(addr_in); + + ret = fi_getname(&fip->pep->fid, &addr_in, &addrlen); + if (ret) { + RPMEMD_FI_ERR(ret, "getting local endpoint address"); + goto err_fi_getname; + } + + if (!addr_in.sin_port) { + RPMEMD_LOG(ERR, "dynamic allocation of port failed"); + goto err_port; + } + + resp->port = htons(addr_in.sin_port); + } else if (fip->fi->addr_format == FI_SOCKADDR_IN6) { + struct sockaddr_in6 addr_in6; + size_t addrlen = sizeof(addr_in6); + + ret = fi_getname(&fip->pep->fid, &addr_in6, &addrlen); + if (ret) { + RPMEMD_FI_ERR(ret, "getting local endpoint address"); + goto err_fi_getname; + } + + if (!addr_in6.sin6_port) { + RPMEMD_LOG(ERR, "dynamic allocation of port failed"); + goto err_port; + } + + resp->port = htons(addr_in6.sin6_port); + } else { + RPMEMD_LOG(ERR, "invalid address format"); + return -1; + } + + resp->rkey = fi_mr_key(fip->mr); + resp->persist_method = fip->persist_method; + resp->raddr = (uint64_t)fip->addr; + resp->nlanes = fip->nlanes; + + return 0; +err_port: +err_fi_getname: + return -1; +} + +/* + * rpmemd_fip_init_fabric_res -- initialize common fabric's resources + */ +static int +rpmemd_fip_init_fabric_res(struct rpmemd_fip *fip) +{ + int ret; + ret = fi_fabric(fip->fi->fabric_attr, &fip->fabric, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "opening fabric domain"); + goto err_fi_fabric; + } + + ret = fi_domain(fip->fabric, fip->fi, &fip->domain, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "opening fabric access domain"); + goto err_fi_domain; + } + + struct fi_eq_attr eq_attr = { + .size = 0, /* use default */ + .flags = 0, + .wait_obj = FI_WAIT_UNSPEC, + .signaling_vector = 0, + .wait_set = NULL, + }; + + ret = fi_eq_open(fip->fabric, &eq_attr, &fip->eq, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "opening event queue"); + goto err_eq_open; + } + + ret = fi_passive_ep(fip->fabric, fip->fi, &fip->pep, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "allocating passive endpoint"); + goto err_pep; + } + + ret = fi_pep_bind(fip->pep, &fip->eq->fid, 0); + if (ret) { + RPMEMD_FI_ERR(ret, "binding event queue to passive endpoint"); + goto err_pep_bind_eq; + } + + return 0; +err_pep_bind_eq: + RPMEMD_FI_CLOSE(fip->pep, "closing passive endpoint"); +err_pep: + RPMEMD_FI_CLOSE(fip->eq, "closing event queue"); +err_eq_open: + RPMEMD_FI_CLOSE(fip->domain, "closing fabric access domain"); +err_fi_domain: + RPMEMD_FI_CLOSE(fip->fabric, "closing fabric domain"); +err_fi_fabric: + return ret; +} + +/* + * rpmemd_fip_fini_fabric_res -- deinitialize common fabric resources + */ +static void +rpmemd_fip_fini_fabric_res(struct rpmemd_fip *fip) +{ + RPMEMD_FI_CLOSE(fip->pep, "closing passive endpoint"); + RPMEMD_FI_CLOSE(fip->eq, "closing event queue"); + RPMEMD_FI_CLOSE(fip->domain, "closing fabric access domain"); + RPMEMD_FI_CLOSE(fip->fabric, "closing fabric domain"); +} + +/* + * rpmemd_fip_init_memory -- initialize memory pool's resources + */ +static int +rpmemd_fip_init_memory(struct rpmemd_fip *fip) +{ + int ret; + + /* + * Register memory region with appropriate access bits: + * - FI_REMOTE_READ - remote peer can issue READ operation, + * - FI_REMOTE_WRITE - remote peer can issue WRITE operation, + */ + ret = fi_mr_reg(fip->domain, fip->addr, fip->size, + FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0, 0, + &fip->mr, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "registering memory"); + return -1; + } + + return 0; +} + +/* + * rpmemd_fip_fini_memory -- deinitialize memory pool's resources + */ +static void +rpmemd_fip_fini_memory(struct rpmemd_fip *fip) +{ + RPMEMD_FI_CLOSE(fip->mr, "unregistering memory"); +} + +/* + * rpmemd_fip_init_ep -- initialize active endpoint + */ +static int +rpmemd_fip_init_ep(struct rpmemd_fip *fip, struct fi_info *info, + struct rpmem_fip_lane *lanep) +{ + int ret; + + info->tx_attr->size = rpmem_fip_wq_size(fip->persist_method, + RPMEM_FIP_NODE_SERVER); + + info->rx_attr->size = rpmem_fip_rx_size(fip->persist_method, + RPMEM_FIP_NODE_SERVER); + + /* create an endpoint from fabric interface info */ + ret = fi_endpoint(fip->domain, info, &lanep->ep, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "allocating endpoint"); + goto err_endpoint; + } + + /* bind event queue to the endpoint */ + ret = fi_ep_bind(lanep->ep, &fip->eq->fid, 0); + if (ret) { + RPMEMD_FI_ERR(ret, "binding event queue to endpoint"); + goto err_bind_eq; + } + + /* + * Bind completion queue to the endpoint. + * Use a single completion queue for outbound and inbound work + * requests. Use selective completion implies adding FI_COMPLETE + * flag to each WR which needs a completion. + */ + ret = fi_ep_bind(lanep->ep, &lanep->cq->fid, + FI_RECV | FI_TRANSMIT | FI_SELECTIVE_COMPLETION); + if (ret) { + RPMEMD_FI_ERR(ret, "binding completion queue to endpoint"); + goto err_bind_cq; + } + + /* enable the endpoint */ + ret = fi_enable(lanep->ep); + if (ret) { + RPMEMD_FI_ERR(ret, "enabling endpoint"); + goto err_enable; + } + + return 0; +err_enable: +err_bind_cq: +err_bind_eq: + RPMEMD_FI_CLOSE(lanep->ep, "closing endpoint"); +err_endpoint: + return -1; +} + +/* + * rpmemd_fip_fini_ep -- close endpoint + */ +static int +rpmemd_fip_fini_ep(struct rpmem_fip_lane *lanep) +{ + return RPMEMD_FI_CLOSE(lanep->ep, "closing endpoint"); +} + +/* + * rpmemd_fip_post_msg -- post RECV buffer + */ +static inline int +rpmemd_fip_post_msg(struct rpmemd_fip_lane *lanep) +{ + int ret = rpmem_fip_recvmsg(lanep->base.ep, &lanep->recv); + if (ret) { + RPMEMD_FI_ERR(ret, "posting recv buffer"); + return ret; + } + + lanep->recv_posted = 1; + + return 0; +} + +/* + * rpmemd_fip_post_resp -- post SEND buffer + */ +static inline int +rpmemd_fip_post_resp(struct rpmemd_fip_lane *lanep) +{ + int ret = rpmem_fip_sendmsg(lanep->base.ep, &lanep->send, + sizeof(struct rpmem_msg_persist_resp)); + if (ret) { + RPMEMD_FI_ERR(ret, "posting send buffer"); + return ret; + } + + lanep->send_posted = 1; + + return 0; +} + +/* + * rpmemd_fip_post_common -- post all RECV messages + */ +static int +rpmemd_fip_post_common(struct rpmemd_fip *fip, struct rpmemd_fip_lane *lanep) +{ + int ret = rpmem_fip_recvmsg(lanep->base.ep, &lanep->recv); + if (ret) { + RPMEMD_FI_ERR(ret, "posting recv buffer"); + return ret; + } + + lanep->recv_posted = 1; + + return 0; +} + +/* + * rpmemd_fip_lanes_init -- initialize all lanes + */ +static int +rpmemd_fip_lanes_init(struct rpmemd_fip *fip) +{ + + fip->lanes = calloc(fip->nlanes, sizeof(*fip->lanes)); + if (!fip->lanes) { + RPMEMD_ERR("!allocating lanes"); + goto err_alloc; + } + + return 0; +err_alloc: + return -1; +} + +/* + * rpmemd_fip_fini_lanes -- deinitialize all lanes + */ +static void +rpmemd_fip_fini_lanes(struct rpmemd_fip *fip) +{ + free(fip->lanes); +} + +/* + * rpmemd_fip_init_common -- initialize common resources + */ +static int +rpmemd_fip_init_common(struct rpmemd_fip *fip) +{ + int ret; + + /* allocate persist message buffer */ + size_t msg_size = fip->nlanes * fip->pmsg_size; + fip->pmsg = malloc(msg_size); + if (!fip->pmsg) { + RPMEMD_LOG(ERR, "!allocating messages buffer"); + goto err_msg_malloc; + } + + /* register persist message buffer */ + ret = fi_mr_reg(fip->domain, fip->pmsg, msg_size, FI_RECV, + 0, 0, 0, &fip->pmsg_mr, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "registering messages buffer"); + goto err_mr_reg_msg; + } + + /* get persist message buffer's local descriptor */ + fip->pmsg_mr_desc = fi_mr_desc(fip->pmsg_mr); + + /* allocate persist response message buffer */ + size_t msg_resp_size = fip->nlanes * + sizeof(struct rpmem_msg_persist_resp); + fip->pres = malloc(msg_resp_size); + if (!fip->pres) { + RPMEMD_FI_ERR(ret, "allocating messages response buffer"); + goto err_msg_resp_malloc; + } + + /* register persist response message buffer */ + ret = fi_mr_reg(fip->domain, fip->pres, msg_resp_size, FI_SEND, + 0, 0, 0, &fip->pres_mr, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "registering messages " + "response buffer"); + goto err_mr_reg_msg_resp; + } + + /* get persist message buffer's local descriptor */ + fip->pres_mr_desc = fi_mr_desc(fip->pres_mr); + + /* initialize lanes */ + unsigned i; + for (i = 0; i < fip->nlanes; i++) { + struct rpmemd_fip_lane *lanep = &fip->lanes[i]; + + /* initialize RECV message */ + rpmem_fip_msg_init(&lanep->recv, + fip->pmsg_mr_desc, 0, + lanep, + rpmemd_fip_get_pmsg(fip, i), + fip->pmsg_size, + FI_COMPLETION); + + /* initialize SEND message */ + rpmem_fip_msg_init(&lanep->send, + fip->pres_mr_desc, 0, + lanep, + &fip->pres[i], + sizeof(fip->pres[i]), + FI_COMPLETION); + } + + return 0; +err_mr_reg_msg_resp: + free(fip->pres); +err_msg_resp_malloc: + RPMEMD_FI_CLOSE(fip->pmsg_mr, + "unregistering messages buffer"); +err_mr_reg_msg: + free(fip->pmsg); +err_msg_malloc: + return -1; +} + +/* + * rpmemd_fip_fini_common -- deinitialize common resources and return last + * error code + */ +static int +rpmemd_fip_fini_common(struct rpmemd_fip *fip) +{ + int lret = 0; + int ret; + + ret = RPMEMD_FI_CLOSE(fip->pmsg_mr, + "unregistering messages buffer"); + if (ret) + lret = ret; + + ret = RPMEMD_FI_CLOSE(fip->pres_mr, + "unregistering messages response buffer"); + if (ret) + lret = ret; + + free(fip->pmsg); + free(fip->pres); + + return lret; +} + +/* + * rpmemd_fip_check_pmsg -- verify persist message + */ +static inline int +rpmemd_fip_check_pmsg(struct rpmemd_fip *fip, struct rpmem_msg_persist *pmsg) +{ + if (pmsg->lane >= fip->nlanes) { + RPMEMD_LOG(ERR, "invalid lane number -- %u", pmsg->lane); + return -1; + } + + uintptr_t raddr = pmsg->addr; + uintptr_t laddr = (uintptr_t)fip->addr; + + if (raddr < laddr || raddr + pmsg->size > laddr + fip->size) { + RPMEMD_LOG(ERR, "invalid address or size requested " + "for persist operation (0x%lx, %lu)", + raddr, pmsg->size); + return -1; + } + + return 0; +} + +/* + * rpmemd_fip_process_send -- process FI_SEND completion + */ +static int +rpmemd_fip_process_send(struct rpmemd_fip *fip, struct rpmemd_fip_lane *lanep) +{ + lanep->send_posted = 0; + + if (lanep->recv_posted) + return 0; + + struct rpmem_msg_persist_resp *pres = + rpmem_fip_msg_get_pres(&lanep->send); + + *pres = lanep->resp; + + int ret; + + /* post lane's RECV buffer */ + ret = rpmemd_fip_post_msg(lanep); + if (unlikely(ret)) + goto err; + + /* post lane's SEND buffer */ + ret = rpmemd_fip_post_resp(lanep); +err: + return ret; +} + +/* + * rpmemd_fip_process_recv -- process FI_RECV completion + */ +static int +rpmemd_fip_process_recv(struct rpmemd_fip *fip, struct rpmemd_fip_lane *lanep) +{ + int ret = 0; + + lanep->recv_posted = 0; + + /* + * Get persist message and persist message response from appropriate + * buffers. The persist message is in lane's RECV buffer and the + * persist response message in lane's SEND buffer. + */ + struct rpmem_msg_persist *pmsg = rpmem_fip_msg_get_pmsg(&lanep->recv); + VALGRIND_DO_MAKE_MEM_DEFINED(pmsg, sizeof(*pmsg)); + + /* verify persist message */ + ret = rpmemd_fip_check_pmsg(fip, pmsg); + if (unlikely(ret)) + goto err; + unsigned mode = pmsg->flags & RPMEM_FLUSH_PERSIST_MASK; + + if (mode == RPMEM_DEEP_PERSIST) { + fip->deep_persist((void *)pmsg->addr, pmsg->size, fip->ctx); + } else if (mode == RPMEM_PERSIST_SEND) { + fip->memcpy_persist((void *)pmsg->addr, pmsg->data, pmsg->size); + } else { + fip->persist((void *)pmsg->addr, pmsg->size); + } + + struct rpmem_msg_persist_resp *pres = lanep->send_posted ? + &lanep->resp : rpmem_fip_msg_get_pres(&lanep->send); + + /* return back the lane id */ + pres->lane = pmsg->lane; + + if (!lanep->send_posted) { + /* post lane's RECV buffer */ + ret = rpmemd_fip_post_msg(lanep); + if (unlikely(ret)) + goto err; + + /* post lane's SEND buffer */ + ret = rpmemd_fip_post_resp(lanep); + } + +err: + return ret; +} + +/* + * rpmemd_fip_cq_read -- wait for specific events on completion queue + */ +static int +rpmemd_fip_cq_read(struct rpmemd_fip *fip, struct fid_cq *cq, + struct rpmemd_fip_lane **lanep, uint64_t *event, uint64_t event_mask) +{ + struct fi_cq_err_entry err; + struct fi_cq_msg_entry cq_entry; + const char *str_err; + ssize_t sret; + int ret; + + while (!fip->closing) { + sret = fi_cq_sread(cq, &cq_entry, 1, NULL, + RPMEM_FIP_CQ_WAIT_MS); + + if (unlikely(fip->closing)) + break; + + if (unlikely(sret == -FI_EAGAIN || sret == 0)) + continue; + + if (unlikely(sret < 0)) { + ret = (int)sret; + goto err_cq_read; + } + + if (!(cq_entry.flags & event_mask)) { + RPMEMD_LOG(ERR, "unexpected event received %lx", + cq_entry.flags); + ret = -1; + goto err; + } + + if (!cq_entry.op_context) { + RPMEMD_LOG(ERR, "null context received"); + ret = -1; + goto err; + } + + *event = cq_entry.flags & event_mask; + *lanep = cq_entry.op_context; + + return 0; + } + + return 0; +err_cq_read: + sret = fi_cq_readerr(cq, &err, 0); + if (sret < 0) { + RPMEMD_FI_ERR((int)sret, "error reading from completion queue: " + "cannot read error from completion queue"); + goto err; + } + + str_err = fi_cq_strerror(cq, err.prov_errno, NULL, NULL, 0); + RPMEMD_LOG(ERR, "error reading from completion queue: %s", str_err); +err: + return ret; +} + +/* + * rpmemd_fip_thread -- thread callback which processes persist + * operation + */ +static void * +rpmemd_fip_thread(void *arg) +{ + struct rpmemd_fip_thread *thread = arg; + struct rpmemd_fip *fip = thread->fip; + struct rpmemd_fip_lane *lanep = NULL; + uint64_t event = 0; + int ret = 0; + + while (!fip->closing) { + ret = rpmemd_fip_cq_read(fip, thread->cq, &lanep, &event, + FI_SEND|FI_RECV); + if (ret) + goto err; + + if (unlikely(fip->closing)) + break; + + RPMEMD_ASSERT(lanep != NULL); + if (event & FI_RECV) + ret = rpmemd_fip_process_recv(fip, lanep); + else if (event & FI_SEND) + ret = rpmemd_fip_process_send(fip, lanep); + if (ret) + goto err; + } + + return 0; +err: + return (void *)(uintptr_t)ret; +} + +/* + * rpmemd_fip_get_def_nthreads -- get default number of threads for given + * persistency method + */ +static size_t +rpmemd_fip_get_def_nthreads(struct rpmemd_fip *fip) +{ + RPMEMD_ASSERT(fip->nlanes > 0); + switch (fip->persist_method) { + case RPMEM_PM_APM: + case RPMEM_PM_GPSPM: + return fip->nlanes; + default: + RPMEMD_ASSERT(0); + return 0; + } +} + +/* + * rpmemd_fip_set_attr -- save required attributes in rpmemd_fip handle + */ +static void +rpmemd_fip_set_attr(struct rpmemd_fip *fip, struct rpmemd_fip_attr *attr) +{ + fip->addr = attr->addr; + fip->size = attr->size; + fip->persist_method = attr->persist_method; + fip->persist = attr->persist; + fip->memcpy_persist = attr->memcpy_persist; + fip->deep_persist = attr->deep_persist; + fip->ctx = attr->ctx; + fip->buff_size = attr->buff_size; + fip->pmsg_size = roundup(sizeof(struct rpmem_msg_persist) + + fip->buff_size, (size_t)64); + + size_t max_nlanes = rpmem_fip_max_nlanes(fip->fi); + RPMEMD_ASSERT(max_nlanes < UINT_MAX); + fip->nlanes = min((unsigned)max_nlanes, attr->nlanes); + + if (attr->nthreads) { + fip->nthreads = attr->nthreads; + } else { + /* use default */ + fip->nthreads = rpmemd_fip_get_def_nthreads(fip); + } + + fip->lanes_per_thread = (fip->nlanes - 1) / fip->nthreads + 1; + size_t cq_size_per_lane = rpmem_fip_cq_size(fip->persist_method, + RPMEM_FIP_NODE_SERVER); + + fip->cq_size = fip->lanes_per_thread * cq_size_per_lane; + + RPMEMD_ASSERT(fip->persist_method < MAX_RPMEM_PM); +} + +/* + * rpmemd_fip_init_thread -- init worker thread + */ +static int +rpmemd_fip_init_thread(struct rpmemd_fip *fip, struct rpmemd_fip_thread *thread) +{ + thread->fip = fip; + thread->lanes = malloc(fip->lanes_per_thread * sizeof(*thread->lanes)); + if (!thread->lanes) { + RPMEMD_LOG(ERR, "!allocating thread lanes"); + goto err_alloc_lanes; + } + + struct fi_cq_attr cq_attr = { + .size = fip->cq_size, + .flags = 0, + .format = FI_CQ_FORMAT_MSG, /* need context and flags */ + .wait_obj = FI_WAIT_UNSPEC, + .signaling_vector = 0, + .wait_cond = FI_CQ_COND_NONE, + .wait_set = NULL, + }; + + int ret = fi_cq_open(fip->domain, &cq_attr, &thread->cq, NULL); + if (ret) { + RPMEMD_FI_ERR(ret, "opening completion queue"); + goto err_cq_open; + } + + return 0; +err_cq_open: + free(thread->lanes); +err_alloc_lanes: + return -1; +} + +/* + * rpmemd_fip_fini_thread -- deinitialize worker thread + */ +static void +rpmemd_fip_fini_thread(struct rpmemd_fip *fip, struct rpmemd_fip_thread *thread) +{ + RPMEMD_FI_CLOSE(thread->cq, "closing completion queue"); + free(thread->lanes); +} + +/* + * rpmemd_fip_init_threads -- initialize worker threads + */ +static int +rpmemd_fip_init_threads(struct rpmemd_fip *fip) +{ + RPMEMD_ASSERT(fip->lanes != NULL); + RPMEMD_ASSERT(fip->nthreads > 0); + + fip->threads = calloc(fip->nthreads, sizeof(*fip->threads)); + if (!fip->threads) { + RPMEMD_LOG(ERR, "!allocating threads"); + goto err_alloc_threads; + } + + int ret; + size_t i; + for (i = 0; i < fip->nthreads; i++) { + ret = rpmemd_fip_init_thread(fip, &fip->threads[i]); + if (ret) { + RPMEMD_LOG(ERR, "!initializing thread %zu", i); + goto err_init_thread; + } + } + + for (size_t i = 0; i < fip->nlanes; i++) { + size_t w = i % fip->nthreads; + struct rpmemd_fip_thread *thread = &fip->threads[w]; + fip->lanes[i].base.cq = thread->cq; + thread->lanes[thread->nlanes++] = &fip->lanes[i]; + } + + return 0; +err_init_thread: + for (size_t j = 0; j < i; j++) + rpmemd_fip_fini_thread(fip, &fip->threads[j]); + free(fip->threads); +err_alloc_threads: + return -1; +} + +/* + * rpmemd_fip_fini_threads -- deinitialize worker threads + */ +static void +rpmemd_fip_fini_threads(struct rpmemd_fip *fip) +{ + for (size_t i = 0; i < fip->nthreads; i++) + rpmemd_fip_fini_thread(fip, &fip->threads[i]); + free(fip->threads); +} + +/* + * rpmemd_fip_init -- initialize fabric provider + */ +struct rpmemd_fip * +rpmemd_fip_init(const char *node, const char *service, + struct rpmemd_fip_attr *attr, struct rpmem_resp_attr *resp, + enum rpmem_err *err) +{ + int ret; + + RPMEMD_ASSERT(resp); + RPMEMD_ASSERT(err); + RPMEMD_ASSERT(attr); + RPMEMD_ASSERT(attr->persist); + + struct rpmemd_fip *fip = calloc(1, sizeof(*fip)); + if (!fip) { + RPMEMD_LOG(ERR, "!allocating fabric handle"); + *err = RPMEM_ERR_FATAL; + return NULL; + } + + ret = rpmemd_fip_getinfo(fip, service, node, attr->provider); + if (ret) { + *err = RPMEM_ERR_BADPROVIDER; + goto err_getinfo; + } + + rpmemd_fip_set_attr(fip, attr); + + ret = rpmemd_fip_init_fabric_res(fip); + if (ret) { + *err = RPMEM_ERR_FATAL; + goto err_init_fabric_res; + } + + ret = rpmemd_fip_init_memory(fip); + if (ret) { + *err = RPMEM_ERR_FATAL; + goto err_init_memory; + } + + ret = rpmemd_fip_lanes_init(fip); + if (ret) { + *err = RPMEM_ERR_FATAL; + goto err_init_lanes; + } + + ret = rpmemd_fip_init_threads(fip); + if (ret) { + *err = RPMEM_ERR_FATAL; + goto err_init_threads; + } + + ret = rpmemd_fip_init_common(fip); + if (ret) { + *err = RPMEM_ERR_FATAL; + goto err_init; + } + + ret = fi_listen(fip->pep); + if (ret) { + *err = RPMEM_ERR_FATAL_CONN; + goto err_fi_listen; + } + + ret = rpmemd_fip_set_resp(fip, resp); + if (ret) { + *err = RPMEM_ERR_FATAL; + goto err_set_resp; + } + + return fip; +err_set_resp: + RPMEMD_FI_CLOSE(fip->pep, "closing passive endpoint"); +err_fi_listen: + rpmemd_fip_fini_common(fip); +err_init: + rpmemd_fip_fini_threads(fip); +err_init_threads: + rpmemd_fip_fini_lanes(fip); +err_init_lanes: + rpmemd_fip_fini_memory(fip); +err_init_memory: + rpmemd_fip_fini_fabric_res(fip); +err_init_fabric_res: + fi_freeinfo(fip->fi); +err_getinfo: + free(fip); + return NULL; +} + +/* + * rpmemd_fip_fini -- deinitialize fabric provider + */ +void +rpmemd_fip_fini(struct rpmemd_fip *fip) +{ + rpmemd_fip_fini_common(fip); + rpmemd_fip_fini_threads(fip); + rpmemd_fip_fini_lanes(fip); + rpmemd_fip_fini_memory(fip); + rpmemd_fip_fini_fabric_res(fip); + fi_freeinfo(fip->fi); + free(fip); +} + +/* + * rpmemd_fip_accept_one -- accept a single connection + */ +static int +rpmemd_fip_accept_one(struct rpmemd_fip *fip, + struct fi_info *info, struct rpmemd_fip_lane *lanep) +{ + int ret; + + ret = rpmemd_fip_init_ep(fip, info, &lanep->base); + if (ret) + goto err_init_ep; + + ret = rpmemd_fip_post_common(fip, lanep); + if (ret) + goto err_post; + + ret = fi_accept(lanep->base.ep, NULL, 0); + if (ret) { + RPMEMD_FI_ERR(ret, "accepting connection request"); + goto err_accept; + } + + fi_freeinfo(info); + + return 0; +err_accept: +err_post: + rpmemd_fip_fini_ep(&lanep->base); +err_init_ep: + fi_freeinfo(info); + return -1; +} + +/* + * rpmemd_fip_accept -- accept a single connection request + */ +int +rpmemd_fip_accept(struct rpmemd_fip *fip, int timeout) +{ + int ret; + struct fi_eq_cm_entry entry; + uint32_t event; + unsigned nreq = 0; /* number of connection requests */ + unsigned ncon = 0; /* number of connected endpoints */ + int connecting = 1; + + while (connecting && (nreq < fip->nlanes || ncon < fip->nlanes)) { + ret = rpmem_fip_read_eq(fip->eq, &entry, + &event, timeout); + if (ret) + goto err_read_eq; + + switch (event) { + case FI_CONNREQ: + ret = rpmemd_fip_accept_one(fip, entry.info, + &fip->lanes[nreq]); + if (ret) + goto err_accept_one; + nreq++; + break; + case FI_CONNECTED: + ncon++; + break; + case FI_SHUTDOWN: + connecting = 0; + break; + default: + RPMEMD_ERR("unexpected event received (%u)", event); + goto err_read_eq; + + } + } + + return 0; +err_accept_one: +err_read_eq: + return -1; +} + +/* + * rpmemd_fip_wait_close -- wait specified time for connection closed event + */ +int +rpmemd_fip_wait_close(struct rpmemd_fip *fip, int timeout) +{ + struct fi_eq_cm_entry entry; + int lret = 0; + uint32_t event; + int ret; + + for (unsigned i = 0; i < fip->nlanes; i++) { + ret = rpmem_fip_read_eq(fip->eq, &entry, &event, timeout); + if (ret) + lret = ret; + if (event != FI_SHUTDOWN) { + RPMEMD_ERR("unexpected event received " + "(is %u expected %u)", + event, FI_SHUTDOWN); + errno = EINVAL; + lret = -1; + } + } + + return lret; +} + +/* + * rpmemd_fip_close -- close the connection + */ +int +rpmemd_fip_close(struct rpmemd_fip *fip) +{ + int lret = 0; + int ret; + + for (unsigned i = 0; i < fip->nlanes; i++) { + ret = rpmemd_fip_fini_ep(&fip->lanes[i].base); + if (ret) + lret = ret; + } + + return lret; +} + +/* + * rpmemd_fip_process_start -- start processing + */ +int +rpmemd_fip_process_start(struct rpmemd_fip *fip) +{ + unsigned i; + for (i = 0; i < fip->nthreads; i++) { + errno = os_thread_create(&fip->threads[i].thread, NULL, + rpmemd_fip_thread, &fip->threads[i]); + if (errno) { + RPMEMD_ERR("!running thread thread"); + goto err_thread_create; + } + } + + return 0; +err_thread_create: + return -1; +} + +/* + * rpmemd_fip_process_stop -- stop processing + */ +int +rpmemd_fip_process_stop(struct rpmemd_fip *fip) +{ + /* this stops all threads */ + util_fetch_and_or32(&fip->closing, 1); + int ret; + int lret = 0; + + for (size_t i = 0; i < fip->nthreads; i++) { + struct rpmemd_fip_thread *thread = &fip->threads[i]; + ret = fi_cq_signal(thread->cq); + if (ret) { + RPMEMD_FI_ERR(ret, "sending signal to CQ"); + lret = ret; + } + void *tret; + errno = os_thread_join(&thread->thread, &tret); + if (errno) { + RPMEMD_LOG(ERR, "!joining cq thread"); + lret = -1; + } else { + ret = (int)(uintptr_t)tret; + if (ret) { + RPMEMD_LOG(ERR, + "cq thread failed with code -- %d", + ret); + lret = ret; + } + } + } + + return lret; +} diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_fip.h b/src/pmdk/src/tools/rpmemd/rpmemd_fip.h new file mode 100644 index 000000000..ba4520431 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_fip.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_fip.h -- rpmemd libfabric provider module header file + */ + +#include + +struct rpmemd_fip; + +struct rpmemd_fip_attr { + void *addr; + size_t size; + unsigned nlanes; + size_t nthreads; + size_t buff_size; + enum rpmem_provider provider; + enum rpmem_persist_method persist_method; + int (*persist)(const void *addr, size_t len); + void *(*memcpy_persist)(void *pmemdest, const void *src, size_t len); + int (*deep_persist)(const void *addr, size_t len, void *ctx); + void *ctx; +}; + +struct rpmemd_fip *rpmemd_fip_init(const char *node, + const char *service, + struct rpmemd_fip_attr *attr, + struct rpmem_resp_attr *resp, + enum rpmem_err *err); +void rpmemd_fip_fini(struct rpmemd_fip *fip); + +int rpmemd_fip_accept(struct rpmemd_fip *fip, int timeout); +int rpmemd_fip_process_start(struct rpmemd_fip *fip); +int rpmemd_fip_process_stop(struct rpmemd_fip *fip); +int rpmemd_fip_wait_close(struct rpmemd_fip *fip, int timeout); +int rpmemd_fip_close(struct rpmemd_fip *fip); diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_log.c b/src/pmdk/src/tools/rpmemd/rpmemd_log.c new file mode 100644 index 000000000..75330c897 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_log.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_log.c -- rpmemd logging functions definitions + */ +#include +#include +#include +#include +#include +#include + +#include "rpmemd_log.h" +#include "os.h" +#include "valgrind_internal.h" + +#define RPMEMD_SYSLOG_OPTS (LOG_NDELAY | LOG_PID) +#define RPMEMD_SYSLOG_FACILITY (LOG_USER) +#define RPMEMD_DEFAULT_FH stderr +#define RPMEMD_MAX_MSG ((size_t)8192) +#define RPMEMD_MAX_PREFIX ((size_t)256) + +enum rpmemd_log_level rpmemd_log_level; +static char *rpmemd_ident; +static int rpmemd_use_syslog; +static FILE *rpmemd_log_file; +static char rpmemd_prefix_buff[RPMEMD_MAX_PREFIX]; + +static const char *rpmemd_log_level_str[MAX_RPD_LOG] = { + [RPD_LOG_ERR] = "err", + [RPD_LOG_WARN] = "warn", + [RPD_LOG_NOTICE] = "notice", + [RPD_LOG_INFO] = "info", + [_RPD_LOG_DBG] = "debug", +}; + +static int rpmemd_level2prio[MAX_RPD_LOG] = { + [RPD_LOG_ERR] = LOG_ERR, + [RPD_LOG_WARN] = LOG_WARNING, + [RPD_LOG_NOTICE] = LOG_NOTICE, + [RPD_LOG_INFO] = LOG_INFO, + [_RPD_LOG_DBG] = LOG_DEBUG, +}; + +/* + * rpmemd_log_basename -- similar to POSIX basename, but without handling for + * trailing slashes. + */ +static const char * +rpmemd_log_basename(const char *fname) +{ + const char *s; + + if (fname == NULL) + return "(null)"; + s = strrchr(fname, '/'); + if (s != NULL) + return s + 1; + else + return fname; +} + +/* + * rpmemd_log_level_from_str -- converts string to log level value + */ +enum rpmemd_log_level +rpmemd_log_level_from_str(const char *str) +{ + if (!str) + return MAX_RPD_LOG; + + for (enum rpmemd_log_level level = 0; level < MAX_RPD_LOG; level++) { + if (strcmp(rpmemd_log_level_str[level], str) == 0) + return level; + } + + return MAX_RPD_LOG; +} + +/* + * rpmemd_log_level_to_str -- converts log level enum to string + */ +const char * +rpmemd_log_level_to_str(enum rpmemd_log_level level) +{ + if (level >= MAX_RPD_LOG) + return NULL; + return rpmemd_log_level_str[level]; +} + +/* + * rpmemd_log_init -- inititalize logging subsystem + * + * ident - string prepended to every message + * use_syslog - use syslog instead of standard output + */ +int +rpmemd_log_init(const char *ident, const char *fname, int use_syslog) +{ + rpmemd_use_syslog = use_syslog; + + if (rpmemd_use_syslog) { + openlog(rpmemd_ident, RPMEMD_SYSLOG_OPTS, + RPMEMD_SYSLOG_FACILITY); + } else { + rpmemd_ident = strdup(ident); + if (!rpmemd_ident) { + perror("strdup"); + return -1; + } + + if (fname) { + rpmemd_log_file = os_fopen(fname, "a"); + if (!rpmemd_log_file) { + perror(fname); + free(rpmemd_ident); + rpmemd_ident = NULL; + return -1; + } + } else { + rpmemd_log_file = RPMEMD_DEFAULT_FH; + } + } + + return 0; +} + +/* + * rpmemd_log_close -- deinitialize logging subsystem + */ +void +rpmemd_log_close(void) +{ + if (rpmemd_use_syslog) { + closelog(); + } else { + if (rpmemd_log_file != RPMEMD_DEFAULT_FH) + fclose(rpmemd_log_file); + rpmemd_log_file = NULL; + + free(rpmemd_ident); + rpmemd_ident = NULL; + } +} + +/* + * rpmemd_prefix -- set prefix for every message + */ +int +rpmemd_prefix(const char *fmt, ...) +{ + if (!fmt) { + rpmemd_prefix_buff[0] = '\0'; + return 0; + } + + va_list ap; + va_start(ap, fmt); + int ret = vsnprintf(rpmemd_prefix_buff, RPMEMD_MAX_PREFIX, + fmt, ap); + va_end(ap); + if (ret < 0) + return -1; + + return 0; +} + +/* + * rpmemd_log -- main logging function + */ +void +rpmemd_log(enum rpmemd_log_level level, const char *fname, int lineno, + const char *fmt, ...) +{ + if (!rpmemd_use_syslog && level > rpmemd_log_level) + return; + + char buff[RPMEMD_MAX_MSG]; + + size_t cnt = 0; + int ret; + if (fname) { + ret = util_snprintf(&buff[cnt], RPMEMD_MAX_MSG - cnt, + "[%s:%d] ", rpmemd_log_basename(fname), lineno); + if (ret < 0) + RPMEMD_FATAL("snprintf failed: %d", errno); + + cnt += (size_t)ret; + } + if (rpmemd_prefix_buff[0]) { + ret = util_snprintf(&buff[cnt], RPMEMD_MAX_MSG - cnt, + "%s ", rpmemd_prefix_buff); + if (ret < 0) + RPMEMD_FATAL("snprintf failed: %d", errno); + + cnt += (size_t)ret; + } + + const char *errorstr = ""; + const char *prefix = ""; + const char *suffix = "\n"; + if (fmt) { + if (*fmt == '!') { + fmt++; + errorstr = strerror(errno); + prefix = ": "; + } + + va_list ap; + va_start(ap, fmt); + ret = vsnprintf(&buff[cnt], RPMEMD_MAX_MSG - cnt, fmt, ap); + va_end(ap); + + if (ret < 0) + RPMEMD_FATAL("vsnprintf failed"); + if ((unsigned)ret >= RPMEMD_MAX_MSG - cnt) + RPMEMD_FATAL("overflow(3): %d >= %lu", ret, + RPMEMD_MAX_MSG - cnt); + + cnt += (size_t)ret; + + ret = util_snprintf(&buff[cnt], RPMEMD_MAX_MSG - cnt, + "%s%s%s", prefix, errorstr, suffix); + if (ret < 0) + RPMEMD_FATAL("snprintf failed: %d", errno); + + cnt += (size_t)ret; + } + buff[cnt] = 0; + + if (rpmemd_use_syslog) { + int prio = rpmemd_level2prio[level]; + syslog(prio, "%s", buff); + } else { + /* to suppress drd false-positive */ + /* XXX: confirm real nature of this issue: pmem/issues#863 */ +#ifdef SUPPRESS_FPUTS_DRD_ERROR + VALGRIND_ANNOTATE_IGNORE_READS_BEGIN(); + VALGRIND_ANNOTATE_IGNORE_WRITES_BEGIN(); +#endif + fprintf(rpmemd_log_file, "%s", buff); + fflush(rpmemd_log_file); +#ifdef SUPPRESS_FPUTS_DRD_ERROR + VALGRIND_ANNOTATE_IGNORE_READS_END(); + VALGRIND_ANNOTATE_IGNORE_WRITES_END(); +#endif + } + +} diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_log.h b/src/pmdk/src/tools/rpmemd/rpmemd_log.h new file mode 100644 index 000000000..d9895679e --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_log.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_log.h -- rpmemd logging functions declarations + */ + +#include +#include "util.h" + +#define FORMAT_PRINTF(a, b) __attribute__((__format__(__printf__, (a), (b)))) + +/* + * The tab character is not allowed in rpmemd log, + * because it is not well handled by syslog. + * Please use RPMEMD_LOG_INDENT instead. + */ +#define RPMEMD_LOG_INDENT " " + +#ifdef DEBUG +#define RPMEMD_LOG(level, fmt, arg...) do {\ + COMPILE_ERROR_ON(strchr(fmt, '\t') != 0);\ + rpmemd_log(RPD_LOG_##level, __FILE__, __LINE__, fmt, ## arg);\ +} while (0) +#else +#define RPMEMD_LOG(level, fmt, arg...) do {\ + COMPILE_ERROR_ON(strchr(fmt, '\t') != 0);\ + rpmemd_log(RPD_LOG_##level, NULL, 0, fmt, ## arg);\ +} while (0) +#endif + +#ifdef DEBUG +#define RPMEMD_DBG(fmt, arg...) do {\ + COMPILE_ERROR_ON(strchr(fmt, '\t') != 0);\ + rpmemd_log(_RPD_LOG_DBG, __FILE__, __LINE__, fmt, ## arg);\ +} while (0) +#else +#define RPMEMD_DBG(fmt, arg...) do {} while (0) +#endif + +#define RPMEMD_ERR(fmt, arg...) do {\ + RPMEMD_LOG(ERR, fmt, ## arg);\ +} while (0) + +#define RPMEMD_FATAL(fmt, arg...) do {\ + RPMEMD_LOG(ERR, fmt, ## arg);\ + abort();\ +} while (0) + +#define RPMEMD_ASSERT(cond) do {\ + if (!(cond)) {\ + rpmemd_log(RPD_LOG_ERR, __FILE__, __LINE__,\ + "assertion fault: %s", #cond);\ + abort();\ + }\ +} while (0) + +enum rpmemd_log_level { + RPD_LOG_ERR, + RPD_LOG_WARN, + RPD_LOG_NOTICE, + RPD_LOG_INFO, + _RPD_LOG_DBG, /* disallow to use this with LOG macro */ + MAX_RPD_LOG, +}; + +enum rpmemd_log_level rpmemd_log_level_from_str(const char *str); +const char *rpmemd_log_level_to_str(enum rpmemd_log_level level); + +extern enum rpmemd_log_level rpmemd_log_level; +int rpmemd_log_init(const char *ident, const char *fname, int use_syslog); +void rpmemd_log_close(void); +int rpmemd_prefix(const char *fmt, ...) FORMAT_PRINTF(1, 2); +void rpmemd_log(enum rpmemd_log_level level, const char *fname, + int lineno, const char *fmt, ...) FORMAT_PRINTF(4, 5); diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_obc.c b/src/pmdk/src/tools/rpmemd/rpmemd_obc.c new file mode 100644 index 000000000..a50576712 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_obc.c @@ -0,0 +1,548 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2019, Intel Corporation */ + +/* + * rpmemd_obc.c -- rpmemd out-of-band connection definitions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "librpmem.h" +#include "rpmemd_log.h" +#include "rpmem_proto.h" +#include "rpmem_common.h" +#include "rpmemd_obc.h" + +struct rpmemd_obc { + int fd_in; + int fd_out; +}; + +/* + * rpmemd_obc_check_proto_ver -- check protocol version + */ +static int +rpmemd_obc_check_proto_ver(unsigned major, unsigned minor) +{ + if (major != RPMEM_PROTO_MAJOR || + minor != RPMEM_PROTO_MINOR) { + RPMEMD_LOG(ERR, "unsupported protocol version -- %u.%u", + major, minor); + return -1; + } + + return 0; +} + +/* + * rpmemd_obc_check_msg_hdr -- check message header + */ +static int +rpmemd_obc_check_msg_hdr(struct rpmem_msg_hdr *hdrp) +{ + switch (hdrp->type) { + case RPMEM_MSG_TYPE_OPEN: + case RPMEM_MSG_TYPE_CREATE: + case RPMEM_MSG_TYPE_CLOSE: + case RPMEM_MSG_TYPE_SET_ATTR: + /* all messages from obc to server are fine */ + break; + default: + RPMEMD_LOG(ERR, "invalid message type -- %u", hdrp->type); + return -1; + } + + if (hdrp->size < sizeof(struct rpmem_msg_hdr)) { + RPMEMD_LOG(ERR, "invalid message size -- %lu", hdrp->size); + return -1; + } + + return 0; +} + +/* + * rpmemd_obc_check_pool_desc -- check pool descriptor + */ +static int +rpmemd_obc_check_pool_desc(struct rpmem_msg_hdr *hdrp, size_t msg_size, + struct rpmem_msg_pool_desc *pool_desc) +{ + size_t body_size = msg_size + pool_desc->size; + if (hdrp->size != body_size) { + RPMEMD_LOG(ERR, "message and pool descriptor size mismatch " + "-- is %lu should be %lu", hdrp->size, body_size); + return -1; + } + + if (pool_desc->size < 2) { + RPMEMD_LOG(ERR, "invalid pool descriptor size -- %u " + "(must be >= 2)", pool_desc->size); + return -1; + } + + if (pool_desc->desc[pool_desc->size - 1] != '\0') { + RPMEMD_LOG(ERR, "invalid pool descriptor " + "(must be null-terminated string)"); + return -1; + } + + size_t len = strlen((char *)pool_desc->desc) + 1; + + if (pool_desc->size != len) { + RPMEMD_LOG(ERR, "invalid pool descriptor size -- is %lu " + "should be %u", len, pool_desc->size); + return -1; + } + + return 0; +} + +/* + * rpmemd_obc_check_provider -- check provider value + */ +static int +rpmemd_obc_check_provider(uint32_t provider) +{ + if (provider == 0 || provider >= MAX_RPMEM_PROV) { + RPMEMD_LOG(ERR, "invalid provider -- %u", provider); + return -1; + } + + return 0; +} + +/* + * rpmemd_obc_ntoh_check_msg_create -- convert and check create request message + */ +static int +rpmemd_obc_ntoh_check_msg_create(struct rpmem_msg_hdr *hdrp) +{ + int ret; + struct rpmem_msg_create *msg = (struct rpmem_msg_create *)hdrp; + + rpmem_ntoh_msg_create(msg); + + ret = rpmemd_obc_check_proto_ver(msg->c.major, msg->c.minor); + if (ret) + return ret; + + ret = rpmemd_obc_check_pool_desc(hdrp, sizeof(*msg), &msg->pool_desc); + if (ret) + return ret; + + ret = rpmemd_obc_check_provider(msg->c.provider); + if (ret) + return ret; + + return 0; +} + +/* + * rpmemd_obc_ntoh_check_msg_open -- convert and check open request message + */ +static int +rpmemd_obc_ntoh_check_msg_open(struct rpmem_msg_hdr *hdrp) +{ + int ret; + struct rpmem_msg_open *msg = (struct rpmem_msg_open *)hdrp; + + rpmem_ntoh_msg_open(msg); + + ret = rpmemd_obc_check_proto_ver(msg->c.major, msg->c.minor); + if (ret) + return ret; + + ret = rpmemd_obc_check_pool_desc(hdrp, sizeof(*msg), &msg->pool_desc); + if (ret) + return ret; + + ret = rpmemd_obc_check_provider(msg->c.provider); + if (ret) + return ret; + + return 0; +} + +/* + * rpmemd_obc_ntoh_check_msg_close -- convert and check close request message + */ +static int +rpmemd_obc_ntoh_check_msg_close(struct rpmem_msg_hdr *hdrp) +{ + struct rpmem_msg_close *msg = (struct rpmem_msg_close *)hdrp; + + rpmem_ntoh_msg_close(msg); + + /* nothing to do */ + return 0; +} + +/* + * rpmemd_obc_ntoh_check_msg_set_attr -- convert and check set attributes + * request message + */ +static int +rpmemd_obc_ntoh_check_msg_set_attr(struct rpmem_msg_hdr *hdrp) +{ + struct rpmem_msg_set_attr *msg = (struct rpmem_msg_set_attr *)hdrp; + + rpmem_ntoh_msg_set_attr(msg); + + /* nothing to do */ + return 0; +} + +typedef int (*rpmemd_obc_ntoh_check_msg_fn)(struct rpmem_msg_hdr *hdrp); + +static rpmemd_obc_ntoh_check_msg_fn rpmemd_obc_ntoh_check_msg[] = { + [RPMEM_MSG_TYPE_CREATE] = rpmemd_obc_ntoh_check_msg_create, + [RPMEM_MSG_TYPE_OPEN] = rpmemd_obc_ntoh_check_msg_open, + [RPMEM_MSG_TYPE_CLOSE] = rpmemd_obc_ntoh_check_msg_close, + [RPMEM_MSG_TYPE_SET_ATTR] = rpmemd_obc_ntoh_check_msg_set_attr, +}; + +/* + * rpmemd_obc_process_create -- process create request + */ +static int +rpmemd_obc_process_create(struct rpmemd_obc *obc, + struct rpmemd_obc_requests *req_cb, void *arg, + struct rpmem_msg_hdr *hdrp) +{ + struct rpmem_msg_create *msg = (struct rpmem_msg_create *)hdrp; + struct rpmem_req_attr req = { + .pool_size = msg->c.pool_size, + .nlanes = (unsigned)msg->c.nlanes, + .pool_desc = (char *)msg->pool_desc.desc, + .provider = (enum rpmem_provider)msg->c.provider, + .buff_size = msg->c.buff_size, + }; + + struct rpmem_pool_attr *rattr = NULL; + struct rpmem_pool_attr rpmem_attr; + unpack_rpmem_pool_attr(&msg->pool_attr, &rpmem_attr); + if (!util_is_zeroed(&rpmem_attr, sizeof(rpmem_attr))) + rattr = &rpmem_attr; + + return req_cb->create(obc, arg, &req, rattr); +} + +/* + * rpmemd_obc_process_open -- process open request + */ +static int +rpmemd_obc_process_open(struct rpmemd_obc *obc, + struct rpmemd_obc_requests *req_cb, void *arg, + struct rpmem_msg_hdr *hdrp) +{ + struct rpmem_msg_open *msg = (struct rpmem_msg_open *)hdrp; + struct rpmem_req_attr req = { + .pool_size = msg->c.pool_size, + .nlanes = (unsigned)msg->c.nlanes, + .pool_desc = (const char *)msg->pool_desc.desc, + .provider = (enum rpmem_provider)msg->c.provider, + .buff_size = msg->c.buff_size, + }; + + return req_cb->open(obc, arg, &req); +} + +/* + * rpmemd_obc_process_close -- process close request + */ +static int +rpmemd_obc_process_close(struct rpmemd_obc *obc, + struct rpmemd_obc_requests *req_cb, void *arg, + struct rpmem_msg_hdr *hdrp) +{ + struct rpmem_msg_close *msg = (struct rpmem_msg_close *)hdrp; + return req_cb->close(obc, arg, (int)msg->flags); +} + +/* + * rpmemd_obc_process_set_attr -- process set attributes request + */ +static int +rpmemd_obc_process_set_attr(struct rpmemd_obc *obc, + struct rpmemd_obc_requests *req_cb, void *arg, + struct rpmem_msg_hdr *hdrp) +{ + struct rpmem_msg_set_attr *msg = (struct rpmem_msg_set_attr *)hdrp; + struct rpmem_pool_attr *rattr = NULL; + struct rpmem_pool_attr rpmem_attr; + unpack_rpmem_pool_attr(&msg->pool_attr, &rpmem_attr); + if (!util_is_zeroed(&rpmem_attr, sizeof(rpmem_attr))) + rattr = &rpmem_attr; + + return req_cb->set_attr(obc, arg, rattr); +} + +typedef int (*rpmemd_obc_process_fn)(struct rpmemd_obc *obc, + struct rpmemd_obc_requests *req_cb, void *arg, + struct rpmem_msg_hdr *hdrp); + +static rpmemd_obc_process_fn rpmemd_obc_process_cb[] = { + [RPMEM_MSG_TYPE_CREATE] = rpmemd_obc_process_create, + [RPMEM_MSG_TYPE_OPEN] = rpmemd_obc_process_open, + [RPMEM_MSG_TYPE_CLOSE] = rpmemd_obc_process_close, + [RPMEM_MSG_TYPE_SET_ATTR] = rpmemd_obc_process_set_attr, +}; + +/* + * rpmemd_obc_recv -- wrapper for read and decode data function + */ +static inline int +rpmemd_obc_recv(struct rpmemd_obc *obc, void *buff, size_t len) +{ + return rpmem_xread(obc->fd_in, buff, len, 0); +} + +/* + * rpmemd_obc_send -- wrapper for encode and write data function + */ +static inline int +rpmemd_obc_send(struct rpmemd_obc *obc, const void *buff, size_t len) +{ + return rpmem_xwrite(obc->fd_out, buff, len, 0); +} + +/* + * rpmemd_obc_msg_recv -- receive and check request message + * + * Return values: + * 0 - success + * < 0 - error + * 1 - obc disconnected + */ +static int +rpmemd_obc_msg_recv(struct rpmemd_obc *obc, + struct rpmem_msg_hdr **hdrpp) +{ + struct rpmem_msg_hdr hdr; + struct rpmem_msg_hdr nhdr; + struct rpmem_msg_hdr *hdrp; + int ret; + + ret = rpmemd_obc_recv(obc, &nhdr, sizeof(nhdr)); + if (ret == 1) { + RPMEMD_LOG(NOTICE, "out-of-band connection disconnected"); + return 1; + } + + if (ret < 0) { + RPMEMD_LOG(ERR, "!receiving message header failed"); + return ret; + } + + memcpy(&hdr, &nhdr, sizeof(hdr)); + rpmem_ntoh_msg_hdr(&hdr); + + ret = rpmemd_obc_check_msg_hdr(&hdr); + if (ret) { + RPMEMD_LOG(ERR, "parsing message header failed"); + return ret; + } + + hdrp = malloc(hdr.size); + if (!hdrp) { + RPMEMD_LOG(ERR, "!allocating message buffer failed"); + return -1; + } + + memcpy(hdrp, &nhdr, sizeof(*hdrp)); + + size_t body_size = hdr.size - sizeof(hdr); + ret = rpmemd_obc_recv(obc, hdrp->body, body_size); + if (ret) { + RPMEMD_LOG(ERR, "!receiving message body failed"); + goto err_recv_body; + } + + ret = rpmemd_obc_ntoh_check_msg[hdr.type](hdrp); + if (ret) { + RPMEMD_LOG(ERR, "parsing message body failed"); + goto err_body; + } + + *hdrpp = hdrp; + return 0; +err_body: +err_recv_body: + free(hdrp); + return -1; +} + +/* + * rpmemd_obc_init -- initialize rpmemd + */ +struct rpmemd_obc * +rpmemd_obc_init(int fd_in, int fd_out) +{ + struct rpmemd_obc *obc = calloc(1, sizeof(*obc)); + if (!obc) { + RPMEMD_LOG(ERR, "!allocating obc failed"); + goto err_calloc; + } + + obc->fd_in = fd_in; + obc->fd_out = fd_out; + + return obc; +err_calloc: + return NULL; +} + +/* + * rpmemd_obc_fini -- destroy obc + */ +void +rpmemd_obc_fini(struct rpmemd_obc *obc) +{ + free(obc); +} + +/* + * rpmemd_obc_status -- sends initial status to the client + */ +int +rpmemd_obc_status(struct rpmemd_obc *obc, uint32_t status) +{ + return rpmemd_obc_send(obc, &status, sizeof(status)); +} + +/* + * rpmemd_obc_process -- wait for and process a message from client + * + * Return values: + * 0 - success + * < 0 - error + * 1 - client disconnected + */ +int +rpmemd_obc_process(struct rpmemd_obc *obc, + struct rpmemd_obc_requests *req_cb, void *arg) +{ + RPMEMD_ASSERT(req_cb != NULL); + RPMEMD_ASSERT(req_cb->create != NULL); + RPMEMD_ASSERT(req_cb->open != NULL); + RPMEMD_ASSERT(req_cb->close != NULL); + RPMEMD_ASSERT(req_cb->set_attr != NULL); + + struct rpmem_msg_hdr *hdrp = NULL; + int ret; + + ret = rpmemd_obc_msg_recv(obc, &hdrp); + if (ret) + return ret; + + RPMEMD_ASSERT(hdrp != NULL); + + ret = rpmemd_obc_process_cb[hdrp->type](obc, req_cb, arg, hdrp); + + free(hdrp); + + return ret; +} + +/* + * rpmemd_obc_create_resp -- send create request response message + */ +int +rpmemd_obc_create_resp(struct rpmemd_obc *obc, + int status, const struct rpmem_resp_attr *res) +{ + struct rpmem_msg_create_resp resp = { + .hdr = { + .type = RPMEM_MSG_TYPE_CREATE_RESP, + .size = sizeof(struct rpmem_msg_create_resp), + .status = (uint32_t)status, + }, + .ibc = { + .port = res->port, + .rkey = res->rkey, + .raddr = res->raddr, + .persist_method = res->persist_method, + .nlanes = res->nlanes, + }, + }; + + rpmem_hton_msg_create_resp(&resp); + + return rpmemd_obc_send(obc, &resp, sizeof(resp)); +} + +/* + * rpmemd_obc_open_resp -- send open request response message + */ +int +rpmemd_obc_open_resp(struct rpmemd_obc *obc, + int status, const struct rpmem_resp_attr *res, + const struct rpmem_pool_attr *pool_attr) +{ + struct rpmem_msg_open_resp resp = { + .hdr = { + .type = RPMEM_MSG_TYPE_OPEN_RESP, + .size = sizeof(struct rpmem_msg_open_resp), + .status = (uint32_t)status, + }, + .ibc = { + .port = res->port, + .rkey = res->rkey, + .raddr = res->raddr, + .persist_method = res->persist_method, + .nlanes = res->nlanes, + }, + }; + + pack_rpmem_pool_attr(pool_attr, &resp.pool_attr); + rpmem_hton_msg_open_resp(&resp); + + return rpmemd_obc_send(obc, &resp, sizeof(resp)); +} + +/* + * rpmemd_obc_close_resp -- send close request response message + */ +int +rpmemd_obc_close_resp(struct rpmemd_obc *obc, + int status) +{ + struct rpmem_msg_close_resp resp = { + .hdr = { + .type = RPMEM_MSG_TYPE_CLOSE_RESP, + .size = sizeof(struct rpmem_msg_close_resp), + .status = (uint32_t)status, + }, + }; + + rpmem_hton_msg_close_resp(&resp); + + return rpmemd_obc_send(obc, &resp, sizeof(resp)); +} + +/* + * rpmemd_obc_set_attr_resp -- send set attributes request response message + */ +int +rpmemd_obc_set_attr_resp(struct rpmemd_obc *obc, int status) +{ + struct rpmem_msg_set_attr_resp resp = { + .hdr = { + .type = RPMEM_MSG_TYPE_SET_ATTR_RESP, + .size = sizeof(struct rpmem_msg_set_attr_resp), + .status = (uint32_t)status, + }, + }; + + rpmem_hton_msg_set_attr_resp(&resp); + + return rpmemd_obc_send(obc, &resp, sizeof(resp)); +} diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_obc.h b/src/pmdk/src/tools/rpmemd/rpmemd_obc.h new file mode 100644 index 000000000..78c8f8ab0 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_obc.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * rpmemd_obc.h -- rpmemd out-of-band connection declarations + */ +#include +#include +#include + +struct rpmemd_obc; + +struct rpmemd_obc_requests { + int (*create)(struct rpmemd_obc *obc, void *arg, + const struct rpmem_req_attr *req, + const struct rpmem_pool_attr *pool_attr); + int (*open)(struct rpmemd_obc *obc, void *arg, + const struct rpmem_req_attr *req); + int (*close)(struct rpmemd_obc *obc, void *arg, int flags); + int (*set_attr)(struct rpmemd_obc *obc, void *arg, + const struct rpmem_pool_attr *pool_attr); +}; + +struct rpmemd_obc *rpmemd_obc_init(int fd_in, int fd_out); +void rpmemd_obc_fini(struct rpmemd_obc *obc); + +int rpmemd_obc_status(struct rpmemd_obc *obc, uint32_t status); + +int rpmemd_obc_process(struct rpmemd_obc *obc, + struct rpmemd_obc_requests *req_cb, void *arg); + +int rpmemd_obc_create_resp(struct rpmemd_obc *obc, + int status, const struct rpmem_resp_attr *res); +int rpmemd_obc_open_resp(struct rpmemd_obc *obc, + int status, const struct rpmem_resp_attr *res, + const struct rpmem_pool_attr *pool_attr); +int rpmemd_obc_set_attr_resp(struct rpmemd_obc *obc, int status); +int rpmemd_obc_close_resp(struct rpmemd_obc *obc, + int status); diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_util.c b/src/pmdk/src/tools/rpmemd/rpmemd_util.c new file mode 100644 index 000000000..db149e935 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_util.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2017-2018, Intel Corporation */ + +/* + * rpmemd_util.c -- rpmemd utility functions definitions + */ + +#include +#include + +#include "libpmem.h" +#include "rpmem_common.h" +#include "rpmemd_log.h" +#include "rpmemd_util.h" + +/* + * rpmemd_pmem_persist -- pmem_persist wrapper required to unify function + * pointer type with pmem_msync + */ +int +rpmemd_pmem_persist(const void *addr, size_t len) +{ + pmem_persist(addr, len); + return 0; +} + +/* + * rpmemd_flush_fatal -- APM specific flush function which should never be + * called because APM does not require flushes + */ +int +rpmemd_flush_fatal(const void *addr, size_t len) +{ + RPMEMD_FATAL("rpmemd_flush_fatal should never be called"); +} + +/* + * rpmemd_persist_to_str -- convert persist function pointer to string + */ +static const char * +rpmemd_persist_to_str(int (*persist)(const void *addr, size_t len)) +{ + if (persist == rpmemd_pmem_persist) { + return "pmem_persist"; + } else if (persist == pmem_msync) { + return "pmem_msync"; + } else if (persist == rpmemd_flush_fatal) { + return "none"; + } else { + return NULL; + } +} + +/* + * rpmem_print_pm_policy -- print persistency method policy + */ +static void +rpmem_print_pm_policy(enum rpmem_persist_method persist_method, + int (*persist)(const void *addr, size_t len)) +{ + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "persist method: %s", + rpmem_persist_method_to_str(persist_method)); + RPMEMD_LOG(NOTICE, RPMEMD_LOG_INDENT "persist flush: %s", + rpmemd_persist_to_str(persist)); +} + +/* + * rpmem_memcpy_msync -- memcpy and msync + */ +static void * +rpmem_memcpy_msync(void *pmemdest, const void *src, size_t len) +{ + void *ret = pmem_memcpy(pmemdest, src, len, PMEM_F_MEM_NOFLUSH); + pmem_msync(pmemdest, len); + + return ret; +} + +/* + * rpmemd_apply_pm_policy -- choose the persistency method and the flush + * function according to the pool type and the persistency method read from the + * config + */ +int +rpmemd_apply_pm_policy(enum rpmem_persist_method *persist_method, + int (**persist)(const void *addr, size_t len), + void *(**memcpy_persist)(void *pmemdest, const void *src, size_t len), + const int is_pmem) +{ + switch (*persist_method) { + case RPMEM_PM_APM: + if (is_pmem) { + *persist_method = RPMEM_PM_APM; + *persist = rpmemd_flush_fatal; + } else { + *persist_method = RPMEM_PM_GPSPM; + *persist = pmem_msync; + } + break; + case RPMEM_PM_GPSPM: + *persist_method = RPMEM_PM_GPSPM; + *persist = is_pmem ? rpmemd_pmem_persist : pmem_msync; + break; + default: + RPMEMD_FATAL("invalid persist method: %d", *persist_method); + return -1; + } + + /* this is for RPMEM_PERSIST_INLINE */ + if (is_pmem) + *memcpy_persist = pmem_memcpy_persist; + else + *memcpy_persist = rpmem_memcpy_msync; + + RPMEMD_LOG(NOTICE, "persistency policy:"); + rpmem_print_pm_policy(*persist_method, *persist); + + return 0; +} diff --git a/src/pmdk/src/tools/rpmemd/rpmemd_util.h b/src/pmdk/src/tools/rpmemd/rpmemd_util.h new file mode 100644 index 000000000..6f18178b2 --- /dev/null +++ b/src/pmdk/src/tools/rpmemd/rpmemd_util.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * rpmemd_util.h -- rpmemd utility functions declarations + */ + +int rpmemd_pmem_persist(const void *addr, size_t len); +int rpmemd_flush_fatal(const void *addr, size_t len); +int rpmemd_apply_pm_policy(enum rpmem_persist_method *persist_method, + int (**persist)(const void *addr, size_t len), + void *(**memcpy_persist)(void *pmemdest, const void *src, size_t len), + const int is_pmem); diff --git a/src/pmdk/src/windows/README b/src/pmdk/src/windows/README new file mode 100644 index 000000000..18067ab92 --- /dev/null +++ b/src/pmdk/src/windows/README @@ -0,0 +1,19 @@ +Persistent Memory Development Kit + +This is src/windows/README. + +This directory contains the Windows-specific source for the PMDK. + +The subdirectory "include" contains header files that have no equivalents +on Windows OS, when building PMDK using VC++ compiler. +Some of those files are empty, which is a cheap trick to avoid preprocessor +errors when including non-existing files. This way we don't need a lot +of preprocessor conditionals in all the source code files. + +The "platform.h" file contains definitions of all the basic types and macros +that are not available under VC++. When building PMDK with Visual Studio, +"platform.h" file is included to each source file using "/FI" (forced include) +option. + +The subdirectory "getopt" contains a windows implementation of getopt and +getopt_long diff --git a/src/pmdk/src/windows/getopt/.cstyleignore b/src/pmdk/src/windows/getopt/.cstyleignore new file mode 100644 index 000000000..760e0cd36 --- /dev/null +++ b/src/pmdk/src/windows/getopt/.cstyleignore @@ -0,0 +1,2 @@ +getopt.c +getopt.h diff --git a/src/pmdk/src/windows/getopt/LICENSE.txt b/src/pmdk/src/windows/getopt/LICENSE.txt new file mode 100644 index 000000000..340303a96 --- /dev/null +++ b/src/pmdk/src/windows/getopt/LICENSE.txt @@ -0,0 +1,24 @@ +Copyright (c) 2012, Kim Gräsman +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Kim Gräsman nor the + names of contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL KIM GRÄSMAN BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/pmdk/src/windows/getopt/README b/src/pmdk/src/windows/getopt/README new file mode 100644 index 000000000..437e7a5df --- /dev/null +++ b/src/pmdk/src/windows/getopt/README @@ -0,0 +1,9 @@ +Persistent Memory Development Kit + +This is src/windows/getopt/README. + +This is directory contains windows getopt implementation downloaded from: + + https://github.com/kimgr/getopt_port + +with changes applied to compile it with "compile as c code(/TC)" option. diff --git a/src/pmdk/src/windows/getopt/getopt.c b/src/pmdk/src/windows/getopt/getopt.c new file mode 100644 index 000000000..16e52cdba --- /dev/null +++ b/src/pmdk/src/windows/getopt/getopt.c @@ -0,0 +1,293 @@ +/* + * *Copyright (c) 2012, Kim Gräsman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Kim Gräsman nor the + * names of contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL KIM GRÄSMAN BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "getopt.h" + +#include +#include +#include + +char* optarg; +int optopt; +/* The variable optind [...] shall be initialized to 1 by the system. */ +int optind = 1; +int opterr; + +static char* optcursor = NULL; +static char *first = NULL; + +/* rotates argv array */ +static void rotate(char **argv, int argc) { + if (argc <= 1) + return; + char *tmp = argv[0]; + memmove(argv, argv + 1, (argc - 1) * sizeof(char *)); + argv[argc - 1] = tmp; +} + +/* Implemented based on [1] and [2] for optional arguments. + optopt is handled FreeBSD-style, per [3]. + Other GNU and FreeBSD extensions are purely accidental. + +[1] https://pubs.opengroup.org/onlinepubs/000095399/functions/getopt.html +[2] https://www.kernel.org/doc/man-pages/online/pages/man3/getopt.3.html +[3] https://www.freebsd.org/cgi/man.cgi?query=getopt&sektion=3&manpath=FreeBSD+9.0-RELEASE +*/ +int getopt(int argc, char* const argv[], const char* optstring) { + int optchar = -1; + const char* optdecl = NULL; + + optarg = NULL; + opterr = 0; + optopt = 0; + + /* Unspecified, but we need it to avoid overrunning the argv bounds. */ + if (optind >= argc) + goto no_more_optchars; + + /* If, when getopt() is called argv[optind] is a null pointer, getopt() + shall return -1 without changing optind. */ + if (argv[optind] == NULL) + goto no_more_optchars; + + /* If, when getopt() is called *argv[optind] is not the character '-', + permute argv to move non options to the end */ + if (*argv[optind] != '-') { + if (argc - optind <= 1) + goto no_more_optchars; + + if (!first) + first = argv[optind]; + + do { + rotate((char **)(argv + optind), argc - optind); + } while (*argv[optind] != '-' && argv[optind] != first); + + if (argv[optind] == first) + goto no_more_optchars; + } + + /* If, when getopt() is called argv[optind] points to the string "-", + getopt() shall return -1 without changing optind. */ + if (strcmp(argv[optind], "-") == 0) + goto no_more_optchars; + + /* If, when getopt() is called argv[optind] points to the string "--", + getopt() shall return -1 after incrementing optind. */ + if (strcmp(argv[optind], "--") == 0) { + ++optind; + if (first) { + do { + rotate((char **)(argv + optind), argc - optind); + } while (argv[optind] != first); + } + goto no_more_optchars; + } + + if (optcursor == NULL || *optcursor == '\0') + optcursor = argv[optind] + 1; + + optchar = *optcursor; + + /* FreeBSD: The variable optopt saves the last known option character + returned by getopt(). */ + optopt = optchar; + + /* The getopt() function shall return the next option character (if one is + found) from argv that matches a character in optstring, if there is + one that matches. */ + optdecl = strchr(optstring, optchar); + if (optdecl) { + /* [I]f a character is followed by a colon, the option takes an + argument. */ + if (optdecl[1] == ':') { + optarg = ++optcursor; + if (*optarg == '\0') { + /* GNU extension: Two colons mean an option takes an + optional arg; if there is text in the current argv-element + (i.e., in the same word as the option name itself, for example, + "-oarg"), then it is returned in optarg, otherwise optarg is set + to zero. */ + if (optdecl[2] != ':') { + /* If the option was the last character in the string pointed to by + an element of argv, then optarg shall contain the next element + of argv, and optind shall be incremented by 2. If the resulting + value of optind is greater than argc, this indicates a missing + option-argument, and getopt() shall return an error indication. + + Otherwise, optarg shall point to the string following the + option character in that element of argv, and optind shall be + incremented by 1. + */ + if (++optind < argc) { + optarg = argv[optind]; + } else { + /* If it detects a missing option-argument, it shall return the + colon character ( ':' ) if the first character of optstring + was a colon, or a question-mark character ( '?' ) otherwise. + */ + optarg = NULL; + fprintf(stderr, "%s: option requires an argument -- '%c'\n", argv[0], optchar); + optchar = (optstring[0] == ':') ? ':' : '?'; + } + } else { + optarg = NULL; + } + } + optcursor = NULL; + } + } else { + fprintf(stderr,"%s: invalid option -- '%c'\n", argv[0], optchar); + /* If getopt() encounters an option character that is not contained in + optstring, it shall return the question-mark ( '?' ) character. */ + optchar = '?'; + } + + if (optcursor == NULL || *++optcursor == '\0') + ++optind; + + return optchar; + +no_more_optchars: + optcursor = NULL; + first = NULL; + return -1; +} + +/* Implementation based on [1]. + +[1] https://www.kernel.org/doc/man-pages/online/pages/man3/getopt.3.html +*/ +int getopt_long(int argc, char* const argv[], const char* optstring, + const struct option* longopts, int* longindex) { + const struct option* o = longopts; + const struct option* match = NULL; + int num_matches = 0; + size_t argument_name_length = 0; + const char* current_argument = NULL; + int retval = -1; + + optarg = NULL; + optopt = 0; + + if (optind >= argc) + return -1; + + /* If, when getopt() is called argv[optind] is a null pointer, getopt_long() + shall return -1 without changing optind. */ + if (argv[optind] == NULL) + goto no_more_optchars; + + /* If, when getopt_long() is called *argv[optind] is not the character '-', + permute argv to move non options to the end */ + if (*argv[optind] != '-') { + if (argc - optind <= 1) + goto no_more_optchars; + + if (!first) + first = argv[optind]; + + do { + rotate((char **)(argv + optind), argc - optind); + } while (*argv[optind] != '-' && argv[optind] != first); + + if (argv[optind] == first) + goto no_more_optchars; + } + + if (strlen(argv[optind]) < 3 || strncmp(argv[optind], "--", 2) != 0) + return getopt(argc, argv, optstring); + + /* It's an option; starts with -- and is longer than two chars. */ + current_argument = argv[optind] + 2; + argument_name_length = strcspn(current_argument, "="); + for (; o->name; ++o) { + if (strncmp(o->name, current_argument, argument_name_length) == 0) { + match = o; + ++num_matches; + if (strlen(o->name) == argument_name_length) { + /* found match is exactly the one which we are looking for */ + num_matches = 1; + break; + } + } + } + + if (num_matches == 1) { + /* If longindex is not NULL, it points to a variable which is set to the + index of the long option relative to longopts. */ + if (longindex) + *longindex = (int)(match - longopts); + + /* If flag is NULL, then getopt_long() shall return val. + Otherwise, getopt_long() returns 0, and flag shall point to a variable + which shall be set to val if the option is found, but left unchanged if + the option is not found. */ + if (match->flag) + *(match->flag) = match->val; + + retval = match->flag ? 0 : match->val; + + if (match->has_arg != no_argument) { + optarg = strchr(argv[optind], '='); + if (optarg != NULL) + ++optarg; + + if (match->has_arg == required_argument) { + /* Only scan the next argv for required arguments. Behavior is not + specified, but has been observed with Ubuntu and Mac OSX. */ + if (optarg == NULL && ++optind < argc) { + optarg = argv[optind]; + } + + if (optarg == NULL) + retval = ':'; + } + } else if (strchr(argv[optind], '=')) { + /* An argument was provided to a non-argument option. + I haven't seen this specified explicitly, but both GNU and BSD-based + implementations show this behavior. + */ + retval = '?'; + } + } else { + /* Unknown option or ambiguous match. */ + retval = '?'; + if (num_matches == 0) { + fprintf(stderr, "%s: unrecognized option -- '%s'\n", argv[0], argv[optind]); + } else { + fprintf(stderr, "%s: option '%s' is ambiguous\n", argv[0], argv[optind]); + } + } + + ++optind; + return retval; + +no_more_optchars: + first = NULL; + return -1; +} diff --git a/src/pmdk/src/windows/getopt/getopt.h b/src/pmdk/src/windows/getopt/getopt.h new file mode 100644 index 000000000..bb9ee4eae --- /dev/null +++ b/src/pmdk/src/windows/getopt/getopt.h @@ -0,0 +1,58 @@ +/* + * *Copyright (c) 2012, Kim Gräsman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Kim Gräsman nor the + * names of contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL KIM GRÄSMAN BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef INCLUDED_GETOPT_PORT_H +#define INCLUDED_GETOPT_PORT_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +extern char* optarg; +extern int optind, opterr, optopt; + +struct option { + const char* name; + int has_arg; + int* flag; + int val; +}; + +int getopt(int argc, char* const argv[], const char* optstring); + +int getopt_long(int argc, char* const argv[], + const char* optstring, const struct option* longopts, int* longindex); + +#if defined(__cplusplus) +} +#endif + +#endif // INCLUDED_GETOPT_PORT_H diff --git a/src/pmdk/src/windows/getopt/getopt.vcxproj b/src/pmdk/src/windows/getopt/getopt.vcxproj new file mode 100644 index 000000000..350ea37e3 --- /dev/null +++ b/src/pmdk/src/windows/getopt/getopt.vcxproj @@ -0,0 +1,88 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {9186EAC4-2F34-4F17-B940-6585D7869BCD} + getopt + 10.0.17134.0 + + + + StaticLibrary + true + v140 + NotSet + + + StaticLibrary + false + v140 + NotSet + + + + + + + + + + + + + + + + + Level3 + Disabled + + + CompileAsC + true + NTDDI_VERSION=NTDDI_WIN10_RS1;_DEBUG;_CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) + 4819 + + + true + + + + + Level3 + MaxSpeed + true + true + + + CompileAsC + true + NTDDI_VERSION=NTDDI_WIN10_RS1;NDEBUG;_CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) + + + true + true + + + true + + + + + + + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/windows/getopt/getopt.vcxproj.filters b/src/pmdk/src/windows/getopt/getopt.vcxproj.filters new file mode 100644 index 000000000..0db0b7978 --- /dev/null +++ b/src/pmdk/src/windows/getopt/getopt.vcxproj.filters @@ -0,0 +1,23 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/src/pmdk/src/windows/include/.cstyleignore b/src/pmdk/src/windows/include/.cstyleignore new file mode 100644 index 000000000..ad7ade0f9 --- /dev/null +++ b/src/pmdk/src/windows/include/.cstyleignore @@ -0,0 +1 @@ +srcversion.h diff --git a/src/pmdk/src/windows/include/dirent.h b/src/pmdk/src/windows/include/dirent.h new file mode 100644 index 000000000..ecc069a86 --- /dev/null +++ b/src/pmdk/src/windows/include/dirent.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * fake dirent.h + */ diff --git a/src/pmdk/src/windows/include/endian.h b/src/pmdk/src/windows/include/endian.h new file mode 100644 index 000000000..29b7e1008 --- /dev/null +++ b/src/pmdk/src/windows/include/endian.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * endian.h -- convert values between host and big-/little-endian byte order + */ + +#ifndef ENDIAN_H +#define ENDIAN_H 1 + +/* + * XXX: On Windows we can assume little-endian architecture + */ +#include + +#define htole16(a) (a) +#define htole32(a) (a) +#define htole64(a) (a) + +#define le16toh(a) (a) +#define le32toh(a) (a) +#define le64toh(a) (a) + +#define htobe16(x) _byteswap_ushort(x) +#define htobe32(x) _byteswap_ulong(x) +#define htobe64(x) _byteswap_uint64(x) + +#define be16toh(x) _byteswap_ushort(x) +#define be32toh(x) _byteswap_ulong(x) +#define be64toh(x) _byteswap_uint64(x) + +#endif /* ENDIAN_H */ diff --git a/src/pmdk/src/windows/include/err.h b/src/pmdk/src/windows/include/err.h new file mode 100644 index 000000000..f9d2afda5 --- /dev/null +++ b/src/pmdk/src/windows/include/err.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * err.h - error and warning messages + */ + +#ifndef ERR_H +#define ERR_H 1 + +#include +#include +#include + +/* + * err - windows implementation of unix err function + */ +__declspec(noreturn) static void +err(int eval, const char *fmt, ...) +{ + va_list vl; + va_start(vl, fmt); + vfprintf(stderr, fmt, vl); + va_end(vl); + exit(eval); +} + +/* + * warn - windows implementation of unix warn function + */ +static void +warn(const char *fmt, ...) +{ + va_list vl; + va_start(vl, fmt); + fprintf(stderr, "Warning: "); + vfprintf(stderr, fmt, vl); + va_end(vl); +} + +#endif /* ERR_H */ diff --git a/src/pmdk/src/windows/include/features.h b/src/pmdk/src/windows/include/features.h new file mode 100644 index 000000000..fe343931f --- /dev/null +++ b/src/pmdk/src/windows/include/features.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * fake features.h + */ diff --git a/src/pmdk/src/windows/include/libgen.h b/src/pmdk/src/windows/include/libgen.h new file mode 100644 index 000000000..932081fca --- /dev/null +++ b/src/pmdk/src/windows/include/libgen.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * fake libgen.h + */ diff --git a/src/pmdk/src/windows/include/linux/limits.h b/src/pmdk/src/windows/include/linux/limits.h new file mode 100644 index 000000000..c14ca5c36 --- /dev/null +++ b/src/pmdk/src/windows/include/linux/limits.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * linux/limits.h -- fake header file + */ + +/* + * XXX - The only purpose of this empty file is to avoid preprocessor + * errors when including a Linux-specific header file that has no equivalent + * on Windows. With this cheap trick, we don't need a lot of preprocessor + * conditionals in all the source code files. + * + * In the future, this will be addressed in some other way. + */ diff --git a/src/pmdk/src/windows/include/platform.h b/src/pmdk/src/windows/include/platform.h new file mode 100644 index 000000000..54b1395b2 --- /dev/null +++ b/src/pmdk/src/windows/include/platform.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * platform.h -- dirty hacks to compile Linux code on Windows using VC++ + * + * This is included to each source file using "/FI" (forced include) option. + * + * XXX - it is a subject for refactoring + */ + +#ifndef PLATFORM_H +#define PLATFORM_H 1 + +#pragma warning(disable : 4996) +#pragma warning(disable : 4200) /* allow flexible array member */ +#pragma warning(disable : 4819) /* non unicode characters */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Prevent PMDK compilation for 32-bit platforms */ +#if defined(_WIN32) && !defined(_WIN64) +#error "32-bit builds of PMDK are not supported!" +#endif + +#define _CRT_RAND_S /* rand_s() */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* use uuid_t definition from util.h */ +#ifdef uuid_t +#undef uuid_t +#endif + +/* a few trivial substitutions */ +#define PATH_MAX MAX_PATH +#define __thread __declspec(thread) +#define __func__ __FUNCTION__ +#ifdef _DEBUG +#define DEBUG +#endif + +/* + * The inline keyword is available only in VC++. + * https://msdn.microsoft.com/en-us/library/bw1hbe6y.aspx + */ +#ifndef __cplusplus +#define inline __inline +#endif + +/* XXX - no equivalents in VC++ */ +#define __attribute__(a) +#define __builtin_constant_p(cnd) 0 + +/* + * missing definitions + */ + +/* errno.h */ +#define ELIBACC 79 /* cannot access a needed shared library */ + +/* sys/stat.h */ +#define S_IRUSR S_IREAD +#define S_IWUSR S_IWRITE +#define S_IRGRP S_IRUSR +#define S_IWGRP S_IWUSR + +#define O_SYNC 0 + +typedef int mode_t; + +#define fchmod(fd, mode) 0 /* XXX - dummy */ +#define setlinebuf(fp) setvbuf(fp, NULL, _IOLBF, BUFSIZ); + +/* unistd.h */ +typedef long long os_off_t; +typedef long long ssize_t; + +int setenv(const char *name, const char *value, int overwrite); +int unsetenv(const char *name); + +/* fcntl.h */ +int posix_fallocate(int fd, os_off_t offset, os_off_t len); + +/* string.h */ +#define strtok_r strtok_s + +/* time.h */ +#define CLOCK_MONOTONIC 1 +#define CLOCK_REALTIME 2 + +int clock_gettime(int id, struct timespec *ts); + +/* signal.h */ +typedef unsigned long long sigset_t; /* one bit for each signal */ +C_ASSERT(NSIG <= sizeof(sigset_t) * 8); + +struct sigaction { + void (*sa_handler) (int signum); + /* void (*sa_sigaction)(int, siginfo_t *, void *); */ + sigset_t sa_mask; + int sa_flags; + void (*sa_restorer) (void); +}; + +__inline int +sigemptyset(sigset_t *set) +{ + *set = 0; + return 0; +} + +__inline int +sigfillset(sigset_t *set) +{ + *set = ~0; + return 0; +} + +__inline int +sigaddset(sigset_t *set, int signum) +{ + if (signum <= 0 || signum >= NSIG) { + errno = EINVAL; + return -1; + } + *set |= (1ULL << (signum - 1)); + return 0; +} + +__inline int +sigdelset(sigset_t *set, int signum) +{ + if (signum <= 0 || signum >= NSIG) { + errno = EINVAL; + return -1; + } + *set &= ~(1ULL << (signum - 1)); + return 0; +} + +__inline int +sigismember(const sigset_t *set, int signum) +{ + if (signum <= 0 || signum >= NSIG) { + errno = EINVAL; + return -1; + } + return ((*set & (1ULL << (signum - 1))) ? 1 : 0); +} + +/* sched.h */ + +/* + * sched_yield -- yield the processor + */ +__inline int +sched_yield(void) +{ + SwitchToThread(); + return 0; /* always succeeds */ +} + +/* + * helper macros for library ctor/dtor function declarations + */ +#define MSVC_CONSTR(func) \ +void func(void); \ +__pragma(comment(linker, "/include:_" #func)) \ +__pragma(section(".CRT$XCU", read)) \ +__declspec(allocate(".CRT$XCU")) \ +const void (WINAPI *_##func)(void) = (const void (WINAPI *)(void))func; + +#define MSVC_DESTR(func) \ +void func(void); \ +static void _##func##_reg(void) { atexit(func); }; \ +MSVC_CONSTR(_##func##_reg) + +#ifdef __cplusplus +} +#endif + +#endif /* PLATFORM_H */ diff --git a/src/pmdk/src/windows/include/sched.h b/src/pmdk/src/windows/include/sched.h new file mode 100644 index 000000000..8303bb25f --- /dev/null +++ b/src/pmdk/src/windows/include/sched.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2017-2020, Intel Corporation */ + +/* + * fake sched.h + */ diff --git a/src/pmdk/src/windows/include/strings.h b/src/pmdk/src/windows/include/strings.h new file mode 100644 index 000000000..33fb468b0 --- /dev/null +++ b/src/pmdk/src/windows/include/strings.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * fake strings.h + */ diff --git a/src/pmdk/src/windows/include/sys/file.h b/src/pmdk/src/windows/include/sys/file.h new file mode 100644 index 000000000..4cbce497d --- /dev/null +++ b/src/pmdk/src/windows/include/sys/file.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * sys/file.h -- file locking + */ diff --git a/src/pmdk/src/windows/include/sys/mman.h b/src/pmdk/src/windows/include/sys/mman.h new file mode 100644 index 000000000..5feb2e309 --- /dev/null +++ b/src/pmdk/src/windows/include/sys/mman.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * sys/mman.h -- memory-mapped files for Windows + */ + +#ifndef SYS_MMAN_H +#define SYS_MMAN_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#define PROT_NONE 0x0 +#define PROT_READ 0x1 +#define PROT_WRITE 0x2 +#define PROT_EXEC 0x4 + +#define MAP_SHARED 0x1 +#define MAP_PRIVATE 0x2 + +#define MAP_FIXED 0x10 +#define MAP_ANONYMOUS 0x20 +#define MAP_ANON MAP_ANONYMOUS + +#define MAP_NORESERVE 0x04000 + +#define MS_ASYNC 1 +#define MS_SYNC 4 +#define MS_INVALIDATE 2 + +#define MAP_FAILED ((void *)(-1)) + +void *mmap(void *addr, size_t len, int prot, int flags, + int fd, os_off_t offset); +int munmap(void *addr, size_t len); +int msync(void *addr, size_t len, int flags); + +int mprotect(void *addr, size_t len, int prot); + +#ifdef __cplusplus +} +#endif + +#endif /* SYS_MMAN_H */ diff --git a/src/pmdk/src/windows/include/sys/mount.h b/src/pmdk/src/windows/include/sys/mount.h new file mode 100644 index 000000000..80dc93e05 --- /dev/null +++ b/src/pmdk/src/windows/include/sys/mount.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * fake sys/mount.h + */ diff --git a/src/pmdk/src/windows/include/sys/param.h b/src/pmdk/src/windows/include/sys/param.h new file mode 100644 index 000000000..7200cfd3e --- /dev/null +++ b/src/pmdk/src/windows/include/sys/param.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * sys/param.h -- a few useful macros + */ + +#ifndef SYS_PARAM_H +#define SYS_PARAM_H 1 + +#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define howmany(x, y) (((x) + ((y) - 1)) / (y)) + +#define BPB 8 /* bits per byte */ + +#define setbit(b, i) ((b)[(i) / BPB] |= 1 << ((i) % BPB)) +#define isset(b, i) ((b)[(i) / BPB] & (1 << ((i) % BPB))) +#define isclr(b, i) (((b)[(i) / BPB] & (1 << ((i) % BPB))) == 0) + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +#endif /* SYS_PARAM_H */ diff --git a/src/pmdk/src/windows/include/sys/resource.h b/src/pmdk/src/windows/include/sys/resource.h new file mode 100644 index 000000000..77a36ed98 --- /dev/null +++ b/src/pmdk/src/windows/include/sys/resource.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2018-2020, Intel Corporation */ + +/* + * fake sys/resource.h + */ diff --git a/src/pmdk/src/windows/include/sys/statvfs.h b/src/pmdk/src/windows/include/sys/statvfs.h new file mode 100644 index 000000000..3a52b7c06 --- /dev/null +++ b/src/pmdk/src/windows/include/sys/statvfs.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * fake statvfs.h + */ diff --git a/src/pmdk/src/windows/include/sys/uio.h b/src/pmdk/src/windows/include/sys/uio.h new file mode 100644 index 000000000..4addf6ac9 --- /dev/null +++ b/src/pmdk/src/windows/include/sys/uio.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * sys/uio.h -- definition of iovec structure + */ + +#ifndef SYS_UIO_H +#define SYS_UIO_H 1 + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +ssize_t writev(int fd, const struct iovec *iov, int iovcnt); + +#ifdef __cplusplus +} +#endif + +#endif /* SYS_UIO_H */ diff --git a/src/pmdk/src/windows/include/sys/wait.h b/src/pmdk/src/windows/include/sys/wait.h new file mode 100644 index 000000000..1becec53f --- /dev/null +++ b/src/pmdk/src/windows/include/sys/wait.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * fake sys/wait.h + */ diff --git a/src/pmdk/src/windows/include/unistd.h b/src/pmdk/src/windows/include/unistd.h new file mode 100644 index 000000000..9dc41f86e --- /dev/null +++ b/src/pmdk/src/windows/include/unistd.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ + +/* + * unistd.h -- compatibility layer for POSIX operating system API + */ + +#ifndef UNISTD_H +#define UNISTD_H 1 + +#include + +#define _SC_PAGESIZE 0 +#define _SC_NPROCESSORS_ONLN 1 + +#define R_OK 04 +#define W_OK 02 +#define X_OK 00 /* execute permission doesn't exist on Windows */ +#define F_OK 00 + +/* + * sysconf -- get configuration information at run time + */ +static __inline long +sysconf(int p) +{ + SYSTEM_INFO si; + int ret = 0; + + switch (p) { + case _SC_PAGESIZE: + GetSystemInfo(&si); + return si.dwPageSize; + + case _SC_NPROCESSORS_ONLN: + for (int i = 0; i < GetActiveProcessorGroupCount(); i++) { + ret += GetActiveProcessorCount(i); + } + return ret; + + default: + return 0; + } + +} + +#define getpid _getpid + +/* + * pread -- read from a file descriptor at given offset + */ +static ssize_t +pread(int fd, void *buf, size_t count, os_off_t offset) +{ + __int64 position = _lseeki64(fd, 0, SEEK_CUR); + _lseeki64(fd, offset, SEEK_SET); + int ret = _read(fd, buf, (unsigned)count); + _lseeki64(fd, position, SEEK_SET); + return ret; +} + +/* + * pwrite -- write to a file descriptor at given offset + */ +static ssize_t +pwrite(int fd, const void *buf, size_t count, os_off_t offset) +{ + __int64 position = _lseeki64(fd, 0, SEEK_CUR); + _lseeki64(fd, offset, SEEK_SET); + int ret = _write(fd, buf, (unsigned)count); + _lseeki64(fd, position, SEEK_SET); + return ret; +} + +#define S_ISBLK(x) 0 /* BLK devices not exist on Windows */ + +/* + * basename -- parse pathname and return filename component + */ +static char * +basename(char *path) +{ + char fname[_MAX_FNAME]; + char ext[_MAX_EXT]; + _splitpath(path, NULL, NULL, fname, ext); + + sprintf(path, "%s%s", fname, ext); + + return path; +} + +/* + * dirname -- parse pathname and return directory component + */ +static char * +dirname(char *path) +{ + if (path == NULL) + return "."; + + size_t len = strlen(path); + if (len == 0) + return "."; + + char *end = path + len; + + /* strip trailing forslashes and backslashes */ + while ((--end) > path) { + if (*end != '\\' && *end != '/') { + *(end + 1) = '\0'; + break; + } + } + + /* strip basename */ + while ((--end) > path) { + if (*end == '\\' || *end == '/') { + *end = '\0'; + break; + } + } + + if (end != path) { + return path; + /* handle edge cases */ + } else if (*end == '\\' || *end == '/') { + *(end + 1) = '\0'; + } else { + *end++ = '.'; + *end = '\0'; + } + + return path; +} + +#endif /* UNISTD_H */ diff --git a/src/pmdk/src/windows/include/win_mmap.h b/src/pmdk/src/windows/include/win_mmap.h new file mode 100644 index 000000000..4e2250f8c --- /dev/null +++ b/src/pmdk/src/windows/include/win_mmap.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2015-2020, Intel Corporation */ +/* + * Copyright (c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * win_mmap.h -- (internal) tracks the regions mapped by mmap + */ + +#ifndef WIN_MMAP_H +#define WIN_MMAP_H 1 + +#include "queue.h" + +#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define rounddown(x, y) (((x) / (y)) * (y)) + +void win_mmap_init(void); +void win_mmap_fini(void); + +/* allocation/mmap granularity */ +extern unsigned long long Mmap_align; + +typedef enum FILE_MAPPING_TRACKER_FLAGS { + FILE_MAPPING_TRACKER_FLAG_DIRECT_MAPPED = 0x0001, + + /* + * This should hold the value of all flags ORed for debug purpose. + */ + FILE_MAPPING_TRACKER_FLAGS_MASK = + FILE_MAPPING_TRACKER_FLAG_DIRECT_MAPPED +} FILE_MAPPING_TRACKER_FLAGS; + +/* + * this structure tracks the file mappings outstanding per file handle + */ +typedef struct FILE_MAPPING_TRACKER { + PMDK_SORTEDQ_ENTRY(FILE_MAPPING_TRACKER) ListEntry; + HANDLE FileHandle; + HANDLE FileMappingHandle; + void *BaseAddress; + void *EndAddress; + DWORD Access; + os_off_t Offset; + size_t FileLen; + FILE_MAPPING_TRACKER_FLAGS Flags; +} FILE_MAPPING_TRACKER, *PFILE_MAPPING_TRACKER; + +extern SRWLOCK FileMappingQLock; +extern PMDK_SORTEDQ_HEAD(FMLHead, FILE_MAPPING_TRACKER) FileMappingQHead; + +#endif /* WIN_MMAP_H */ diff --git a/src/pmdk/src/windows/libs_debug.props b/src/pmdk/src/windows/libs_debug.props new file mode 100644 index 000000000..32d4306b0 --- /dev/null +++ b/src/pmdk/src/windows/libs_debug.props @@ -0,0 +1,34 @@ + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\libs\ + $(FrameworkSDKdir)bin\$(TargetPlatformVersion)\$(Platform);$(ExecutablePath) + + + + $(SolutionDir)\include;$(SolutionDir)\windows\include;$(SolutionDir)\common;$(SolutionDir)\core;$(SolutionDir)\$(TargetName) + PMDK_UTF8_API;SDS_ENABLED;NTDDI_VERSION=NTDDI_WIN10_RS4;_CRT_SECURE_NO_WARNINGS;_WINDLL;_DEBUG;%(PreprocessorDefinitions) + CompileAsC + true + platform.h + Level3 + true + true + false + + + true + shlwapi.lib;ntdll.lib;mincore.lib;%(AdditionalDependencies) + $(TargetName).def + true + true + false + + + _DEBUG + $(SolutionDir)\common;$(SolutionDir)\windows\include + + + \ No newline at end of file diff --git a/src/pmdk/src/windows/libs_release.props b/src/pmdk/src/windows/libs_release.props new file mode 100644 index 000000000..20728c2aa --- /dev/null +++ b/src/pmdk/src/windows/libs_release.props @@ -0,0 +1,36 @@ + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\libs\ + $(FrameworkSDKdir)bin\$(TargetPlatformVersion)\$(Platform);$(ExecutablePath) + + + + $(SolutionDir)\include;$(SolutionDir)\windows\include;$(SolutionDir)\common;$(SolutionDir)\core;$(SolutionDir)\$(TargetName) + PMDK_UTF8_API;SDS_ENABLED;NTDDI_VERSION=NTDDI_WIN10_RS4;_CRT_SECURE_NO_WARNINGS;_WINDLL;NDEBUG;%(PreprocessorDefinitions) + CompileAsC + true + platform.h + Level3 + true + true + false + Neither + + + true + shlwapi.lib;ntdll.lib;mincore.lib;%(AdditionalDependencies) + $(TargetName).def + DebugFastLink + false + false + + + + + $(SolutionDir)\common;$(SolutionDir)\windows\include + + + \ No newline at end of file diff --git a/src/pmdk/src/windows/srcversion/srcversion.vcxproj b/src/pmdk/src/windows/srcversion/srcversion.vcxproj new file mode 100644 index 000000000..29f1dbf62 --- /dev/null +++ b/src/pmdk/src/windows/srcversion/srcversion.vcxproj @@ -0,0 +1,108 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + {901F04DB-E1A5-4A41-8B81-9D31C19ACD59} + Win32Proj + srcversion + 10.0.17134.0 + + + + Application + true + v140 + NotSet + + + Application + true + v140 + NotSet + + + + + + + + + + + + + + + true + + + true + + + + NotUsing + Level3 + _DEBUG;_CONSOLE;WINAPI_PARTITION_SYSTEM;%(PreprocessorDefinitions) + platform.h + 4996 + CompileAsC + MultiThreadedDebugDLL + + + Console + true + + + + + + + + powershell.exe -ExecutionPolicy Bypass -file "$(SolutionDir)..\utils\SRCVERSION.ps1" $(SRCVERSION) + __NON_EXISTENT_FILE__ + generate srcversion.h + + + + + NotUsing + Level3 + NDEBUG;_CONSOLE;WINAPI_PARTITION_SYSTEM;%(PreprocessorDefinitions) + platform.h + 4996 + CompileAsC + MaxSpeed + MultiThreadedDLL + Default + + + Console + true + + + + + + + + powershell.exe -ExecutionPolicy Bypass -file "$(SolutionDir)..\utils\SRCVERSION.ps1" $(SRCVERSION) + __NON_EXISTENT_FILE__ + generate srcversion.h + + + + + + \ No newline at end of file diff --git a/src/pmdk/src/windows/win_mmap.c b/src/pmdk/src/windows/win_mmap.c new file mode 100644 index 000000000..1f1585dc1 --- /dev/null +++ b/src/pmdk/src/windows/win_mmap.c @@ -0,0 +1,1132 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2015-2019, Intel Corporation */ +/* + * Copyright (c) 2015-2017, Microsoft Corporation. All rights reserved. + * Copyright (c) 2016, Hewlett Packard Enterprise Development LP + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * win_mmap.c -- memory-mapped files for Windows + */ + +/* + * XXX - The initial approach to PMDK for Windows port was to minimize the + * amount of changes required in the core part of the library, and to avoid + * preprocessor conditionals, if possible. For that reason, some of the + * Linux system calls that have no equivalents on Windows have been emulated + * using Windows API. + * Note that it was not a goal to fully emulate POSIX-compliant behavior + * of mentioned functions. They are used only internally, so current + * implementation is just good enough to satisfy PMDK needs and to make it + * work on Windows. + * + * This is a subject for change in the future. Likely, all these functions + * will be replaced with "util_xxx" wrappers with OS-specific implementation + * for Linux and Windows. + * + * Known issues: + * - on Windows, mapping granularity/alignment is 64KB, not 4KB; + * - mprotect() behavior and protection flag handling in mmap() is slightly + * different than on Linux (see comments below). + */ + +#include +#include "mmap.h" +#include "util.h" +#include "out.h" +#include "win_mmap.h" + +/* uncomment for more debug information on mmap trackers */ +/* #define MMAP_DEBUG_INFO */ + +NTSTATUS +NtFreeVirtualMemory(_In_ HANDLE ProcessHandle, _Inout_ PVOID *BaseAddress, + _Inout_ PSIZE_T RegionSize, _In_ ULONG FreeType); + +/* + * XXX Unify the Linux and Windows code and replace this structure with + * the map tracking list defined in mmap.h. + */ +SRWLOCK FileMappingQLock = SRWLOCK_INIT; +struct FMLHead FileMappingQHead = + PMDK_SORTEDQ_HEAD_INITIALIZER(FileMappingQHead); + +/* + * mmap_file_mapping_comparer -- (internal) compares the two file mapping + * trackers + */ +static LONG_PTR +mmap_file_mapping_comparer(PFILE_MAPPING_TRACKER a, PFILE_MAPPING_TRACKER b) +{ + return ((LONG_PTR)a->BaseAddress - (LONG_PTR)b->BaseAddress); +} + +#ifdef MMAP_DEBUG_INFO +/* + * mmap_info -- (internal) dump info about all the mapping trackers + */ +static void +mmap_info(void) +{ + LOG(4, NULL); + + AcquireSRWLockShared(&FileMappingQLock); + + PFILE_MAPPING_TRACKER mt; + for (mt = PMDK_SORTEDQ_FIRST(&FileMappingQHead); + mt != (void *)&FileMappingQHead; + mt = PMDK_SORTEDQ_NEXT(mt, ListEntry)) { + + LOG(4, "FH %08x FMH %08x AD %p-%p (%zu) " + "OF %08x FL %zu AC %d F %d", + mt->FileHandle, + mt->FileMappingHandle, + mt->BaseAddress, + mt->EndAddress, + (char *)mt->EndAddress - (char *)mt->BaseAddress, + mt->Offset, + mt->FileLen, + mt->Access, + mt->Flags); + } + + ReleaseSRWLockShared(&FileMappingQLock); +} +#endif + +/* + * mmap_reserve -- (internal) reserve virtual address range + */ +static void * +mmap_reserve(void *addr, size_t len) +{ + LOG(4, "addr %p len %zu", addr, len); + + ASSERTeq((uintptr_t)addr % Mmap_align, 0); + ASSERTeq(len % Mmap_align, 0); + + void *reserved_addr = VirtualAlloc(addr, len, + MEM_RESERVE, PAGE_NOACCESS); + if (reserved_addr == NULL) { + ERR("cannot find a contiguous region - " + "addr: %p, len: %lx, gle: 0x%08x", + addr, len, GetLastError()); + errno = ENOMEM; + return MAP_FAILED; + } + + return reserved_addr; +} + +/* + * mmap_unreserve -- (internal) frees the range that's previously reserved + */ +static int +mmap_unreserve(void *addr, size_t len) +{ + LOG(4, "addr %p len %zu", addr, len); + + ASSERTeq((uintptr_t)addr % Mmap_align, 0); + ASSERTeq(len % Mmap_align, 0); + + size_t bytes_returned; + MEMORY_BASIC_INFORMATION basic_info; + + bytes_returned = VirtualQuery(addr, &basic_info, sizeof(basic_info)); + + if (bytes_returned != sizeof(basic_info)) { + ERR("cannot query the virtual address properties of the range " + "- addr: %p, len: %d", addr, len); + errno = EINVAL; + return -1; + } + + if (basic_info.State == MEM_RESERVE) { + DWORD nt_status; + void *release_addr = addr; + size_t release_size = len; + nt_status = NtFreeVirtualMemory(GetCurrentProcess(), + &release_addr, &release_size, MEM_RELEASE); + if (nt_status != 0) { + ERR("cannot release the reserved virtual space - " + "addr: %p, len: %d, nt_status: 0x%08x", + addr, len, nt_status); + errno = EINVAL; + return -1; + } + ASSERTeq(release_addr, addr); + ASSERTeq(release_size, len); + LOG(4, "freed reservation - addr: %p, size: %d", release_addr, + release_size); + } else { + LOG(4, "range not reserved - addr: %p, size: %d", addr, len); + } + + return 0; +} + +/* + * win_mmap_init -- initialization of file mapping tracker + */ +void +win_mmap_init(void) +{ + AcquireSRWLockExclusive(&FileMappingQLock); + PMDK_SORTEDQ_INIT(&FileMappingQHead); + ReleaseSRWLockExclusive(&FileMappingQLock); +} + +/* + * win_mmap_fini -- file mapping tracker cleanup routine + */ +void +win_mmap_fini(void) +{ + /* + * Let's make sure that no one is in the middle of updating the + * list by grabbing the lock. + */ + AcquireSRWLockExclusive(&FileMappingQLock); + + while (!PMDK_SORTEDQ_EMPTY(&FileMappingQHead)) { + PFILE_MAPPING_TRACKER mt; + mt = (PFILE_MAPPING_TRACKER)PMDK_SORTEDQ_FIRST( + &FileMappingQHead); + + PMDK_SORTEDQ_REMOVE(&FileMappingQHead, mt, ListEntry); + + if (mt->BaseAddress != NULL) + UnmapViewOfFile(mt->BaseAddress); + + size_t release_size = + (char *)mt->EndAddress - (char *)mt->BaseAddress; + /* + * Free reservation after file mapping (if reservation was + * bigger than length of mapped file) + */ + void *release_addr = (char *)mt->BaseAddress + mt->FileLen; + mmap_unreserve(release_addr, release_size - mt->FileLen); + + if (mt->FileMappingHandle != NULL) + CloseHandle(mt->FileMappingHandle); + + if (mt->FileHandle != NULL) + CloseHandle(mt->FileHandle); + + free(mt); + } + ReleaseSRWLockExclusive(&FileMappingQLock); +} + +#define PROT_ALL (PROT_READ|PROT_WRITE|PROT_EXEC) + +/* + * mmap -- map file into memory + * + * XXX - If read-only mapping was created initially, it is not possible + * to change protection to R/W, even if the file itself was open in R/W mode. + * To workaround that, we could modify mmap() to create R/W mapping first, + * then change the protection to R/O. This way, it should be possible + * to elevate permissions later. + */ +void * +mmap(void *addr, size_t len, int prot, int flags, int fd, os_off_t offset) +{ + LOG(4, "addr %p len %zu prot %d flags %d fd %d offset %ju", + addr, len, prot, flags, fd, offset); + + if (len == 0) { + ERR("invalid length: %zu", len); + errno = EINVAL; + return MAP_FAILED; + } + + if ((prot & ~PROT_ALL) != 0) { + ERR("invalid flags: 0x%08x", flags); + /* invalid protection flags */ + errno = EINVAL; + return MAP_FAILED; + } + + if (((flags & MAP_PRIVATE) && (flags & MAP_SHARED)) || + ((flags & (MAP_PRIVATE | MAP_SHARED)) == 0)) { + ERR("neither MAP_PRIVATE or MAP_SHARED is set, or both: 0x%08x", + flags); + errno = EINVAL; + return MAP_FAILED; + } + + /* XXX shall we use SEC_LARGE_PAGES flag? */ + DWORD protect = 0; + DWORD access = 0; + + /* on x86, PROT_WRITE implies PROT_READ */ + if (prot & PROT_WRITE) { + if (flags & MAP_PRIVATE) { + access = FILE_MAP_COPY; + if (prot & PROT_EXEC) + protect = PAGE_EXECUTE_WRITECOPY; + else + protect = PAGE_WRITECOPY; + } else { + /* FILE_MAP_ALL_ACCESS == FILE_MAP_WRITE */ + access = FILE_MAP_ALL_ACCESS; + if (prot & PROT_EXEC) + protect = PAGE_EXECUTE_READWRITE; + else + protect = PAGE_READWRITE; + } + } else if (prot & PROT_READ) { + access = FILE_MAP_READ; + if (prot & PROT_EXEC) + protect = PAGE_EXECUTE_READ; + else + protect = PAGE_READONLY; + } else { + /* XXX - PAGE_NOACCESS is not supported by CreateFileMapping */ + ERR("PAGE_NOACCESS is not supported"); + errno = ENOTSUP; + return MAP_FAILED; + } + + if (((uintptr_t)addr % Mmap_align) != 0) { + if ((flags & MAP_FIXED) == 0) { + /* ignore invalid hint if no MAP_FIXED flag is set */ + addr = NULL; + } else { + ERR("hint address is not well-aligned: %p", addr); + errno = EINVAL; + return MAP_FAILED; + } + } + + if ((offset % Mmap_align) != 0) { + ERR("offset is not well-aligned: %ju", offset); + errno = EINVAL; + return MAP_FAILED; + } + + if ((flags & MAP_FIXED) != 0) { + /* + * Free any reservations that the caller might have, also we + * have to unmap any existing mappings in this region as per + * mmap's manual. + * XXX - Ideally we should unmap only if the prot and flags + * are similar, we are deferring it as we don't rely on it + * yet. + */ + int ret = munmap(addr, len); + if (ret != 0) { + ERR("!munmap: addr %p len %zu", addr, len); + return MAP_FAILED; + } + } + + size_t len_align = roundup(len, Mmap_align); + size_t filelen; + size_t filelen_align; + HANDLE fh; + if (flags & MAP_ANON) { + /* + * In our implementation we are choosing to ignore fd when + * MAP_ANON is set, instead of failing. + */ + fh = INVALID_HANDLE_VALUE; + + /* ignore/override offset */ + offset = 0; + filelen = len; + filelen_align = len_align; + + if ((flags & MAP_NORESERVE) != 0) { + /* + * For anonymous mappings the meaning of MAP_NORESERVE + * flag is pretty much the same as SEC_RESERVE. + */ + protect |= SEC_RESERVE; + } + } else { + LARGE_INTEGER filesize; + + if (fd == -1) { + ERR("invalid file descriptor: %d", fd); + errno = EBADF; + return MAP_FAILED; + } + + /* + * We need to keep file handle open for proper + * implementation of msync() and to hold the file lock. + */ + if (!DuplicateHandle(GetCurrentProcess(), + (HANDLE)_get_osfhandle(fd), + GetCurrentProcess(), &fh, + 0, FALSE, DUPLICATE_SAME_ACCESS)) { + ERR("cannot duplicate handle - fd: %d, gle: 0x%08x", + fd, GetLastError()); + errno = ENOMEM; + return MAP_FAILED; + } + + /* + * If we are asked to map more than the file size, map till the + * file size and reserve the following. + */ + + if (!GetFileSizeEx(fh, &filesize)) { + ERR("cannot query the file size - fh: %d, gle: 0x%08x", + fd, GetLastError()); + CloseHandle(fh); + return MAP_FAILED; + } + + if (offset >= (os_off_t)filesize.QuadPart) { + errno = EINVAL; + ERR("offset is beyond the file size"); + CloseHandle(fh); + return MAP_FAILED; + } + + /* calculate length of the mapped portion of the file */ + filelen = filesize.QuadPart - offset; + if (filelen > len) + filelen = len; + filelen_align = roundup(filelen, Mmap_align); + + if ((offset + len) > (size_t)filesize.QuadPart) { + /* + * Reserve virtual address for the rest of range we need + * to map, and free a portion in the beginning for this + * allocation. + */ + void *reserved_addr = mmap_reserve(addr, len_align); + if (reserved_addr == MAP_FAILED) { + ERR("cannot reserve region"); + CloseHandle(fh); + return MAP_FAILED; + } + + if (addr != reserved_addr && (flags & MAP_FIXED) != 0) { + ERR("cannot find a contiguous region - " + "addr: %p, len: %lx, gle: 0x%08x", + addr, len, GetLastError()); + if (mmap_unreserve(reserved_addr, + len_align) != 0) { + ASSERT(FALSE); + ERR("cannot free reserved region"); + } + errno = ENOMEM; + CloseHandle(fh); + return MAP_FAILED; + } + + addr = reserved_addr; + if (mmap_unreserve(reserved_addr, filelen_align) != 0) { + ASSERT(FALSE); + ERR("cannot free reserved region"); + CloseHandle(fh); + return MAP_FAILED; + } + } + } + + HANDLE fmh = CreateFileMapping(fh, + NULL, /* security attributes */ + protect, + (DWORD) ((filelen + offset) >> 32), + (DWORD) ((filelen + offset) & 0xFFFFFFFF), + NULL); + + if (fmh == NULL) { + DWORD gle = GetLastError(); + ERR("CreateFileMapping, gle: 0x%08x", gle); + if (gle == ERROR_ACCESS_DENIED) + errno = EACCES; + else + errno = EINVAL; /* XXX */ + CloseHandle(fh); + return MAP_FAILED; + } + + void *base = MapViewOfFileEx(fmh, + access, + (DWORD) (offset >> 32), + (DWORD) (offset & 0xFFFFFFFF), + filelen, + addr); /* hint address */ + + if (base == NULL) { + if (addr == NULL || (flags & MAP_FIXED) != 0) { + ERR("MapViewOfFileEx, gle: 0x%08x", GetLastError()); + errno = EINVAL; + CloseHandle(fh); + CloseHandle(fmh); + return MAP_FAILED; + } + + /* try again w/o hint */ + base = MapViewOfFileEx(fmh, + access, + (DWORD) (offset >> 32), + (DWORD) (offset & 0xFFFFFFFF), + filelen, + NULL); /* no hint address */ + } + + if (base == NULL) { + ERR("MapViewOfFileEx, gle: 0x%08x", GetLastError()); + errno = ENOMEM; + CloseHandle(fh); + CloseHandle(fmh); + return MAP_FAILED; + } + + /* + * We will track the file mapping handle on a lookaside list so that + * we don't have to modify the fact that we only return back the base + * address rather than a more elaborate structure. + */ + + PFILE_MAPPING_TRACKER mt = + malloc(sizeof(struct FILE_MAPPING_TRACKER)); + + if (mt == NULL) { + ERR("!malloc"); + CloseHandle(fh); + CloseHandle(fmh); + return MAP_FAILED; + } + + mt->Flags = 0; + mt->FileHandle = fh; + mt->FileMappingHandle = fmh; + mt->BaseAddress = base; + mt->EndAddress = (void *)((char *)base + len_align); + mt->Access = access; + mt->Offset = offset; + mt->FileLen = filelen_align; + + /* + * XXX: Use the QueryVirtualMemoryInformation when available in the new + * SDK. If the file is DAX mapped say so in the FILE_MAPPING_TRACKER + * Flags. + */ + DWORD filesystemFlags; + if (fh == INVALID_HANDLE_VALUE) { + LOG(4, "anonymous mapping - not DAX mapped - handle: %p", fh); + } else if (GetVolumeInformationByHandleW(fh, NULL, 0, NULL, NULL, + &filesystemFlags, NULL, 0)) { + if (filesystemFlags & FILE_DAX_VOLUME) { + mt->Flags |= FILE_MAPPING_TRACKER_FLAG_DIRECT_MAPPED; + } else { + LOG(4, "file is not DAX mapped - handle: %p", fh); + } + } else { + ERR("failed to query volume information : %08x", + GetLastError()); + } + + AcquireSRWLockExclusive(&FileMappingQLock); + + PMDK_SORTEDQ_INSERT(&FileMappingQHead, mt, ListEntry, + FILE_MAPPING_TRACKER, mmap_file_mapping_comparer); + + ReleaseSRWLockExclusive(&FileMappingQLock); + +#ifdef MMAP_DEBUG_INFO + mmap_info(); +#endif + + return base; +} + +/* + * mmap_split -- (internal) replace existing mapping with another one(s) + * + * Unmaps the region between [begin,end]. If it's in a middle of the existing + * mapping, it results in two new mappings and duplicated file/mapping handles. + */ +static int +mmap_split(PFILE_MAPPING_TRACKER mt, void *begin, void *end) +{ + LOG(4, "begin %p end %p", begin, end); + + ASSERTeq((uintptr_t)begin % Mmap_align, 0); + ASSERTeq((uintptr_t)end % Mmap_align, 0); + + PFILE_MAPPING_TRACKER mtb = NULL; + PFILE_MAPPING_TRACKER mte = NULL; + HANDLE fh = mt->FileHandle; + HANDLE fmh = mt->FileMappingHandle; + size_t len; + + /* + * In this routine we copy flags from mt to the two subsets that we + * create. All flags may not be appropriate to propagate so let's + * assert about the flags we know, if some one adds a new flag in the + * future they would know about this copy and take appropricate action. + */ + C_ASSERT(FILE_MAPPING_TRACKER_FLAGS_MASK == 1); + + /* + * 1) b e b e + * xxxxxxxxxxxxx => xxx.......xxxx - mtb+mte + * 2) b e b e + * xxxxxxxxxxxxx => xxxxxxx....... - mtb + * 3) b e b e + * xxxxxxxxxxxxx => ........xxxxxx - mte + * 4) b e b e + * xxxxxxxxxxxxx => .............. - + */ + + if (begin > mt->BaseAddress) { + /* case #1/2 */ + /* new mapping at the beginning */ + mtb = malloc(sizeof(struct FILE_MAPPING_TRACKER)); + if (mtb == NULL) { + ERR("!malloc"); + goto err; + } + + mtb->Flags = mt->Flags; + mtb->FileHandle = fh; + mtb->FileMappingHandle = fmh; + mtb->BaseAddress = mt->BaseAddress; + mtb->EndAddress = begin; + mtb->Access = mt->Access; + mtb->Offset = mt->Offset; + + len = (char *)begin - (char *)mt->BaseAddress; + mtb->FileLen = len >= mt->FileLen ? mt->FileLen : len; + } + + if (end < mt->EndAddress) { + /* case #1/3 */ + /* new mapping at the end */ + mte = malloc(sizeof(struct FILE_MAPPING_TRACKER)); + if (mte == NULL) { + ERR("!malloc"); + goto err; + } + + if (!mtb) { + /* case #3 */ + mte->FileHandle = fh; + mte->FileMappingHandle = fmh; + } else { + /* case #1 - need to duplicate handles */ + mte->FileHandle = NULL; + mte->FileMappingHandle = NULL; + + if (!DuplicateHandle(GetCurrentProcess(), fh, + GetCurrentProcess(), + &mte->FileHandle, + 0, FALSE, DUPLICATE_SAME_ACCESS)) { + ERR("DuplicateHandle, gle: 0x%08x", + GetLastError()); + goto err; + } + + if (!DuplicateHandle(GetCurrentProcess(), fmh, + GetCurrentProcess(), + &mte->FileMappingHandle, + 0, FALSE, DUPLICATE_SAME_ACCESS)) { + ERR("DuplicateHandle, gle: 0x%08x", + GetLastError()); + goto err; + } + } + + mte->Flags = mt->Flags; + mte->BaseAddress = end; + mte->EndAddress = mt->EndAddress; + mte->Access = mt->Access; + mte->Offset = mt->Offset + + ((char *)mte->BaseAddress - (char *)mt->BaseAddress); + + len = (char *)end - (char *)mt->BaseAddress; + mte->FileLen = len >= mt->FileLen ? 0 : mt->FileLen - len; + } + + if (mt->FileLen > 0 && UnmapViewOfFile(mt->BaseAddress) == FALSE) { + ERR("UnmapViewOfFile, gle: 0x%08x", GetLastError()); + goto err; + } + + len = (char *)mt->EndAddress - (char *)mt->BaseAddress; + if (len > mt->FileLen) { + void *addr = (char *)mt->BaseAddress + mt->FileLen; + mmap_unreserve(addr, len - mt->FileLen); + } + + if (!mtb && !mte) { + /* case #4 */ + CloseHandle(fmh); + CloseHandle(fh); + } + + /* + * free entry for the original mapping + */ + PMDK_SORTEDQ_REMOVE(&FileMappingQHead, mt, ListEntry); + free(mt); + + if (mtb) { + len = (char *)mtb->EndAddress - (char *)mtb->BaseAddress; + if (len > mtb->FileLen) { + void *addr = (char *)mtb->BaseAddress + mtb->FileLen; + void *raddr = mmap_reserve(addr, len - mtb->FileLen); + if (raddr == MAP_FAILED) { + ERR("cannot find a contiguous region - " + "addr: %p, len: %lx, gle: 0x%08x", + addr, len, GetLastError()); + goto err; + } + } + + if (mtb->FileLen > 0) { + void *base = MapViewOfFileEx(mtb->FileMappingHandle, + mtb->Access, + (DWORD) (mtb->Offset >> 32), + (DWORD) (mtb->Offset & 0xFFFFFFFF), + mtb->FileLen, + mtb->BaseAddress); /* hint address */ + + if (base == NULL) { + ERR("MapViewOfFileEx, gle: 0x%08x", + GetLastError()); + goto err; + } + } + + PMDK_SORTEDQ_INSERT(&FileMappingQHead, mtb, ListEntry, + FILE_MAPPING_TRACKER, mmap_file_mapping_comparer); + } + + if (mte) { + len = (char *)mte->EndAddress - (char *)mte->BaseAddress; + if (len > mte->FileLen) { + void *addr = (char *)mte->BaseAddress + mte->FileLen; + void *raddr = mmap_reserve(addr, len - mte->FileLen); + if (raddr == MAP_FAILED) { + ERR("cannot find a contiguous region - " + "addr: %p, len: %lx, gle: 0x%08x", + addr, len, GetLastError()); + goto err; + } + } + + if (mte->FileLen > 0) { + void *base = MapViewOfFileEx(mte->FileMappingHandle, + mte->Access, + (DWORD) (mte->Offset >> 32), + (DWORD) (mte->Offset & 0xFFFFFFFF), + mte->FileLen, + mte->BaseAddress); /* hint address */ + + if (base == NULL) { + ERR("MapViewOfFileEx, gle: 0x%08x", + GetLastError()); + goto err_mte; + } + } + + PMDK_SORTEDQ_INSERT(&FileMappingQHead, mte, ListEntry, + FILE_MAPPING_TRACKER, mmap_file_mapping_comparer); + } + + return 0; + +err: + if (mtb) { + ASSERTeq(mtb->FileMappingHandle, fmh); + ASSERTeq(mtb->FileHandle, fh); + CloseHandle(mtb->FileMappingHandle); + CloseHandle(mtb->FileHandle); + + len = (char *)mtb->EndAddress - (char *)mtb->BaseAddress; + if (len > mtb->FileLen) { + void *addr = (char *)mtb->BaseAddress + mtb->FileLen; + mmap_unreserve(addr, len - mtb->FileLen); + } + } + +err_mte: + if (mte) { + if (mte->FileMappingHandle) + CloseHandle(mte->FileMappingHandle); + if (mte->FileHandle) + CloseHandle(mte->FileHandle); + + len = (char *)mte->EndAddress - (char *)mte->BaseAddress; + if (len > mte->FileLen) { + void *addr = (char *)mte->BaseAddress + mte->FileLen; + mmap_unreserve(addr, len - mte->FileLen); + } + } + + free(mtb); + free(mte); + return -1; +} + +/* + * munmap -- delete mapping + */ +int +munmap(void *addr, size_t len) +{ + LOG(4, "addr %p len %zu", addr, len); + + if (((uintptr_t)addr % Mmap_align) != 0) { + ERR("address is not well-aligned: %p", addr); + errno = EINVAL; + return -1; + } + + if (len == 0) { + ERR("invalid length: %zu", len); + errno = EINVAL; + return -1; + } + + int retval = -1; + + if (len > UINTPTR_MAX - (uintptr_t)addr) { + /* limit len to not get beyond address space */ + len = UINTPTR_MAX - (uintptr_t)addr; + } + + void *begin = addr; + void *end = (void *)((char *)addr + len); + + AcquireSRWLockExclusive(&FileMappingQLock); + + PFILE_MAPPING_TRACKER mt; + PFILE_MAPPING_TRACKER next; + for (mt = PMDK_SORTEDQ_FIRST(&FileMappingQHead); + mt != (void *)&FileMappingQHead; + mt = next) { + + /* + * Pick the next entry before we split there by delete the + * this one (NOTE: mmap_spilt could delete this entry). + */ + next = PMDK_SORTEDQ_NEXT(mt, ListEntry); + + if (mt->BaseAddress >= end) { + LOG(4, "ignoring all mapped ranges beyond given range"); + break; + } + + if (mt->EndAddress <= begin) { + LOG(4, "skipping a mapped range before given range"); + continue; + } + + void *begin2 = begin > mt->BaseAddress ? + begin : mt->BaseAddress; + void *end2 = end < mt->EndAddress ? + end : mt->EndAddress; + + size_t len2 = (char *)end2 - (char *)begin2; + + void *align_end = (void *)roundup((uintptr_t)end2, Mmap_align); + if (mmap_split(mt, begin2, align_end) != 0) { + LOG(2, "mapping split failed"); + goto err; + } + + if (len > len2) { + len -= len2; + } else { + len = 0; + break; + } + } + + /* + * If we didn't find any mapped regions in our list attempt to free + * as if the entire range is reserved. + * + * XXX: We don't handle a range having few mapped regions and few + * reserved regions. + */ + if (len > 0) + mmap_unreserve(addr, roundup(len, Mmap_align)); + + retval = 0; + +err: + ReleaseSRWLockExclusive(&FileMappingQLock); + + if (retval == -1) + errno = EINVAL; + +#ifdef MMAP_DEBUG_INFO + mmap_info(); +#endif + + return retval; +} + +#define MS_ALL (MS_SYNC|MS_ASYNC|MS_INVALIDATE) + +/* + * msync -- synchronize a file with a memory map + */ +int +msync(void *addr, size_t len, int flags) +{ + LOG(4, "addr %p len %zu flags %d", addr, len, flags); + + if ((flags & ~MS_ALL) != 0) { + ERR("invalid flags: 0x%08x", flags); + errno = EINVAL; + return -1; + } + + /* + * XXX - On Linux it is allowed to call msync() without MS_SYNC + * nor MS_ASYNC. + */ + if (((flags & MS_SYNC) && (flags & MS_ASYNC)) || + ((flags & (MS_SYNC | MS_ASYNC)) == 0)) { + ERR("neither MS_SYNC or MS_ASYNC is set, or both: 0x%08x", + flags); + errno = EINVAL; + return -1; + } + + if (((uintptr_t)addr % Pagesize) != 0) { + ERR("address is not page-aligned: %p", addr); + errno = EINVAL; + return -1; + } + + if (len == 0) { + LOG(4, "zero-length region - do nothing"); + return 0; /* do nothing */ + } + + if (len > UINTPTR_MAX - (uintptr_t)addr) { + /* limit len to not get beyond address space */ + len = UINTPTR_MAX - (uintptr_t)addr; + } + + int retval = -1; + + void *begin = addr; + void *end = (void *)((char *)addr + len); + + AcquireSRWLockShared(&FileMappingQLock); + + PFILE_MAPPING_TRACKER mt; + PMDK_SORTEDQ_FOREACH(mt, &FileMappingQHead, ListEntry) { + if (mt->BaseAddress >= end) { + LOG(4, "ignoring all mapped ranges beyond given range"); + break; + } + if (mt->EndAddress <= begin) { + LOG(4, "skipping a mapped range before given range"); + continue; + } + + void *begin2 = begin > mt->BaseAddress ? + begin : mt->BaseAddress; + void *end2 = end < mt->EndAddress ? + end : mt->EndAddress; + + size_t len2 = (char *)end2 - (char *)begin2; + + /* do nothing for anonymous mappings */ + if (mt->FileHandle != INVALID_HANDLE_VALUE) { + if (FlushViewOfFile(begin2, len2) == FALSE) { + ERR("FlushViewOfFile, gle: 0x%08x", + GetLastError()); + errno = ENOMEM; + goto err; + } + + if (FlushFileBuffers(mt->FileHandle) == FALSE) { + ERR("FlushFileBuffers, gle: 0x%08x", + GetLastError()); + errno = EINVAL; + goto err; + } + } + + if (len > len2) { + len -= len2; + } else { + len = 0; + break; + } + } + + if (len > 0) { + ERR("indicated memory (or part of it) was not mapped"); + errno = ENOMEM; + } else { + retval = 0; + } + +err: + ReleaseSRWLockShared(&FileMappingQLock); + return retval; +} + +#define PROT_ALL (PROT_READ|PROT_WRITE|PROT_EXEC) + +/* + * mprotect -- set protection on a region of memory + * + * XXX - If the memory range passed to mprotect() includes invalid pages, + * returned status will indicate error, and errno is set to ENOMEM. + * However, the protection change is actually applied to all the valid pages, + * ignoring the rest. + * This is different than on Linux, where it stops on the first invalid page. + */ +int +mprotect(void *addr, size_t len, int prot) +{ + LOG(4, "addr %p len %zu prot %d", addr, len, prot); + + if (((uintptr_t)addr % Pagesize) != 0) { + ERR("address is not page-aligned: %p", addr); + errno = EINVAL; + return -1; + } + + if (len == 0) { + LOG(4, "zero-length region - do nothing"); + return 0; /* do nothing */ + } + + if (len > UINTPTR_MAX - (uintptr_t)addr) { + len = UINTPTR_MAX - (uintptr_t)addr; + LOG(4, "limit len to %zu to not get beyond address space", len); + } + + DWORD protect = 0; + + if ((prot & PROT_READ) && (prot & PROT_WRITE)) { + protect |= PAGE_READWRITE; + if (prot & PROT_EXEC) + protect |= PAGE_EXECUTE_READWRITE; + } else if (prot & PROT_READ) { + protect |= PAGE_READONLY; + if (prot & PROT_EXEC) + protect |= PAGE_EXECUTE_READ; + } else { + protect |= PAGE_NOACCESS; + } + + int retval = -1; + + void *begin = addr; + void *end = (void *)((char *)addr + len); + + AcquireSRWLockShared(&FileMappingQLock); + + PFILE_MAPPING_TRACKER mt; + PMDK_SORTEDQ_FOREACH(mt, &FileMappingQHead, ListEntry) { + if (mt->BaseAddress >= end) { + LOG(4, "ignoring all mapped ranges beyond given range"); + break; + } + if (mt->EndAddress <= begin) { + LOG(4, "skipping a mapped range before given range"); + continue; + } + + void *begin2 = begin > mt->BaseAddress ? + begin : mt->BaseAddress; + void *end2 = end < mt->EndAddress ? + end : mt->EndAddress; + + /* + * protect of region to VirtualProtection must be compatible + * with the access protection specified for this region + * when the view was mapped using MapViewOfFileEx + */ + if (mt->Access == FILE_MAP_COPY) { + if (protect & PAGE_READWRITE) { + protect &= ~PAGE_READWRITE; + protect |= PAGE_WRITECOPY; + } else if (protect & PAGE_EXECUTE_READWRITE) { + protect &= ~PAGE_EXECUTE_READWRITE; + protect |= PAGE_EXECUTE_WRITECOPY; + } + } + + size_t len2 = (char *)end2 - (char *)begin2; + + DWORD oldprot = 0; + BOOL ret; + ret = VirtualProtect(begin2, len2, protect, &oldprot); + if (ret == FALSE) { + DWORD gle = GetLastError(); + ERR("VirtualProtect, gle: 0x%08x", gle); + /* translate error code */ + switch (gle) { + case ERROR_INVALID_PARAMETER: + errno = EACCES; + break; + case ERROR_INVALID_ADDRESS: + errno = ENOMEM; + break; + default: + errno = EINVAL; + break; + } + goto err; + } + + if (len > len2) { + len -= len2; + } else { + len = 0; + break; + } + } + + if (len > 0) { + ERR("indicated memory (or part of it) was not mapped"); + errno = ENOMEM; + } else { + retval = 0; + } + +err: + ReleaseSRWLockShared(&FileMappingQLock); + return retval; +} diff --git a/src/pmdk/utils/.gitignore b/src/pmdk/utils/.gitignore new file mode 100644 index 000000000..c4c4ffc6a --- /dev/null +++ b/src/pmdk/utils/.gitignore @@ -0,0 +1 @@ +*.zip diff --git a/src/pmdk/utils/CHECK_WHITESPACE.PS1 b/src/pmdk/utils/CHECK_WHITESPACE.PS1 new file mode 100644 index 000000000..5ffeedc4a --- /dev/null +++ b/src/pmdk/utils/CHECK_WHITESPACE.PS1 @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2017, Intel Corporation +# +# CHECK_WHITESPACE.PS1 -- script to check coding style +# +# XXX - integrate with VS projects and execute for each build +# + +$scriptdir = Split-Path -Parent $PSCommandPath +$rootdir = $scriptdir + "\.." +$whitepace = $rootdir + "\utils\check_whitespace" + +If ( Get-Command -Name perl -ErrorAction SilentlyContinue ) { + &perl $whitepace -g + if ($LASTEXITCODE -ne 0) { + Exit $LASTEXITCODE + } +} else { + Write-Output "Cannot execute check_whitespace - perl is missing" +} diff --git a/src/pmdk/utils/CREATE-ZIP.PS1 b/src/pmdk/utils/CREATE-ZIP.PS1 new file mode 100644 index 000000000..902cb07a9 --- /dev/null +++ b/src/pmdk/utils/CREATE-ZIP.PS1 @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2019, Intel Corporation +# +# CREATE-ZIP.PS1 -- script to create release zip package +# + +# +# parameter handling +# +[CmdletBinding(PositionalBinding=$false)] +Param( + [alias("b")] + $build = "debug", + [alias("v")] + $version = "0", + [alias("e")] + $extended = "0" + ) + +$scriptdir = Split-Path -Parent $PSCommandPath +$rootdir = $scriptdir + "\..\" + +$builddir = $rootdir + "\src\x64\" +$zipdir = $builddir + "\pmdk\" + +if ($version -eq "0") { + $git = Get-Command -Name git -ErrorAction SilentlyContinue + if ($git) { + $version = $(git describe) + } else { + $version = "0" + } +} + +$zipfile = $builddir + "\pmdk-" + $version + "-win-x64-" + $build + ".zip" + +Remove-Item $zipdir -Force -Recurse -ea si +Get-ChildItem | Where-Object {$_.Name -Match "pmdk-.*-win-x64.zip"} | Remove-Item -Force -ea si +New-Item -ItemType directory -Path ( $zipdir) -Force | Out-Null +New-Item -ItemType directory -Path ( $zipdir + "\bin\") -Force | Out-Null +New-Item -ItemType directory -Path ( $zipdir + "\lib\") -Force | Out-Null + +$libs = @("libpmem", "libpmemblk", "libpmemlog", "libpmemobj", "libpmempool") +$apps = @("pmempool") +$apps_extended = @("pmempool", "pmemalloc", "pmemdetect", "pmemspoil", "pmemwrite") + +if ($extended -eq "1") { + $apps = $apps_extended +} + +foreach ($lib in $libs) { + Copy-Item ($builddir + $build + "\libs\" + $lib + ".dll") ($zipdir + "\bin\") + foreach ($ex in @(".lib", ".pdb")) { + Copy-Item ($builddir + $build + "\libs\" + $lib + $ex) ($zipdir + "\lib\") + } +} +foreach ($app in $apps) { + if ($app -eq "pmempool") { + Copy-Item ($builddir + $build + "\libs\" + $app + ".exe") ($zipdir + "\bin\") + Copy-Item ($builddir + $build + "\libs\" + $app + ".pdb") ($zipdir + "\lib\") + } else { + Copy-Item ($builddir + $build + "\tests\" + $app + ".exe") ($zipdir + "\bin\") + Copy-Item ($builddir + $build + "\tests\" + $app + ".pdb") ($zipdir + "\lib\") + } +} + +Copy-Item -Recurse ($rootdir + "src\include") ($zipdir) +Remove-Item -Force ($zipdir + "include\.cstyleignore") +Remove-Item -Force ($zipdir + "include\README") +Remove-Item -Force ($zipdir + "include\librpmem.h") +Copy-Item ($rootdir + "README.md") ($zipdir) +Copy-Item ($rootdir + "LICENSE") ($zipdir) +Copy-Item ($rootdir + "ChangeLog") ($zipdir) + +Add-Type -Assembly System.IO.Compression.FileSystem +$comprlevel = [System.IO.Compression.CompressionLevel]::Optimal + +if (Test-Path ($zipdir)) { + [System.IO.Compression.ZipFile]::CreateFromDirectory($zipdir, $zipfile, + $comprlevel, $true) +} + +Remove-Item $zipdir -Force -Recurse -ea si diff --git a/src/pmdk/utils/CSTYLE.ps1 b/src/pmdk/utils/CSTYLE.ps1 new file mode 100644 index 000000000..42b22b34c --- /dev/null +++ b/src/pmdk/utils/CSTYLE.ps1 @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2017, Intel Corporation +# +# CSTYLE.ps1 -- script to check coding style +# +# XXX - integrate with VS projects and execute for each build +# + +$scriptdir = Split-Path -Parent $PSCommandPath +$rootdir = $scriptdir + "\.." +$cstyle = $rootdir + "\utils\cstyle" +$checkdir = $rootdir + +# XXX - *.cpp/*.hpp files not supported yet +$include = @( "*.c", "*.h" ) + +If ( Get-Command -Name perl -ErrorAction SilentlyContinue ) { + Get-ChildItem -Path $checkdir -Recurse -Include $include | ` + Where-Object { $_.FullName -notlike "*jemalloc*" } | ` + ForEach-Object { + $IGNORE = $_.DirectoryName + "\.cstyleignore" + if(Test-Path $IGNORE) { + if((Select-String $_.Name $IGNORE)) { + return + } + } + $_ + } | ForEach-Object { + Write-Output $_.FullName + & perl $cstyle $_.FullName + if ($LASTEXITCODE -ne 0) { + Exit $LASTEXITCODE + } + } +} else { + Write-Output "Cannot execute cstyle - perl is missing" +} diff --git a/src/pmdk/utils/Makefile b/src/pmdk/utils/Makefile new file mode 100644 index 000000000..14352886f --- /dev/null +++ b/src/pmdk/utils/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)\ + $(filter $(subst *,%,$2),$d))) + +SCRIPTS = $(call rwildcard,,*.sh) + +cstyle: + ./check-shebang.sh $(SCRIPTS) + +.PHONY: cstyle diff --git a/src/pmdk/utils/README b/src/pmdk/utils/README new file mode 100644 index 000000000..07a762ca2 --- /dev/null +++ b/src/pmdk/utils/README @@ -0,0 +1,5 @@ +Persistent Memory Development Kit + +This is utils/README. + +The scripts found here are used during library development. diff --git a/src/pmdk/utils/SRCVERSION.ps1 b/src/pmdk/utils/SRCVERSION.ps1 new file mode 100644 index 000000000..8e6e43639 --- /dev/null +++ b/src/pmdk/utils/SRCVERSION.ps1 @@ -0,0 +1,158 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# SRCVERSION.PS1 -- script to create SCRVERSION macro and generate srcversion.h +# + +# +# Windows dll versioning supports only fixed number of fields. The most +# important are MAJOR, MINOR and REVISION. We have 3-compoment releases +# (e.g. 1.5.1) with release candidates, so we have to encode this information +# into this fixed number of fields. That's why we abuse REVISION to encode both +# 3rd component and rc status. +# REVISION = 3RDCOMP * 1000 + (!is_rc) * 100 + rc. +# +# Examples: +# +---------------------+-----+-----+--------+-----+------+-------+----------+ +# |git describe --long |MAJOR|MINOR|REVISION|BUILD|BUGFIX|PRIVATE|PRERELEASE| +# +---------------------+-----+-----+--------+-----+------+-------+----------+ +# |1.5-rc2-0-12345678 | 1| 5| 2| 0| false| false| true| +# |1.5-rc3-6-12345678 | 1| 5| 3| 6| false| true| true| +# |1.5-0-12345678 | 1| 5| 100| 0| false| false| false| +# |1.5-6-123345678 | 1| 5| 100| 6| false| true| false| +# |1.5.2-rc1-0-12345678 | 1| 5| 2001| 0| true| false| true| +# |1.5.2-rc4-6-12345678 | 1| 5| 2004| 6| true| true| true| +# |1.5.2-0-12345678 | 1| 5| 2100| 0| true| false| false| +# |1.5.2-6-12345678 | 1| 5| 2100| 6| true| true| false| +# +---------------------+-----+-----+--------+-----+------+-------+----------+ +# + +$scriptPath = Split-Path -parent $MyInvocation.MyCommand.Definition +$file_path = $scriptPath + "\..\src\windows\include\srcversion.h" +$git_version_file = $scriptPath + "\..\GIT_VERSION" +$version_file = $scriptPath + "\..\VERSION" +$git = Get-Command -Name git -ErrorAction SilentlyContinue + +if (Test-Path $file_path) { + $old_src_version = Get-Content $file_path | ` + Where-Object { $_ -like '#define SRCVERSION*' } +} else { + $old_src_version = "" +} + +$git_version = "" +$git_version_hash = "" + +if (Test-Path $git_version_file) { + $git_version = Get-Content $git_version_file + if ($git_version -eq "`$Format:%h`$") { + $git_version = "" + } else { + $git_version_hash = $git_version + } +} + +$PRERELEASE = $false +$BUGFIX = $false +$PRIVATE = $true +$CUSTOM = $false + +if ($null -ne $args[0]) { + $version = $args[0] + $ver_array = $version.split("-+") +} elseif (Test-Path $version_file) { + $version = Get-Content $version_file + $ver_array = $version.split("-+") +} elseif ($git_version_hash -ne "") { + $MAJOR = 0 + $MINOR = 0 + $REVISION = 0 + $BUILD = 0 + + $version = $git_version_hash + $CUSTOM = $true + $version_custom_msg = "#define VERSION_CUSTOM_MSG `"$git_version_hash`"" +} elseif ($null -ne $git) { + $version = $(git describe) + $ver_array = $(git describe --long).split("-+") +} else { + $MAJOR = 0 + $MINOR = 0 + $REVISION = 0 + $BUILD = 0 + + $version = "UNKNOWN_VERSION" + $CUSTOM = $true + $version_custom_msg = "#define VERSION_CUSTOM_MSG `"UNKNOWN_VERSION`"" +} + +if ($null -ne $ver_array) { + $ver_dots = $ver_array[0].split(".") + $MAJOR = $ver_dots[0] + $MINOR = $ver_dots[1] + if ($ver_dots.length -ge 3) { + $REV = $ver_dots[2] + $BUGFIX = $true + } else { + $REV = 0 + } + + $REVISION = 1000 * $REV + $BUILD = $ver_array[$ver_array.length - 2] + + if ($ver_array.length -eq 4) { + # .[.]--- + + if ($ver_array[1].StartsWith("rc")) { + # .[.]-rc-- + $REVISION += $ver_array[1].Substring("rc".Length) + $PRERELEASE = $true + $version = "$($ver_array[0])-$($ver_array[1])+git$($ver_array[2]).$($ver_array[3])" + } else { + # .[.]--- + throw "Unknown version format" + } + } else { + # .[.]-- + $REVISION += 100 + $version = "$($ver_array[0])+git$($ver_array[1]).$($ver_array[2])" + } + + if ($BUILD -eq 0) { + # it is not a (pre)release build + $PRIVATE = $false + } +} + +$src_version = "#define SRCVERSION `"$version`"" + +if ($old_src_version -eq $src_version) { + exit 0 +} + +Write-Output "updating source version: $version" +Write-Output $src_version > $file_path + +Write-Output "#ifdef RC_INVOKED" >> $file_path + +Write-Output "#define MAJOR $MAJOR" >> $file_path +Write-Output "#define MINOR $MINOR" >> $file_path +Write-Output "#define REVISION $REVISION" >> $file_path +Write-Output "#define BUILD $BUILD" >> $file_path + +if ($PRERELEASE) { + Write-Output "#define PRERELEASE 1" >> $file_path +} +if ($BUGFIX) { + Write-Output "#define BUGFIX 1" >> $file_path +} +if ($PRIVATE) { + Write-Output "#define PRIVATE 1" >> $file_path +} +if ($CUSTOM) { + Write-Output "#define CUSTOM 1" >> $file_path + Write-Output $version_custom_msg >> $file_path +} + +Write-Output "#endif" >> $file_path diff --git a/src/pmdk/utils/build-dpkg.sh b/src/pmdk/utils/build-dpkg.sh new file mode 100755 index 000000000..cb96f4185 --- /dev/null +++ b/src/pmdk/utils/build-dpkg.sh @@ -0,0 +1,836 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# build-dpkg.sh - Script for building deb packages +# + +set -e + +SCRIPT_DIR=$(dirname $0) +source $SCRIPT_DIR/pkg-common.sh + +# +# usage -- print usage message and exit +# +usage() +{ + [ "$1" ] && echo Error: $1 + cat >&2 < debian/librpmem.install +$LIB_DIR/librpmem.so.* +EOF + +cat << EOF > debian/librpmem.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +librpmem: package-name-doesnt-match-sonames +EOF + +cat << EOF > debian/librpmem-dev.install +$LIB_DIR/pmdk_debug/librpmem.a $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/librpmem.so $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/librpmem.so.* $LIB_DIR/pmdk_dbg/ +$LIB_DIR/librpmem.so +$LIB_DIR/pkgconfig/librpmem.pc +$INC_DIR/librpmem.h +$MAN7_DIR/librpmem.7 +$MAN3_DIR/rpmem_*.3 +EOF + +cat << EOF > debian/librpmem-dev.triggers +interest man-db +EOF + +cat << EOF > debian/librpmem-dev.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +# The following warnings are triggered by a bug in debhelper: +# https://bugs.debian.org/204975 +postinst-has-useless-call-to-ldconfig +postrm-has-useless-call-to-ldconfig +# We do not want to compile with -O2 for debug version +hardening-no-fortify-functions $LIB_DIR/pmdk_dbg/* +EOF + +cat << EOF > debian/rpmemd.install +usr/bin/rpmemd +$MAN1_DIR/rpmemd.1 +EOF + +cat << EOF > debian/rpmemd.triggers +interest man-db +EOF + +cat << EOF > debian/rpmemd.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +EOF +} + +function append_rpmem_control() { +cat << EOF >> $CONTROL_FILE + +Package: librpmem +Architecture: any +Depends: \${shlibs:Depends}, \${misc:Depends} +Description: Persistent Memory remote access support library + librpmem provides low-level support for remote access to persistent memory + (pmem) utilizing RDMA-capable RNICs. The library can be used to replicate + remotely a memory region over RDMA protocol. It utilizes appropriate + persistency mechanism based on remote node’s platform capabilities. The + librpmem utilizes the ssh client to authenticate a user on remote node and for + encryption of connection’s out-of-band configuration data. + . + This library is for applications that use remote persistent memory directly, + without the help of any library-supplied transactions or memory allocation. + Higher-level libraries that build on libpmem are available and are recommended + for most applications. + +Package: librpmem-dev +Section: libdevel +Architecture: any +Depends: librpmem (=\${binary:Version}), libpmem-dev, \${shlibs:Depends}, \${misc:Depends} +Description: Development files for librpmem + librpmem provides low-level support for remote access to persistent memory + (pmem) utilizing RDMA-capable RNICs. + . + This package contains libraries and header files used for linking programs + against librpmem. + +Package: rpmemd +Section: misc +Architecture: any +Priority: optional +Depends: \${shlibs:Depends}, \${misc:Depends} +Description: rpmem daemon + Daemon for Remote Persistent Memory support. +EOF +} + +function libpmem2_install_triggers_overrides() { +cat << EOF > debian/libpmem2.install +$LIB_DIR/libpmem2.so.* +EOF + +cat << EOF > debian/libpmem2.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +libpmem2: package-name-doesnt-match-sonames +EOF + +cat << EOF > debian/libpmem2-dev.install +$LIB_DIR/pmdk_debug/libpmem2.a $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmem2.so $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmem2.so.* $LIB_DIR/pmdk_dbg/ +$LIB_DIR/libpmem2.so +$LIB_DIR/pkgconfig/libpmem2.pc +$INC_DIR/libpmem2.h +$MAN7_DIR/libpmem2.7 +$MAN3_DIR/pmem2_*.3 +EOF + +cat << EOF > debian/libpmem2-dev.triggers +interest man-db +EOF + +cat << EOF > debian/libpmem2-dev.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +# The following warnings are triggered by a bug in debhelper: +# https://bugs.debian.org/204975 +postinst-has-useless-call-to-ldconfig +postrm-has-useless-call-to-ldconfig +# We do not want to compile with -O2 for debug version +hardening-no-fortify-functions $LIB_DIR/pmdk_dbg/* +EOF +} + +function append_libpmem2_control() { +cat << EOF >> $CONTROL_FILE + +Package: libpmem2 +Architecture: any +Depends: \${shlibs:Depends}, \${misc:Depends} +Description: Persistent Memory low level support library + libpmem2 provides low level persistent memory support. In particular, support + for the persistent memory instructions for flushing changes to pmem is + provided. + +Package: libpmem2-dev +Section: libdevel +Architecture: any +Depends: libpmem2 (=\${binary:Version}), \${shlibs:Depends}, \${misc:Depends} +Description: Development files for libpmem2 + libpmem2 provides low level persistent memory support. In particular, support + for the persistent memory instructions for flushing changes to pmem is + provided. +EOF +} + +function daxio_install_triggers_overrides() { +cat << EOF > debian/daxio.install +usr/bin/daxio +$MAN1_DIR/daxio.1 +EOF + +cat << EOF > debian/daxio.triggers +interest man-db +EOF + +cat << EOF > debian/daxio.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +EOF +} + +function append_daxio_control() { +cat << EOF >> $CONTROL_FILE + +Package: daxio +Section: misc +Architecture: any +Priority: optional +Depends: libpmem (=\${binary:Version}), \${shlibs:Depends}, \${misc:Depends} +Description: dd-like tool to read/write to a devdax device + The daxio utility performs I/O on Device DAX devices or zeroes a Device + DAX device. Since the standard I/O APIs (read/write) cannot be used + with Device DAX, data transfer is performed on a memory-mapped device. + The daxio may be used to dump Device DAX data to a file, restore data from + a backup copy, move/copy data to another device or to erase data from + a device. +EOF +} + +if [ "${BUILD_PACKAGE_CHECK}" == "y" ] +then +CHECK_CMD=" +override_dh_auto_test: + dh_auto_test + if [ -f $TEST_CONFIG_FILE ]; then\ + cp $TEST_CONFIG_FILE src/test/testconfig.sh;\ + else\ + echo 'PMEM_FS_DIR=/tmp' > src/test/testconfig.sh; \ + echo 'PMEM_FS_DIR_FORCE_PMEM=1' >> src/test/testconfig.sh; \ + echo 'TEST_BUILD=\"debug nondebug\"' >> src/test/testconfig.sh; \ + echo 'TEST_FS=\"pmem any none\"' >> src/test/testconfig.sh; \ + fi + make pcheck ${PCHECK_OPTS} +" +else +CHECK_CMD=" +override_dh_auto_test: + +" +fi + +check_tool debuild +check_tool dch +check_file $SCRIPT_DIR/pkg-config.sh + +source $SCRIPT_DIR/pkg-config.sh + +PACKAGE_VERSION=$(get_version $PACKAGE_VERSION_TAG) +PACKAGE_RELEASE=1 +PACKAGE_SOURCE=${PACKAGE_NAME}-${PACKAGE_VERSION} +PACKAGE_TARBALL_ORIG=${PACKAGE_NAME}_${PACKAGE_VERSION}.orig.tar.gz +MAGIC_INSTALL=utils/magic-install.sh +MAGIC_UNINSTALL=utils/magic-uninstall.sh +CONTROL_FILE=debian/control + +[ -d $WORKING_DIR ] || mkdir $WORKING_DIR +[ -d $OUT_DIR ] || mkdir $OUT_DIR + +OLD_DIR=$PWD + +cd $WORKING_DIR + +check_dir $SOURCE + +mv $SOURCE $PACKAGE_SOURCE +tar zcf $PACKAGE_TARBALL_ORIG $PACKAGE_SOURCE + +cd $PACKAGE_SOURCE + +rm -rf debian +mkdir debian + +# Generate compat file +cat << EOF > debian/compat +9 +EOF + +# Generate control file +cat << EOF > $CONTROL_FILE +Source: $PACKAGE_NAME +Maintainer: $PACKAGE_MAINTAINER +Section: libs +Priority: optional +Standards-version: 4.1.4 +Build-Depends: debhelper (>= 9) +Homepage: https://pmem.io/pmdk/ + +Package: libpmem +Architecture: any +Depends: \${shlibs:Depends}, \${misc:Depends} +Description: Persistent Memory low level support library + libpmem provides low level persistent memory support. In particular, support + for the persistent memory instructions for flushing changes to pmem is + provided. + +Package: libpmem-dev +Section: libdevel +Architecture: any +Depends: libpmem (=\${binary:Version}), \${shlibs:Depends}, \${misc:Depends} +Description: Development files for libpmem + libpmem provides low level persistent memory support. In particular, support + for the persistent memory instructions for flushing changes to pmem is + provided. + +Package: libpmemblk +Architecture: any +Depends: libpmem (=\${binary:Version}), \${shlibs:Depends}, \${misc:Depends} +Description: Persistent Memory block array support library + libpmemblk implements a pmem-resident array of blocks, all the same size, where + a block is updated atomically with respect to power failure or program + interruption (no torn blocks). + +Package: libpmemblk-dev +Section: libdevel +Architecture: any +Depends: libpmemblk (=\${binary:Version}), libpmem-dev, \${shlibs:Depends}, \${misc:Depends} +Description: Development files for libpmemblk + libpmemblk implements a pmem-resident array of blocks, all the same size, where + a block is updated atomically with respect to power failure or program + interruption (no torn blocks). + +Package: libpmemlog +Architecture: any +Depends: libpmem (=\${binary:Version}), \${shlibs:Depends}, \${misc:Depends} +Description: Persistent Memory log file support library + libpmemlog implements a pmem-resident log file. + +Package: libpmemlog-dev +Section: libdevel +Architecture: any +Depends: libpmemlog (=\${binary:Version}), libpmem-dev, \${shlibs:Depends}, \${misc:Depends} +Description: Development files for libpmemlog + libpmemlog implements a pmem-resident log file. + +Package: libpmemobj +Architecture: any +Depends: libpmem (=\${binary:Version}), \${shlibs:Depends}, \${misc:Depends} +Description: Persistent Memory object store support library + libpmemobj turns a persistent memory file into a flexible object store, + supporting transactions, memory management, locking, lists, and a number of + other features. + +Package: libpmemobj-dev +Section: libdevel +Architecture: any +Depends: libpmemobj (=\${binary:Version}), libpmem-dev, \${shlibs:Depends}, \${misc:Depends} +Description: Development files for libpmemobj + libpmemobj turns a persistent memory file into a flexible object store, + supporting transactions, memory management, locking, lists, and a number of + other features. + . + This package contains libraries and header files used for linking programs + against libpmemobj. + +Package: libpmempool +Architecture: any +Depends: libpmem (=\${binary:Version}), \${shlibs:Depends}, \${misc:Depends} +Description: Persistent Memory pool management support library + libpmempool provides a set of utilities for management, diagnostics and repair + of persistent memory pools. A pool in this context means a pmemobj pool, + pmemblk pool, pmemlog pool or BTT layout, independent of the underlying + storage. The libpmempool is for applications that need high reliability or + built-in troubleshooting. It may be useful for testing and debugging purposes + also. + +Package: libpmempool-dev +Section: libdevel +Architecture: any +Depends: libpmempool (=\${binary:Version}), libpmem-dev, \${shlibs:Depends}, \${misc:Depends} +Description: Development files for libpmempool + libpmempool provides a set of utilities for management, diagnostics and repair + of persistent memory pools. + . + This package contains libraries and header files used for linking programs + against libpmempool. + +Package: $PACKAGE_NAME-dbg +Section: debug +Priority: optional +Architecture: any +Depends: libpmem (=\${binary:Version}), libpmemblk (=\${binary:Version}), libpmemlog (=\${binary:Version}), libpmemobj (=\${binary:Version}), libpmempool (=\${binary:Version}), \${misc:Depends} +Description: Debug symbols for PMDK libraries + Debug symbols for all PMDK libraries. + +Package: pmempool +Section: misc +Architecture: any +Priority: optional +Depends: \${shlibs:Depends}, \${misc:Depends} +Description: utility for management and off-line analysis of PMDK memory pools + This utility is a standalone tool that manages Persistent Memory pools + created by PMDK libraries. It provides a set of utilities for + administration and diagnostics of Persistent Memory pools. Pmempool may be + useful for troubleshooting by system administrators and users of the + applications based on PMDK libraries. + +Package: pmreorder +Section: misc +Architecture: any +Priority: optional +Depends: \${shlibs:Depends}, \${misc:Depends} +Description: tool to parse and replay pmemcheck logs + Pmreorder is tool that parses and replays log of operations collected by + pmemcheck -- a atandalone tool which is a collection of python scripts designed + to parse and replay operations logged by pmemcheck - a persistent memory + checking tool. Pmreorder performs the store reordering between persistent + memory barriers - a sequence of flush-fence operations. It uses a + consistency checking routine provided in the command line options to check + whether files are in a consistent state. +EOF + +cp LICENSE debian/copyright + +if [ -n "$NDCTL_ENABLE" ]; then + pass_ndctl_enable="NDCTL_ENABLE=$NDCTL_ENABLE" +else + pass_ndctl_enable="" +fi + +cat << EOF > debian/rules +#!/usr/bin/make -f +#export DH_VERBOSE=1 +%: + dh \$@ + +override_dh_strip: + dh_strip --dbg-package=$PACKAGE_NAME-dbg + +override_dh_auto_build: + dh_auto_build -- EXPERIMENTAL=${EXPERIMENTAL} prefix=/$PREFIX libdir=/$LIB_DIR includedir=/$INC_DIR docdir=/$DOC_DIR man1dir=/$MAN1_DIR man3dir=/$MAN3_DIR man5dir=/$MAN5_DIR man7dir=/$MAN7_DIR sysconfdir=/etc bashcompdir=/usr/share/bash-completion/completions NORPATH=1 ${pass_ndctl_enable} SRCVERSION=$SRCVERSION PMEM2_INSTALL=${PMEM2_INSTALL} + +override_dh_auto_install: + dh_auto_install -- EXPERIMENTAL=${EXPERIMENTAL} prefix=/$PREFIX libdir=/$LIB_DIR includedir=/$INC_DIR docdir=/$DOC_DIR man1dir=/$MAN1_DIR man3dir=/$MAN3_DIR man5dir=/$MAN5_DIR man7dir=/$MAN7_DIR sysconfdir=/etc bashcompdir=/usr/share/bash-completion/completions NORPATH=1 ${pass_ndctl_enable} SRCVERSION=$SRCVERSION PMEM2_INSTALL=${PMEM2_INSTALL} + find -path './debian/*usr/share/man/man*/*.gz' -exec gunzip {} \; + +override_dh_install: + mkdir -p debian/tmp/usr/share/pmdk/ + cp utils/pmdk.magic debian/tmp/usr/share/pmdk/ + dh_install + +${CHECK_CMD} +EOF + +chmod +x debian/rules + +mkdir debian/source + +ITP_BUG_EXCUSE="# This is our first package but we do not want to upload it yet. +# Please refer to Debian Developer's Reference section 5.1 (New packages) for details: +# https://www.debian.org/doc/manuals/developers-reference/pkgs.html#newpackage" + +cat << EOF > debian/source/format +3.0 (quilt) +EOF + +cat << EOF > debian/libpmem.install +$LIB_DIR/libpmem.so.* +usr/share/pmdk/pmdk.magic +$MAN5_DIR/poolset.5 +EOF + +cat $MAGIC_INSTALL > debian/libpmem.postinst +sed -i '1s/.*/\#\!\/bin\/bash/' debian/libpmem.postinst +echo $'\n#DEBHELPER#\n' >> debian/libpmem.postinst +cat $MAGIC_UNINSTALL > debian/libpmem.prerm +sed -i '1s/.*/\#\!\/bin\/bash/' debian/libpmem.prerm +echo $'\n#DEBHELPER#\n' >> debian/libpmem.prerm + +cat << EOF > debian/libpmem.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +libpmem: package-name-doesnt-match-sonames +EOF + +cat << EOF > debian/libpmem-dev.install +$LIB_DIR/pmdk_debug/libpmem.a $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmem.so $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmem.so.* $LIB_DIR/pmdk_dbg/ +$LIB_DIR/libpmem.so +$LIB_DIR/pkgconfig/libpmem.pc +$INC_DIR/libpmem.h +$MAN7_DIR/libpmem.7 +$MAN3_DIR/pmem_*.3 +EOF + +cat << EOF > debian/libpmem-dev.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +# The following warnings are triggered by a bug in debhelper: +# https://bugs.debian.org/204975 +postinst-has-useless-call-to-ldconfig +postrm-has-useless-call-to-ldconfig +# We do not want to compile with -O2 for debug version +hardening-no-fortify-functions $LIB_DIR/pmdk_dbg/* +# pmdk provides second set of libraries for debugging. +# These are in /usr/lib/$arch/pmdk_dbg/, but still trigger ldconfig. +# Related issue: https://github.com/pmem/issues/issues/841 +libpmem-dev: package-has-unnecessary-activation-of-ldconfig-trigger + +EOF + +cat << EOF > debian/libpmemblk.install +$LIB_DIR/libpmemblk.so.* +EOF + +cat << EOF > debian/libpmemblk.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +libpmemblk: package-name-doesnt-match-sonames +EOF + +cat << EOF > debian/libpmemblk-dev.install +$LIB_DIR/pmdk_debug/libpmemblk.a $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmemblk.so $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmemblk.so.* $LIB_DIR/pmdk_dbg/ +$LIB_DIR/libpmemblk.so +$LIB_DIR/pkgconfig/libpmemblk.pc +$INC_DIR/libpmemblk.h +$MAN7_DIR/libpmemblk.7 +$MAN3_DIR/pmemblk_*.3 +EOF + +cat << EOF > debian/libpmemblk-dev.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +# The following warnings are triggered by a bug in debhelper: +# https://bugs.debian.org/204975 +postinst-has-useless-call-to-ldconfig +postrm-has-useless-call-to-ldconfig +# We do not want to compile with -O2 for debug version +hardening-no-fortify-functions $LIB_DIR/pmdk_dbg/* +# pmdk provides second set of libraries for debugging. +# These are in /usr/lib/$arch/pmdk_dbg/, but still trigger ldconfig. +# Related issue: https://github.com/pmem/issues/issues/841 +libpmemblk-dev: package-has-unnecessary-activation-of-ldconfig-trigger +EOF + +cat << EOF > debian/libpmemlog.install +$LIB_DIR/libpmemlog.so.* +EOF + +cat << EOF > debian/libpmemlog.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +libpmemlog: package-name-doesnt-match-sonames +EOF + +cat << EOF > debian/libpmemlog-dev.install +$LIB_DIR/pmdk_debug/libpmemlog.a $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmemlog.so $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmemlog.so.* $LIB_DIR/pmdk_dbg/ +$LIB_DIR/libpmemlog.so +$LIB_DIR/pkgconfig/libpmemlog.pc +$INC_DIR/libpmemlog.h +$MAN7_DIR/libpmemlog.7 +$MAN3_DIR/pmemlog_*.3 +EOF + +cat << EOF > debian/libpmemlog-dev.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +# The following warnings are triggered by a bug in debhelper: +# https://bugs.debian.org/204975 +postinst-has-useless-call-to-ldconfig +postrm-has-useless-call-to-ldconfig +# We do not want to compile with -O2 for debug version +hardening-no-fortify-functions $LIB_DIR/pmdk_dbg/* +# pmdk provides second set of libraries for debugging. +# These are in /usr/lib/$arch/pmdk_dbg/, but still trigger ldconfig. +# Related issue: https://github.com/pmem/issues/issues/841 +libpmemlog-dev: package-has-unnecessary-activation-of-ldconfig-trigger +EOF + +cat << EOF > debian/libpmemobj.install +$LIB_DIR/libpmemobj.so.* +EOF + +cat << EOF > debian/libpmemobj.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +libpmemobj: package-name-doesnt-match-sonames +EOF + +cat << EOF > debian/libpmemobj-dev.install +$LIB_DIR/pmdk_debug/libpmemobj.a $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmemobj.so $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmemobj.so.* $LIB_DIR/pmdk_dbg/ +$LIB_DIR/libpmemobj.so +$LIB_DIR/pkgconfig/libpmemobj.pc +$INC_DIR/libpmemobj.h +$INC_DIR/libpmemobj/*.h +$MAN7_DIR/libpmemobj.7 +$MAN3_DIR/pmemobj_*.3 +$MAN3_DIR/pobj_*.3 +$MAN3_DIR/oid_*.3 +$MAN3_DIR/toid*.3 +$MAN3_DIR/direct_*.3 +$MAN3_DIR/d_r*.3 +$MAN3_DIR/tx_*.3 +EOF + +cat << EOF > debian/libpmemobj-dev.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +# The following warnings are triggered by a bug in debhelper: +# https://bugs.debian.org/204975 +postinst-has-useless-call-to-ldconfig +postrm-has-useless-call-to-ldconfig +# We do not want to compile with -O2 for debug version +hardening-no-fortify-functions $LIB_DIR/pmdk_dbg/* +# pmdk provides second set of libraries for debugging. +# These are in /usr/lib/$arch/pmdk_dbg/, but still trigger ldconfig. +# Related issue: https://github.com/pmem/issues/issues/841 +libpmemobj-dev: package-has-unnecessary-activation-of-ldconfig-trigger +EOF + +cat << EOF > debian/libpmempool.install +$LIB_DIR/libpmempool.so.* +EOF + +cat << EOF > debian/libpmempool.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +libpmempool: package-name-doesnt-match-sonames +EOF + +cat << EOF > debian/libpmempool-dev.install +$LIB_DIR/pmdk_debug/libpmempool.a $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmempool.so $LIB_DIR/pmdk_dbg/ +$LIB_DIR/pmdk_debug/libpmempool.so.* $LIB_DIR/pmdk_dbg/ +$LIB_DIR/libpmempool.so +$LIB_DIR/pkgconfig/libpmempool.pc +$INC_DIR/libpmempool.h +$MAN7_DIR/libpmempool.7 +$MAN3_DIR/pmempool_*.3 +EOF + +cat << EOF > debian/libpmempool-dev.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +# The following warnings are triggered by a bug in debhelper: +# https://bugs.debian.org/204975 +postinst-has-useless-call-to-ldconfig +postrm-has-useless-call-to-ldconfig +# We do not want to compile with -O2 for debug version +hardening-no-fortify-functions $LIB_DIR/pmdk_dbg/* +# pmdk provides second set of libraries for debugging. +# These are in /usr/lib/$arch/pmdk_dbg/, but still trigger ldconfig. +# Related issue: https://github.com/pmem/issues/issues/841 +libpmempool-dev: package-has-unnecessary-activation-of-ldconfig-trigger +EOF + +cat << EOF > debian/$PACKAGE_NAME-dbg.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +EOF + +cat << EOF > debian/pmempool.install +usr/bin/pmempool +$MAN1_DIR/pmempool.1 +$MAN1_DIR/pmempool-*.1 +usr/share/bash-completion/completions/pmempool +EOF + +cat << EOF > debian/pmempool.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +EOF + +cat << EOF > debian/pmreorder.install +usr/bin/pmreorder +usr/share/pmreorder/*.py +$MAN1_DIR/pmreorder.1 +EOF + +cat << EOF > debian/pmreorder.lintian-overrides +$ITP_BUG_EXCUSE +new-package-should-close-itp-bug +EOF + +# librpmem & rpmemd +if [ "${BUILD_RPMEM}" = "y" -a "${RPMEM_DPKG}" = "y" ] +then + append_rpmem_control; + rpmem_install_triggers_overrides; +fi + +# libpmem2 +if [ "${PMEM2_INSTALL}" == "y" ] +then + append_libpmem2_control; + libpmem2_install_triggers_overrides; +fi + +# daxio +if [ "${NDCTL_ENABLE}" != "n" ] +then + append_daxio_control; + daxio_install_triggers_overrides; +fi + +# Convert ChangeLog to debian format +CHANGELOG_TMP=changelog.tmp +dch --create --empty --package $PACKAGE_NAME -v $PACKAGE_VERSION-$PACKAGE_RELEASE -M -c $CHANGELOG_TMP +touch debian/changelog +head -n1 $CHANGELOG_TMP >> debian/changelog +echo "" >> debian/changelog +convert_changelog ChangeLog >> debian/changelog +echo "" >> debian/changelog +tail -n1 $CHANGELOG_TMP >> debian/changelog +rm $CHANGELOG_TMP + +# This is our first release but we do +debuild --preserve-envvar=EXTRA_CFLAGS_RELEASE \ + --preserve-envvar=EXTRA_CFLAGS_DEBUG \ + --preserve-envvar=EXTRA_CFLAGS \ + --preserve-envvar=EXTRA_CXXFLAGS \ + --preserve-envvar=EXTRA_LDFLAGS \ + --preserve-envvar=NDCTL_ENABLE \ + -us -uc -b + +cd $OLD_DIR + +find $WORKING_DIR -name "*.deb"\ + -or -name "*.dsc"\ + -or -name "*.changes"\ + -or -name "*.orig.tar.gz"\ + -or -name "*.debian.tar.gz" | while read FILE +do + mv -v $FILE $OUT_DIR/ +done + +exit 0 diff --git a/src/pmdk/utils/build-rpm.sh b/src/pmdk/utils/build-rpm.sh new file mode 100755 index 000000000..759a1fe1c --- /dev/null +++ b/src/pmdk/utils/build-rpm.sh @@ -0,0 +1,263 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# +# build-rpm.sh - Script for building rpm packages +# + +set -e + +SCRIPT_DIR=$(dirname $0) +source $SCRIPT_DIR/pkg-common.sh + +check_tool rpmbuild +check_file $SCRIPT_DIR/pkg-config.sh +source $SCRIPT_DIR/pkg-config.sh + +# +# usage -- print usage message and exit +# +usage() +{ + [ "$1" ] && echo Error: $1 + cat >&2 < $RPM_SPEC_FILE + +if [ "$DISTRO" = "SLES_like" ] +then + sed -i '/^#.*bugzilla.redhat/d' $RPM_SPEC_FILE +fi + +# do not split on space +IFS=$'\n' + +# experimental features +if [ "${EXPERIMENTAL}" = "y" ] +then + # no experimental features for now + RPMBUILD_OPTS+=( ) +fi + +# libpmem2 +if [ "${PMEM2_INSTALL}" == "y" ] +then + RPMBUILD_OPTS+=(--define "_pmem2_install 1") +fi + +# librpmem & rpmemd +if [ "${BUILD_RPMEM}" = "y" ] +then + RPMBUILD_OPTS+=(--with fabric) +else + RPMBUILD_OPTS+=(--without fabric) +fi + +# daxio & RAS +if [ "${NDCTL_ENABLE}" = "n" ] +then + RPMBUILD_OPTS+=(--without ndctl) +else + RPMBUILD_OPTS+=(--with ndctl) +fi + +# use specified testconfig file or default +if [[( -n "${TEST_CONFIG_FILE}") && ( -f "$TEST_CONFIG_FILE" ) ]] +then + echo "Test config file: $TEST_CONFIG_FILE" + RPMBUILD_OPTS+=(--define "_testconfig $TEST_CONFIG_FILE") +else + echo -e "Test config file $TEST_CONFIG_FILE does not exist.\n"\ + "Default test config will be used." +fi + +# run make check or not +if [ "${BUILD_PACKAGE_CHECK}" == "n" ] +then + RPMBUILD_OPTS+=(--define "_skip_check 1") +fi + +tar zcf $PACKAGE_TARBALL $PACKAGE_SOURCE + +# Create directory structure for rpmbuild +mkdir -v BUILD SPECS + +echo "opts: ${RPMBUILD_OPTS[@]}" + +rpmbuild --define "_topdir `pwd`"\ + --define "_rpmdir ${OUT_DIR}"\ + --define "_srcrpmdir ${OUT_DIR}"\ + -ta $PACKAGE_TARBALL \ + ${RPMBUILD_OPTS[@]} + +echo "Building rpm packages done" + +exit 0 diff --git a/src/pmdk/utils/check-area.sh b/src/pmdk/utils/check-area.sh new file mode 100755 index 000000000..911556b91 --- /dev/null +++ b/src/pmdk/utils/check-area.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2020, Intel Corporation + +# +# Finds applicable area name for specified commit id. +# + +if [ -z "$1" ]; then + echo "Missing commit id argument." + exit 1 +fi + +files=$(git show $1 --format=oneline --name-only | grep -v -e "$1") + +git show -q $1 | cat + +echo +echo "Modified files:" +echo "$files" + +function categorize() { + category=$1 + shift + cat_files=`echo "$files" | grep $*` + + if [ -n "${cat_files}" ]; then + echo "$category" + files=`echo "$files" | grep -v $*` + fi +} + +echo +echo "Areas computed basing on the list of modified files: (see utils/check-area.sh for full algorithm)" + +categorize core -e "^src/core/" +categorize pmem -e "^src/libpmem/" -e "^src/include/libpmem.h" +categorize pmem2 -e "^src/libpmem2/" -e "^src/include/libpmem2.h" +categorize rpmem -e "^src/librpmem/" -e "^src/include/librpmem.h" -e "^src/tools/rpmemd/" -e "^src/rpmem_common/" +categorize log -e "^src/libpmemlog/" -e "^src/include/libpmemlog.h" +categorize blk -e "^src/libpmemblk/" -e "^src/include/libpmemblk.h" +categorize obj -e "^src/libpmemobj/" -e "^src/include/libpmemobj.h" -e "^src/include/libpmemobj/" +categorize pool -e "^src/libpmempool/" -e "^src/include/libpmempool.h" -e "^src/tools/pmempool/" +categorize benchmark -e "^src/benchmarks/" +categorize examples -e "^src/examples/" +categorize daxio -e "^src/tools/daxio/" +categorize pmreorder -e "^src/tools/pmreorder/" +categorize test -e "^src/test/" +categorize doc -e "^doc/" -e ".md\$" -e "^ChangeLog" -e "README" +categorize common -e "^src/common/" \ + -e "^utils/" \ + -e ".inc\$" \ + -e ".yml\$" \ + -e ".gitattributes" \ + -e ".gitignore" \ + -e "^.mailmap\$" \ + -e "^src/PMDK.sln\$" \ + -e "Makefile\$" \ + -e "^src/freebsd/" \ + -e "^src/windows/" \ + -e "^src/include/pmemcompat.h" + +echo +echo "If the above list contains more than 1 entry, please consider splitting" +echo "your change into more commits, unless those changes don't make sense " +echo "individually (they do not build, tests do not pass, etc)." +echo "For example, it's perfectly fine to use 'obj' prefix for one commit that" +echo "changes libpmemobj source code, its tests and documentation." + +if [ -n "$files" ]; then + echo + echo "Uncategorized files:" + echo "$files" +fi diff --git a/src/pmdk/utils/check-commit.sh b/src/pmdk/utils/check-commit.sh new file mode 100755 index 000000000..3985e40c3 --- /dev/null +++ b/src/pmdk/utils/check-commit.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# Used to check whether all the commit messages in a pull request +# follow the GIT/PMDK guidelines. +# +# usage: ./check-commit.sh commit +# + +if [ -z "$1" ]; then + echo "Usage: check-commit.sh commit-id" + exit 1 +fi + +echo "Checking $1" + +subject=$(git log --format="%s" -n 1 $1) + +if [[ $subject =~ ^Merge.* ]]; then + # skip + exit 0 +fi + +if [[ $subject =~ ^Revert.* ]]; then + # skip + exit 0 +fi + +# valid area names +AREAS="pmem\|pmem2\|rpmem\|log\|blk\|obj\|pool\|test\|benchmark\|examples\|doc\|core\|common\|daxio\|pmreorder" + +prefix=$(echo $subject | sed -n "s/^\($AREAS\)\:.*/\1/p") + +if [ "$prefix" = "" ]; then + echo "FAIL: subject line in commit message does not contain valid area name" + echo + `dirname $0`/check-area.sh $1 + exit 1 +fi + +commit_len=$(git log --format="%s%n%b" -n 1 $1 | wc -L) + +if [ $commit_len -gt 73 ]; then + echo "FAIL: commit message exceeds 72 chars per line (commit_len)" + echo + git log -n 1 $1 | cat + exit 1 +fi diff --git a/src/pmdk/utils/check-commits.sh b/src/pmdk/utils/check-commits.sh new file mode 100755 index 000000000..762db22db --- /dev/null +++ b/src/pmdk/utils/check-commits.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# Used to check whether all the commit messages in a pull request +# follow the GIT/PMDK guidelines. +# +# usage: ./check-commits.sh [range] +# + +if [ -z "$1" ]; then + # on CI run this check only for pull requests + if [ -n "$CI_REPO_SLUG" ]; then + if [[ "$CI_REPO_SLUG" != "$GITHUB_REPO" \ + || $CI_EVENT_TYPE != "pull_request" ]]; + then + echo "SKIP: $0 can only be executed for pull requests to $GITHUB_REPO" + exit 0 + fi + fi + # CI_COMMIT_RANGE can be invalid for force pushes - use another + # method to determine the list of commits + if [[ $(git rev-list $CI_COMMIT_RANGE 2>/dev/null) || -n "$CI_COMMIT_RANGE" ]]; then + MERGE_BASE=$(echo $CI_COMMIT_RANGE | cut -d. -f1) + [ -z $MERGE_BASE ] && \ + MERGE_BASE=$(git log --pretty="%cN:%H" | grep GitHub | head -n1 | cut -d: -f2) + RANGE=$MERGE_BASE..$CI_COMMIT + else + MERGE_BASE=$(git log --pretty="%cN:%H" | grep GitHub | head -n1 | cut -d: -f2) + RANGE=$MERGE_BASE..HEAD + fi +else + RANGE="$1" +fi + +COMMITS=$(git log --pretty=%H $RANGE) + +set -e + +for commit in $COMMITS; do + `dirname $0`/check-commit.sh $commit +done diff --git a/src/pmdk/utils/check-manpage b/src/pmdk/utils/check-manpage new file mode 100755 index 000000000..d0c0d4d2e --- /dev/null +++ b/src/pmdk/utils/check-manpage @@ -0,0 +1,62 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019, Intel Corporation + +# check-manpage -- a tool to check a single man page against errors +# +# While it can handle multiple files, it's recommended to use +# check-manpages instead. +set -e + +check_link() +{ + [ $(wc -l <"$file") = 1 ] || + { echo ".so link isn't the only line" && return; } + link=$(cat "$file") + link=${link#.so } + [ "${link##*/}" = "$link" ] || + grep -q '^\.so man\([0-9]\)/[a-z0-9+_-]\+\.\1$' "$file" || + { echo ".so link directory is not matching manX" && return; } + [ -e "${link##*/}" ] || + { echo ".so link target doesn't exist: ${link##*/}" && return; } +} + +for m in "$@"; do + dir="$(dirname $m)" + file="$(basename $m)" + [ -n "$dir" ] && pushd "$dir" >/dev/null + + if grep -q '^\.so' "$file"; then + err=$(check_link) + [ -z "$err" ] || { + echo >&2 "$file: $err" + FAILED=1 + } + popd >/dev/null 2>/dev/null + continue + fi + + # man can emit warnings and errors. Even non-fatal errors are normally + # suppressed if a pager is in use (ie, all interactive usage). Common + # messages include an unknown macro, an unbreakable line, etc. + err=$(MANWIDTH=80 man --warnings -E UTF-8 -l -Tutf8 -Z "$file" 2>&1 >/dev/null| + grep -v 'cannot adjust line' || true) + [ -z "$err" ] || { + echo >&2 "$file: $err" + FAILED=1 + } + + # If a "NAME" section exists, call lexgrog to see if it's properly + # formatted. + if grep -q '^\.SH NAME' "$file"; then + if ! lexgrog "$file" >/dev/null; then + # lexgrog doesn't give any interesting messages. + echo 2>&1 "lexgrog failed on $file" + FAILED=1 + fi + fi + + popd >/dev/null 2>/dev/null +done + +exit $FAILED diff --git a/src/pmdk/utils/check-manpages b/src/pmdk/utils/check-manpages new file mode 100755 index 000000000..415db124c --- /dev/null +++ b/src/pmdk/utils/check-manpages @@ -0,0 +1,16 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019-2020, Intel Corporation + +# check-manpages -- a tool to test against some manpage errors + +MANS="$*" +[ -n "$MANS" ] || MANS="$(find doc -name '*.1' -o -name '*.3' -o -name '*.5' -o -name '*.7')" +[ -n "$MANS" ] || { echo >&2 "No man pages given, and none found in doc/"; exit 1;} + +for page in $MANS;do + if [ "${page/rpmem/}" != "$page" ] && [ "$BUILD_RPMEM" != "y" ]; then + continue + fi + echo $page +done | xargs -P `nproc` -n1 -- utils/check-manpage diff --git a/src/pmdk/utils/check-os.sh b/src/pmdk/utils/check-os.sh new file mode 100755 index 000000000..64d371c13 --- /dev/null +++ b/src/pmdk/utils/check-os.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2019, Intel Corporation + +# +# Used to check if there are no banned functions in .o file +# +# usage: ./check-os.sh [os.h path] [.o file] [.c file] + +EXCLUDE="os_posix|os_thread_posix" +if [[ $2 =~ $EXCLUDE ]]; then + echo "skip $2" + exit 0 +fi + +symbols=$(nm --demangle --undefined-only --format=posix $2 | sed 's/ U *//g') +functions=$(cat $1 | tr '\n' '|') +functions=${functions%?} # remove trailing | character +out=$( + for sym in $symbols + do + grep -wE $functions <<<"$sym" + done | sed 's/$/\(\)/g') + +[[ ! -z $out ]] && + echo -e "`pwd`/$3:1: non wrapped function(s):\n$out\nplease use os wrappers" && + rm -f $2 && # remove .o file as it don't match requirements + exit 1 + +exit 0 diff --git a/src/pmdk/utils/check-shebang.sh b/src/pmdk/utils/check-shebang.sh new file mode 100755 index 000000000..1051d178b --- /dev/null +++ b/src/pmdk/utils/check-shebang.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2019, Intel Corporation +# +# utils/check-shebang.sh -- interpreter directive check script +# +set -e + +err_count=0 + +for file in $@ ; do + [ ! -f $file ] && continue + SHEBANG=`head -n1 $file | cut -d" " -f1` + [ "${SHEBANG:0:2}" != "#!" ] && continue + if [ "$SHEBANG" != "#!/usr/bin/env" -a $SHEBANG != "#!/bin/sh" ]; then + INTERP=`echo $SHEBANG | rev | cut -d"/" -f1 | rev` + echo "$file:1: error: invalid interpreter directive:" >&2 + echo " (is: \"$SHEBANG\", should be: \"#!/usr/bin/env $INTERP\")" >&2 + ((err_count+=1)) + fi +done + +if [ "$err_count" == "0" ]; then + echo "Interpreter directives are OK." +else + echo "Found $err_count errors in interpreter directives!" >&2 + err_count=1 +fi + +exit $err_count diff --git a/src/pmdk/utils/check_license/.gitignore b/src/pmdk/utils/check_license/.gitignore new file mode 100644 index 000000000..8891a7f15 --- /dev/null +++ b/src/pmdk/utils/check_license/.gitignore @@ -0,0 +1 @@ +check-license diff --git a/src/pmdk/utils/check_license/check-headers.sh b/src/pmdk/utils/check_license/check-headers.sh new file mode 100755 index 000000000..f79453bd3 --- /dev/null +++ b/src/pmdk/utils/check_license/check-headers.sh @@ -0,0 +1,192 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# check-headers.sh - check copyright and license in source files + +SELF=$0 + +function usage() { + echo "Usage: $SELF [-h|-v|-a]" + echo " -h, --help this help message" + echo " -v, --verbose verbose mode" + echo " -a, --all check all files (only modified files are checked by default)" +} + +if [ "$#" -lt 2 ]; then + usage >&2 + exit 2 +fi + +SOURCE_ROOT=$1 +shift +LICENSE=$1 +shift + +PATTERN=`mktemp` +TMP=`mktemp` +TMP2=`mktemp` +TEMPFILE=`mktemp` +rm -f $PATTERN $TMP $TMP2 + +if [ "$1" == "-h" -o "$1" == "--help" ]; then + usage + exit 0 +fi + +export GIT="git -C ${SOURCE_ROOT}" +$GIT rev-parse || exit 1 + +if [ -f $SOURCE_ROOT/.git/shallow ]; then + SHALLOW_CLONE=1 + echo + echo "Warning: This is a shallow clone. Checking dates in copyright headers" + echo " will be skipped in case of files that have no history." + echo +else + SHALLOW_CLONE=0 +fi + +VERBOSE=0 +CHECK_ALL=0 +while [ "$1" != "" ]; do + case $1 in + -v|--verbose) + VERBOSE=1 + ;; + -a|--all) + CHECK_ALL=1 + ;; + esac + shift +done + +if [ $CHECK_ALL -eq 0 ]; then + CURRENT_COMMIT=$($GIT log --pretty=%H -1) + MERGE_BASE=$($GIT merge-base HEAD origin/master 2>/dev/null) + [ -z $MERGE_BASE ] && \ + MERGE_BASE=$($GIT log --pretty="%cN:%H" | grep GitHub | head -n1 | cut -d: -f2) + [ -z $MERGE_BASE -o "$CURRENT_COMMIT" = "$MERGE_BASE" ] && \ + CHECK_ALL=1 +fi + +if [ $CHECK_ALL -eq 1 ]; then + echo "Checking copyright headers of all files..." + GIT_COMMAND="ls-tree -r --name-only HEAD" +else + if [ $VERBOSE -eq 1 ]; then + echo + echo "Warning: will check copyright headers of modified files only," + echo " in order to check all files issue the following command:" + echo " $ $SELF -a" + echo " (e.g.: $ $SELF $SOURCE_ROOT $LICENSE -a)" + echo + fi + echo "Checking copyright headers of modified files only..." + GIT_COMMAND="diff --name-only $MERGE_BASE $CURRENT_COMMIT" +fi + +FILES=$($GIT $GIT_COMMAND | ${SOURCE_ROOT}/utils/check_license/file-exceptions.sh | \ + grep -E -e '*\.[chs]$' -e '*\.[ch]pp$' -e '*\.sh$' \ + -e '*\.py$' -e '*\.link$' -e 'Makefile*' -e 'TEST*' \ + -e '/common.inc$' -e '/match$' -e '/check_whitespace$' \ + -e 'LICENSE$' -e 'CMakeLists.txt$' -e '*\.cmake$' | \ + xargs) + +RV=0 +for file in $FILES ; do + # The src_path is a path which should be used in every command except git. + # git is called with -C flag so filepaths should be relative to SOURCE_ROOT + src_path="${SOURCE_ROOT}/$file" + [ ! -f $src_path ] && continue + # ensure that file is UTF-8 encoded + ENCODING=`file -b --mime-encoding $src_path` + iconv -f $ENCODING -t "UTF-8" $src_path > $TEMPFILE + + if ! grep -q "SPDX-License-Identifier: $LICENSE" $src_path; then + echo "$src_path:1: no $LICENSE SPDX tag found " >&2 + RV=1 + elif [[ $file == *.c ]] || [[ $file == *.cpp ]]; then + if ! grep -q -e "\/\/ SPDX-License-Identifier: $LICENSE" $src_path; then + echo "$src_path:1: wrong format of $LICENSE SPDX tag" >&2 + RV=1 + fi + elif [[ $file == *.h ]] || [[ $file == *.hpp ]]; then + if ! grep -q -e "\/\* SPDX-License-Identifier: $LICENSE \*\/" $src_path; then + echo "$src_path:1: wrong format of $LICENSE SPDX tag" >&2 + RV=1 + fi + elif [[ $file != LICENSE ]]; then + if ! grep -q -e "# SPDX-License-Identifier: $LICENSE" $src_path; then + echo "$src_path:1: wrong format of $LICENSE SPDX tag" >&2 + RV=1 + fi + fi + + if [ $SHALLOW_CLONE -eq 0 ]; then + $GIT log --no-merges --format="%ai %aE" -- $file | sort > $TMP + else + # mark the grafted commits (commits with no parents) + $GIT log --no-merges --format="%ai %aE grafted-%p-commit" -- $file | sort > $TMP + fi + + # skip checking dates for non-Intel commits + [[ ! $(tail -n1 $TMP) =~ "@intel.com" ]] && continue + + # skip checking dates for new files + [ $(cat $TMP | wc -l) -le 1 ] && continue + + # grep out the grafted commits (commits with no parents) + # and skip checking dates for non-Intel commits + grep -v -e "grafted--commit" $TMP | grep -e "@intel.com" > $TMP2 + + [ $(cat $TMP2 | wc -l) -eq 0 ] && continue + + FIRST=`head -n1 $TMP2` + LAST=` tail -n1 $TMP2` + + YEARS=`sed ' +/Copyright [0-9-]\+.*, Intel Corporation/!d +s/.*Copyright \([0-9]\+\)-\([0-9]\+\),.*/\1-\2/ +s/.*Copyright \([0-9]\+\),.*/\1-\1/' $src_path` + if [ -z "$YEARS" ]; then + echo >&2 "$src_path:1: No copyright years found" + RV=1 + continue + fi + + HEADER_FIRST=`echo $YEARS | cut -d"-" -f1` + HEADER_LAST=` echo $YEARS | cut -d"-" -f2` + + COMMIT_FIRST=`echo $FIRST | cut -d"-" -f1` + COMMIT_LAST=` echo $LAST | cut -d"-" -f1` + if [ "$COMMIT_FIRST" != "" -a "$COMMIT_LAST" != "" ]; then + if [ $HEADER_LAST -lt $COMMIT_LAST ]; then + if [ $HEADER_FIRST -lt $COMMIT_FIRST ]; then + COMMIT_FIRST=$HEADER_FIRST + fi + COMMIT_LAST=`date +%G` + if [ $COMMIT_FIRST -eq $COMMIT_LAST ]; then + NEW=$COMMIT_LAST + else + NEW=$COMMIT_FIRST-$COMMIT_LAST + fi + echo "$file:1: error: wrong copyright date: (is: $YEARS, should be: $NEW)" >&2 + RV=1 + fi + else + echo "$file:1: unknown commit dates" >&2 + RV=1 + fi +done +rm -f $TMP $TMP2 $TEMPFILE + +$(dirname "$0")/check-ms-license.pl $FILES + +# check if error found +if [ $RV -eq 0 ]; then + echo "Copyright headers are OK." +else + echo "Error(s) in copyright headers found!" >&2 +fi +exit $RV diff --git a/src/pmdk/utils/check_license/check-ms-license.pl b/src/pmdk/utils/check_license/check-ms-license.pl new file mode 100755 index 000000000..fae3c9436 --- /dev/null +++ b/src/pmdk/utils/check_license/check-ms-license.pl @@ -0,0 +1,62 @@ +#!/usr/bin/perl -w +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2020, Intel Corporation + +use Digest::MD5 "md5_hex"; + +my $BSD3 = <; + close F; + next unless /Copyright.*(Microsoft Corporation|FUJITSU)/; + + s/^ \*//mg; + s/^#//mg; + if (index($_, $BSD3) == -1) { + $err = 1; + print STDERR "Outside copyright but no/wrong license text in $f\n"; + } +} + +exit $err diff --git a/src/pmdk/utils/check_license/file-exceptions.sh b/src/pmdk/utils/check_license/file-exceptions.sh new file mode 100755 index 000000000..a3bdb7425 --- /dev/null +++ b/src/pmdk/utils/check_license/file-exceptions.sh @@ -0,0 +1,7 @@ +#!/bin/sh -e +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# file-exceptions.sh - filter out files not checked for copyright and license + +grep -v -E -e '/queue.h$' -e '/getopt.h$' -e '/getopt.c$' -e 'src/core/valgrind/' -e '/testconfig\...$' diff --git a/src/pmdk/utils/check_sdk_version.py b/src/pmdk/utils/check_sdk_version.py new file mode 100755 index 000000000..7dc134f01 --- /dev/null +++ b/src/pmdk/utils/check_sdk_version.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019-2020, Intel Corporation + +import argparse +import os +from subprocess import check_output, CalledProcessError +import sys +import shlex +from xml.dom import minidom +from xml.parsers.expat import ExpatError + +VALID_SDK_VERSION = '10.0.17134.0' + + +def get_vcxproj_files(root_dir, ignored): + """Get a list ".vcxproj" files under PMDK directory.""" + to_format = [] + command = 'git ls-files *.vcxproj' + try: + output = check_output(shlex.split(command), + cwd=root_dir).decode("UTF-8") + except CalledProcessError as e: + sys.exit('Error: "' + command + '" failed with returncode: ' + + str(e.returncode)) + + for line in output.splitlines(): + if not line: + continue + file_path = os.path.join(root_dir, line) + if os.path.isfile(file_path): + to_format.append(file_path) + + return to_format + + +def get_sdk_version(file): + """ + Get Windows SDK version from modified/new files from the current + pull request. + """ + tag = 'WindowsTargetPlatformVersion' + try: + xml_file = minidom.parse(file) + except ExpatError as e: + sys.exit('Error: "' + file + '" is incorrect.\n' + str(e)) + version_list = xml_file.getElementsByTagName(tag) + if len(version_list) != 1: + sys.exit('Error: the amount of tags "' + tag + '" is other than 1.') + version = version_list[0].firstChild.data + + return version + + +def main(): + parser = argparse.ArgumentParser(prog='check_sdk_version.py', + description='The script checks Windows SDK version in .vcxproj files.') + parser.add_argument('-d', '--directory', + help='Directory of PMDK tree.', required=True) + args = parser.parse_args() + current_directory = args.directory + if not os.path.isdir(current_directory): + sys.exit('"' + current_directory + '" is not a directory.') + + files = get_vcxproj_files(current_directory, '') + if not files: + sys.exit(0) + for file in files: + sdk_version = get_sdk_version(file) + if sdk_version != VALID_SDK_VERSION: + sys.exit('Wrong Windows SDK version: ' + sdk_version + + ' in file: "' + file + '". Please use: ' + VALID_SDK_VERSION) + + +if __name__ == '__main__': + main() diff --git a/src/pmdk/utils/check_whitespace b/src/pmdk/utils/check_whitespace new file mode 100755 index 000000000..083b3e8ff --- /dev/null +++ b/src/pmdk/utils/check_whitespace @@ -0,0 +1,210 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2015-2020, Intel Corporation + +# +# check_whitespace -- scrub source tree for whitespace errors +# + +use strict; +use warnings; + +use File::Basename; +use File::Find; +use Encode; +use v5.16; + +my $Me = $0; +$Me =~ s,.*/,,; + +$SIG{HUP} = $SIG{INT} = $SIG{TERM} = $SIG{__DIE__} = sub { + die @_ if $^S; + + my $errstr = shift; + + die "$Me: ERROR: $errstr"; +}; + +my $Errcount = 0; + +# +# err -- emit error, keep total error count +# +sub err { + warn @_, "\n"; + $Errcount++; +} + +# +# decode_file_as_string -- slurp an entire file into memory and decode +# +sub decode_file_as_string { + my ($full, $file) = @_; + my $fh; + open($fh, '<', $full) or die "$full $!\n"; + + local $/; + $_ = <$fh>; + close $fh; + + # check known encodings or die + my $decoded; + my @encodings = ("UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE"); + + foreach my $enc (@encodings) { + eval { $decoded = decode( $enc, $_, Encode::FB_CROAK ) }; + + if (!$@) { + $decoded =~ s/\R/\n/g; + return $decoded; + } + } + + die "$Me: ERROR: Unknown file encoding"; +} + +# +# check_whitespace -- run the checks on the given file +# +sub check_whitespace { + my ($full, $file) = @_; + + my $line = 0; + my $eol; + my $nf = 0; + my $fstr = decode_file_as_string($full, $file); + my $empty = 0; + my $is_python = $full =~ /\.py$/; + + for (split /^/, $fstr) { + $line++; + if (!$is_python && /^$/) { + $empty++; + if ($empty > 1) { + err("$full:$line: ERROR duplicated empty line"); + } + } else { + $empty = 0; + } + + $eol = /[\n]/s; + if (/^\.nf$/) { + err("$full:$line: ERROR: nested .nf") if $nf; + $nf = 1; + } elsif (/^\.fi$/) { + $nf = 0; + } elsif ($nf == 0) { + chomp; + err("$full:$line: ERROR: trailing whitespace") if /\s$/; + err("$full:$line: ERROR: spaces before tabs") if / \t/; + } + } + + err("$full:$line: .nf without .fi") if $nf; + err("$full:$line: noeol") unless $eol; +} + +sub check_whitespace_with_exc { + my ($full) = @_; + + $_ = $full; + + return 0 if /^[.\/]*src\/common\/queue\.h/; + return 0 if /^[.\/]*src\/core\/valgrind\/.*\.h/; + + $_ = basename($full); + + return 0 unless /^(README.*|LICENSE.*|Makefile.*|CMakeLists.txt|.gitignore|TEST.*|RUNTESTS|check_whitespace|.*\.([chp13s]|sh|map|cpp|hpp|inc|PS1|ps1|py|md|cmake))$/; + return 0 if -z; + + check_whitespace($full, $_); + return 1; +} + +my $verbose = 0; +my $force = 0; +my $recursive = 0; + +sub check { + my ($file) = @_; + my $r; + + if ($force) { + $r = check_whitespace($file, basename($file)); + } else { + $r = check_whitespace_with_exc($file); + } + + if ($verbose) { + if ($r == 0) { + printf("skipped $file\n"); + } else { + printf("checked $file\n"); + } + } +} + +my @files = (); + +foreach my $arg (@ARGV) { + if ($arg eq '-v') { + $verbose = 1; + next; + } + if ($arg eq '-f') { + $force = 1; + next; + } + if ($arg eq '-r') { + $recursive = 1; + next; + } + if ($arg eq '-g') { + @files = `git ls-tree -r --name-only HEAD`; + chomp(@files); + next; + } + if ($arg eq '-h') { + printf "Options: + -g - check all files tracked by git + -r dir - recursively check all files in specified directory + -v verbose - print whether file was checked or not + -f force - disable blacklist\n"; + exit 1; + } + + if ($recursive == 1) { + find(sub { + my $full = $File::Find::name; + + if (!$force && + ($full eq './.git' || + $full eq './src/debug' || + $full eq './src/nondebug' || + $full eq './rpmbuild' || + $full eq './dpkgbuild')) { + $File::Find::prune = 1; + return; + } + + return unless -f; + + push @files, $full; + }, $arg); + + $recursive = 0; + next; + } + + push @files, $arg; +} + +if (!@files) { + printf "Empty file list!\n"; +} + +foreach (@files) { + check($_); +} + +exit $Errcount; diff --git a/src/pmdk/utils/copy-source.sh b/src/pmdk/utils/copy-source.sh new file mode 100755 index 000000000..5bb2d589d --- /dev/null +++ b/src/pmdk/utils/copy-source.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +# +# utils/copy-source.sh -- copy source files (from HEAD) to 'path_to_dir/pmdk' +# directory whether in git repository or not. +# +# usage: ./copy-source.sh [path_to_dir] [srcversion] + +set -e + +DESTDIR="$1" +SRCVERSION=$2 + +if [ -d .git ]; then + if [ -n "$(git status --porcelain)" ]; then + echo "Error: Working directory is dirty: $(git status --porcelain)" + exit 1 + fi +else + echo "Warning: You are not in git repository, working directory might be dirty." +fi + +mkdir -p "$DESTDIR"/pmdk +echo -n $SRCVERSION > "$DESTDIR"/pmdk/.version + +if [ -d .git ]; then + git archive HEAD | tar -x -C "$DESTDIR"/pmdk +else + find . \ + -maxdepth 1 \ + -not -name $(basename "$DESTDIR") \ + -not -name . \ + -exec cp -r "{}" "$DESTDIR"/pmdk \; +fi diff --git a/src/pmdk/utils/cstyle b/src/pmdk/utils/cstyle new file mode 100755 index 000000000..5221a2d56 --- /dev/null +++ b/src/pmdk/utils/cstyle @@ -0,0 +1,1037 @@ +#!/usr/bin/env perl +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Portions copyright 2017, Intel Corporation. +# +# @(#)cstyle 1.58 98/09/09 (from shannon) +#ident "%Z%%M% %I% %E% SMI" +# +# cstyle - check for some common stylistic errors. +# +# cstyle is a sort of "lint" for C coding style. +# It attempts to check for the style used in the +# kernel, sometimes known as "Bill Joy Normal Form". +# +# There's a lot this can't check for, like proper indentation +# of code blocks. There's also a lot more this could check for. +# +# A note to the non perl literate: +# +# perl regular expressions are pretty much like egrep +# regular expressions, with the following special symbols +# +# \s any space character +# \S any non-space character +# \w any "word" character [a-zA-Z0-9_] +# \W any non-word character +# \d a digit [0-9] +# \D a non-digit +# \b word boundary (between \w and \W) +# \B non-word boundary +# + +require 5.0; +use IO::File; +use Getopt::Std; +use strict; +use warnings; + +my $usage = +"usage: cstyle [-chpvCP] [-o constructs] file ... + -c check continuation indentation inside functions + -h perform heuristic checks that are sometimes wrong + -p perform some of the more picky checks + -v verbose + -C don't check anything in header block comments + -P check for use of non-POSIX types + -o constructs + allow a comma-separated list of optional constructs: + doxygen allow doxygen-style block comments (/** /*!) + splint allow splint-style lint comments (/*@ ... @*/) +"; + +my %opts; + +if (!getopts("cho:pvCP", \%opts)) { + print $usage; + exit 2; +} + +my $check_continuation = $opts{'c'}; +my $heuristic = $opts{'h'}; +my $picky = $opts{'p'}; +my $verbose = $opts{'v'}; +my $ignore_hdr_comment = $opts{'C'}; +my $check_posix_types = $opts{'P'}; + +my $doxygen_comments = 0; +my $splint_comments = 0; + +if (defined($opts{'o'})) { + for my $x (split /,/, $opts{'o'}) { + if ($x eq "doxygen") { + $doxygen_comments = 1; + } elsif ($x eq "splint") { + $splint_comments = 1; + } else { + print "cstyle: unrecognized construct \"$x\"\n"; + print $usage; + exit 2; + } + } +} + +my ($filename, $line, $prev); # shared globals + +my $fmt; +my $hdr_comment_start; + +if ($verbose) { + $fmt = "%s:%d: %s\n%s\n"; +} else { + $fmt = "%s:%d: %s\n"; +} + +if ($doxygen_comments) { + # doxygen comments look like "/*!" or "/**"; allow them. + $hdr_comment_start = qr/^\s*\/\*[\!\*]?$/; +} else { + $hdr_comment_start = qr/^\s*\/\*$/; +} + +# Note, following must be in single quotes so that \s and \w work right. +my $typename = '(int|char|short|long|unsigned|float|double' . + '|\w+_t|struct\s+\w+|union\s+\w+|FILE|BOOL)'; + +# mapping of old types to POSIX compatible types +my %old2posix = ( + 'unchar' => 'uchar_t', + 'ushort' => 'ushort_t', + 'uint' => 'uint_t', + 'ulong' => 'ulong_t', + 'u_int' => 'uint_t', + 'u_short' => 'ushort_t', + 'u_long' => 'ulong_t', + 'u_char' => 'uchar_t', + 'quad' => 'quad_t' +); + +my $lint_re = qr/\/\*(?: + ARGSUSED[0-9]*|NOTREACHED|LINTLIBRARY|VARARGS[0-9]*| + CONSTCOND|CONSTANTCOND|CONSTANTCONDITION|EMPTY| + FALLTHRU|FALLTHROUGH|LINTED.*?|PRINTFLIKE[0-9]*| + PROTOLIB[0-9]*|SCANFLIKE[0-9]*|CSTYLED.*? + )\*\//x; + +my $splint_re = qr/\/\*@.*?@\*\//x; + +my $warlock_re = qr/\/\*\s*(?: + VARIABLES\ PROTECTED\ BY| + MEMBERS\ PROTECTED\ BY| + ALL\ MEMBERS\ PROTECTED\ BY| + READ-ONLY\ VARIABLES:| + READ-ONLY\ MEMBERS:| + VARIABLES\ READABLE\ WITHOUT\ LOCK:| + MEMBERS\ READABLE\ WITHOUT\ LOCK:| + LOCKS\ COVERED\ BY| + LOCK\ UNNEEDED\ BECAUSE| + LOCK\ NEEDED:| + LOCK\ HELD\ ON\ ENTRY:| + READ\ LOCK\ HELD\ ON\ ENTRY:| + WRITE\ LOCK\ HELD\ ON\ ENTRY:| + LOCK\ ACQUIRED\ AS\ SIDE\ EFFECT:| + READ\ LOCK\ ACQUIRED\ AS\ SIDE\ EFFECT:| + WRITE\ LOCK\ ACQUIRED\ AS\ SIDE\ EFFECT:| + LOCK\ RELEASED\ AS\ SIDE\ EFFECT:| + LOCK\ UPGRADED\ AS\ SIDE\ EFFECT:| + LOCK\ DOWNGRADED\ AS\ SIDE\ EFFECT:| + FUNCTIONS\ CALLED\ THROUGH\ POINTER| + FUNCTIONS\ CALLED\ THROUGH\ MEMBER| + LOCK\ ORDER: + )/x; + +my $err_stat = 0; # exit status + +if ($#ARGV >= 0) { + foreach my $arg (@ARGV) { + my $fh = new IO::File $arg, "r"; + if (!defined($fh)) { + printf "%s: can not open\n", $arg; + } else { + &cstyle($arg, $fh); + close $fh; + } + } +} else { + &cstyle("", *STDIN); +} +exit $err_stat; + +my $no_errs = 0; # set for CSTYLED-protected lines + +sub err($) { + my ($error) = @_; + unless ($no_errs) { + if ($verbose) { + printf $fmt, $filename, $., $error, $line; + } else { + printf $fmt, $filename, $., $error; + } + $err_stat = 1; + } +} + +sub err_prefix($$) { + my ($prevline, $error) = @_; + my $out = $prevline."\n".$line; + unless ($no_errs) { + printf $fmt, $filename, $., $error, $out; + $err_stat = 1; + } +} + +sub err_prev($) { + my ($error) = @_; + unless ($no_errs) { + printf $fmt, $filename, $. - 1, $error, $prev; + $err_stat = 1; + } +} + +sub cstyle($$) { + +my ($fn, $filehandle) = @_; +$filename = $fn; # share it globally + +my $in_cpp = 0; +my $next_in_cpp = 0; + +my $in_comment = 0; +my $in_header_comment = 0; +my $comment_done = 0; +my $in_warlock_comment = 0; +my $in_function = 0; +my $in_function_header = 0; +my $in_declaration = 0; +my $note_level = 0; +my $nextok = 0; +my $nocheck = 0; + +my $in_string = 0; + +my ($okmsg, $comment_prefix); + +$line = ''; +$prev = ''; +reset_indent(); + +line: while (<$filehandle>) { + s/\r?\n$//; # strip return and newline + + # save the original line, then remove all text from within + # double or single quotes, we do not want to check such text. + + $line = $_; + + # + # C allows strings to be continued with a backslash at the end of + # the line. We translate that into a quoted string on the previous + # line followed by an initial quote on the next line. + # + # (we assume that no-one will use backslash-continuation with character + # constants) + # + $_ = '"' . $_ if ($in_string && !$nocheck && !$in_comment); + + # + # normal strings and characters + # + s/'([^\\']|\\[^xX0]|\\0[0-9]*|\\[xX][0-9a-fA-F]*)'/''/g; + s/"([^\\"]|\\.)*"/\"\"/g; + + # + # detect string continuation + # + if ($nocheck || $in_comment) { + $in_string = 0; + } else { + # + # Now that all full strings are replaced with "", we check + # for unfinished strings continuing onto the next line. + # + $in_string = + (s/([^"](?:"")*)"([^\\"]|\\.)*\\$/$1""/ || + s/^("")*"([^\\"]|\\.)*\\$/""/); + } + + # + # figure out if we are in a cpp directive + # + $in_cpp = $next_in_cpp || /^\s*#/; # continued or started + $next_in_cpp = $in_cpp && /\\$/; # only if continued + + # strip off trailing backslashes, which appear in long macros + s/\s*\\$//; + + # an /* END CSTYLED */ comment ends a no-check block. + if ($nocheck) { + if (/\/\* *END *CSTYLED *\*\//) { + $nocheck = 0; + } else { + reset_indent(); + next line; + } + } + + # a /*CSTYLED*/ comment indicates that the next line is ok. + if ($nextok) { + if ($okmsg) { + err($okmsg); + } + $nextok = 0; + $okmsg = 0; + if (/\/\* *CSTYLED.*\*\//) { + /^.*\/\* *CSTYLED *(.*) *\*\/.*$/; + $okmsg = $1; + $nextok = 1; + } + $no_errs = 1; + } elsif ($no_errs) { + $no_errs = 0; + } + + # check length of line. + # first, a quick check to see if there is any chance of being too long. + if (($line =~ tr/\t/\t/) * 7 + length($line) > 80) { + # yes, there is a chance. + # replace tabs with spaces and check again. + my $eline = $line; + 1 while $eline =~ + s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; + if (length($eline) > 80) { + # allow long line if it is user visible string + # find if line start from " or L" and ends + # with " + 2 optional characters + # (these characters can be i.e. '");' '" \' or '",' etc...) + if($eline =~ /^ *L?".*"[^"]{0,2}$/) { + # check if entire line is one string literal + $eline =~ s/^ *L?"//; + $eline =~ s/"[^"]{0,2}$//; + + if($eline =~ /[^\\]"|[^\\](\\\\)+"/) { + err("line > 80 characters"); + } + } else { + err("line > 80 characters"); + } + } + } + + # ignore NOTE(...) annotations (assumes NOTE is on lines by itself). + if ($note_level || /\b_?NOTE\s*\(/) { # if in NOTE or this is NOTE + s/[^()]//g; # eliminate all non-parens + $note_level += s/\(//g - length; # update paren nest level + next; + } + + # a /* BEGIN CSTYLED */ comment starts a no-check block. + if (/\/\* *BEGIN *CSTYLED *\*\//) { + $nocheck = 1; + } + + # a /*CSTYLED*/ comment indicates that the next line is ok. + if (/\/\* *CSTYLED.*\*\//) { + /^.*\/\* *CSTYLED *(.*) *\*\/.*$/; + $okmsg = $1; + $nextok = 1; + } + if (/\/\/ *CSTYLED/) { + /^.*\/\/ *CSTYLED *(.*)$/; + $okmsg = $1; + $nextok = 1; + } + + # universal checks; apply to everything + if (/\t +\t/) { + err("spaces between tabs"); + } + if (/ \t+ /) { + err("tabs between spaces"); + } + if (/\s$/) { + err("space or tab at end of line"); + } + if (/[^ \t(]\/\*/ && !/\w\(\/\*.*\*\/\);/) { + err("comment preceded by non-blank"); + } + + # is this the beginning or ending of a function? + # (not if "struct foo\n{\n") + if (/^{$/ && $prev =~ /\)\s*(const\s*)?(\/\*.*\*\/\s*)?\\?$/) { + $in_function = 1; + $in_declaration = 1; + $in_function_header = 0; + $prev = $line; + next line; + } + if (/^}\s*(\/\*.*\*\/\s*)*$/) { + if ($prev =~ /^\s*return\s*;/) { + err_prev("unneeded return at end of function"); + } + $in_function = 0; + reset_indent(); # we don't check between functions + $prev = $line; + next line; + } + if (/^\w*\($/) { + $in_function_header = 1; + } + + if ($in_warlock_comment && /\*\//) { + $in_warlock_comment = 0; + $prev = $line; + next line; + } + + # a blank line terminates the declarations within a function. + # XXX - but still a problem in sub-blocks. + if ($in_declaration && /^$/) { + $in_declaration = 0; + } + + if ($comment_done) { + $in_comment = 0; + $in_header_comment = 0; + $comment_done = 0; + } + # does this looks like the start of a block comment? + if (/$hdr_comment_start/) { + if (!/^\t*\/\*/) { + err("block comment not indented by tabs"); + } + $in_comment = 1; + /^(\s*)\//; + $comment_prefix = $1; + if ($comment_prefix eq "") { + $in_header_comment = 1; + } + $prev = $line; + next line; + } + # are we still in the block comment? + if ($in_comment) { + if (/^$comment_prefix \*\/$/) { + $comment_done = 1; + } elsif (/\*\//) { + $comment_done = 1; + err("improper block comment close") + unless ($ignore_hdr_comment && $in_header_comment); + } elsif (!/^$comment_prefix \*[ \t]/ && + !/^$comment_prefix \*$/) { + err("improper block comment") + unless ($ignore_hdr_comment && $in_header_comment); + } + } + + if ($in_header_comment && $ignore_hdr_comment) { + $prev = $line; + next line; + } + + # check for errors that might occur in comments and in code. + + # allow spaces to be used to draw pictures in header and block comments. + if (/[^ ] / && !/".* .*"/ && !$in_header_comment && !$in_comment) { + err("spaces instead of tabs"); + } + if (/^ / && !/^ \*[ \t\/]/ && !/^ \*$/ && + (!/^ \w/ || $in_function != 0)) { + err("indent by spaces instead of tabs"); + } + if (/^\t+ [^ \t\*]/ || /^\t+ \S/ || /^\t+ \S/) { + err("continuation line not indented by 4 spaces"); + } + if (/$warlock_re/ && !/\*\//) { + $in_warlock_comment = 1; + $prev = $line; + next line; + } + if (/^\s*\/\*./ && !/^\s*\/\*.*\*\// && !/$hdr_comment_start/) { + err("improper first line of block comment"); + } + + if ($in_comment) { # still in comment, don't do further checks + $prev = $line; + next line; + } + + if ((/[^(]\/\*\S/ || /^\/\*\S/) && + !(/$lint_re/ || ($splint_comments && /$splint_re/))) { + err("missing blank after open comment"); + } + if (/\S\*\/[^)]|\S\*\/$/ && + !(/$lint_re/ || ($splint_comments && /$splint_re/))) { + err("missing blank before close comment"); + } + if (/\/\/\S/) { # C++ comments + err("missing blank after start comment"); + } + # check for unterminated single line comments, but allow them when + # they are used to comment out the argument list of a function + # declaration. + if (/\S.*\/\*/ && !/\S.*\/\*.*\*\// && !/\(\/\*/) { + err("unterminated single line comment"); + } + + if (/^(#else|#endif|#include)(.*)$/) { + $prev = $line; + if ($picky) { + my $directive = $1; + my $clause = $2; + # Enforce ANSI rules for #else and #endif: no noncomment + # identifiers are allowed after #endif or #else. Allow + # C++ comments since they seem to be a fact of life. + if ((($1 eq "#endif") || ($1 eq "#else")) && + ($clause ne "") && + (!($clause =~ /^\s+\/\*.*\*\/$/)) && + (!($clause =~ /^\s+\/\/.*$/))) { + err("non-comment text following " . + "$directive (or malformed $directive " . + "directive)"); + } + } + next line; + } + + # + # delete any comments and check everything else. Note that + # ".*?" is a non-greedy match, so that we don't get confused by + # multiple comments on the same line. + # + s/\/\*.*?\*\//\x01/g; + s/\/\/.*$/\x01/; # C++ comments + + # delete any trailing whitespace; we have already checked for that. + s/\s*$//; + + # following checks do not apply to text in comments. + + if (/[^ \t\+]\+[^\+=]/ || /[^\+]\+[^ \+=]/) { + err("missing space around + operator"); + } + if (/[^ \t]\+=/ || /\+=[^ ]/) { + err("missing space around += operator"); + } + if (/[^ \t\-]\-[^\->]/ && !/\(\w+\)\-\w/ && !/[\(\[]\-[\w \t]+[\)\],]/) { + err("missing space before - operator"); + } + if (/[^\-]\-[^ \-=>]/ && !/\(\-\w+\)/ && + !/(return|case|=|>|<|\?|:|,|^[ \t]+)[ \t]+\-[\w\(]/ && !/(\([^\)]+\)|\[|\()\-[\w\(\]]/) { + err("missing space after - operator"); + } + if (/(return|case|=|\?|:|,|\[)[ \t]+\-[ \t]/ || /[\(\[]\-[ \t]/) { + err("extra space after - operator"); + } + if (/[ \t]\+\+ /) { + err("extra space before or after ++ operator"); + } + if (/[ \t]\-\- /) { + err("extra space before or after -- operator"); + } + if (/[^ \t]\-=/ || /\-=[^ ]/) { + err("missing space around -= operator"); + } + if (/[^ \t][\%\/]/ || /[\%\/][^ =]/ || /[\%\/]=[^ ]/) { + err("missing space around one of operators: % %= / /="); + } + if (/[^ \t]\*=/ || /\*=[^ ]/) { + err("missing space around *= operator"); + } + if (/[^ \t\(\)\*\[]\*/) { + err("missing space before * operator"); + } + if (/\*[^ =\*\w\(,]/ && !/\(.+ \*+\)/ && !/\*\[\]/ && + !/\*\-\-\w/ && !/\*\+\+\w/ && !/\*\)/) { + err("missing space after * operator"); + } + if (/[^<>\s][!<>=]=/ || /[^<>][!<>=]=[^\s,]/ || + (/[^->]>[^,=>\s]/ && !/[^->]>$/) || + (/[^<]<[^,=<\s]/ && !/[^<]<$/) || + /[^<\s]<[^<]/ || /[^->\s]>[^>]/) { + err("missing space around relational operator"); + } + if (/\S>>=/ || /\S<<=/ || />>=\S/ || /<<=\S/ || /\S[-+*\/&|^%]=/ || + (/[^-+*\/&|^%!<>=\s]=[^=]/ && !/[^-+*\/&|^%!<>=\s]=$/) || + (/[^!<>=]=[^=\s]/ && !/[^!<>=]=$/)) { + # XXX - should only check this for C++ code + # XXX - there are probably other forms that should be allowed + if (!/\soperator=/) { + err("missing space around assignment operator"); + } + } + if (/[,;]\S/ && !/\bfor \(;;\)/) { + err("comma or semicolon followed by non-blank"); + } + # allow "for" statements to have empty "while" clauses + if (/\s[,;]/ && !/^[\t]+;$/ && !/^\s*for \([^;]*; ;[^;]*\)/) { + err("comma or semicolon preceded by blank"); + } + if (/^\s*(&&|\|\|)/) { + err("improper boolean continuation"); + } + if (/\S *(&&|\|\|)/ || /(&&|\|\|) *\S/) { + err("more than one space around boolean operator"); + } + if (/\b(for|if|while|switch|return|case)\(/) { + err("missing space between keyword and paren"); + } + if (/(\b(for|if|while|switch|return)\b.*){2,}/ && !/^#define/) { + # multiple "case" and "sizeof" allowed + err("more than one keyword on line"); + } + if (/\b(for|if|while|switch|return|case)\s\s+\(/ && + !/^#if\s+\(/) { + err("extra space between keyword and paren"); + } + # try to detect "func (x)" but not "if (x)" or + # "#define foo (x)" or "int (*func)();" + if (/\w\s\(/) { + my $s = $_; + # strip off all keywords on the line + s/\b(for|if|while|switch|return|case)\s\(/XXX(/g; + s/\b(sizeof|typeof|__typeof__)\s*\(/XXX(/g; + s/#elif\s\(/XXX(/g; + s/^#define\s+\w+\s+\(/XXX(/; + # do not match things like "void (*f)();" + # or "typedef void (func_t)();" + s/\w\s\(+\*/XXX(*/g; + s/\b($typename|void)\s+\(+/XXX(/og; + s/\btypedef\s($typename|void)\s+\(+/XXX(/og; + # do not match "__attribute__ ((format (...)))" + s/\b__attribute__\s*\(\(format\s*\(/__attribute__((XXX(/g; + if (/\w\s\(/) { + err("extra space between function name and left paren"); + } + $_ = $s; + } + # try to detect "int foo(x)", but not "extern int foo(x);" + # XXX - this still trips over too many legitimate things, + # like "int foo(x,\n\ty);" +# if (/^(\w+(\s|\*)+)+\w+\(/ && !/\)[;,](\s|\x01)*$/ && +# !/^(extern|static)\b/) { +# err("return type of function not on separate line"); +# } + # this is a close approximation + if (/^(\w+(\s|\*)+)+\w+\(.*\)(\s|\x01)*$/ && + !/^(extern|static)\b/) { + err("return type of function not on separate line"); + } + if (/^#define\t/ || /^#ifdef\t/ || /^#ifndef\t/) { + err("#define/ifdef/ifndef followed by tab instead of space"); + } + if (/^#define\s\s+/ || /^#ifdef\s\s+/ || /^#ifndef\s\s+/) { + err("#define/ifdef/ifndef followed by more than one space"); + } + # AON C-style doesn't require this. + #if (/^\s*return\W[^;]*;/ && !/^\s*return\s*\(.*\);/) { + # err("unparenthesized return expression"); + #} + if (/\bsizeof\b/ && !/\bsizeof\s*\(.*\)/) { + err("unparenthesized sizeof expression"); + } + if (/\b(sizeof|typeof)\b/ && /\b(sizeof|typeof)\s+\(.*\)/) { + err("spaces between sizeof/typeof expression and paren"); + } + if (/\(\s/) { + err("whitespace after left paren"); + } + # allow "for" statements to have empty "continue" clauses + if (/\s\)/ && !/^\s*for \([^;]*;[^;]*; \)/) { + err("whitespace before right paren"); + } + if (/^\s*\(void\)[^ ]/) { + err("missing space after (void) cast"); + } + if (/\S\{/ && !/\{\{/ && !/\(struct \w+\)\{/) { + err("missing space before left brace"); + } + if ($in_function && /^\s+{/ && + ($prev =~ /\)\s*$/ || $prev =~ /\bstruct\s+\w+$/)) { + err("left brace starting a line"); + } + if (/}(else|while)/) { + err("missing space after right brace"); + } + if (/}\s\s+(else|while)/) { + err("extra space after right brace"); + } + if (/\b_VOID\b|\bVOID\b|\bSTATIC\b/) { + err("obsolete use of VOID or STATIC"); + } + if (/\b($typename|void)\*/o) { + err("missing space between type name and *"); + } + if (/^\s+#/) { + err("preprocessor statement not in column 1"); + } + if (/^#\s/) { + err("blank after preprocessor #"); + } + if (/!\s*(strcmp|strncmp|bcmp)\s*\(/) { + err("don't use boolean ! with comparison functions"); + } + if (/^\S+\([\S\s]*\)\s*{/) { + err("brace of function definition not at beginning of line"); + } + if (/static\s+\S+\s*=\s*(0|NULL)\s*;/) { + err("static variable initialized with 0 or NULL"); + } + if (/typedef[\S\s]+\*\s*\w+\s*;/) { + err("typedefed pointer type"); + } + if (/unsigned\s+int\s/) { + err("'unsigned int' instead of just 'unsigned'"); + } + if (/long\s+long\s+int\s/) { + err("'long long int' instead of just 'long long'"); + } elsif (/long\s+int\s/) { + err("'long int' instead of just 'long'"); + } + + # + # We completely ignore, for purposes of indentation: + # * lines outside of functions + # * preprocessor lines + # + if ($check_continuation && $in_function && !$in_cpp) { + process_indent($_); + } + if ($picky) { + # try to detect spaces after casts, but allow (e.g.) + # "sizeof (int) + 1", "void (*funcptr)(int) = foo;", and + # "int foo(int) __NORETURN;" + if ((/^\($typename( \*+)?\)\s/o || + /\W\($typename( \*+)?\)\s/o) && + !/sizeof\($typename( \*)?\)\s/o && + !/\($typename( \*+)?\)\s+=[^=]/o) { + err("space after cast"); + } + if (/\b($typename|void)\s*\*\s/o && + !/\b($typename|void)\s*\*\s+const\b/o) { + err("unary * followed by space"); + } + } + if ($check_posix_types) { + # try to detect old non-POSIX types. + # POSIX requires all non-standard typedefs to end in _t, + # but historically these have been used. + if (/\b(unchar|ushort|uint|ulong|u_int|u_short|u_long|u_char|quad)\b/) { + err("non-POSIX typedef $1 used: use $old2posix{$1} instead"); + } + } + if ($heuristic) { + # cannot check this everywhere due to "struct {\n...\n} foo;" + if ($in_function && !$in_declaration && + /}./ && !/}\s+=/ && !/{.*}[;,]$/ && !/}(\s|\x01)*$/ && + !/} (else|while)/ && !/}}/) { + err("possible bad text following right brace"); + } + # cannot check this because sub-blocks in + # the middle of code are ok + if ($in_function && /^\s+{/) { + err("possible left brace starting a line"); + } + } + if (/^\s*else\W/) { + if ($prev =~ /^\s*}$/) { + err_prefix($prev, + "else and right brace should be on same line"); + } + } + $prev = $line; +} + +if ($prev eq "") { + err("last line in file is blank"); +} + +} + +# +# Continuation-line checking +# +# The rest of this file contains the code for the continuation checking +# engine. It's a pretty simple state machine which tracks the expression +# depth (unmatched '('s and '['s). +# +# Keep in mind that the argument to process_indent() has already been heavily +# processed; all comments have been replaced by control-A, and the contents of +# strings and character constants have been elided. +# + +my $cont_in; # currently inside of a continuation +my $cont_off; # skipping an initializer or definition +my $cont_noerr; # suppress cascading errors +my $cont_start; # the line being continued +my $cont_base; # the base indentation +my $cont_first; # this is the first line of a statement +my $cont_multiseg; # this continuation has multiple segments + +my $cont_special; # this is a C statement (if, for, etc.) +my $cont_macro; # this is a macro +my $cont_case; # this is a multi-line case + +my @cont_paren; # the stack of unmatched ( and [s we've seen + +sub +reset_indent() +{ + $cont_in = 0; + $cont_off = 0; +} + +sub +delabel($) +{ + # + # replace labels with tabs. Note that there may be multiple + # labels on a line. + # + local $_ = $_[0]; + + while (/^(\t*)( *(?:(?:\w+\s*)|(?:case\b[^:]*)): *)(.*)$/) { + my ($pre_tabs, $label, $rest) = ($1, $2, $3); + $_ = $pre_tabs; + while ($label =~ s/^([^\t]*)(\t+)//) { + $_ .= "\t" x (length($2) + length($1) / 8); + } + $_ .= ("\t" x (length($label) / 8)).$rest; + } + + return ($_); +} + +sub +process_indent($) +{ + require strict; + local $_ = $_[0]; # preserve the global $_ + + s/\x01//g; # No comments + s/\s+$//; # Strip trailing whitespace + + return if (/^$/); # skip empty lines + + # regexps used below; keywords taking (), macros, and continued cases + my $special = '(?:(?:\}\s*)?else\s+)?(?:if|for|while|switch)\b'; + my $macro = '[A-Z_][A-Z_0-9]*\('; + my $case = 'case\b[^:]*$'; + + # skip over enumerations, array definitions, initializers, etc. + if ($cont_off <= 0 && !/^\s*$special/ && + (/(?:(?:\b(?:enum|struct|union)\s*[^\{]*)|(?:\s+=\s*))\{/ || + (/^\s*{/ && $prev =~ /=\s*(?:\/\*.*\*\/\s*)*$/))) { + $cont_in = 0; + $cont_off = tr/{/{/ - tr/}/}/; + return; + } + if ($cont_off) { + $cont_off += tr/{/{/ - tr/}/}/; + return; + } + + if (!$cont_in) { + $cont_start = $line; + + if (/^\t* /) { + err("non-continuation indented 4 spaces"); + $cont_noerr = 1; # stop reporting + } + $_ = delabel($_); # replace labels with tabs + + # check if the statement is complete + return if (/^\s*\}?$/); + return if (/^\s*\}?\s*else\s*\{?$/); + return if (/^\s*do\s*\{?$/); + return if (/{$/); + return if (/}[,;]?$/); + + # Allow macros on their own lines + return if (/^\s*[A-Z_][A-Z_0-9]*$/); + + # cases we don't deal with, generally non-kosher + if (/{/) { + err("stuff after {"); + return; + } + + # Get the base line, and set up the state machine + /^(\t*)/; + $cont_base = $1; + $cont_in = 1; + @cont_paren = (); + $cont_first = 1; + $cont_multiseg = 0; + + # certain things need special processing + $cont_special = /^\s*$special/? 1 : 0; + $cont_macro = /^\s*$macro/? 1 : 0; + $cont_case = /^\s*$case/? 1 : 0; + } else { + $cont_first = 0; + + # Strings may be pulled back to an earlier (half-)tabstop + unless ($cont_noerr || /^$cont_base / || + (/^\t*(?: )?(?:gettext\()?\"/ && !/^$cont_base\t/)) { + err_prefix($cont_start, + "continuation should be indented 4 spaces"); + } + } + + my $rest = $_; # keeps the remainder of the line + + # + # The split matches 0 characters, so that each 'special' character + # is processed separately. Parens and brackets are pushed and + # popped off the @cont_paren stack. For normal processing, we wait + # until a ; or { terminates the statement. "special" processing + # (if/for/while/switch) is allowed to stop when the stack empties, + # as is macro processing. Case statements are terminated with a : + # and an empty paren stack. + # + foreach $_ (split /[^\(\)\[\]\{\}\;\:]*/) { + next if (length($_) == 0); + + # rest contains the remainder of the line + my $rxp = "[^\Q$_\E]*\Q$_\E"; + $rest =~ s/^$rxp//; + + if (/\(/ || /\[/) { + push @cont_paren, $_; + } elsif (/\)/ || /\]/) { + my $cur = $_; + tr/\)\]/\(\[/; + + my $old = (pop @cont_paren); + if (!defined($old)) { + err("unexpected '$cur'"); + $cont_in = 0; + last; + } elsif ($old ne $_) { + err("'$cur' mismatched with '$old'"); + $cont_in = 0; + last; + } + + # + # If the stack is now empty, do special processing + # for if/for/while/switch and macro statements. + # + next if (@cont_paren != 0); + if ($cont_special) { + if ($rest =~ /^\s*{?$/) { + $cont_in = 0; + last; + } + if ($rest =~ /^\s*;$/) { + err("empty if/for/while body ". + "not on its own line"); + $cont_in = 0; + last; + } + if (!$cont_first && $cont_multiseg == 1) { + err_prefix($cont_start, + "multiple statements continued ". + "over multiple lines"); + $cont_multiseg = 2; + } elsif ($cont_multiseg == 0) { + $cont_multiseg = 1; + } + # We've finished this section, start + # processing the next. + goto section_ended; + } + if ($cont_macro) { + if ($rest =~ /^$/) { + $cont_in = 0; + last; + } + } + } elsif (/\;/) { + if ($cont_case) { + err("unexpected ;"); + } elsif (!$cont_special) { + err("unexpected ;") if (@cont_paren != 0); + if (!$cont_first && $cont_multiseg == 1) { + err_prefix($cont_start, + "multiple statements continued ". + "over multiple lines"); + $cont_multiseg = 2; + } elsif ($cont_multiseg == 0) { + $cont_multiseg = 1; + } + if ($rest =~ /^$/) { + $cont_in = 0; + last; + } + if ($rest =~ /^\s*special/) { + err("if/for/while/switch not started ". + "on its own line"); + } + goto section_ended; + } + } elsif (/\{/) { + err("{ while in parens/brackets") if (@cont_paren != 0); + err("stuff after {") if ($rest =~ /[^\s}]/); + $cont_in = 0; + last; + } elsif (/\}/) { + err("} while in parens/brackets") if (@cont_paren != 0); + if (!$cont_special && $rest !~ /^\s*(while|else)\b/) { + if ($rest =~ /^$/) { + err("unexpected }"); + } else { + err("stuff after }"); + } + $cont_in = 0; + last; + } + } elsif (/\:/ && $cont_case && @cont_paren == 0) { + err("stuff after multi-line case") if ($rest !~ /$^/); + $cont_in = 0; + last; + } + next; +section_ended: + # End of a statement or if/while/for loop. Reset + # cont_special and cont_macro based on the rest of the + # line. + $cont_special = ($rest =~ /^\s*$special/)? 1 : 0; + $cont_macro = ($rest =~ /^\s*$macro/)? 1 : 0; + $cont_case = 0; + next; + } + $cont_noerr = 0 if (!$cont_in); +} diff --git a/src/pmdk/utils/docker/0001-travis-fix-travisci_build_coverity_scan.sh.patch b/src/pmdk/utils/docker/0001-travis-fix-travisci_build_coverity_scan.sh.patch new file mode 100644 index 000000000..9738942aa --- /dev/null +++ b/src/pmdk/utils/docker/0001-travis-fix-travisci_build_coverity_scan.sh.patch @@ -0,0 +1,27 @@ +From b5179dc4822eaab192361da05aa95d98f523960f Mon Sep 17 00:00:00 2001 +From: Lukasz Dorau +Date: Mon, 7 May 2018 12:05:40 +0200 +Subject: [PATCH] travis: fix travisci_build_coverity_scan.sh + +--- + travisci_build_coverity_scan.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/travisci_build_coverity_scan.sh b/travisci_build_coverity_scan.sh +index ad9d4afcf..562b08bcc 100644 +--- a/travisci_build_coverity_scan.sh ++++ b/travisci_build_coverity_scan.sh +@@ -92,8 +92,8 @@ response=$(curl \ + --form description="Travis CI build" \ + $UPLOAD_URL) + status_code=$(echo "$response" | sed -n '$p') +-if [ "$status_code" != "201" ]; then ++if [ "$status_code" != "200" ]; then + TEXT=$(echo "$response" | sed '$d') +- echo -e "\033[33;1mCoverity Scan upload failed: $TEXT.\033[0m" ++ echo -e "\033[33;1mCoverity Scan upload failed: $response.\033[0m" + exit 1 + fi +-- +2.13.6 + diff --git a/src/pmdk/utils/docker/README b/src/pmdk/utils/docker/README new file mode 100644 index 000000000..b4a271359 --- /dev/null +++ b/src/pmdk/utils/docker/README @@ -0,0 +1,19 @@ +Persistent Memory Development Kit + +This is utils/docker/README. + +Scripts in this directory let Travis CI run a Docker container with ubuntu- +or fedora-based environment and build PMDK project inside it. + +'build-local.sh' can be used to build PMDK locally. + +'build-CI.sh' is used for building PMDK on Travis and GitHub Actions CIs + +NOTE: +If you commit changes to any Dockerfile or shell script in the 'images' +subdirectory and then do git-rebase before pushing your commits to the +repository, make sure that you do not squash the commit which is the head in +your repository. This will let Travis and GitHub Actions CIs recreate +Docker images used during the build before the build. Otherwise the not-updated +Docker image will be pulled from the Docker Hub and used during the build on +Travis and GitHub Actions CIs. diff --git a/src/pmdk/utils/docker/build-CI.sh b/src/pmdk/utils/docker/build-CI.sh new file mode 100755 index 000000000..1b75d3b94 --- /dev/null +++ b/src/pmdk/utils/docker/build-CI.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# build-CI.sh - runs a Docker container from a Docker image with environment +# prepared for building PMDK project and starts building PMDK. +# +# This script is used for building PMDK on Travis and GitHub Actions CIs. +# + +set -e + +source $(dirname $0)/set-ci-vars.sh +source $(dirname $0)/set-vars.sh +source $(dirname $0)/valid-branches.sh + +if [[ "$CI_EVENT_TYPE" != "cron" && "$CI_BRANCH" != "coverity_scan" \ + && "$COVERITY" -eq 1 ]]; then + echo "INFO: Skip Coverity scan job if build is triggered neither by " \ + "'cron' nor by a push to 'coverity_scan' branch" + exit 0 +fi + +if [[ ( "$CI_EVENT_TYPE" == "cron" || "$CI_BRANCH" == "coverity_scan" )\ + && "$COVERITY" -ne 1 ]]; then + echo "INFO: Skip regular jobs if build is triggered either by 'cron'" \ + " or by a push to 'coverity_scan' branch" + exit 0 +fi + +if [[ -z "$OS" || -z "$OS_VER" ]]; then + echo "ERROR: The variables OS and OS_VER have to be set properly " \ + "(eg. OS=ubuntu, OS_VER=16.04)." + exit 1 +fi + +if [[ -z "$HOST_WORKDIR" ]]; then + echo "ERROR: The variable HOST_WORKDIR has to contain a path to " \ + "the root of the PMDK project on the host machine" + exit 1 +fi + +if [[ -z "$TEST_BUILD" ]]; then + TEST_BUILD=all +fi + +imageName=${DOCKERHUB_REPO}:1.10-${OS}-${OS_VER}-${CI_CPU_ARCH} +containerName=pmdk-${OS}-${OS_VER} + +if [[ $MAKE_PKG -eq 0 ]] ; then command="./run-build.sh"; fi +if [[ $MAKE_PKG -eq 1 ]] ; then command="./run-build-package.sh"; fi +if [[ $COVERAGE -eq 1 ]] ; then command="./run-coverage.sh"; ci_env=`bash <(curl -s https://codecov.io/env)`; fi + +if [[ ( "$CI_EVENT_TYPE" == "cron" || "$CI_BRANCH" == "coverity_scan" )\ + && "$COVERITY" -eq 1 ]]; then + command="./run-coverity.sh" +fi + +if [ -n "$DNS_SERVER" ]; then DNS_SETTING=" --dns=$DNS_SERVER "; fi +if [[ -f $CI_FILE_SKIP_BUILD_PKG_CHECK ]]; then BUILD_PACKAGE_CHECK=n; else BUILD_PACKAGE_CHECK=y; fi +if [ -z "$NDCTL_ENABLE" ]; then ndctl_enable=; else ndctl_enable="--env NDCTL_ENABLE=$NDCTL_ENABLE"; fi +if [[ $UBSAN -eq 1 ]]; then for x in C CPP LD; do declare EXTRA_${x}FLAGS=-fsanitize=undefined; done; fi + +# Only run doc update on $GITHUB_REPO master or stable branch +if [[ -z "${CI_BRANCH}" || -z "${TARGET_BRANCHES[${CI_BRANCH}]}" || "$CI_EVENT_TYPE" == "pull_request" || "$CI_REPO_SLUG" != "${GITHUB_REPO}" ]]; then + AUTO_DOC_UPDATE=0 +fi + +# Check if we are running on a CI (Travis or GitHub Actions) +[ -n "$GITHUB_ACTIONS" -o -n "$TRAVIS" ] && CI_RUN="YES" || CI_RUN="NO" + +# We have a blacklist only for ppc64le arch +if [[ "$CI_CPU_ARCH" == ppc64le ]] ; then BLACKLIST_FILE=../../utils/docker/ppc64le.blacklist; fi + +# docker on travis + ppc64le runs inside an LXD container and for security +# limits what can be done inside it, and as such, `docker run` fails with +# > the input device is not a TTY +# when using -t because of limited permissions to /dev imposed by LXD. +if [[ -n "$TRAVIS" && "$CI_CPU_ARCH" == ppc64le ]] || [[ -n "$GITHUB_ACTIONS" ]]; then + TTY='' +else + TTY='-t' +fi + +WORKDIR=/pmdk +SCRIPTSDIR=$WORKDIR/utils/docker + +# Run a container with +# - environment variables set (--env) +# - host directory containing PMDK source mounted (-v) +# - a tmpfs /tmp with the necessary size and permissions (--tmpfs)* +# - working directory set (-w) +# +# * We need a tmpfs /tmp inside docker but we cannot run it with --privileged +# and do it from inside, so we do using this docker-run option. +# By default --tmpfs add nosuid,nodev,noexec to the mount flags, we don't +# want that and just to make sure we add the usually default rw,relatime just +# in case docker change the defaults. +docker run --rm --name=$containerName -i $TTY \ + $DNS_SETTING \ + $ci_env \ + --env http_proxy=$http_proxy \ + --env https_proxy=$https_proxy \ + --env AUTO_DOC_UPDATE=$AUTO_DOC_UPDATE \ + --env CC=$PMDK_CC \ + --env CXX=$PMDK_CXX \ + --env VALGRIND=$VALGRIND \ + --env EXTRA_CFLAGS=$EXTRA_CFLAGS \ + --env EXTRA_CXXFLAGS=$EXTRA_CXXFLAGS \ + --env EXTRA_LDFLAGS=$EXTRA_LDFLAGS \ + --env REMOTE_TESTS=$REMOTE_TESTS \ + --env TEST_BUILD=$TEST_BUILD \ + --env WORKDIR=$WORKDIR \ + --env EXPERIMENTAL=$EXPERIMENTAL \ + --env BUILD_PACKAGE_CHECK=$BUILD_PACKAGE_CHECK \ + --env SCRIPTSDIR=$SCRIPTSDIR \ + --env TRAVIS=$TRAVIS \ + --env CI_COMMIT_RANGE=$CI_COMMIT_RANGE \ + --env CI_COMMIT=$CI_COMMIT \ + --env CI_REPO_SLUG=$CI_REPO_SLUG \ + --env CI_BRANCH=$CI_BRANCH \ + --env CI_EVENT_TYPE=$CI_EVENT_TYPE \ + --env DOC_UPDATE_GITHUB_TOKEN=$DOC_UPDATE_GITHUB_TOKEN \ + --env COVERITY_SCAN_TOKEN=$COVERITY_SCAN_TOKEN \ + --env COVERITY_SCAN_NOTIFICATION_EMAIL=$COVERITY_SCAN_NOTIFICATION_EMAIL \ + --env FAULT_INJECTION=$FAULT_INJECTION \ + --env GITHUB_ACTIONS=$GITHUB_ACTIONS \ + --env GITHUB_HEAD_REF=$GITHUB_HEAD_REF \ + --env GITHUB_REPO=$GITHUB_REPO \ + --env GITHUB_REPOSITORY=$GITHUB_REPOSITORY \ + --env GITHUB_REF=$GITHUB_REF \ + --env GITHUB_RUN_ID=$GITHUB_RUN_ID \ + --env GITHUB_SHA=$GITHUB_SHA \ + --env CI_RUN=$CI_RUN \ + --env SRC_CHECKERS=$SRC_CHECKERS \ + --env BLACKLIST_FILE=$BLACKLIST_FILE \ + $ndctl_enable \ + --tmpfs /tmp:rw,relatime,suid,dev,exec,size=6G \ + -v $HOST_WORKDIR:$WORKDIR \ + -v /etc/localtime:/etc/localtime \ + -w $SCRIPTSDIR \ + $imageName $command diff --git a/src/pmdk/utils/docker/build-local.sh b/src/pmdk/utils/docker/build-local.sh new file mode 100755 index 000000000..0fa5b2107 --- /dev/null +++ b/src/pmdk/utils/docker/build-local.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2020, Intel Corporation + +# +# build-local.sh - runs a Docker container from a Docker image with environment +# prepared for building PMDK project and starts building PMDK. +# +# This script is for building PMDK locally (not on CI). +# +# Notes: +# - run this script from its location or set the variable 'HOST_WORKDIR' to +# where the root of the PMDK project is on the host machine. +# - set variables 'OS' and 'OS_VER' properly to a system you want to build PMDK +# on (for proper values take a look on the list of Dockerfiles at the +# utils/docker/images directory), eg. OS=ubuntu, OS_VER=16.04. +# - set 'KEEP_TEST_CONFIG' variable to 1 if you do not want the tests to be +# reconfigured (your current test configuration will be preserved and used). +# - tests with Device Dax are not supported by pcheck yet, so do not provide +# these devices in your configuration. +# + +set -e + +# Environment variables that can be customized (default values are after dash): +export KEEP_CONTAINER=${KEEP_CONTAINER:-0} +export KEEP_TEST_CONFIG=${KEEP_TEST_CONFIG:-0} +export TEST_BUILD=${TEST_BUILD:-all} +export REMOTE_TESTS=${REMOTE_TESTS:-1} +export MAKE_PKG=${MAKE_PKG:-0} +export EXTRA_CFLAGS=${EXTRA_CFLAGS} +export EXTRA_CXXFLAGS=${EXTRA_CXXFLAGS:-} +export PMDK_CC=${PMDK_CC:-gcc} +export PMDK_CXX=${PMDK_CXX:-g++} +export EXPERIMENTAL=${EXPERIMENTAL:-n} +export VALGRIND=${VALGRIND:-1} +export DOCKERHUB_REPO=${DOCKERHUB_REPO:-pmem/pmdk} +export GITHUB_REPO=${GITHUB_REPO:-pmem/pmdk} + +if [[ -z "$OS" || -z "$OS_VER" ]]; then + echo "ERROR: The variables OS and OS_VER have to be set " \ + "(eg. OS=ubuntu, OS_VER=16.04)." + exit 1 +fi + +if [[ -z "$HOST_WORKDIR" ]]; then + HOST_WORKDIR=$(readlink -f ../..) +fi + +if [[ "$KEEP_CONTAINER" != "1" ]]; then + RM_SETTING=" --rm" +fi + +imageName=${DOCKERHUB_REPO}:1.10-${OS}-${OS_VER}-${CI_CPU_ARCH} +containerName=pmdk-${OS}-${OS_VER} + +if [[ $MAKE_PKG -eq 1 ]] ; then + command="./run-build-package.sh" +else + command="./run-build.sh" +fi + +if [ -n "$DNS_SERVER" ]; then DNS_SETTING=" --dns=$DNS_SERVER "; fi +if [ -z "$NDCTL_ENABLE" ]; then ndctl_enable=; else ndctl_enable="--env NDCTL_ENABLE=$NDCTL_ENABLE"; fi + +WORKDIR=/pmdk +SCRIPTSDIR=$WORKDIR/utils/docker + +# Check if we are running on a CI (Travis or GitHub Actions) +[ -n "$GITHUB_ACTIONS" -o -n "$TRAVIS" ] && CI_RUN="YES" || CI_RUN="NO" + +echo Building ${OS}-${OS_VER} + +# Run a container with +# - environment variables set (--env) +# - host directory containing PMDK source mounted (-v) +# - a tmpfs /tmp with the necessary size and permissions (--tmpfs)* +# - working directory set (-w) +# +# * We need a tmpfs /tmp inside docker but we cannot run it with --privileged +# and do it from inside, so we do using this docker-run option. +# By default --tmpfs add nosuid,nodev,noexec to the mount flags, we don't +# want that and just to make sure we add the usually default rw,relatime just +# in case docker change the defaults. +docker run --name=$containerName -ti \ + $RM_SETTING \ + $DNS_SETTING \ + --env http_proxy=$http_proxy \ + --env https_proxy=$https_proxy \ + --env CC=$PMDK_CC \ + --env CXX=$PMDK_CXX \ + --env VALGRIND=$VALGRIND \ + --env EXTRA_CFLAGS=$EXTRA_CFLAGS \ + --env EXTRA_CXXFLAGS=$EXTRA_CXXFLAGS \ + --env EXTRA_LDFLAGS=$EXTRA_LDFLAGS \ + --env REMOTE_TESTS=$REMOTE_TESTS \ + --env CONFIGURE_TESTS=$CONFIGURE_TESTS \ + --env TEST_BUILD=$TEST_BUILD \ + --env WORKDIR=$WORKDIR \ + --env EXPERIMENTAL=$EXPERIMENTAL \ + --env SCRIPTSDIR=$SCRIPTSDIR \ + --env KEEP_TEST_CONFIG=$KEEP_TEST_CONFIG \ + --env CI_RUN=$CI_RUN \ + --env BLACKLIST_FILE=$BLACKLIST_FILE \ + $ndctl_enable \ + --tmpfs /tmp:rw,relatime,suid,dev,exec,size=6G \ + -v $HOST_WORKDIR:$WORKDIR \ + -v /etc/localtime:/etc/localtime \ + $DAX_SETTING \ + -w $SCRIPTSDIR \ + $imageName $command diff --git a/src/pmdk/utils/docker/configure-tests.sh b/src/pmdk/utils/docker/configure-tests.sh new file mode 100755 index 000000000..23148ed4f --- /dev/null +++ b/src/pmdk/utils/docker/configure-tests.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# configure-tests.sh - is called inside a Docker container; configures tests +# and ssh server for use during build of PMDK project. +# + +set -e + +# Configure tests +cat << EOF > $WORKDIR/src/test/testconfig.sh +LONGDIR=LoremipsumdolorsitametconsecteturadipiscingelitVivamuslacinianibhattortordictumsollicitudinNullamvariusvestibulumligulaetegestaselitsemperidMaurisultriciesligulaeuipsumtinciduntluctusMorbimaximusvariusdolorid +# this path is ~3000 characters long +DIRSUFFIX="$LONGDIR/$LONGDIR/$LONGDIR/$LONGDIR/$LONGDIR" +NON_PMEM_FS_DIR=/tmp +PMEM_FS_DIR=/tmp +PMEM_FS_DIR_FORCE_PMEM=1 +TEST_BUILD="debug nondebug" +ENABLE_SUDO_TESTS=y +TM=1 +EOF + +# Configure remote tests +if [[ $REMOTE_TESTS -eq 1 ]]; then + echo "Configuring remote tests" + cat << EOF >> $WORKDIR/src/test/testconfig.sh +NODE[0]=127.0.0.1 +NODE_WORKING_DIR[0]=/tmp/node0 +NODE_ADDR[0]=127.0.0.1 +NODE_ENV[0]="PMEM_IS_PMEM_FORCE=1" +NODE[1]=127.0.0.1 +NODE_WORKING_DIR[1]=/tmp/node1 +NODE_ADDR[1]=127.0.0.1 +NODE_ENV[1]="PMEM_IS_PMEM_FORCE=1" +NODE[2]=127.0.0.1 +NODE_WORKING_DIR[2]=/tmp/node2 +NODE_ADDR[2]=127.0.0.1 +NODE_ENV[2]="PMEM_IS_PMEM_FORCE=1" +NODE[3]=127.0.0.1 +NODE_WORKING_DIR[3]=/tmp/node3 +NODE_ADDR[3]=127.0.0.1 +NODE_ENV[3]="PMEM_IS_PMEM_FORCE=1" +TEST_BUILD="debug nondebug" +TEST_PROVIDERS=sockets +EOF + + mkdir -p ~/.ssh/cm + + cat << EOF >> ~/.ssh/config +Host 127.0.0.1 + StrictHostKeyChecking no + ControlPath ~/.ssh/cm/%r@%h:%p + ControlMaster auto + ControlPersist 10m +EOF + + if [ ! -f /etc/ssh/ssh_host_rsa_key ] + then + (echo $USERPASS | sudo -S ssh-keygen -t rsa -C $USER@$HOSTNAME -P '' -f /etc/ssh/ssh_host_rsa_key) + fi + echo $USERPASS | sudo -S sh -c 'cat /etc/ssh/ssh_host_rsa_key.pub >> /etc/ssh/authorized_keys' + ssh-keygen -t rsa -C $USER@$HOSTNAME -P '' -f ~/.ssh/id_rsa + cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys + chmod -R 700 ~/.ssh + chmod 640 ~/.ssh/authorized_keys + chmod 600 ~/.ssh/config + + # Start ssh service + echo $USERPASS | sudo -S $START_SSH_COMMAND + + ssh 127.0.0.1 exit 0 +else + echo "Skipping remote tests" + echo + echo "Removing all libfabric.pc files in order to simulate that libfabric is not installed:" + find /usr -name "libfabric.pc" 2>/dev/null || true + echo $USERPASS | sudo -S sh -c 'find /usr -name "libfabric.pc" -exec rm -f {} + 2>/dev/null' +fi + +# Configure python tests + cat << EOF >> $WORKDIR/src/test/testconfig.py +config = { + 'unittest_log_level': 1, + 'cacheline_fs_dir': '/tmp', + 'force_cacheline': True, + 'page_fs_dir': '/tmp', + 'force_page': False, + 'byte_fs_dir': '/tmp', + 'force_byte': True, + 'tm': True, + 'test_type': 'check', + 'granularity': 'all', + 'fs_dir_force_pmem': 0, + 'keep_going': False, + 'timeout': '3m', + 'build': ['debug', 'release'], + 'force_enable': None, + 'device_dax_path': [], + 'fail_on_skip': False, + 'enable_admin_tests': True + } +EOF + diff --git a/src/pmdk/utils/docker/images/0001-fix-generating-gcov-files-and-turn-off-verbose-log.patch b/src/pmdk/utils/docker/images/0001-fix-generating-gcov-files-and-turn-off-verbose-log.patch new file mode 100644 index 000000000..7377d07f6 --- /dev/null +++ b/src/pmdk/utils/docker/images/0001-fix-generating-gcov-files-and-turn-off-verbose-log.patch @@ -0,0 +1,37 @@ +From d633d3b0a5f03be280efb80a69b9d5ed4e9c4d56 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= +Date: Tue, 14 Jul 2020 13:58:34 +0200 +Subject: [PATCH] fix generating gcov files and turn-off verbose log + +--- + codecov | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/codecov b/codecov +index e702ecd..0a2f4d8 100755 +--- a/codecov ++++ b/codecov +@@ -1108,9 +1108,9 @@ then + if [ "$ft_gcovout" = "0" ]; + then + # suppress gcov output +- bash -c "find $proj_root -type f -name '*.gcno' $gcov_include $gcov_ignore -exec $gcov_exe -pb $gcov_arg {} +" >/dev/null 2>&1 || true ++ bash -c "find $proj_root -type f -name '*.gcno' $gcov_include $gcov_ignore -execdir $gcov_exe -pb $gcov_arg {} \;" >/dev/null 2>&1 || true + else +- bash -c "find $proj_root -type f -name '*.gcno' $gcov_include $gcov_ignore -exec $gcov_exe -pb $gcov_arg {} +" || true ++ bash -c "find $proj_root -type f -name '*.gcno' $gcov_include $gcov_ignore -execdir $gcov_exe -pb $gcov_arg {} \;" || true + fi + else + say "${e}==>${x} gcov disabled" +@@ -1425,7 +1425,7 @@ do + report_len=$(wc -c < "$file") + if [ "$report_len" -ne 0 ]; + then +- say " ${g}+${x} $file ${e}bytes=$(echo "$report_len" | tr -d ' ')${x}" ++ #say " ${g}+${x} $file ${e}bytes=$(echo "$report_len" | tr -d ' ')${x}" + # append to to upload + _filename=$(basename "$file") + if [ "${_filename##*.}" = 'gcov' ]; +-- +2.25.1 + diff --git a/src/pmdk/utils/docker/images/Dockerfile.fedora-31 b/src/pmdk/utils/docker/images/Dockerfile.fedora-31 new file mode 100644 index 000000000..a699fee85 --- /dev/null +++ b/src/pmdk/utils/docker/images/Dockerfile.fedora-31 @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# Dockerfile - a 'recipe' for Docker to build an image of fedora-based +# environment for building the PMDK project. +# + +# Pull base image +FROM fedora:31 +MAINTAINER piotr.balcer@intel.com + +# libfabric (optional if libfabric-dev >= 1.4.2 is installed) +ENV FABRIC_DEPS "\ + autoconf \ + automake \ + libtool \ + wget" + +ENV VALGRIND_DEPS "\ + autoconf \ + automake \ + file \ + findutils \ + git" + +# pmdk base +ENV BASE_DEPS "\ + git \ + daxctl-devel \ + make \ + ndctl-devel \ + pkgconfig" + +# benchmarks (optional) +ENV BENCH_DEPS "\ + glib2-devel" + +# examples (optional) +ENV EXAMPLES_DEPS "\ + fuse \ + fuse-devel \ + ncurses-devel \ + libuv-devel" + +# documentation (optional) +ENV DOC_DEPS "\ + pandoc" + +# tests +ENV TESTS_DEPS "\ + bc \ + gdb \ + libunwind-devel \ + ndctl \ + openssh-server \ + strace" + +# packaging +ENV PACKAGING_DEPS "\ + rpm-build \ + rpm-build-libs \ + rpmdevtools" + +# Coverity +ENV COVERITY_DEPS "\ + gcc \ + wget" + +# misc +ENV MISC_DEPS "\ + clang \ + hub \ + lbzip2 \ + man \ + python3-flake8 \ + rsync \ + shadow-utils \ + sudo \ + tar \ + which \ + xmlto" + +# Copy install valgrind script +COPY install-valgrind.sh install-valgrind.sh + +# Copy install libfabric script +COPY install-libfabric.sh install-libfabric.sh + +RUN dnf update -y && dnf install -y \ + $FABRIC_DEPS \ + $VALGRIND_DEPS \ + $BASE_DEPS \ + $BENCH_DEPS \ + $EXAMPLES_DEPS \ + $DOC_DEPS \ + $TESTS_DEPS \ + $PACKAGING_DEPS \ + $COVERITY_DEPS \ + $MISC_DEPS \ + $TESTS_DEPS \ + && ./install-valgrind.sh fedora \ + && ./install-libfabric.sh fedora \ + && dnf clean all + +# Add user +ENV USER pmdkuser +ENV USERPASS pmdkpass +RUN useradd -m $USER +RUN echo "$USER:$USERPASS" | chpasswd +RUN gpasswd wheel -a $USER +RUN echo "%wheel ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER $USER + +# Set required environment variables +ENV OS fedora +ENV OS_VER 31 +ENV START_SSH_COMMAND /usr/sbin/sshd +ENV PACKAGE_MANAGER rpm +ENV NOTTY 1 diff --git a/src/pmdk/utils/docker/images/Dockerfile.ubuntu-19.10 b/src/pmdk/utils/docker/images/Dockerfile.ubuntu-19.10 new file mode 100644 index 000000000..9b61f8953 --- /dev/null +++ b/src/pmdk/utils/docker/images/Dockerfile.ubuntu-19.10 @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# Dockerfile - a 'recipe' for Docker to build an image of ubuntu-based +# environment for building the PMDK project. +# + +# Pull base image +FROM ubuntu:19.10 +MAINTAINER piotr.balcer@intel.com + +ENV DEBIAN_FRONTEND noninteractive + +# Additional parameters to build docker without building components +ARG SKIP_SCRIPTS_DOWNLOAD + +# libfabric (optional if libfabric-dev >= 1.4.2 is installed) +ENV FABRIC_DEPS "autoconf \ + automake \ + build-essential \ + libtool \ + unzip \ + wget" + +ENV VALGRIND_DEPS "autoconf \ + automake \ + build-essential \ + git" + +# pmdk base +ENV BASE_DEPS "build-essential \ + git \ + libdaxctl-dev \ + libndctl-dev \ + pkg-config" + +# benchmarks (optional) +ENV BENCH_DEPS libglib2.0-dev + +# examples (optional) +ENV EXAMPLES_DEPS "libfuse-dev \ + libncurses5-dev \ + libuv1-dev" + +# documentation (optional) +ENV DOC_DEPS pandoc + +# tests +ENV TESTS_DEPS "bc \ + gdb \ + libc6-dbg \ + libunwind-dev \ + ndctl \ + python3 \ + ssh \ + strace" + +# packaging +ENV PACKAGING_DEPS "debhelper \ + devscripts \ + fakeroot" + +# CodeCov +ENV CODECOV_DEPS curl + +# Coverity +ENV COVERITY_DEPS ruby gcc g++ wget + +# misc +ENV MISC_DEPS "clang \ + clang-format \ + flake8 \ + sudo \ + whois" + +# Copy install valgrind script +COPY install-valgrind.sh install-valgrind.sh + +# Copy install libfabric script +COPY install-libfabric.sh install-libfabric.sh + +# Copy codecov patch and script to download scripts required in run-*.sh +COPY download-scripts.sh download-scripts.sh +COPY 0001-fix-generating-gcov-files-and-turn-off-verbose-log.patch \ + 0001-fix-generating-gcov-files-and-turn-off-verbose-log.patch + +# Update the Apt cache and install basic tools +RUN apt-get update && apt-get dist-upgrade -y \ + && apt-get install -y --no-install-recommends \ + $FABRIC_DEPS \ + $VALGRIND_DEPS \ + $BASE_DEPS \ + $BENCH_DEPS \ + $EXAMPLES_DEPS \ + $DOC_DEPS \ + $TESTS_DEPS \ + $PACKAGING_DEPS \ + $CODECOV_DEPS \ + $COVERITY_DEPS \ + $MISC_DEPS \ + && ./install-valgrind.sh ubuntu \ + && ./install-libfabric.sh \ + && ./download-scripts.sh \ + && rm -rf /var/lib/apt/lists/* + +# Add user +ENV USER pmdkuser +ENV USERPASS pmdkpass +RUN useradd -m $USER -g sudo -p `mkpasswd $USERPASS` +RUN echo "%sudo ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +# switch user +USER $USER + +# Set required environment variables +ENV OS ubuntu +ENV OS_VER 19.10 +ENV START_SSH_COMMAND service ssh start +ENV PACKAGE_MANAGER dpkg +ENV NOTTY 1 diff --git a/src/pmdk/utils/docker/images/README b/src/pmdk/utils/docker/images/README new file mode 100644 index 000000000..0b8c551e4 --- /dev/null +++ b/src/pmdk/utils/docker/images/README @@ -0,0 +1,6 @@ +Persistent Memory Development Kit + +This is utils/docker/images/README. + +Scripts in this directory let you prepare Docker images for building +PMDK project under specified OS (ubuntu, fedora). diff --git a/src/pmdk/utils/docker/images/build-image.sh b/src/pmdk/utils/docker/images/build-image.sh new file mode 100755 index 000000000..b9e7a2a4b --- /dev/null +++ b/src/pmdk/utils/docker/images/build-image.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# build-image.sh - prepares a Docker image with -based +# environment intended for the CPU architecture +# designed for building PMDK project, according to +# the Dockerfile. file located in the same directory. +# +# The script can be run locally. +# + +set -e + +OS_VER=$1 +CPU_ARCH=$2 + +function usage { + echo "Usage:" + echo " build-image.sh " + echo "where:" + echo " - can be for example 'ubuntu-19.10' provided "\ + "a Dockerfile named 'Dockerfile.ubuntu-19.10' "\ + "exists in the current directory and" + echo " - is a CPU architecture, for example 'x86_64'" +} + +# Check if two first arguments are not empty +if [[ -z "$2" ]]; then + usage + exit 1 +fi + +# Check if the file Dockerfile.OS-VER exists +if [[ ! -f "Dockerfile.$OS_VER" ]]; then + echo "Error: Dockerfile.$OS_VER does not exist." + echo + usage + exit 1 +fi + +if [[ -z "${DOCKERHUB_REPO}" ]]; then + echo "Error: DOCKERHUB_REPO environment variable is not set" + exit 1 +fi + +# Build a Docker image tagged with ${DOCKERHUB_REPO}:OS-VER-ARCH +tag=${DOCKERHUB_REPO}:1.10-${OS_VER}-${CPU_ARCH} +docker build -t $tag \ + --build-arg http_proxy=$http_proxy \ + --build-arg https_proxy=$https_proxy \ + -f Dockerfile.$OS_VER . diff --git a/src/pmdk/utils/docker/images/download-scripts.sh b/src/pmdk/utils/docker/images/download-scripts.sh new file mode 100755 index 000000000..fae37b464 --- /dev/null +++ b/src/pmdk/utils/docker/images/download-scripts.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2020, Intel Corporation + +# +# download-scripts.sh - downloads specific version of codecov's bash +# script to generate and upload reports. It's useful, +# since unverified version may break coverage results. +# + +set -e + +# master: Merge pull request #342 from codecov/revert-proj-name-..., 18.08.2020 +CODECOV_VERSION="e877c1280cc6e902101fb5df2981ed1c962da7f0" + +if [ "${SKIP_SCRIPTS_DOWNLOAD}" ]; then + echo "Variable 'SKIP_SCRIPTS_DOWNLOAD' is set; skipping scripts' download" + exit +fi + +mkdir -p /opt/scripts + +# Download codecov's bash script +git clone https://github.com/codecov/codecov-bash +cd codecov-bash +git checkout $CODECOV_VERSION + +git apply ../0001-fix-generating-gcov-files-and-turn-off-verbose-log.patch +mv -v codecov /opt/scripts/codecov + +cd .. +rm -rf codecov-bash diff --git a/src/pmdk/utils/docker/images/install-libfabric.sh b/src/pmdk/utils/docker/images/install-libfabric.sh new file mode 100755 index 000000000..355f24525 --- /dev/null +++ b/src/pmdk/utils/docker/images/install-libfabric.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# install-libfabric.sh - installs a customized version of libfabric +# + +set -e + +OS=$1 + +# Keep in sync with requirements in src/common.inc. +libfabric_ver=1.4.2 +libfabric_url=https://github.com/ofiwg/libfabric/archive +libfabric_dir=libfabric-$libfabric_ver +libfabric_tarball=v${libfabric_ver}.zip +wget "${libfabric_url}/${libfabric_tarball}" +unzip $libfabric_tarball + +cd $libfabric_dir + +# XXX HACK HACK HACK +# Disable use of spin locks in libfabric. +# +# spinlocks do not play well (IOW at all) with cpu-constrained environments, +# like GitHub Actions, and this leads to timeouts of some PMDK's tests. +# This change speeds up pmempool_sync_remote/TEST28-31 by a factor of 20-30. +# +perl -pi -e 's/have_spinlock=1/have_spinlock=0/' configure.ac +# XXX HACK HACK HACK + +./autogen.sh +./configure --prefix=/usr --enable-sockets +make -j$(nproc) +make -j$(nproc) install + +cd .. +rm -f ${libfabric_tarball} +rm -rf ${libfabric_dir} diff --git a/src/pmdk/utils/docker/images/install-libndctl.sh b/src/pmdk/utils/docker/images/install-libndctl.sh new file mode 100755 index 000000000..fc8cee3fb --- /dev/null +++ b/src/pmdk/utils/docker/images/install-libndctl.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2019, Intel Corporation + +# +# install-libndctl.sh - installs libndctl +# + +set -e + +OS=$2 + +echo "==== clone ndctl repo ====" +git clone https://github.com/pmem/ndctl.git +cd ndctl +git checkout $1 + +if [ "$OS" = "fedora" ]; then + +echo "==== setup rpmbuild tree ====" +rpmdev-setuptree + +RPMDIR=$HOME/rpmbuild/ +VERSION=$(./git-version) +SPEC=./rhel/ndctl.spec + +echo "==== create source tarball =====" +git archive --format=tar --prefix="ndctl-${VERSION}/" HEAD | gzip > "$RPMDIR/SOURCES/ndctl-${VERSION}.tar.gz" + +echo "==== build ndctl ====" +./autogen.sh +./configure --disable-docs +make -j$(nproc) + +echo "==== build ndctl packages ====" +rpmbuild -ba $SPEC + +echo "==== install ndctl packages ====" +RPM_ARCH=$(uname -m) +rpm -i $RPMDIR/RPMS/$RPM_ARCH/*.rpm + +echo "==== cleanup ====" +rm -rf $RPMDIR + +else + +echo "==== build ndctl ====" +./autogen.sh +./configure --disable-docs +make -j$(nproc) + +echo "==== install ndctl ====" +make -j$(nproc) install + +echo "==== cleanup ====" + +fi + +cd .. +rm -rf ndctl diff --git a/src/pmdk/utils/docker/images/install-valgrind.sh b/src/pmdk/utils/docker/images/install-valgrind.sh new file mode 100755 index 000000000..97babcb67 --- /dev/null +++ b/src/pmdk/utils/docker/images/install-valgrind.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# install-valgrind.sh - installs valgrind for persistent memory +# + +set -e + +OS=$1 + +install_upstream_from_distro() { + case "$OS" in + fedora) dnf install -y valgrind ;; + ubuntu) apt-get install -y --no-install-recommends valgrind ;; + *) return 1 ;; + esac +} + +install_upstream_3_16_1() { + git clone git://sourceware.org/git/valgrind.git + cd valgrind + # valgrind v3.16.1 upstream + git checkout VALGRIND_3_16_BRANCH + ./autogen.sh + ./configure + make -j$(nproc) + make -j$(nproc) install + cd .. + rm -rf valgrind +} + +install_custom-pmem_from_source() { + git clone https://github.com/pmem/valgrind.git + cd valgrind + # valgrind v3.15 with pmemcheck + # 2020.04.01 Merge pull request #78 from marcinslusarz/opt3 + git checkout 759686fd66cc0105df8311cfe676b0b2f9e89196 + ./autogen.sh + ./configure + make -j$(nproc) + make -j$(nproc) install + cd .. + rm -rf valgrind +} + +ARCH=$(uname -m) +case "$ARCH" in + ppc64le) install_upstream_3_16_1 ;; + *) install_custom-pmem_from_source ;; +esac diff --git a/src/pmdk/utils/docker/images/push-image.sh b/src/pmdk/utils/docker/images/push-image.sh new file mode 100755 index 000000000..03b935213 --- /dev/null +++ b/src/pmdk/utils/docker/images/push-image.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# push-image.sh - pushes the Docker image to the Docker Hub. +# +# The script utilizes $DOCKERHUB_USER and $DOCKERHUB_PASSWORD variables +# to log in to Docker Hub. The variables can be set in the Travis project's +# configuration for automated builds. +# + +set -e + +source $(dirname $0)/../set-ci-vars.sh + +if [[ -z "$OS" ]]; then + echo "OS environment variable is not set" + exit 1 +fi + +if [[ -z "$OS_VER" ]]; then + echo "OS_VER environment variable is not set" + exit 1 +fi + +if [[ -z "$CI_CPU_ARCH" ]]; then + echo "CI_CPU_ARCH environment variable is not set" + exit 1 +fi + +if [[ -z "${DOCKERHUB_REPO}" ]]; then + echo "DOCKERHUB_REPO environment variable is not set" + exit 1 +fi + +TAG="1.10-${OS}-${OS_VER}-${CI_CPU_ARCH}" + +# Check if the image tagged with pmdk/OS-VER exists locally +if [[ ! $(docker images -a | awk -v pattern="^${DOCKERHUB_REPO}:${TAG}\$" \ + '$1":"$2 ~ pattern') ]] +then + echo "ERROR: Docker image tagged ${DOCKERHUB_REPO}:${TAG} does not exists locally." + exit 1 +fi + +# Log in to the Docker Hub +docker login -u="$DOCKERHUB_USER" -p="$DOCKERHUB_PASSWORD" + +# Push the image to the repository +docker push ${DOCKERHUB_REPO}:${TAG} diff --git a/src/pmdk/utils/docker/ppc64le.blacklist b/src/pmdk/utils/docker/ppc64le.blacklist new file mode 100644 index 000000000..6002ad91f --- /dev/null +++ b/src/pmdk/utils/docker/ppc64le.blacklist @@ -0,0 +1,19 @@ +ex_librpmem_basic +ex_librpmem_hello +ex_librpmem_manpage +libpmempool_rm_remote +obj_basic_integration +obj_check_remote +obj_ctl_debug +obj_mem +obj_memcheck_register +obj_pmalloc_mt +obj_rpmem_basic_integration +obj_rpmem_heap_interrupt +obj_rpmem_heap_state +obj_ulog_size +pmempool_create +pmempool_sync_remote +pmempool_transform_remote +rpmem_basic +rpmem_fip diff --git a/src/pmdk/utils/docker/prepare-for-build.sh b/src/pmdk/utils/docker/prepare-for-build.sh new file mode 100755 index 000000000..f18c18c68 --- /dev/null +++ b/src/pmdk/utils/docker/prepare-for-build.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# prepare-for-build.sh - is called inside a Docker container; prepares +# the environment inside a Docker container for +# running build of PMDK project. +# + +set -e + +# This should be run only on CIs +if [ "$CI_RUN" == "YES" ]; then + # Make sure $WORKDIR has correct access rights + # - set them to the current UID and GID + echo $USERPASS | sudo -S chown -R $(id -u).$(id -g) $WORKDIR +fi + +# Configure tests (e.g. ssh for remote tests) unless the current configuration +# should be preserved +KEEP_TEST_CONFIG=${KEEP_TEST_CONFIG:-0} +if [[ "$KEEP_TEST_CONFIG" == 0 ]]; then + ./configure-tests.sh +fi diff --git a/src/pmdk/utils/docker/pull-or-rebuild-image.sh b/src/pmdk/utils/docker/pull-or-rebuild-image.sh new file mode 100755 index 000000000..a3a23cdd1 --- /dev/null +++ b/src/pmdk/utils/docker/pull-or-rebuild-image.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# pull-or-rebuild-image.sh - rebuilds the Docker image used in the +# current Travis build if necessary. +# +# The script rebuilds the Docker image if the Dockerfile for the current +# OS version (Dockerfile.${OS}-${OS_VER}) or any .sh script from the directory +# with Dockerfiles were modified and committed. +# +# If the Travis build is not of the "pull_request" type (i.e. in case of +# merge after pull_request) and it succeed, the Docker image should be pushed +# to the Docker Hub repository. An empty file is created to signal that to +# further scripts. +# +# If the Docker image does not have to be rebuilt, it will be pulled from +# Docker Hub. +# + +set -e + +source $(dirname $0)/set-ci-vars.sh +source $(dirname $0)/set-vars.sh + +if [[ "$CI_EVENT_TYPE" != "cron" && "$CI_BRANCH" != "coverity_scan" \ + && "$COVERITY" -eq 1 ]]; then + echo "INFO: Skip Coverity scan job if build is triggered neither by " \ + "'cron' nor by a push to 'coverity_scan' branch" + exit 0 +fi + +if [[ ( "$CI_EVENT_TYPE" == "cron" || "$CI_BRANCH" == "coverity_scan" )\ + && "$COVERITY" -ne 1 ]]; then + echo "INFO: Skip regular jobs if build is triggered either by 'cron'" \ + " or by a push to 'coverity_scan' branch" + exit 0 +fi + +if [[ -z "$OS" || -z "$OS_VER" ]]; then + echo "ERROR: The variables OS and OS_VER have to be set properly " \ + "(eg. OS=ubuntu, OS_VER=16.04)." + exit 1 +fi + +if [[ -z "$HOST_WORKDIR" ]]; then + echo "ERROR: The variable HOST_WORKDIR has to contain a path to " \ + "the root of the PMDK project on the host machine" + exit 1 +fi + +# Find all the commits for the current build +if [ -n "$CI_COMMIT_RANGE" ]; then + commits=$(git rev-list $CI_COMMIT_RANGE) +else + commits=$CI_COMMIT +fi + +echo "Commits in the commit range:" +for commit in $commits; do echo $commit; done + +# Get the list of files modified by the commits +files=$(for commit in $commits; do git diff-tree --no-commit-id --name-only \ + -r $commit; done | sort -u) +echo "Files modified within the commit range:" +for file in $files; do echo $file; done + +# Path to directory with Dockerfiles and image building scripts +images_dir_name=images +base_dir=utils/docker/$images_dir_name + +# Check if committed file modifications require the Docker image to be rebuilt +for file in $files; do + # Check if modified files are relevant to the current build + if [[ $file =~ ^($base_dir)\/Dockerfile\.($OS)-($OS_VER)$ ]] \ + || [[ $file =~ ^($base_dir)\/.*\.sh$ ]] + then + # Rebuild Docker image for the current OS version + echo "Rebuilding the Docker image for the Dockerfile.$OS-$OS_VER" + pushd $images_dir_name + ./build-image.sh ${OS}-${OS_VER} ${CI_CPU_ARCH} + popd + + # Check if the image has to be pushed to Docker Hub + # (i.e. the build is triggered by commits to the $GITHUB_REPO + # repository's stable-* or master branch, and the Travis build is not + # of the "pull_request" type). In that case, create the empty + # file. + if [[ "$CI_REPO_SLUG" == "$GITHUB_REPO" \ + && ($CI_BRANCH == stable-* || $CI_BRANCH == devel-* || $CI_BRANCH == master) \ + && $CI_EVENT_TYPE != "pull_request" \ + && $PUSH_IMAGE == "1" ]] + then + echo "The image will be pushed to Docker Hub" + touch $CI_FILE_PUSH_IMAGE_TO_REPO + else + echo "Skip pushing the image to Docker Hub" + fi + + if [[ $PUSH_IMAGE == "1" ]] + then + echo "Skip build package check if image has to be pushed" + touch $CI_FILE_SKIP_BUILD_PKG_CHECK + fi + exit 0 + fi +done + +# Getting here means rebuilding the Docker image is not required. +# Pull the image from Docker Hub. +docker pull ${DOCKERHUB_REPO}:1.10-${OS}-${OS_VER}-${CI_CPU_ARCH} diff --git a/src/pmdk/utils/docker/run-build-package.sh b/src/pmdk/utils/docker/run-build-package.sh new file mode 100755 index 000000000..5141904aa --- /dev/null +++ b/src/pmdk/utils/docker/run-build-package.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2019, Intel Corporation + +# +# run-build-package.sh - is called inside a Docker container; prepares +# the environment and starts a build of PMDK project. +# + +set -e + +# Prepare build enviromnent +./prepare-for-build.sh + +# Create fake tag, so that package has proper 'version' field +git config user.email "test@package.com" +git config user.name "test package" +git tag -a 1.4.99 -m "1.4" HEAD~1 || true + +# Build all and run tests +cd $WORKDIR +export PCHECK_OPTS="-j2 BLACKLIST_FILE=${BLACKLIST_FILE}" +make -j$(nproc) $PACKAGE_MANAGER + +# Install packages +if [[ "$PACKAGE_MANAGER" == "dpkg" ]]; then + cd $PACKAGE_MANAGER + echo $USERPASS | sudo -S dpkg --install *.deb +else + RPM_ARCH=$(uname -m) + cd $PACKAGE_MANAGER/$RPM_ARCH + echo $USERPASS | sudo -S rpm --install *.rpm +fi + +# Compile and run standalone test +cd $WORKDIR/utils/docker/test_package +make -j$(nproc) LIBPMEMOBJ_MIN_VERSION=1.4 +./test_package testfile1 + +# Use pmreorder installed in the system +pmreorder_version="$(pmreorder -v)" +pmreorder_pattern="pmreorder\.py .+$" +(echo "$pmreorder_version" | grep -Ev "$pmreorder_pattern") && echo "pmreorder version failed" && exit 1 + +touch testfile2 +touch logfile1 +pmreorder -p testfile2 -l logfile1 diff --git a/src/pmdk/utils/docker/run-build.sh b/src/pmdk/utils/docker/run-build.sh new file mode 100755 index 000000000..78edf09ec --- /dev/null +++ b/src/pmdk/utils/docker/run-build.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation + +# +# run-build.sh - is called inside a Docker container; prepares the environment +# and starts a build of PMDK project. +# + +set -e + +# Prepare build environment +./prepare-for-build.sh + +# Build all and run tests +cd $WORKDIR +if [ "$SRC_CHECKERS" != "0" ]; then + make -j$(nproc) check-license + make -j$(nproc) cstyle +fi + +make -j$(nproc) +make -j$(nproc) test +# do not change -j2 to -j$(nproc) in case of tests (make check/pycheck) +make -j2 pcheck TEST_BUILD=$TEST_BUILD +# do not change -j2 to -j$(nproc) in case of tests (make check/pycheck) +make -j2 pycheck +make -j$(nproc) DESTDIR=/tmp source + +# Create PR with generated docs +if [[ "$AUTO_DOC_UPDATE" == "1" ]]; then + echo "Running auto doc update" + ./utils/docker/run-doc-update.sh +fi diff --git a/src/pmdk/utils/docker/run-coverage.sh b/src/pmdk/utils/docker/run-coverage.sh new file mode 100755 index 000000000..b6f7ed0a8 --- /dev/null +++ b/src/pmdk/utils/docker/run-coverage.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2020, Intel Corporation + +# +# run-coverage.sh - is called inside a Docker container; runs tests +# to measure code coverage and sends report to codecov.io +# + +set -e + +# Get and prepare PMDK source +./prepare-for-build.sh + +# Hush error messages, mainly from Valgrind +export UT_DUMP_LINES=0 + +# Skip printing mismatched files for tests with Valgrind +export UT_VALGRIND_SKIP_PRINT_MISMATCHED=1 + +# Build all and run tests +cd $WORKDIR +make -j$(nproc) COVERAGE=1 +make -j$(nproc) test COVERAGE=1 + +# XXX: unfortunately valgrind raports issues in coverage instrumentation +# which we have to ignore (-k flag), also there is dependency between +# local and remote tests (which cannot be easily removed) we have to +# run local and remote tests separately +cd src/test +# do not change -j2 to -j$(nproc) in case of tests (make check/pycheck) +make -kj2 pcheck-local-quiet TEST_BUILD=debug || true +make check-remote-quiet TEST_BUILD=debug || true +# do not change -j2 to -j$(nproc) in case of tests (make check/pycheck) +make -j2 pycheck TEST_BUILD=debug || true +cd ../.. + +# prepare flag for codecov report to differentiate builds +flag=tests +[ -n "$GITHUB_ACTIONS" ] && flag=GHA +[ -n "$TRAVIS" ] && flag=Travis + +# run gcov exe, using codecov's bash (remove parsed coverage files, set flag and exit 1 if not successful) +/opt/scripts/codecov -c -F ${flag} -Z + +printf "check for any leftover gcov files\n" +leftover_files=$(find . -name "*.gcov" | wc -l) +if [[ $leftover_files > 0 ]]; then + # display found files and exit with error (they all should be parsed) + find . -name "*.gcov" + return 1 +fi diff --git a/src/pmdk/utils/docker/run-coverity.sh b/src/pmdk/utils/docker/run-coverity.sh new file mode 100755 index 000000000..140fa753d --- /dev/null +++ b/src/pmdk/utils/docker/run-coverity.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2020, Intel Corporation + +# +# run-coverity.sh - runs the Coverity scan build +# + +set -e + +if [[ "$CI_REPO_SLUG" != "$GITHUB_REPO" \ + && ( "$COVERITY_SCAN_NOTIFICATION_EMAIL" == "" \ + || "$COVERITY_SCAN_TOKEN" == "" ) ]]; then + echo + echo "Skipping Coverity build:"\ + "COVERITY_SCAN_TOKEN=\"$COVERITY_SCAN_TOKEN\" or"\ + "COVERITY_SCAN_NOTIFICATION_EMAIL="\ + "\"$COVERITY_SCAN_NOTIFICATION_EMAIL\" is not set" + exit 0 +fi + +# Prepare build environment +./prepare-for-build.sh + +CERT_FILE=/etc/ssl/certs/ca-certificates.crt +TEMP_CF=$(mktemp) +cp $CERT_FILE $TEMP_CF + +# Download Coverity certificate +echo -n | openssl s_client -connect scan.coverity.com:443 | \ + sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | \ + tee -a $TEMP_CF + +echo $USERPASS | sudo -S mv $TEMP_CF $CERT_FILE + +export COVERITY_SCAN_PROJECT_NAME="$CI_REPO_SLUG" +[[ "$CI_EVENT_TYPE" == "cron" ]] \ + && export COVERITY_SCAN_BRANCH_PATTERN="master" \ + || export COVERITY_SCAN_BRANCH_PATTERN="coverity_scan" +export COVERITY_SCAN_BUILD_COMMAND="make -j$(nproc) all" + +cd $WORKDIR + +# +# Run the Coverity scan +# + +# The 'travisci_build_coverity_scan.sh' script requires the following +# environment variables to be set: +# - TRAVIS_BRANCH - has to contain the name of the current branch +# - TRAVIS_PULL_REQUEST - has to be set to 'true' in case of pull requests +# +export TRAVIS_BRANCH=${CI_BRANCH} +[ "${CI_EVENT_TYPE}" == "pull_request" ] && export TRAVIS_PULL_REQUEST="true" + +# XXX: Patch the Coverity script. +# Recently, this script regularly exits with an error, even though +# the build is successfully submitted. Probably because the status code +# is missing in response, or it's not 201. +# Changes: +# 1) change the expected status code to 200 and +# 2) print the full response string. +# +# This change should be reverted when the Coverity script is fixed. +# +# The previous version was: +# curl -s https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh | bash + +wget https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh +patch < utils/docker/0001-travis-fix-travisci_build_coverity_scan.sh.patch +bash ./travisci_build_coverity_scan.sh diff --git a/src/pmdk/utils/docker/run-doc-update.sh b/src/pmdk/utils/docker/run-doc-update.sh new file mode 100755 index 000000000..132315eb8 --- /dev/null +++ b/src/pmdk/utils/docker/run-doc-update.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019-2020, Intel Corporation + +set -e + +source `dirname $0`/valid-branches.sh + +BOT_NAME="pmem-bot" +USER_NAME="pmem" +REPO_NAME="pmdk" + +ORIGIN="https://${DOC_UPDATE_GITHUB_TOKEN}@github.com/${BOT_NAME}/${REPO_NAME}" +UPSTREAM="https://github.com/${USER_NAME}/${REPO_NAME}" +# master or stable-* branch +TARGET_BRANCH=${CI_BRANCH} +VERSION=${TARGET_BRANCHES[$TARGET_BRANCH]} + +if [ -z $VERSION ]; then + echo "Target location for branch $TARGET_BRANCH is not defined." + exit 1 +fi + +# Clone bot repo +git clone ${ORIGIN} +cd ${REPO_NAME} +git remote add upstream ${UPSTREAM} + +git config --local user.name ${BOT_NAME} +git config --local user.email "pmem-bot@intel.com" + +git remote update +git checkout -B ${TARGET_BRANCH} upstream/${TARGET_BRANCH} + +# Copy man & PR web md +cd ./doc +make -j$(nproc) web +cd .. + +mv ./doc/web_linux ../ +mv ./doc/web_windows ../ +mv ./doc/generated/libs_map.yml ../ + +# Checkout gh-pages and copy docs +GH_PAGES_NAME="gh-pages-for-${TARGET_BRANCH}" +git checkout -B $GH_PAGES_NAME upstream/gh-pages +git clean -dfx + +rsync -a ../web_linux/ ./manpages/linux/${VERSION}/ +rsync -a ../web_windows/ ./manpages/windows/${VERSION}/ \ + --exclude='librpmem' \ + --exclude='rpmemd' --exclude='pmreorder' \ + --exclude='daxio' + +rm -r ../web_linux +rm -r ../web_windows + +if [ $TARGET_BRANCH = "master" ]; then + [ ! -d _data ] && mkdir _data + cp ../libs_map.yml _data +fi + +# Add and push changes. +# git commit command may fail if there is nothing to commit. +# In that case we want to force push anyway (there might be open pull request +# with changes which were reverted). +git add -A +git commit -m "doc: automatic gh-pages docs update" && true +git push -f ${ORIGIN} $GH_PAGES_NAME + +GITHUB_TOKEN=${DOC_UPDATE_GITHUB_TOKEN} hub pull-request -f \ + -b ${USER_NAME}:gh-pages \ + -h ${BOT_NAME}:${GH_PAGES_NAME} \ + -m "doc: automatic gh-pages docs update" && true + +exit 0 diff --git a/src/pmdk/utils/docker/set-ci-vars.sh b/src/pmdk/utils/docker/set-ci-vars.sh new file mode 100755 index 000000000..af41a753e --- /dev/null +++ b/src/pmdk/utils/docker/set-ci-vars.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2020, Intel Corporation + +# +# set-ci-vars.sh -- set CI variables common for both: +# Travis and GitHub Actions CIs +# + +set -e + +function get_commit_range_from_last_merge { + # get commit id of the last merge + LAST_MERGE=$(git log --merges --pretty=%H -1) + LAST_COMMIT=$(git log --pretty=%H -1) + if [ "$LAST_MERGE" == "$LAST_COMMIT" ]; then + # GitHub Actions commits its own merge in case of pull requests + # so the first merge commit has to be skipped. + LAST_MERGE=$(git log --merges --pretty=%H -2 | tail -n1) + fi + if [ "$LAST_MERGE" == "" ]; then + # possible in case of shallow clones + # or new repos with no merge commits yet + # - pick up the first commit + LAST_MERGE=$(git log --pretty=%H | tail -n1) + fi + COMMIT_RANGE="$LAST_MERGE..HEAD" + # make sure it works now + if ! git rev-list $COMMIT_RANGE >/dev/null; then + COMMIT_RANGE="" + fi + echo $COMMIT_RANGE +} + +COMMIT_RANGE_FROM_LAST_MERGE=$(get_commit_range_from_last_merge) + +if [ -n "$TRAVIS" ]; then + CI_COMMIT=$TRAVIS_COMMIT + CI_COMMIT_RANGE="${TRAVIS_COMMIT_RANGE/.../..}" + CI_BRANCH=$TRAVIS_BRANCH + CI_EVENT_TYPE=$TRAVIS_EVENT_TYPE + CI_REPO_SLUG=$TRAVIS_REPO_SLUG + + # CI_COMMIT_RANGE is usually invalid for force pushes - fix it when used + # with non-upstream repository + if [ -n "$CI_COMMIT_RANGE" -a "$CI_REPO_SLUG" != "$GITHUB_REPO" ]; then + if ! git rev-list $CI_COMMIT_RANGE; then + CI_COMMIT_RANGE=$COMMIT_RANGE_FROM_LAST_MERGE + fi + fi + + case "$TRAVIS_CPU_ARCH" in + "amd64") + CI_CPU_ARCH="x86_64" + ;; + *) + CI_CPU_ARCH=$TRAVIS_CPU_ARCH + ;; + esac + +elif [ -n "$GITHUB_ACTIONS" ]; then + CI_COMMIT=$GITHUB_SHA + CI_COMMIT_RANGE=$COMMIT_RANGE_FROM_LAST_MERGE + CI_BRANCH=$(echo $GITHUB_REF | cut -d'/' -f3) + CI_REPO_SLUG=$GITHUB_REPOSITORY + CI_CPU_ARCH="x86_64" # GitHub Actions supports only x86_64 + + case "$GITHUB_EVENT_NAME" in + "schedule") + CI_EVENT_TYPE="cron" + ;; + *) + CI_EVENT_TYPE=$GITHUB_EVENT_NAME + ;; + esac + +else + CI_COMMIT=$(git log --pretty=%H -1) + CI_COMMIT_RANGE=$COMMIT_RANGE_FROM_LAST_MERGE + CI_CPU_ARCH="x86_64" +fi + +export CI_COMMIT=$CI_COMMIT +export CI_COMMIT_RANGE=$CI_COMMIT_RANGE +export CI_BRANCH=$CI_BRANCH +export CI_EVENT_TYPE=$CI_EVENT_TYPE +export CI_REPO_SLUG=$CI_REPO_SLUG +export CI_CPU_ARCH=$CI_CPU_ARCH + +echo CI_COMMIT=$CI_COMMIT +echo CI_COMMIT_RANGE=$CI_COMMIT_RANGE +echo CI_BRANCH=$CI_BRANCH +echo CI_EVENT_TYPE=$CI_EVENT_TYPE +echo CI_REPO_SLUG=$CI_REPO_SLUG +echo CI_CPU_ARCH=$CI_CPU_ARCH diff --git a/src/pmdk/utils/docker/set-vars.sh b/src/pmdk/utils/docker/set-vars.sh new file mode 100755 index 000000000..9a531a840 --- /dev/null +++ b/src/pmdk/utils/docker/set-vars.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019, Intel Corporation + +# +# set-vars.sh - set required environment variables +# + +set -e + +export CI_FILE_PUSH_IMAGE_TO_REPO=/tmp/push_image_to_repo_flag +export CI_FILE_SKIP_BUILD_PKG_CHECK=/tmp/skip_build_package_check diff --git a/src/pmdk/utils/docker/test_package/.gitignore b/src/pmdk/utils/docker/test_package/.gitignore new file mode 100644 index 000000000..7e564dc4e --- /dev/null +++ b/src/pmdk/utils/docker/test_package/.gitignore @@ -0,0 +1 @@ +test_package diff --git a/src/pmdk/utils/docker/test_package/Makefile b/src/pmdk/utils/docker/test_package/Makefile new file mode 100644 index 000000000..f5970aac3 --- /dev/null +++ b/src/pmdk/utils/docker/test_package/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018, Intel Corporation + +check_package = $(shell pkg-config $(1) && echo y || echo n) + +HAS_LIBPMEMOBJ := $(call check_package, libpmemobj --atleast-version $(LIBPMEMOBJ_MIN_VERSION) --print-errors) + +LIBS += $(shell pkg-config --libs libpmemobj) +CFLAGS += $(shell pkg-config --cflags libpmemobj) + +ifeq ($(HAS_LIBPMEMOBJ),n) +$(error libpmemobj(version >= $(LIBPMEMOBJ_MIN_VERSION)) is missing) +endif + +test_package: test_package.c + $(CC) test_package.c $(LIBS) $(CFLAGS) -o $@ + +clean: + rm -f test_package + +.PHONY: clean diff --git a/src/pmdk/utils/docker/test_package/README b/src/pmdk/utils/docker/test_package/README new file mode 100644 index 000000000..de81a6a5b --- /dev/null +++ b/src/pmdk/utils/docker/test_package/README @@ -0,0 +1,6 @@ +Persistent Memory Development Kit + +This is utils/docker/test_package/README. + +This directory contains simple application which uses libpmemobj. +It can be used to test whether libpmemobj was installed properly. diff --git a/src/pmdk/utils/docker/test_package/test_package.c b/src/pmdk/utils/docker/test_package/test_package.c new file mode 100644 index 000000000..cf1e7eb8b --- /dev/null +++ b/src/pmdk/utils/docker/test_package/test_package.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2018, Intel Corporation */ + +#include +#include +#include + +#define LAYOUT_NAME "test" + +struct my_root { + int foo; +}; + +int +main(int argc, char *argv[]) +{ + if (argc < 2) { + printf("usage: %s file-name\n", argv[0]); + return 1; + } + + const char *path = argv[1]; + + PMEMobjpool *pop = pmemobj_create(path, LAYOUT_NAME, + PMEMOBJ_MIN_POOL, S_IWUSR | S_IRUSR); + + if (pop == NULL) { + printf("failed to create pool\n"); + return 1; + } + + PMEMoid root = pmemobj_root(pop, sizeof(struct my_root)); + struct my_root *rootp = pmemobj_direct(root); + + rootp->foo = 10; + pmemobj_persist(pop, &rootp->foo, sizeof(rootp->foo)); + + pmemobj_close(pop); + + return 0; +} diff --git a/src/pmdk/utils/docker/valid-branches.sh b/src/pmdk/utils/docker/valid-branches.sh new file mode 100755 index 000000000..b111c2a3d --- /dev/null +++ b/src/pmdk/utils/docker/valid-branches.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2020, Intel Corporation + +declare -A TARGET_BRANCHES=( \ + ["master"]="master" \ + ["stable-1.5"]="v1.5" \ + ["stable-1.6"]="v1.6" \ + ["stable-1.7"]="v1.7" \ + ["stable-1.8"]="v1.8" \ + ["stable-1.9"]="v1.9" \ + ) diff --git a/src/pmdk/utils/get_aliases.sh b/src/pmdk/utils/get_aliases.sh new file mode 100755 index 000000000..87978e2bc --- /dev/null +++ b/src/pmdk/utils/get_aliases.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2020, Intel Corporation +# + +# +# get_aliases.sh -- generate map of manuals functions and libraries +# +# usage: run from /pmdk/doc/generated location without parameters: +# ./../../utils/get_aliases.sh +# +# This script searches manpages from section 7 then +# takes all functions from each section using specified pattern +# and at the end to every function it assign real markdown file +# representation based on *.gz file content +# +# Generated libs_map.yml file is used on gh-pages +# to handle functions and their aliases +# + +list=("$@") +man_child=("$@") + +function search_aliases { +children=$1 +parent=$2 +for i in ${children[@]} +do + if [ -e ../$parent/$i ] + then + echo "Man: $i" + content=$(head -c 150 ../$parent/$i) + if [[ "$content" == ".so "* ]] ; + then + content=$(basename ${content#".so"}) + i="${i%.*}" + echo " $i: $content" >> $map_file + else + r="${i%.*}" + echo " $r: $i" >> $map_file + fi + fi +done +} + +function list_pages { + parent="${1%.*}" + list=("$@") + man_child=("$@") + + if [ "$parent" == "libpmem" ]; then + man_child=($(ls -1 ../libpmem | grep -e ".*\.3$")) + echo -n "- $parent: " >> $map_file + echo "${man_child[@]}" >> $map_file + fi + + if [ "$parent" == "libpmem2" ]; then + man_child=($(ls -1 ../libpmem2 | grep -e ".*\.3$")) + echo -n "- $parent: " >> $map_file + echo "${man_child[@]}" >> $map_file + fi + + if [ "$parent" == "libpmemblk" ]; then + man_child=($(ls -1 ../libpmemblk | grep -e ".*\.3$")) + echo -n "- $parent: " >> $map_file + echo "${man_child[@]}" >> $map_file + fi + + if [ "$parent" == "libpmemlog" ]; then + man_child=($(ls -1 ../libpmemlog | grep -e ".*\.3$")) + echo -n "- $parent: " >> $map_file + echo "${man_child[@]}" >> $map_file + fi + + if [ "$parent" == "libpmemobj" ]; then + man_child=($(ls -1 ../libpmemobj | grep -e ".*\.3$")) + echo -n "- $parent: " >> $map_file + echo "${man_child[@]}" >> $map_file + fi + + if [ "$parent" == "libpmempool" ]; then + man_child=($(ls -1 ../libpmempool | grep -e ".*\.3$")) + echo -n "- $parent: " >> $map_file + echo "${man_child[@]}" >> $map_file + fi + + if [ "$parent" == "librpmem" ]; then + man_child=($(ls -1 ../librpmem | grep -e ".*\.3$")) + echo -n "- $parent: " >> $map_file + echo "${man_child[@]}" >> $map_file + fi + + if [ ${#man_child[@]} -ne 0 ] + then + list=${man_child[@]} + search_aliases "${list[@]}" "$parent" + fi +} + +man7=($(ls -1 ../*/ | grep -e ".*\.7$")) + +map_file=libs_map.yml +[ -e $map_file ] && rm $map_file +touch $map_file + +for i in "${man7[@]}" +do +echo "Library: $i" + list_pages $i +done diff --git a/src/pmdk/utils/git-years b/src/pmdk/utils/git-years new file mode 100755 index 000000000..d54018ddf --- /dev/null +++ b/src/pmdk/utils/git-years @@ -0,0 +1,8 @@ +#!/bin/sh +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2019, Intel Corporation + +# git-years -- calculate the range of years for a given file from git + +git log --pretty='%aI %aE' "$@"|grep '@intel\.com'|cut -d- -f1|sort| + sed '$p;2,$d'|uniq|tr '\n' -|sed 's/-$//' diff --git a/src/pmdk/utils/libpmem.pc.in b/src/pmdk/utils/libpmem.pc.in new file mode 100644 index 000000000..850b28198 --- /dev/null +++ b/src/pmdk/utils/libpmem.pc.in @@ -0,0 +1,9 @@ +includedir=${prefix}/include + +Name: libpmem +Description: libpmem library from PMDK project +Version: ${version} +URL: https://pmem.io/pmdk +Requires: +Libs: -L${libdir} -lpmem +Cflags: -I${includedir} diff --git a/src/pmdk/utils/libpmem2.pc.in b/src/pmdk/utils/libpmem2.pc.in new file mode 100644 index 000000000..d4ddc907c --- /dev/null +++ b/src/pmdk/utils/libpmem2.pc.in @@ -0,0 +1,9 @@ +includedir=${prefix}/include + +Name: libpmem2 +Description: libpmem2 library from PMDK project +Version: ${version} +URL: https://pmem.io/pmdk +Requires: +Libs: -L${libdir} -lpmem2 +Cflags: -I${includedir} diff --git a/src/pmdk/utils/libpmemblk.pc.in b/src/pmdk/utils/libpmemblk.pc.in new file mode 100644 index 000000000..5b0ffbf3e --- /dev/null +++ b/src/pmdk/utils/libpmemblk.pc.in @@ -0,0 +1,9 @@ +includedir=${prefix}/include + +Name: libpmemblk +Description: libpmemblk library from PMDK project +Version: ${version} +URL: https://pmem.io/pmdk +Requires.private: libpmem${rasdeps} +Libs: -L${libdir} -lpmemblk +Cflags: -I${includedir} diff --git a/src/pmdk/utils/libpmemlog.pc.in b/src/pmdk/utils/libpmemlog.pc.in new file mode 100644 index 000000000..f1658438b --- /dev/null +++ b/src/pmdk/utils/libpmemlog.pc.in @@ -0,0 +1,9 @@ +includedir=${prefix}/include + +Name: libpmemlog +Description: libpmemlog library from PMDK project +Version: ${version} +URL: https://pmem.io/pmdk +Requires.private: libpmem${rasdeps} +Libs: -L${libdir} -lpmemlog +Cflags: -I${includedir} diff --git a/src/pmdk/utils/libpmemobj.pc.in b/src/pmdk/utils/libpmemobj.pc.in new file mode 100644 index 000000000..12c396e9e --- /dev/null +++ b/src/pmdk/utils/libpmemobj.pc.in @@ -0,0 +1,10 @@ +includedir=${prefix}/include + +Name: libpmemobj +Description: libpmemobj library from PMDK project +Version: ${version} +URL: https://pmem.io/pmdk +Requires.private: libpmem${rasdeps} +Libs: -L${libdir} -lpmemobj +Libs.private: -ldl +Cflags: -I${includedir} diff --git a/src/pmdk/utils/libpmempool.pc.in b/src/pmdk/utils/libpmempool.pc.in new file mode 100644 index 000000000..660938a27 --- /dev/null +++ b/src/pmdk/utils/libpmempool.pc.in @@ -0,0 +1,10 @@ +includedir=${prefix}/include + +Name: libpmempool +Description: libpmempool library from PMDK project +Version: ${version} +URL: https://pmem.io/pmdk +Requires.private: libpmem${rasdeps} +Libs: -L${libdir} -lpmempool +Libs.private: -ldl +Cflags: -I${includedir} diff --git a/src/pmdk/utils/librpmem.pc.in b/src/pmdk/utils/librpmem.pc.in new file mode 100644 index 000000000..50bf1bb91 --- /dev/null +++ b/src/pmdk/utils/librpmem.pc.in @@ -0,0 +1,9 @@ +includedir=${prefix}/include + +Name: librpmem +Description: librpmem library from PMDK project +Version: ${version} +URL: https://pmem.io/pmdk +Requires: +Libs: -L${libdir} -lrpmem +Cflags: -I${includedir} diff --git a/src/pmdk/utils/magic-install.sh b/src/pmdk/utils/magic-install.sh new file mode 100644 index 000000000..3c65bab49 --- /dev/null +++ b/src/pmdk/utils/magic-install.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2017, Intel Corporation +# +# magic-install.sh -- Script for installing magic script +# +set -e + +if ! grep -q "File: pmdk" /etc/magic +then + echo "Appending PMDK magic to /etc/magic" + cat /usr/share/pmdk/pmdk.magic >> /etc/magic +else + echo "PMDK magic already exists" +fi diff --git a/src/pmdk/utils/magic-uninstall.sh b/src/pmdk/utils/magic-uninstall.sh new file mode 100644 index 000000000..6733c7e72 --- /dev/null +++ b/src/pmdk/utils/magic-uninstall.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2017, Intel Corporation +# +# magic-uninstall.sh -- Script for uninstalling magic script +# +set -e + +HDR_LOCAL=$(grep "File: pmdk" /etc/magic) +HDR_PKG=$(grep "File: pmdk" /usr/share/pmdk/pmdk.magic) + +if [[ $HDR_LOCAL == $HDR_PKG ]] +then + echo "Removing PMDK magic from /etc/magic" + HDR_LINE=$(grep -n "File: pmdk" /etc/magic | cut -f1 -d:) + HDR_PKG_LINE=$(grep -n "File: pmdk" /usr/share/pmdk/pmdk.magic | cut -f1 -d:) + HDR_LINES=$(cat /usr/share/pmdk/pmdk.magic | wc -l) + HDR_FIRST=$(($HDR_LINE - $HDR_PKG_LINE + 1)) + HDR_LAST=$(($HDR_FIRST + $HDR_LINES)) + sed -i "${HDR_FIRST},${HDR_LAST}d" /etc/magic +fi diff --git a/src/pmdk/utils/md2man.sh b/src/pmdk/utils/md2man.sh new file mode 100755 index 000000000..941bd3c96 --- /dev/null +++ b/src/pmdk/utils/md2man.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation +# + +# +# md2man.sh -- convert markdown to groff man pages +# +# usage: md2man.sh file template outfile +# +# This script converts markdown file into groff man page using pandoc. +# It performs some pre- and post-processing for better results: +# - uses m4 to preprocess OS-specific directives. See doc/macros.man. +# - parse input file for YAML metadata block and read man page title, +# section and version +# - cut-off metadata block and license +# - unindent code blocks +# - cut-off windows and web specific parts of documentation +# +# If the TESTOPTS variable is set, generates a preprocessed markdown file +# with the header stripped off for testing purposes. +# + +set -e +set -o pipefail + +filename=$1 +template=$2 +outfile=$3 +title=`sed -n 's/^title:\ _MP(*\([A-Za-z0-9_-]*\).*$/\1/p' $filename` +section=`sed -n 's/^title:.*\([0-9]\))$/\1/p' $filename` +version=`sed -n 's/^date:\ *\(.*\)$/\1/p' $filename` + +if [ "$TESTOPTS" != "" ]; then + m4 $TESTOPTS macros.man $filename | sed -n -e '/# NAME #/,$p' > $outfile +else + OPTS= + +if [ "$WIN32" == 1 ]; then + OPTS="$OPTS -DWIN32" +else + OPTS="$OPTS -UWIN32" +fi + +if [ "$(uname -s)" == "FreeBSD" ]; then + OPTS="$OPTS -DFREEBSD" +else + OPTS="$OPTS -UFREEBSD" +fi + +if [ "$WEB" == 1 ]; then + OPTS="$OPTS -DWEB" + mkdir -p "$(dirname $outfile)" + m4 $OPTS macros.man $filename | sed -n -e '/---/,$p' > $outfile +else + SOURCE_DATE_EPOCH="${SOURCE_DATE_EPOCH:-$(date +%s)}" + COPYRIGHT=$(grep -rwI "\[comment]: <> (Copyright" $filename |\ + sed "s/\[comment\]: <> (\([^)]*\))/\1/") + dt=$(date -u -d "@$SOURCE_DATE_EPOCH" +%F 2>/dev/null || + date -u -r "$SOURCE_DATE_EPOCH" +%F 2>/dev/null || date -u +%F) + m4 $OPTS macros.man $filename | sed -n -e '/# NAME #/,$p' |\ + pandoc -s -t man -o $outfile --template=$template \ + -V title=$title -V section=$section \ + -V date="$dt" -V version="$version" \ + -V copyright="$COPYRIGHT" +fi +fi diff --git a/src/pmdk/utils/os-banned b/src/pmdk/utils/os-banned new file mode 100644 index 000000000..57d162551 --- /dev/null +++ b/src/pmdk/utils/os-banned @@ -0,0 +1,63 @@ +pthread_once +pthread_key_create +pthread_key_delete +pthread_setspecific +pthread_getspecific +pthread_mutex_init +pthread_mutex_destroy +pthread_mutex_lock +pthread_mutex_trylock +pthread_mutex_unlock +pthread_mutex_timedlock +pthread_rwlock_init +pthread_rwlock_destroy +pthread_rwlock_rdlock +pthread_rwlock_wrlock +pthread_rwlock_tryrdlock +pthread_rwlock_trywrlock +pthread_rwlock_unlock +pthread_rwlock_timedrdlock +pthread_rwlock_timedwrlock +pthread_spin_init +pthread_spin_destroy +pthread_spin_lock +pthread_spin_unlock +pthread_spin_trylock +pthread_cond_init +pthread_cond_destroy +pthread_cond_broadcast +pthread_cond_signal +pthread_cond_timedwait +pthread_cond_wait +pthread_create +pthread_join +cpu_zero +cpu_set +pthread_setaffinity_np +pthread_atfork +sem_init +sem_destroy +sem_wait +sem_trywait +sem_post +fsync +fsync_dir +open +stat +unlink +access +fopen +fdopen +chmod +mkstemp +posix_fallocate +ftruncate +flock +writev +clock_gettime +rand_r +unsetenv +setenv +getenv +strsignal +execv diff --git a/src/pmdk/utils/pkg-common.sh b/src/pmdk/utils/pkg-common.sh new file mode 100644 index 000000000..f3711bc59 --- /dev/null +++ b/src/pmdk/utils/pkg-common.sh @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2019, Intel Corporation + +# +# pkg-common.sh - common functions and variables for building packages +# + +export LC_ALL="C" + +function error() { + echo -e "error: $@" +} + +function check_dir() { + if [ ! -d $1 ] + then + error "Directory '$1' does not exist." + exit 1 + fi +} + +function check_file() { + if [ ! -f $1 ] + then + error "File '$1' does not exist." + exit 1 + fi +} + +function check_tool() { + local tool=$1 + if [ -z "$(which $tool 2>/dev/null)" ] + then + error "'${tool}' not installed or not in PATH" + exit 1 + fi +} + +function get_version() { + echo -n $1 | sed "s/-rc/~rc/" +} + +function get_os() { + if [ -f /etc/os-release ] + then + local OS=$(cat /etc/os-release | grep -m1 -o -P '(?<=NAME=).*($)') + [[ "$OS" =~ SLES|openSUSE ]] && echo -n "SLES_like" || + ([[ "$OS" =~ "Fedora"|"Red Hat"|"CentOS" ]] && echo -n "RHEL_like" || echo 1) + else + echo 1 + fi +} + +REGEX_DATE_AUTHOR="([a-zA-Z]{3} [a-zA-Z]{3} [0-9]{2} [0-9]{4})\s*(.*)" +REGEX_MESSAGE_START="\s*\*\s*(.*)" +REGEX_MESSAGE="\s*(\S.*)" diff --git a/src/pmdk/utils/pkg-config.sh b/src/pmdk/utils/pkg-config.sh new file mode 100644 index 000000000..7186fa2ee --- /dev/null +++ b/src/pmdk/utils/pkg-config.sh @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2014-2020, Intel Corporation + +# Name of package +PACKAGE_NAME="pmdk" + +# Name and email of package maintainer +PACKAGE_MAINTAINER="Piotr Balcer " + +# Brief description of the package +PACKAGE_SUMMARY="Persistent Memory Development Kit" + +# Full description of the package +PACKAGE_DESCRIPTION="The collection of libraries and utilities for Persistent Memory Programming" + +# Website +PACKAGE_URL="https://pmem.io/pmdk" diff --git a/src/pmdk/utils/pmdk.magic b/src/pmdk/utils/pmdk.magic new file mode 100644 index 000000000..7409ba1ef --- /dev/null +++ b/src/pmdk/utils/pmdk.magic @@ -0,0 +1,15 @@ + +#------------------------------------------------------------------------------ +# $File: pmdk,v 1.2 2017/12/11 20:00:00 +# pmdk: file(1) magic for Persistent Memory Development Kit pool files +# +# The PMDK specific format of pool files. +# +# PMEM signature +0 string PMEM +>4 string POOLSET Persistent Memory Poolset file +>>11 search REPLICA with replica +# Pool type signature +>4 regex LOG|BLK|OBJ Persistent Memory Pool file, type: %s, +# Major version number +>>8 lelong >0 version 0x%x diff --git a/src/pmdk/utils/pmdk.spec.in b/src/pmdk/utils/pmdk.spec.in new file mode 100644 index 000000000..094e9d0b4 --- /dev/null +++ b/src/pmdk/utils/pmdk.spec.in @@ -0,0 +1,710 @@ + +# rpmbuild options: +# --with | --without fabric +# --with | --without ndctl +# --define _testconfig +# --define _skip_check 1 + +# do not terminate build if files in the $RPM_BUILD_ROOT +# directory are not found in the %files (without rpmem case) +%define _unpackaged_files_terminate_build 0 + +# disable 'make check' on suse +%if %{defined suse_version} + %define _skip_check 1 + %define dist .suse%{suse_version} +%endif + +# libfabric v1.4.2 is available on: +# openSUSE Tumbleweed, Leap 15.0, Leap 42.3; SLE 12 SP3, 15 +# Fedora >=27; RHEL >=7.5 +%if (0%{?suse_version} > 1315) || (0%{?fedora} >= 27) || (0%{?rhel} >= 7) +%bcond_without fabric +%else +%bcond_with fabric +%endif + +%bcond_without ndctl + +%define min_libfabric_ver __LIBFABRIC_MIN_VER__ +%define min_ndctl_ver __NDCTL_MIN_VER__ + +Name: pmdk +Version: __VERSION__ +Release: 1%{?dist} +Summary: __PACKAGE_SUMMARY__ +Packager: __PACKAGE_MAINTAINER__ +Group: __GROUP_SYS_LIBS__ +License: __LICENSE__ +URL: https://pmem.io/pmdk + +Source0: %{name}-%{version}.tar.gz + +BuildRequires: gcc +BuildRequires: make +BuildRequires: glibc-devel +BuildRequires: autoconf +BuildRequires: automake +BuildRequires: man +BuildRequires: pkgconfig +BuildRequires: gdb + +# fdupes package is available only on 'openSUSE Tumbleweed' and 'openSUSE Leap 15.1' +%if (0%{?suse_version} > 1500) || (0%{?sles_version} >= 150100 && 0%{?is_opensuse}) +BuildRequires: fdupes +%endif + +%if %{with ndctl} +%if %{defined suse_version} +BuildRequires: libndctl-devel >= %{min_ndctl_ver} +%else +BuildRequires: ndctl-devel >= %{min_ndctl_ver} +BuildRequires: daxctl-devel >= %{min_ndctl_ver} +%endif +%endif + +%if %{with fabric} +BuildRequires: libfabric-devel >= %{min_libfabric_ver} +%endif + + +# Debug variants of the libraries should be filtered out of the provides. +%global __provides_exclude_from ^%{_libdir}/pmdk_debug/.*\\.so.*$ + +# By design, PMDK does not support any 32-bit architecture. +# Due to dependency on xmmintrin.h and some inline assembly, it can be +# compiled only for x86_64 at the moment. +# Other 64-bit architectures could also be supported, if only there is +# a request for that, and if somebody provides the arch-specific +# implementation of the low-level routines for flushing to persistent +# memory. + +# https://bugzilla.redhat.com/show_bug.cgi?id=1340634 +# https://bugzilla.redhat.com/show_bug.cgi?id=1340635 +# https://bugzilla.redhat.com/show_bug.cgi?id=1340636 +# https://bugzilla.redhat.com/show_bug.cgi?id=1340637 + +ExclusiveArch: x86_64 ppc64le + +%description +The Persistent Memory Development Kit is a collection of libraries for +using memory-mapped persistence, optimized specifically for persistent memory. + + +%package -n libpmem2__PKG_NAME_SUFFIX__ +Summary: Low-level persistent memory support library +Group: __GROUP_SYS_LIBS__ +%description -n libpmem2__PKG_NAME_SUFFIX__ +The libpmem2 provides low level persistent memory support. In particular, +support for the persistent memory instructions for flushing changes +to pmem is provided. + +%files -n libpmem2__PKG_NAME_SUFFIX__ +%defattr(-,root,root,-) +%dir %{_datadir}/pmdk +%{_libdir}/libpmem2.so.* +%{_datadir}/pmdk/pmdk.magic +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmem2-devel +Summary: Development files for the low-level persistent memory library +Group: __GROUP_DEV_LIBS__ +Requires: libpmem2__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmem2-devel +The libpmem2 provides low level persistent memory support. In particular, +support for the persistent memory instructions for flushing changes +to pmem is provided. + +This library is provided for software which tracks every store to +pmem and needs to flush those changes to durability. Most developers +will find higher level libraries like libpmemobj to be much more +convenient. + +%files -n libpmem2-devel +%defattr(-,root,root,-) +%{_libdir}/libpmem2.so +%{_libdir}/pkgconfig/libpmem2.pc +%{_includedir}/libpmem2.h +%{_mandir}/man7/libpmem2.7.gz +%{_mandir}/man3/pmem2_*.3.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmem2-debug +Summary: Debug variant of the low-level persistent memory library +Group: __GROUP_DEV_LIBS__ +Requires: libpmem2__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmem2-debug +The libpmem provides low level persistent memory support. In particular, +support for the persistent memory instructions for flushing changes +to pmem is provided. + +This sub-package contains debug variant of the library, providing +run-time assertions and trace points. The typical way to access the +debug version is to set the environment variable LD_LIBRARY_PATH to +/usr/lib64/pmdk_debug. + +%files -n libpmem2-debug +%defattr(-,root,root,-) +%dir %{_libdir}/pmdk_debug +%{_libdir}/pmdk_debug/libpmem2.so +%{_libdir}/pmdk_debug/libpmem2.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + + +%package -n libpmem__PKG_NAME_SUFFIX__ +Summary: Low-level persistent memory support library +Group: __GROUP_SYS_LIBS__ +%description -n libpmem__PKG_NAME_SUFFIX__ +The libpmem provides low level persistent memory support. In particular, +support for the persistent memory instructions for flushing changes +to pmem is provided. + +%files -n libpmem__PKG_NAME_SUFFIX__ +%defattr(-,root,root,-) +%dir %{_datadir}/pmdk +%{_libdir}/libpmem.so.* +%{_datadir}/pmdk/pmdk.magic +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmem-devel +Summary: Development files for the low-level persistent memory library +Group: __GROUP_DEV_LIBS__ +Requires: libpmem__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmem-devel +The libpmem provides low level persistent memory support. In particular, +support for the persistent memory instructions for flushing changes +to pmem is provided. + +This library is provided for software which tracks every store to +pmem and needs to flush those changes to durability. Most developers +will find higher level libraries like libpmemobj to be much more +convenient. + +%files -n libpmem-devel +%defattr(-,root,root,-) +%{_libdir}/libpmem.so +%{_libdir}/pkgconfig/libpmem.pc +%{_includedir}/libpmem.h +%{_mandir}/man7/libpmem.7.gz +%{_mandir}/man3/pmem_*.3.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmem-debug +Summary: Debug variant of the low-level persistent memory library +Group: __GROUP_DEV_LIBS__ +Requires: libpmem__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmem-debug +The libpmem provides low level persistent memory support. In particular, +support for the persistent memory instructions for flushing changes +to pmem is provided. + +This sub-package contains debug variant of the library, providing +run-time assertions and trace points. The typical way to access the +debug version is to set the environment variable LD_LIBRARY_PATH to +/usr/lib64/pmdk_debug. + +%files -n libpmem-debug +%defattr(-,root,root,-) +%dir %{_libdir}/pmdk_debug +%{_libdir}/pmdk_debug/libpmem.so +%{_libdir}/pmdk_debug/libpmem.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemblk__PKG_NAME_SUFFIX__ +Summary: Persistent Memory Resident Array of Blocks library +Group: __GROUP_SYS_LIBS__ +Requires: libpmem__PKG_NAME_SUFFIX__ >= %{version}-%{release} +%description -n libpmemblk__PKG_NAME_SUFFIX__ +The libpmemblk implements a pmem-resident array of blocks, all the same +size, where a block is updated atomically with respect to power +failure or program interruption (no torn blocks). + +%files -n libpmemblk__PKG_NAME_SUFFIX__ +%defattr(-,root,root,-) +%{_libdir}/libpmemblk.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemblk-devel +Summary: Development files for the Persistent Memory Resident Array of Blocks library +Group: __GROUP_DEV_LIBS__ +Requires: libpmemblk__PKG_NAME_SUFFIX__ = %{version}-%{release} +Requires: libpmem-devel = %{version}-%{release} +%description -n libpmemblk-devel +The libpmemblk implements a pmem-resident array of blocks, all the same +size, where a block is updated atomically with respect to power +failure or program interruption (no torn blocks). + +For example, a program keeping a cache of fixed-size objects in pmem +might find this library useful. This library is provided for cases +requiring large arrays of objects at least 512 bytes each. Most +developers will find higher level libraries like libpmemobj to be +more generally useful. + +%files -n libpmemblk-devel +%defattr(-,root,root,-) +%{_libdir}/libpmemblk.so +%{_libdir}/pkgconfig/libpmemblk.pc +%{_includedir}/libpmemblk.h +%{_mandir}/man7/libpmemblk.7.gz +%{_mandir}/man5/poolset.5.gz +%{_mandir}/man3/pmemblk_*.3.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemblk-debug +Summary: Debug variant of the Persistent Memory Resident Array of Blocks library +Group: __GROUP_DEV_LIBS__ +Requires: libpmemblk__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmemblk-debug +The libpmemblk implements a pmem-resident array of blocks, all the same +size, where a block is updated atomically with respect to power +failure or program interruption (no torn blocks). + +This sub-package contains debug variant of the library, providing +run-time assertions and trace points. The typical way to access the +debug version is to set the environment variable LD_LIBRARY_PATH to +/usr/lib64/pmdk_debug. + +%files -n libpmemblk-debug +%defattr(-,root,root,-) +%dir %{_libdir}/pmdk_debug +%{_libdir}/pmdk_debug/libpmemblk.so +%{_libdir}/pmdk_debug/libpmemblk.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemlog__PKG_NAME_SUFFIX__ +Summary: Persistent Memory Resident Log File library +Group: __GROUP_SYS_LIBS__ +Requires: libpmem__PKG_NAME_SUFFIX__ >= %{version}-%{release} +%description -n libpmemlog__PKG_NAME_SUFFIX__ +The libpmemlog library provides a pmem-resident log file. This is +useful for programs like databases that append frequently to a log +file. + +%files -n libpmemlog__PKG_NAME_SUFFIX__ +%defattr(-,root,root,-) +%{_libdir}/libpmemlog.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemlog-devel +Summary: Development files for the Persistent Memory Resident Log File library +Group: __GROUP_DEV_LIBS__ +Requires: libpmemlog__PKG_NAME_SUFFIX__ = %{version}-%{release} +Requires: libpmem-devel = %{version}-%{release} +%description -n libpmemlog-devel +The libpmemlog library provides a pmem-resident log file. This +library is provided for cases requiring an append-mostly file to +record variable length entries. Most developers will find higher +level libraries like libpmemobj to be more generally useful. + +%files -n libpmemlog-devel +%defattr(-,root,root,-) +%{_libdir}/libpmemlog.so +%{_libdir}/pkgconfig/libpmemlog.pc +%{_includedir}/libpmemlog.h +%{_mandir}/man7/libpmemlog.7.gz +%{_mandir}/man5/poolset.5.gz +%{_mandir}/man3/pmemlog_*.3.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemlog-debug +Summary: Debug variant of the Persistent Memory Resident Log File library +Group: __GROUP_DEV_LIBS__ +Requires: libpmemlog__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmemlog-debug +The libpmemlog library provides a pmem-resident log file. This +library is provided for cases requiring an append-mostly file to +record variable length entries. Most developers will find higher +level libraries like libpmemobj to be more generally useful. + +This sub-package contains debug variant of the library, providing +run-time assertions and trace points. The typical way to access the +debug version is to set the environment variable LD_LIBRARY_PATH to +/usr/lib64/pmdk_debug. + +%files -n libpmemlog-debug +%defattr(-,root,root,-) +%dir %{_libdir}/pmdk_debug +%{_libdir}/pmdk_debug/libpmemlog.so +%{_libdir}/pmdk_debug/libpmemlog.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemobj__PKG_NAME_SUFFIX__ +Summary: Persistent Memory Transactional Object Store library +Group: __GROUP_SYS_LIBS__ +Requires: libpmem__PKG_NAME_SUFFIX__ >= %{version}-%{release} +%description -n libpmemobj__PKG_NAME_SUFFIX__ +The libpmemobj library provides a transactional object store, +providing memory allocation, transactions, and general facilities for +persistent memory programming. + +%files -n libpmemobj__PKG_NAME_SUFFIX__ +%defattr(-,root,root,-) +%{_libdir}/libpmemobj.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemobj-devel +Summary: Development files for the Persistent Memory Transactional Object Store library +Group: __GROUP_DEV_LIBS__ +Requires: libpmemobj__PKG_NAME_SUFFIX__ = %{version}-%{release} +Requires: libpmem-devel = %{version}-%{release} +%description -n libpmemobj-devel +The libpmemobj library provides a transactional object store, +providing memory allocation, transactions, and general facilities for +persistent memory programming. Developers new to persistent memory +probably want to start with this library. + +%files -n libpmemobj-devel +%defattr(-,root,root,-) +%{_libdir}/libpmemobj.so +%{_libdir}/pkgconfig/libpmemobj.pc +%{_includedir}/libpmemobj.h +%{_includedir}/libpmemobj/*.h +%{_mandir}/man7/libpmemobj.7.gz +%{_mandir}/man5/poolset.5.gz +%{_mandir}/man3/pmemobj_*.3.gz +%{_mandir}/man3/pobj_*.3.gz +%{_mandir}/man3/oid_*.3.gz +%{_mandir}/man3/toid*.3.gz +%{_mandir}/man3/direct_*.3.gz +%{_mandir}/man3/d_r*.3.gz +%{_mandir}/man3/tx_*.3.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmemobj-debug +Summary: Debug variant of the Persistent Memory Transactional Object Store library +Group: __GROUP_DEV_LIBS__ +Requires: libpmemobj__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmemobj-debug +The libpmemobj library provides a transactional object store, +providing memory allocation, transactions, and general facilities for +persistent memory programming. Developers new to persistent memory +probably want to start with this library. + +This sub-package contains debug variant of the library, providing +run-time assertions and trace points. The typical way to access the +debug version is to set the environment variable LD_LIBRARY_PATH to +/usr/lib64/pmdk_debug. + +%files -n libpmemobj-debug +%defattr(-,root,root,-) +%dir %{_libdir}/pmdk_debug +%{_libdir}/pmdk_debug/libpmemobj.so +%{_libdir}/pmdk_debug/libpmemobj.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmempool__PKG_NAME_SUFFIX__ +Summary: Persistent Memory pool management library +Group: __GROUP_SYS_LIBS__ +Requires: libpmem__PKG_NAME_SUFFIX__ >= %{version}-%{release} +%description -n libpmempool__PKG_NAME_SUFFIX__ +The libpmempool library provides a set of utilities for off-line +administration, analysis, diagnostics and repair of persistent memory +pools created by libpmemlog, libpemblk and libpmemobj libraries. + +%files -n libpmempool__PKG_NAME_SUFFIX__ +%defattr(-,root,root,-) +%{_libdir}/libpmempool.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmempool-devel +Summary: Development files for Persistent Memory pool management library +Group: __GROUP_DEV_LIBS__ +Requires: libpmempool__PKG_NAME_SUFFIX__ = %{version}-%{release} +Requires: libpmem-devel = %{version}-%{release} +%description -n libpmempool-devel +The libpmempool library provides a set of utilities for off-line +administration, analysis, diagnostics and repair of persistent memory +pools created by libpmemlog, libpemblk and libpmemobj libraries. + +%files -n libpmempool-devel +%defattr(-,root,root,-) +%{_libdir}/libpmempool.so +%{_libdir}/pkgconfig/libpmempool.pc +%{_includedir}/libpmempool.h +%{_mandir}/man7/libpmempool.7.gz +%{_mandir}/man5/poolset.5.gz +%{_mandir}/man3/pmempool_*.3.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n libpmempool-debug +Summary: Debug variant of the Persistent Memory pool management library +Group: __GROUP_DEV_LIBS__ +Requires: libpmempool__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n libpmempool-debug +The libpmempool library provides a set of utilities for off-line +administration, analysis, diagnostics and repair of persistent memory +pools created by libpmemlog, libpemblk and libpmemobj libraries. + +This sub-package contains debug variant of the library, providing +run-time assertions and trace points. The typical way to access the +debug version is to set the environment variable LD_LIBRARY_PATH to +/usr/lib64/pmdk_debug. + +%files -n libpmempool-debug +%defattr(-,root,root,-) +%dir %{_libdir}/pmdk_debug +%{_libdir}/pmdk_debug/libpmempool.so +%{_libdir}/pmdk_debug/libpmempool.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%if %{with fabric} + +%package -n librpmem__PKG_NAME_SUFFIX__ +Summary: Remote Access to Persistent Memory library +Group: __GROUP_SYS_LIBS__ +Requires: libfabric >= %{min_libfabric_ver} +%if %{defined suse_version} +Requires: openssh +%else +Requires: openssh-clients +%endif +%description -n librpmem__PKG_NAME_SUFFIX__ +The librpmem library provides low-level support for remote access +to persistent memory utilizing RDMA-capable NICs. It can be used +to replicate persistent memory regions over RDMA protocol. + +%files -n librpmem__PKG_NAME_SUFFIX__ +%defattr(-,root,root,-) +%{_libdir}/librpmem.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n librpmem-devel +Summary: Development files for the Remote Access to Persistent Memory library +Group: __GROUP_DEV_LIBS__ +Requires: librpmem__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n librpmem-devel +The librpmem library provides low-level support for remote access +to persistent memory utilizing RDMA-capable NICs. It can be used +to replicate persistent memory regions over RDMA protocol. + +This sub-package contains libraries and header files for developing +applications that want to specifically make use of librpmem. + +%files -n librpmem-devel +%defattr(-,root,root,-) +%{_libdir}/librpmem.so +%{_libdir}/pkgconfig/librpmem.pc +%{_includedir}/librpmem.h +%{_mandir}/man7/librpmem.7.gz +%{_mandir}/man3/rpmem_*.3.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n librpmem-debug +Summary: Debug variant of the Remote Access to Persistent Memory library +Group: __GROUP_DEV_LIBS__ +Requires: librpmem__PKG_NAME_SUFFIX__ = %{version}-%{release} +%description -n librpmem-debug +The librpmem library provides low-level support for remote access +to persistent memory utilizing RDMA-capable NICs. It can be used +to replicate persistent memory regions over RDMA protocol. + +This sub-package contains debug variant of the library, providing +run-time assertions and trace points. The typical way to access the +debug version is to set the environment variable LD_LIBRARY_PATH to +/usr/lib64/pmdk_debug. + +%files -n librpmem-debug +%defattr(-,root,root,-) +%dir %{_libdir}/pmdk_debug +%{_libdir}/pmdk_debug/librpmem.so +%{_libdir}/pmdk_debug/librpmem.so.* +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%package -n rpmemd +Group: __GROUP_SYS_BASE__ +Summary: Target node process executed by librpmem +Requires: libfabric >= %{min_libfabric_ver} +%description -n rpmemd +The rpmemd process is executed on a target node by librpmem library +and facilitates access to persistent memory over RDMA. + +%files -n rpmemd +%{_bindir}/rpmemd +%{_mandir}/man1/rpmemd.1.gz + +# end of "if _with_fabric" +%endif + +%package -n pmempool +Summary: Utilities for Persistent Memory +Group: __GROUP_SYS_BASE__ +Requires: libpmem__PKG_NAME_SUFFIX__ >= %{version}-%{release} +Requires: libpmemlog__PKG_NAME_SUFFIX__ >= %{version}-%{release} +Requires: libpmemblk__PKG_NAME_SUFFIX__ >= %{version}-%{release} +Requires: libpmemobj__PKG_NAME_SUFFIX__ >= %{version}-%{release} +Requires: libpmempool__PKG_NAME_SUFFIX__ >= %{version}-%{release} +Obsoletes: nvml-tools < %{version}-%{release} +%description -n pmempool +The pmempool is a standalone utility for management and off-line analysis +of Persistent Memory pools created by PMDK libraries. It provides a set +of utilities for administration and diagnostics of Persistent Memory pools. +The pmempool may be useful for troubleshooting by system administrators +and users of the applications based on PMDK libraries. + +%files -n pmempool +%{_bindir}/pmempool +%{_mandir}/man1/pmempool.1.gz +%{_mandir}/man1/pmempool-*.1.gz +%config(noreplace) %{_sysconfdir}/bash_completion.d/pmempool +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + +%package -n pmreorder +Summary: Consistency Checker for Persistent Memory +Group: __GROUP_SYS_BASE__ +%description -n pmreorder +The pmreorder tool is a collection of python scripts designed to parse +and replay operations logged by pmemcheck - a persistent memory checking tool. +Pmreorder performs the store reordering between persistent memory barriers - +a sequence of flush-fence operations. It uses a consistency checking routine +provided in the command line options to check whether files are in a consistent state. + +%files -n pmreorder +%{_bindir}/pmreorder +%{_datadir}/pmreorder/*.py +%{_mandir}/man1/pmreorder.1.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + + +%if %{with ndctl} + +%package -n daxio +Summary: Perform I/O on Device DAX devices or zero a Device DAX device +Group: __GROUP_SYS_BASE__ +Requires: libpmem__PKG_NAME_SUFFIX__ >= %{version}-%{release} +%description -n daxio +The daxio utility performs I/O on Device DAX devices or zero +a Device DAX device. Since the standard I/O APIs (read/write) cannot be used +with Device DAX, data transfer is performed on a memory-mapped device. +The daxio may be used to dump Device DAX data to a file, restore data from +a backup copy, move/copy data to another device or to erase data from +a device. + +%files -n daxio +%{_bindir}/daxio +%{_mandir}/man1/daxio.1.gz +%license LICENSE +%doc ChangeLog CONTRIBUTING.md README.md + +# end of "if _with_ndctl" +%endif + +%prep +%setup -q -n %{name}-%{version} + + +%build +# For debug build default flags may be overridden to disable compiler +# optimizations. +CFLAGS="%{optflags}" \ +LDFLAGS="%{?__global_ldflags}" \ +make %{?_smp_mflags} \ +%if %{without ndctl} + NDCTL_ENABLE=n \ +%endif + __MAKE_FLAGS__ + + +# Override LIB_AR with empty string to skip installation of static libraries +%install +make install DESTDIR=%{buildroot} \ +%if %{without ndctl} + NDCTL_ENABLE=n \ +%endif + LIB_AR= \ + prefix=%{_prefix} \ + libdir=%{_libdir} \ + includedir=%{_includedir} \ + mandir=%{_mandir} \ + bindir=%{_bindir} \ + sysconfdir=%{_sysconfdir} \ + docdir=%{_docdir} +mkdir -p %{buildroot}%{_datadir}/pmdk +cp utils/pmdk.magic %{buildroot}%{_datadir}/pmdk/ +__MAKE_INSTALL_FDUPES__ + + +%check +%if 0%{?_skip_check} == 1 + echo "Check skipped" +%else + %if %{defined _testconfig} + cp %{_testconfig} src/test/testconfig.sh + %else + echo "PMEM_FS_DIR=/tmp" > src/test/testconfig.sh + echo "PMEM_FS_DIR_FORCE_PMEM=1" >> src/test/testconfig.sh + echo 'TEST_BUILD="debug nondebug"' >> src/test/testconfig.sh + echo 'TEST_FS="pmem any none"' >> src/test/testconfig.sh + %endif + make \ +%if %{without ndctl} + NDCTL_ENABLE=n \ +%endif + check +%endif + +%post -n libpmem__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%postun -n libpmem__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%post -n libpmemblk__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%postun -n libpmemblk__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%post -n libpmemlog__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%postun -n libpmemlog__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%post -n libpmemobj__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%postun -n libpmemobj__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%post -n libpmempool__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%postun -n libpmempool__PKG_NAME_SUFFIX__ -p /sbin/ldconfig + +%if %{with fabric} +%post -n librpmem__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%postun -n librpmem__PKG_NAME_SUFFIX__ -p /sbin/ldconfig +%endif + +%if 0%{?__debug_package} == 0 +%debug_package +%endif + + +%changelog diff --git a/src/pmdk/utils/ps_analyze.ps1 b/src/pmdk/utils/ps_analyze.ps1 new file mode 100644 index 000000000..b0e6bfe9e --- /dev/null +++ b/src/pmdk/utils/ps_analyze.ps1 @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017, Intel Corporation +# +# ps_analyze -- script to analyze ps1 files +# + +Write-Output "Starting PSScript analyzing ..." + +$scriptdir = Split-Path -Parent $PSCommandPath +$rootdir = $scriptdir + "\.." +$detected = 0 + +$include = @("*.ps1" ) +Get-ChildItem -Path $rootdir -Recurse -Include $include | ` + Where-Object { $_.FullName -notlike "*test*" } | ` + ForEach-Object { + $analyze_result = Invoke-ScriptAnalyzer -Path $_.FullName + if ($analyze_result) { + $detected = $detected + $analyze_result.Count + Write-Output $_.FullName + Write-Output $analyze_result + } + } + +if ($detected) { + Write-Output "PSScriptAnalyzer FAILED. Issues detected: $detected" + Exit 1 +} else { + Write-Output "PSScriptAnalyzer PASSED. No issue detected." + Exit 0 +} diff --git a/src/pmdk/utils/sort_solution b/src/pmdk/utils/sort_solution new file mode 100755 index 000000000..c14f57ece --- /dev/null +++ b/src/pmdk/utils/sort_solution @@ -0,0 +1,128 @@ +#!/usr/bin/perl +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016, Intel Corporation + +# +# sort_solution -- sort visual studio solution projects lists +# + +use strict; +use warnings; +# install libtext-diff-perl or perl-Text-Diff +use Text::Diff; + +use Cwd 'abs_path'; +use File::Basename; +use File::Compare; + + +sub help { + print "Usage: sort_solution [check|sort]\n"; + exit; +} + +sub sort_global_section { + my ($solution_fh, $temp_fh, $section_name) = @_; + my $line = ""; + my @array; + + while (defined($line = <$solution_fh>) && ($line !~ $section_name)) { + print $temp_fh $line; + } + print $temp_fh $line; + + while (defined($line = <$solution_fh>) && ($line !~ "EndGlobalSection")) { + push @array, $line; + } + + @array = sort @array; + + foreach (@array) { + print $temp_fh $_; + } + + print $temp_fh $line; # print EndGlobalSection line +} + +my $num_args = $#ARGV + 1; +if ($num_args != 1) { + help; +} +my $arg = $ARGV[0]; + +if($arg ne "check" && $arg ne "sort") { + help; +} +my $filename = dirname(abs_path($0)).'/../src/PMDK.sln'; +my $tempfile = dirname(abs_path($0)).'/../src/temp.sln'; + +open(my $temp_fh, '>', $tempfile) + or die "Could not open file '$tempfile' $!"; +open(my $solution_fh, '<:crlf', $filename) + or die "Could not open file '$filename' $!"; + +my $line; + +# Read a header of file +while (defined($line = <$solution_fh>) && ($line !~ "^Project")) { + print $temp_fh $line; +} + +my @part1; +my $buff; +my $guid; + +# Read the projects list with project dependencies +do { + if($line =~ "^Project") { + $buff = $line; + $guid = (split(/\,/, $line))[2]; + } elsif($line =~ "^EndProject") { + $buff .= $line; + my %table = ( + guid => $guid, + buff => $buff, + ); + push @part1, \%table; + } else { + $buff .= $line; + } + +} while (defined($line = <$solution_fh>) && $line ne "Global\n"); + +# sort the project list by a project GIUD and write to the tempfile +@part1 = sort { $a->{guid} cmp $b->{guid} } @part1; + +foreach (@part1) { + my %hash = %$_; + print $temp_fh $hash{"buff"}; +} +print $temp_fh $line; # EndProject line + +sort_global_section $solution_fh, $temp_fh, "ProjectConfigurationPlatforms"; + +sort_global_section $solution_fh, $temp_fh, "NestedProjects"; + +# read solution file to the end and copy it to the temp file +while (defined($line = <$solution_fh>)){ + print $temp_fh $line; +} + +close($temp_fh); +close($solution_fh); + +if($arg eq "check") { + my $diff = diff $filename => $tempfile; + if ($diff eq "") { + unlink $tempfile; + exit; + } + + print "PMDK solution file is not sorted, " . + "please use sort_solution script before pushing your changes\n"; + unlink $tempfile; + exit 1; +} else { + unlink $filename or die "Cannot replace solution file $!"; + rename $tempfile, $filename; +} diff --git a/src/pmdk/utils/style_check.sh b/src/pmdk/utils/style_check.sh new file mode 100755 index 000000000..c9d44e2a7 --- /dev/null +++ b/src/pmdk/utils/style_check.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2016-2020, Intel Corporation +# +# utils/style_check.sh -- common style checking script +# +set -e + +ARGS=("$@") +CSTYLE_ARGS=() +CLANG_ARGS=() +FLAKE8_ARGS=() +CHECK_TYPE=$1 + +[ -z "$clang_format_bin" ] && which clang-format-9 >/dev/null && + clang_format_bin=clang-format-9 +[ -z "$clang_format_bin" ] && which clang-format >/dev/null && + clang_format_bin=clang-format +[ -z "$clang_format_bin" ] && clang_format_bin=clang-format + +# +# print script usage +# +function usage() { + echo "$0 [C/C++ files]" +} + +# +# require clang-format version 9.0 +# +function check_clang_version() { + set +e + which ${clang_format_bin} &> /dev/null && ${clang_format_bin} --version |\ + grep "version 9\.0"\ + &> /dev/null + if [ $? -ne 0 ]; then + echo "SKIP: requires clang-format version 9.0" + exit 0 + fi + set -e +} + +# +# run old cstyle check +# +function run_cstyle() { + if [ $# -eq 0 ]; then + return + fi + + ${cstyle_bin} -pP $@ +} + +# +# generate diff with clang-format rules +# +function run_clang_check() { + if [ $# -eq 0 ]; then + return + fi + check_clang_version + + for file in $@ + do + LINES=$(${clang_format_bin} -style=file $file |\ + git diff --no-index $file - | wc -l) + if [ $LINES -ne 0 ]; then + ${clang_format_bin} -style=file $file | git diff --no-index $file - + fi + done +} + +# +# in-place format according to clang-format rules +# +function run_clang_format() { + if [ $# -eq 0 ]; then + return + fi + check_clang_version + + ${clang_format_bin} -style=file -i $@ +} + +function run_flake8() { + if [ $# -eq 0 ]; then + return + fi + ${flake8_bin} --exclude=testconfig.py,envconfig.py $@ +} + +for ((i=1; i<$#; i++)) { + + IGNORE="$(dirname ${ARGS[$i]})/.cstyleignore" + if [ -e $IGNORE ]; then + if grep -q ${ARGS[$i]} $IGNORE ; then + echo "SKIP ${ARGS[$i]}" + continue + fi + fi + case ${ARGS[$i]} in + *.[ch]pp) + CLANG_ARGS+="${ARGS[$i]} " + ;; + + *.[ch]) + CSTYLE_ARGS+="${ARGS[$i]} " + ;; + + *.py) + FLAKE8_ARGS+="${ARGS[$i]} " + ;; + + *) + echo "Unknown argument" + exit 1 + ;; + esac +} + +case $CHECK_TYPE in + check) + run_cstyle ${CSTYLE_ARGS} + run_clang_check ${CLANG_ARGS} + run_flake8 ${FLAKE8_ARGS} + ;; + + format) + run_clang_format ${CLANG_ARGS} + ;; + + *) + echo "Invalid parameters" + usage + exit 1 + ;; +esac diff --git a/src/pmdk/utils/version.sh b/src/pmdk/utils/version.sh new file mode 100755 index 000000000..fd751c9d9 --- /dev/null +++ b/src/pmdk/utils/version.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017-2020, Intel Corporation +# +# utils/version.sh -- determine project's version +# +set -e + +if [ -f "$1/VERSION" ]; then + cat "$1/VERSION" + exit 0 +fi + +if [ -f $1/GIT_VERSION ]; then + echo -n "\$Format:%h\$" | cmp -s $1/GIT_VERSION - && true + if [ $? -eq 0 ]; then + PARSE_GIT_VERSION=0 + else + PARSE_GIT_VERSION=1 + fi +else + PARSE_GIT_VERSION=0 +fi + +LATEST_RELEASE=$(cat $1/ChangeLog | grep "* Version" | cut -d " " -f 3 | sort -rd | head -n1) + +if [ $PARSE_GIT_VERSION -eq 1 ]; then + GIT_VERSION_HASH=$(cat $1/GIT_VERSION) + + if [ -n "$GIT_VERSION_HASH" ]; then + echo "$LATEST_RELEASE+git.$GIT_VERSION_HASH" + exit 0 + fi +fi + +cd "$1" + +GIT_DESCRIBE=$(git describe 2>/dev/null) && true +if [ -n "$GIT_DESCRIBE" ]; then + # 1.5-19-gb8f78a329 -> 1.5+git19.gb8f78a329 + # 1.5-rc1-19-gb8f78a329 -> 1.5-rc1+git19.gb8f78a329 + echo "$GIT_DESCRIBE" | sed "s/\([0-9.]*\)-rc\([0-9]*\)-\([0-9]*\)-\([0-9a-g]*\)/\1-rc\2+git\3.\4/" | sed "s/\([0-9.]*\)-\([0-9]*\)-\([0-9a-g]*\)/\1+git\2.\3/" + exit 0 +fi + +# try commit it, git describe can fail when there are no tags (e.g. with shallow clone, like on Travis) +GIT_COMMIT=$(git log -1 --format=%h) && true +if [ -n "$GIT_COMMIT" ]; then + echo "$LATEST_RELEASE+git.$GIT_COMMIT" + exit 0 +fi + +cd - >/dev/null + +# If nothing works, try to get version from directory name +VER=$(basename `realpath "$1"` | sed 's/pmdk[-]*\([0-9a-z.+-]*\).*/\1/') +if [ -n "$VER" ]; then + echo "$VER" + exit 0 +fi + +exit 1 -- cgit v1.2.3