From 9f0fc191371843c4fc000a226b0a26b6c059aacd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:40:19 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- tools/testing/selftests/mm/.gitignore | 3 + tools/testing/selftests/mm/Makefile | 6 +- .../selftests/mm/charge_reserved_hugetlb.sh | 2 +- tools/testing/selftests/mm/config | 1 + tools/testing/selftests/mm/gup_longterm.c | 3 +- tools/testing/selftests/mm/hugetlb-madvise.c | 19 - .../selftests/mm/hugetlb_fault_after_madv.c | 73 + tools/testing/selftests/mm/ksm_functional_tests.c | 66 +- tools/testing/selftests/mm/ksm_tests.c | 2 +- tools/testing/selftests/mm/map_hugetlb.c | 7 + tools/testing/selftests/mm/mdwe_test.c | 128 +- tools/testing/selftests/mm/mremap_test.c | 303 +++- tools/testing/selftests/mm/pagemap_ioctl.c | 1664 ++++++++++++++++++++ tools/testing/selftests/mm/pkey-helpers.h | 2 +- tools/testing/selftests/mm/run_vmtests.sh | 12 + tools/testing/selftests/mm/uffd-unit-tests.c | 6 + tools/testing/selftests/mm/va_high_addr_switch.sh | 6 + tools/testing/selftests/mm/vm_util.c | 19 + tools/testing/selftests/mm/vm_util.h | 1 + tools/testing/selftests/mm/write_hugetlb_memory.sh | 2 +- 20 files changed, 2244 insertions(+), 81 deletions(-) create mode 100644 tools/testing/selftests/mm/hugetlb_fault_after_madv.c create mode 100644 tools/testing/selftests/mm/pagemap_ioctl.c (limited to 'tools/testing/selftests/mm') diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index cdc9ce4426..4ff10ea614 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -18,6 +18,8 @@ mremap_dontunmap mremap_test on-fault-limit transhuge-stress +pagemap_ioctl +*.tmp* protection_keys protection_keys_32 protection_keys_64 @@ -43,3 +45,4 @@ mdwe_test gup_longterm mkdirty va_high_addr_switch +hugetlb_fault_after_madv diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 6a9fc56931..dede0bcf97 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -33,7 +33,7 @@ endif MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) -LDLIBS = -lrt -lpthread +LDLIBS = -lrt -lpthread -lm TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test @@ -60,6 +60,7 @@ TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit +TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -68,9 +69,10 @@ TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests TEST_GEN_FILES += ksm_functional_tests TEST_GEN_FILES += mdwe_test +TEST_GEN_FILES += hugetlb_fault_after_madv ifneq ($(ARCH),arm64) -TEST_GEN_PROGS += soft-dirty +TEST_GEN_FILES += soft-dirty endif ifeq ($(ARCH),x86_64) diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 0899019a7f..e14bdd4455 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index be087c4bc3..4309916f62 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_USERFAULTFD=y +CONFIG_PTE_MARKER_UFFD_WP=y CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index d33d3e68ff..ad168d35b2 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -265,10 +265,11 @@ static void run_with_tmpfile(test_fn fn, const char *desc) fd = fileno(file); if (fd < 0) { ksft_test_result_fail("fileno() failed\n"); - return; + goto close; } fn(fd, pagesize); +close: fclose(file); } diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index d55322df4b..f32d99565c 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -36,25 +36,6 @@ unsigned long huge_page_size; unsigned long base_page_size; -unsigned long get_free_hugepages(void) -{ - unsigned long fhp = 0; - char *line = NULL; - size_t linelen = 0; - FILE *f = fopen("/proc/meminfo", "r"); - - if (!f) - return fhp; - while (getline(&line, &linelen, f) > 0) { - if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1) - break; - } - - free(line); - fclose(f); - return fhp; -} - void write_fault_pages(void *addr, unsigned long nr_pages) { unsigned long i; diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c new file mode 100644 index 0000000000..73b81c6323 --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include "vm_util.h" +#include "../kselftest.h" + +#define MMAP_SIZE (1 << 21) +#define INLOOP_ITER 100 + +char *huge_ptr; + +/* Touch the memory while it is being madvised() */ +void *touch(void *unused) +{ + char *ptr = (char *)huge_ptr; + + for (int i = 0; i < INLOOP_ITER; i++) + ptr[0] = '.'; + + return NULL; +} + +void *madv(void *unused) +{ + usleep(rand() % 10); + + for (int i = 0; i < INLOOP_ITER; i++) + madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + + return NULL; +} + +int main(void) +{ + unsigned long free_hugepages; + pthread_t thread1, thread2; + /* + * On kernel 6.4, we are able to reproduce the problem with ~1000 + * interactions + */ + int max = 10000; + + srand(getpid()); + + free_hugepages = get_free_hugepages(); + if (free_hugepages != 1) { + ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n", + free_hugepages); + } + + while (max--) { + huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + + if ((unsigned long)huge_ptr == -1) + ksft_exit_skip("Failed to allocated huge page\n"); + + pthread_create(&thread1, NULL, madv, NULL); + pthread_create(&thread2, NULL, touch, NULL); + + pthread_join(thread1, NULL); + pthread_join(thread2, NULL); + munmap(huge_ptr, MMAP_SIZE); + } + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 901e950f91..fbff0dd091 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -26,6 +26,7 @@ #define KiB 1024u #define MiB (1024 * KiB) +#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child" static int mem_fd; static int ksm_fd; @@ -479,6 +480,64 @@ static void test_prctl_fork(void) ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); } +static int ksm_fork_exec_child(void) +{ + /* Test if KSM is enabled for the process. */ + return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1; +} + +static void test_prctl_fork_exec(void) +{ + int ret, status; + pid_t child_pid; + + ksft_print_msg("[RUN] %s\n", __func__); + + ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (ret < 0 && errno == EINVAL) { + ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n"); + return; + } else if (ret) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n"); + return; + } + + child_pid = fork(); + if (child_pid == -1) { + ksft_test_result_skip("fork() failed\n"); + return; + } else if (child_pid == 0) { + char *prg_name = "./ksm_functional_tests"; + char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME }; + + execv(prg_name, argv_for_program); + return; + } + + if (waitpid(child_pid, &status, 0) > 0) { + if (WIFEXITED(status)) { + status = WEXITSTATUS(status); + if (status) { + ksft_test_result_fail("KSM not enabled\n"); + return; + } + } else { + ksft_test_result_fail("program didn't terminate normally\n"); + return; + } + } else { + ksft_test_result_fail("waitpid() failed\n"); + return; + } + + if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n"); + return; + } + + ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); +} + static void test_prctl_unmerge(void) { const unsigned int size = 2 * MiB; @@ -536,9 +595,13 @@ unmap: int main(int argc, char **argv) { - unsigned int tests = 7; + unsigned int tests = 8; int err; + if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { + exit(ksm_fork_exec_child() == 1 ? 0 : 1); + } + #ifdef __NR_userfaultfd tests++; #endif @@ -576,6 +639,7 @@ int main(int argc, char **argv) test_prctl(); test_prctl_fork(); + test_prctl_fork_exec(); test_prctl_unmerge(); err = ksft_get_fail_cnt(); diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index 380b691d3e..b748c48908 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot, if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index 193281560b..86e8f2048a 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -58,10 +59,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c index 0c5e469ae3..200bedcdc3 100644 --- a/tools/testing/selftests/mm/mdwe_test.c +++ b/tools/testing/selftests/mm/mdwe_test.c @@ -22,15 +22,104 @@ TEST(prctl_flags) { + EXPECT_LT(prctl(PR_SET_MDWE, PR_MDWE_NO_INHERIT, 0L, 0L, 7L), 0); + EXPECT_EQ(errno, EINVAL); + EXPECT_LT(prctl(PR_SET_MDWE, 7L, 0L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_SET_MDWE, 0L, 7L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 7L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 0L, 7L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 7L, 0L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 0L, 7L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 7L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 0L, 7L), 0); + EXPECT_EQ(errno, EINVAL); +} + +FIXTURE(consecutive_prctl_flags) {}; +FIXTURE_SETUP(consecutive_prctl_flags) {} +FIXTURE_TEARDOWN(consecutive_prctl_flags) {} + +FIXTURE_VARIANT(consecutive_prctl_flags) +{ + unsigned long first_flags; + unsigned long second_flags; + bool should_work; +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_no_flags) +{ + .first_flags = 0, + .second_flags = 0, + .should_work = true, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_exec_gain) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .should_work = true, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_both_flags) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .should_work = true, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .second_flags = 0, + .should_work = false, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe_no_inherit) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .second_flags = 0, + .should_work = false, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_no_inherit) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .should_work = false, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_enable_no_inherit) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .should_work = false, +}; + +TEST_F(consecutive_prctl_flags, two_prctls) +{ + int ret; + + EXPECT_EQ(prctl(PR_SET_MDWE, variant->first_flags, 0L, 0L, 0L), 0); + + ret = prctl(PR_SET_MDWE, variant->second_flags, 0L, 0L, 0L); + if (variant->should_work) { + EXPECT_EQ(ret, 0); + + ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L); + ASSERT_EQ(ret, variant->second_flags); + } else { + EXPECT_NE(ret, 0); + ASSERT_EQ(errno, EPERM); + } } FIXTURE(mdwe) @@ -45,28 +134,45 @@ FIXTURE_VARIANT(mdwe) { bool enabled; bool forked; + bool inherit; }; FIXTURE_VARIANT_ADD(mdwe, stock) { - .enabled = false, + .enabled = false, .forked = false, + .inherit = false, }; FIXTURE_VARIANT_ADD(mdwe, enabled) { - .enabled = true, + .enabled = true, .forked = false, + .inherit = true, }; -FIXTURE_VARIANT_ADD(mdwe, forked) +FIXTURE_VARIANT_ADD(mdwe, inherited) { - .enabled = true, + .enabled = true, .forked = true, + .inherit = true, }; +FIXTURE_VARIANT_ADD(mdwe, not_inherited) +{ + .enabled = true, + .forked = true, + .inherit = false, +}; + +static bool executable_map_should_fail(const FIXTURE_VARIANT(mdwe) *variant) +{ + return variant->enabled && (!variant->forked || variant->inherit); +} + FIXTURE_SETUP(mdwe) { + unsigned long mdwe_flags; int ret, status; self->p = NULL; @@ -76,13 +182,17 @@ FIXTURE_SETUP(mdwe) if (!variant->enabled) return; - ret = prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0L, 0L, 0L); + mdwe_flags = PR_MDWE_REFUSE_EXEC_GAIN; + if (!variant->inherit) + mdwe_flags |= PR_MDWE_NO_INHERIT; + + ret = prctl(PR_SET_MDWE, mdwe_flags, 0L, 0L, 0L); ASSERT_EQ(ret, 0) { TH_LOG("PR_SET_MDWE failed or unsupported"); } ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L); - ASSERT_EQ(ret, 1); + ASSERT_EQ(ret, mdwe_flags); if (variant->forked) { self->pid = fork(); @@ -113,7 +223,7 @@ TEST_F(mdwe, mmap_READ_EXEC) TEST_F(mdwe, mmap_WRITE_EXEC) { self->p = mmap(NULL, self->size, PROT_WRITE | PROT_EXEC, self->flags, 0, 0); - if (variant->enabled) { + if (executable_map_should_fail(variant)) { EXPECT_EQ(self->p, MAP_FAILED); } else { EXPECT_NE(self->p, MAP_FAILED); @@ -139,7 +249,7 @@ TEST_F(mdwe, mprotect_add_EXEC) ASSERT_NE(self->p, MAP_FAILED); ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC); - if (variant->enabled) { + if (executable_map_should_fail(variant)) { EXPECT_LT(ret, 0); } else { EXPECT_EQ(ret, 0); @@ -154,7 +264,7 @@ TEST_F(mdwe, mprotect_WRITE_EXEC) ASSERT_NE(self->p, MAP_FAILED); ret = mprotect(self->p, self->size, PROT_WRITE | PROT_EXEC); - if (variant->enabled) { + if (executable_map_should_fail(variant)) { EXPECT_LT(ret, 0); } else { EXPECT_EQ(ret, 0); diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 5c3773de9f..1d4c1589c3 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -23,12 +23,15 @@ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#define SIZE_MB(m) ((size_t)m * (1024 * 1024)) +#define SIZE_KB(k) ((size_t)k * 1024) struct config { unsigned long long src_alignment; unsigned long long dest_alignment; unsigned long long region_size; int overlapping; + int dest_preamble_size; }; struct test { @@ -44,6 +47,7 @@ enum { _1MB = 1ULL << 20, _2MB = 2ULL << 20, _4MB = 4ULL << 20, + _5MB = 5ULL << 20, _1GB = 1ULL << 30, _2GB = 2ULL << 30, PMD = _2MB, @@ -145,6 +149,60 @@ static bool is_range_mapped(FILE *maps_fp, void *start, void *end) return success; } +/* + * Returns the start address of the mapping on success, else returns + * NULL on failure. + */ +static void *get_source_mapping(struct config c) +{ + unsigned long long addr = 0ULL; + void *src_addr = NULL; + unsigned long long mmap_min_addr; + + mmap_min_addr = get_mmap_min_addr(); + /* + * For some tests, we need to not have any mappings below the + * source mapping. Add some headroom to mmap_min_addr for this. + */ + mmap_min_addr += 10 * _4MB; + +retry: + addr += c.src_alignment; + if (addr < mmap_min_addr) + goto retry; + + src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, + -1, 0); + if (src_addr == MAP_FAILED) { + if (errno == EPERM || errno == EEXIST) + goto retry; + goto error; + } + /* + * Check that the address is aligned to the specified alignment. + * Addresses which have alignments that are multiples of that + * specified are not considered valid. For instance, 1GB address is + * 2MB-aligned, however it will not be considered valid for a + * requested alignment of 2MB. This is done to reduce coincidental + * alignment in the tests. + */ + if (((unsigned long long) src_addr & (c.src_alignment - 1)) || + !((unsigned long long) src_addr & c.src_alignment)) { + munmap(src_addr, c.region_size); + goto retry; + } + + if (!src_addr) + goto error; + + return src_addr; +error: + ksft_print_msg("Failed to map source region: %s\n", + strerror(errno)); + return NULL; +} + /* * This test validates that merge is called when expanding a mapping. * Mapping containing three pages is created, middle page is unmapped @@ -225,59 +283,83 @@ out: } /* - * Returns the start address of the mapping on success, else returns - * NULL on failure. + * Verify that an mremap within a range does not cause corruption + * of unrelated part of range. + * + * Consider the following range which is 2MB aligned and is + * a part of a larger 20MB range which is not shown. Each + * character is 256KB below making the source and destination + * 2MB each. The lower case letters are moved (s to d) and the + * upper case letters are not moved. The below test verifies + * that the upper case S letters are not corrupted by the + * adjacent mremap. + * + * |DDDDddddSSSSssss| */ -static void *get_source_mapping(struct config c) +static void mremap_move_within_range(char pattern_seed) { - unsigned long long addr = 0ULL; - void *src_addr = NULL; - unsigned long long mmap_min_addr; + char *test_name = "mremap mremap move within range"; + void *src, *dest; + int i, success = 1; + + size_t size = SIZE_MB(20); + void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = 0; + goto out; + } + memset(ptr, 0, size); - mmap_min_addr = get_mmap_min_addr(); + src = ptr + SIZE_MB(6); + src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1)); -retry: - addr += c.src_alignment; - if (addr < mmap_min_addr) - goto retry; + /* Set byte pattern for source block. */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(2); i++) { + ((char *)src)[i] = (char) rand(); + } - src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE, - MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, - -1, 0); - if (src_addr == MAP_FAILED) { - if (errno == EPERM || errno == EEXIST) - goto retry; - goto error; + dest = src - SIZE_MB(2); + + void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1), + MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1)); + if (new_ptr == MAP_FAILED) { + perror("mremap"); + success = 0; + goto out; } - /* - * Check that the address is aligned to the specified alignment. - * Addresses which have alignments that are multiples of that - * specified are not considered valid. For instance, 1GB address is - * 2MB-aligned, however it will not be considered valid for a - * requested alignment of 2MB. This is done to reduce coincidental - * alignment in the tests. - */ - if (((unsigned long long) src_addr & (c.src_alignment - 1)) || - !((unsigned long long) src_addr & c.src_alignment)) { - munmap(src_addr, c.region_size); - goto retry; + + /* Verify byte pattern after remapping */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(1); i++) { + char c = (char) rand(); + + if (((char *)src)[i] != c) { + ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n", + i); + ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + ((char *) src)[i] & 0xff); + success = 0; + } } - if (!src_addr) - goto error; +out: + if (munmap(ptr, size) == -1) + perror("munmap"); - return src_addr; -error: - ksft_print_msg("Failed to map source region: %s\n", - strerror(errno)); - return NULL; + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); } /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char pattern_seed) { - void *addr, *src_addr, *dest_addr; + void *addr, *src_addr, *dest_addr, *dest_preamble_addr; unsigned long long i; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; @@ -294,7 +376,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, goto out; } - /* Set byte pattern */ + /* Set byte pattern for source block. */ srand(pattern_seed); for (i = 0; i < threshold; i++) memset((char *) src_addr + i, (char) rand(), 1); @@ -306,6 +388,9 @@ static long long remap_region(struct config c, unsigned int threshold_mb, addr = (void *) (((unsigned long long) src_addr + c.region_size + offset) & align_mask); + /* Remap after the destination block preamble. */ + addr += c.dest_preamble_size; + /* See comment in get_source_mapping() */ if (!((unsigned long long) addr & c.dest_alignment)) addr = (void *) ((unsigned long long) addr | c.dest_alignment); @@ -316,11 +401,29 @@ static long long remap_region(struct config c, unsigned int threshold_mb, if (addr + c.dest_alignment < addr) { ksft_print_msg("Couldn't find a valid region to remap to\n"); ret = -1; - goto out; + goto clean_up_src; } addr += c.dest_alignment; } + if (c.dest_preamble_size) { + dest_preamble_addr = mmap((void *) addr - c.dest_preamble_size, c.dest_preamble_size, + PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, + -1, 0); + if (dest_preamble_addr == MAP_FAILED) { + ksft_print_msg("Failed to map dest preamble region: %s\n", + strerror(errno)); + ret = -1; + goto clean_up_src; + } + + /* Set byte pattern for the dest preamble block. */ + srand(pattern_seed); + for (i = 0; i < c.dest_preamble_size; i++) + memset((char *) dest_preamble_addr + i, (char) rand(), 1); + } + clock_gettime(CLOCK_MONOTONIC, &t_start); dest_addr = mremap(src_addr, c.region_size, c.region_size, MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr); @@ -329,7 +432,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, if (dest_addr == MAP_FAILED) { ksft_print_msg("mremap failed: %s\n", strerror(errno)); ret = -1; - goto clean_up_src; + goto clean_up_dest_preamble; } /* Verify byte pattern after remapping */ @@ -338,7 +441,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, char c = (char) rand(); if (((char *) dest_addr)[i] != c) { - ksft_print_msg("Data after remap doesn't match at offset %d\n", + ksft_print_msg("Data after remap doesn't match at offset %llu\n", i); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, ((char *) dest_addr)[i] & 0xff); @@ -347,6 +450,23 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } } + /* Verify the dest preamble byte pattern after remapping */ + if (c.dest_preamble_size) { + srand(pattern_seed); + for (i = 0; i < c.dest_preamble_size; i++) { + char c = (char) rand(); + + if (((char *) dest_preamble_addr)[i] != c) { + ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", + i); + ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + ((char *) dest_preamble_addr)[i] & 0xff); + ret = -1; + goto clean_up_dest; + } + } + } + start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec; end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec; ret = end_ns - start_ns; @@ -359,12 +479,92 @@ static long long remap_region(struct config c, unsigned int threshold_mb, */ clean_up_dest: munmap(dest_addr, c.region_size); +clean_up_dest_preamble: + if (c.dest_preamble_size && dest_preamble_addr) + munmap(dest_preamble_addr, c.dest_preamble_size); clean_up_src: munmap(src_addr, c.region_size); out: return ret; } +/* + * Verify that an mremap aligning down does not destroy + * the beginning of the mapping just because the aligned + * down address landed on a mapping that maybe does not exist. + */ +static void mremap_move_1mb_from_start(char pattern_seed) +{ + char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src"; + void *src = NULL, *dest = NULL; + int i, success = 1; + + /* Config to reuse get_source_mapping() to do an aligned mmap. */ + struct config c = { + .src_alignment = SIZE_MB(1) + SIZE_KB(256), + .region_size = SIZE_MB(6) + }; + + src = get_source_mapping(c); + if (!src) { + success = 0; + goto out; + } + + c.src_alignment = SIZE_MB(1) + SIZE_KB(256); + dest = get_source_mapping(c); + if (!dest) { + success = 0; + goto out; + } + + /* Set byte pattern for source block. */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(2); i++) { + ((char *)src)[i] = (char) rand(); + } + + /* + * Unmap the beginning of dest so that the aligned address + * falls on no mapping. + */ + munmap(dest, SIZE_MB(1)); + + void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1), + MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1)); + if (new_ptr == MAP_FAILED) { + perror("mremap"); + success = 0; + goto out; + } + + /* Verify byte pattern after remapping */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(1); i++) { + char c = (char) rand(); + + if (((char *)src)[i] != c) { + ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n", + i); + ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + ((char *) src)[i] & 0xff); + success = 0; + } + } + +out: + if (src && munmap(src, c.region_size) == -1) + perror("munmap src"); + + if (dest && munmap(dest, c.region_size) == -1) + perror("munmap dest"); + + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} + static void run_mremap_test_case(struct test test_case, int *failures, unsigned int threshold_mb, unsigned int pattern_seed) @@ -434,7 +634,7 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb, return 0; } -#define MAX_TEST 13 +#define MAX_TEST 15 #define MAX_PERF_TEST 3 int main(int argc, char **argv) { @@ -443,7 +643,8 @@ int main(int argc, char **argv) unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; unsigned int pattern_seed; int num_expand_tests = 2; - struct test test_cases[MAX_TEST]; + int num_misc_tests = 2; + struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; time_t t; @@ -500,6 +701,15 @@ int main(int argc, char **argv) test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, "2GB mremap - Source PUD-aligned, Destination PUD-aligned"); + /* Src and Dest addr 1MB aligned. 5MB mremap. */ + test_cases[13] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "5MB mremap - Source 1MB-aligned, Destination 1MB-aligned"); + + /* Src and Dest addr 1MB aligned. 5MB mremap. */ + test_cases[14] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble"); + test_cases[14].config.dest_preamble_size = 10 * _4MB; + perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, "1GB mremap - Source PTE-aligned, Destination PTE-aligned"); /* @@ -515,7 +725,7 @@ int main(int argc, char **argv) (threshold_mb * _1MB >= _1GB); ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ? - ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests); + ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests + num_misc_tests); for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, @@ -533,6 +743,9 @@ int main(int argc, char **argv) fclose(maps_fp); + mremap_move_within_range(pattern_seed); + mremap_move_1mb_from_start(pattern_seed); + if (run_perf_tests) { ksft_print_msg("\n%s\n", "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c new file mode 100644 index 0000000000..d59517ed3d --- /dev/null +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -0,0 +1,1664 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include "vm_util.h" +#include "../kselftest.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PAGEMAP_BITS_ALL (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \ + PAGE_IS_FILE | PAGE_IS_PRESENT | \ + PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \ + PAGE_IS_HUGE) +#define PAGEMAP_NON_WRITTEN_BITS (PAGE_IS_WPALLOWED | PAGE_IS_FILE | \ + PAGE_IS_PRESENT | PAGE_IS_SWAPPED | \ + PAGE_IS_PFNZERO | PAGE_IS_HUGE) + +#define TEST_ITERATIONS 100 +#define PAGEMAP "/proc/self/pagemap" +int pagemap_fd; +int uffd; +int page_size; +int hpage_size; +const char *progname; + +#define LEN(region) ((region.end - region.start)/page_size) + +static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag, + int max_pages, long required_mask, long anyof_mask, long excluded_mask, + long return_mask) +{ + struct pm_scan_arg arg; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + len); + arg.vec = (uintptr_t)vec; + arg.vec_len = vec_len; + arg.flags = flag; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = max_pages; + arg.category_mask = required_mask; + arg.category_anyof_mask = anyof_mask; + arg.category_inverted = excluded_mask; + arg.return_mask = return_mask; + + return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); +} + +static long pagemap_ioc(void *start, int len, void *vec, int vec_len, int flag, + int max_pages, long required_mask, long anyof_mask, long excluded_mask, + long return_mask, long *walk_end) +{ + struct pm_scan_arg arg; + int ret; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + len); + arg.vec = (uintptr_t)vec; + arg.vec_len = vec_len; + arg.flags = flag; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = max_pages; + arg.category_mask = required_mask; + arg.category_anyof_mask = anyof_mask; + arg.category_inverted = excluded_mask; + arg.return_mask = return_mask; + + ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); + + if (walk_end) + *walk_end = arg.walk_end; + + return ret; +} + + +int init_uffd(void) +{ + struct uffdio_api uffdio_api; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); + if (uffd == -1) + return uffd; + + uffdio_api.api = UFFD_API; + uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | + UFFD_FEATURE_WP_HUGETLBFS_SHMEM; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + return -1; + + if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || + !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || + !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || + !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + return -1; + + return 0; +} + +int wp_init(void *lpBaseAddress, int dwRegionSize) +{ + struct uffdio_register uffdio_register; + struct uffdio_writeprotect wp; + + uffdio_register.range.start = (unsigned long)lpBaseAddress; + uffdio_register.range.len = dwRegionSize; + uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + ksft_exit_fail_msg("ioctl(UFFDIO_REGISTER) %d %s\n", errno, strerror(errno)); + + if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT)) + ksft_exit_fail_msg("ioctl set is incorrect\n"); + + wp.range.start = (unsigned long)lpBaseAddress; + wp.range.len = dwRegionSize; + wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; + + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) + ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n"); + + return 0; +} + +int wp_free(void *lpBaseAddress, int dwRegionSize) +{ + struct uffdio_register uffdio_register; + + uffdio_register.range.start = (unsigned long)lpBaseAddress; + uffdio_register.range.len = dwRegionSize; + uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) + ksft_exit_fail_msg("ioctl unregister failure\n"); + return 0; +} + +int wp_addr_range(void *lpBaseAddress, int dwRegionSize) +{ + if (pagemap_ioctl(lpBaseAddress, dwRegionSize, NULL, 0, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0) + ksft_exit_fail_msg("error %d %d %s\n", 1, errno, strerror(errno)); + + return 0; +} + +void *gethugetlb_mem(int size, int *shmid) +{ + char *mem; + + if (shmid) { + *shmid = shmget(2, size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (*shmid < 0) + return NULL; + + mem = shmat(*shmid, 0, 0); + if (mem == (char *)-1) { + shmctl(*shmid, IPC_RMID, NULL); + ksft_exit_fail_msg("Shared memory attach failure\n"); + } + } else { + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0); + if (mem == MAP_FAILED) + return NULL; + } + + return mem; +} + +int userfaultfd_tests(void) +{ + int mem_size, vec_size, written, num_pages = 16; + char *mem, *vec; + + mem_size = num_pages * page_size; + mem = mmap(NULL, mem_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + + /* Change protection of pages differently */ + mprotect(mem, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 1 * mem_size/8, mem_size/8, PROT_READ); + mprotect(mem + 2 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 3 * mem_size/8, mem_size/8, PROT_READ); + mprotect(mem + 4 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 5 * mem_size/8, mem_size/8, PROT_NONE); + mprotect(mem + 6 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 7 * mem_size/8, mem_size/8, PROT_READ); + + wp_addr_range(mem + (mem_size/16), mem_size - 2 * (mem_size/8)); + wp_addr_range(mem, mem_size); + + vec_size = mem_size/page_size; + vec = malloc(sizeof(struct page_region) * vec_size); + + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + free(vec); + return 0; +} + +int get_reads(struct page_region *vec, int vec_size) +{ + int i, sum = 0; + + for (i = 0; i < vec_size; i++) + sum += LEN(vec[i]); + + return sum; +} + +int sanity_tests_sd(void) +{ + int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0; + int total_writes, total_reads, reads, count; + struct page_region *vec, *vec2; + char *mem, *m[2]; + long walk_end; + + vec_size = num_pages/2; + mem_size = num_pages * page_size; + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + vec2 = malloc(sizeof(struct page_region) * vec_size); + if (!vec2) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + /* 1. wrong operation */ + ksft_test_result(pagemap_ioctl(mem, 0, vec, vec_size, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0, + "%s Zero range size is valid\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, NULL, vec_size, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) < 0, + "%s output buffer must be specified with size\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0, + "%s output buffer can be 0\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, 0, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0, + "%s output buffer can be 0\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, -1, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0, + "%s wrong flag specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC | 0xFF, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0, + "%s flag has extra bits specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, + 0, 0, 0, 0, PAGE_IS_WRITTEN) >= 0, + "%s no selection mask is specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, PAGE_IS_WRITTEN, 0, 0) == 0, + "%s no return mask is specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, 0x1000) < 0, + "%s wrong return mask specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, 0xFFF, PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN) < 0, + "%s mixture of correct and wrong flag\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, 0, 0, PAGEMAP_BITS_ALL, PAGE_IS_WRITTEN) >= 0, + "%s PAGEMAP_BITS_ALL can be specified with PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", + __func__); + + /* 2. Clear area with larger vec size */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + ksft_test_result(ret >= 0, "%s Clear area with larger vec size\n", __func__); + + /* 3. Repeated pattern of written and non-written pages */ + for (i = 0; i < mem_size; i += 2 * page_size) + mem[i]++; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, + 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == mem_size/(page_size * 2), + "%s Repeated pattern of written and non-written pages\n", __func__); + + /* 4. Repeated pattern of written and non-written pages in parts */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret3 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno)); + + ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2, + "%s Repeated pattern of written and non-written pages in parts %d %d %d\n", + __func__, ret, ret3, ret2); + + /* 5. Repeated pattern of written and non-written pages max_pages */ + for (i = 0; i < mem_size; i += 2 * page_size) + mem[i]++; + mem[(mem_size/page_size - 1) * page_size]++; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ksft_test_result(ret == num_pages/2 && ret2 == 1, + "%s Repeated pattern of written and non-written pages max_pages\n", + __func__); + + /* 6. only get 2 dirty pages and clear them as well */ + vec_size = mem_size/page_size; + memset(mem, -1, mem_size); + + /* get and clear second and third pages */ + ret = pagemap_ioctl(mem + page_size, 2 * page_size, vec, 1, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == 2 && + vec[0].start == (uintptr_t)(mem + page_size) && + ret2 == 2 && LEN(vec2[0]) == 1 && vec2[0].start == (uintptr_t)mem && + LEN(vec2[1]) == vec_size - 3 && + vec2[1].start == (uintptr_t)(mem + 3 * page_size), + "%s only get 2 written pages and clear them as well\n", __func__); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 7. Two regions */ + m[0] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (m[0] == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + m[1] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (m[1] == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(m[0], mem_size); + wp_init(m[1], mem_size); + wp_addr_range(m[0], mem_size); + wp_addr_range(m[1], mem_size); + + memset(m[0], 'a', mem_size); + memset(m[1], 'b', mem_size); + + wp_addr_range(m[0], mem_size); + + ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size, + "%s Two regions\n", __func__); + + wp_free(m[0], mem_size); + wp_free(m[1], mem_size); + munmap(m[0], mem_size); + munmap(m[1], mem_size); + + free(vec); + free(vec2); + + /* 8. Smaller vec */ + mem_size = 1050 * page_size; + vec_size = mem_size/(page_size*2); + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + for (i = 0; i < mem_size/page_size; i += 2) + mem[i * page_size]++; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + total_pages += ret; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + total_pages += ret; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + total_pages += ret; + + ksft_test_result(total_pages == mem_size/(page_size*2), "%s Smaller max_pages\n", __func__); + + free(vec); + wp_free(mem, mem_size); + munmap(mem, mem_size); + total_pages = 0; + + /* 9. Smaller vec */ + mem_size = 10000 * page_size; + vec_size = 50; + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + for (count = 0; count < TEST_ITERATIONS; count++) { + total_writes = total_reads = 0; + walk_end = (long)mem; + + for (i = 0; i < mem_size; i += page_size) { + if (rand() % 2) { + mem[i]++; + total_writes++; + } + } + + while (total_reads < total_writes) { + ret = pagemap_ioc((void *)walk_end, mem_size-(walk_end - (long)mem), vec, + vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + if (ret > vec_size) + break; + + reads = get_reads(vec, ret); + total_reads += reads; + } + + if (total_reads != total_writes) + break; + } + + ksft_test_result(count == TEST_ITERATIONS, "Smaller vec\n"); + + free(vec); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 10. Walk_end tester */ + vec_size = 1000; + mem_size = vec_size * page_size; + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + memset(mem, 0, mem_size); + + ret = pagemap_ioc(mem, 0, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 0 && walk_end == (long)mem, + "Walk_end: Same start and end address\n"); + + ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 0 && walk_end == (long)mem, + "Walk_end: Same start and end with WP\n"); + + ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 0 && walk_end == (long)mem, + "Walk_end: Same start and end with 0 output buffer\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: Big vec\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: vec of minimum length\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2), + "Walk_end: Half max pages\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size), + "Walk_end: 1 max page\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: max pages\n"); + + wp_addr_range(mem, mem_size); + for (i = 0; i < mem_size; i += 2 * page_size) + mem[i]++; + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_end sparse: Big vec\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), + "Walk_end sparse: vec of minimum length\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), + "Walk_end sparse: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_end sparse: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_end sparse: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_endsparse : Half max pages\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), + "Walk_end: 1 max page\n"); + + free(vec); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + return 0; +} + +int base_tests(char *prefix, char *mem, int mem_size, int skip) +{ + int vec_size, written; + struct page_region *vec, *vec2; + + if (skip) { + ksft_test_result_skip("%s all new pages must not be written (dirty)\n", prefix); + ksft_test_result_skip("%s all pages must be written (dirty)\n", prefix); + ksft_test_result_skip("%s all pages dirty other than first and the last one\n", + prefix); + ksft_test_result_skip("%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix); + ksft_test_result_skip("%s only middle page dirty\n", prefix); + ksft_test_result_skip("%s only two middle pages dirty\n", prefix); + return 0; + } + + vec_size = mem_size/page_size; + vec = malloc(sizeof(struct page_region) * vec_size); + vec2 = malloc(sizeof(struct page_region) * vec_size); + + /* 1. all new pages must be not be written (dirty) */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", prefix); + + /* 2. all pages must be written */ + memset(mem, -1, mem_size); + + written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && LEN(vec[0]) == mem_size/page_size, + "%s all pages must be written (dirty)\n", prefix); + + /* 3. all pages dirty other than first and the last one */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + memset(mem + page_size, 0, mem_size - (2 * page_size)); + + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && LEN(vec[0]) >= vec_size - 2 && LEN(vec[0]) <= vec_size, + "%s all pages dirty other than first and the last one\n", prefix); + + written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 0, + "%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix); + + /* 4. only middle page dirty */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + mem[vec_size/2 * page_size]++; + + written = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && LEN(vec[0]) >= 1, + "%s only middle page dirty\n", prefix); + + /* 5. only two middle pages dirty and walk over only middle pages */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + mem[vec_size/2 * page_size]++; + mem[(vec_size/2 + 1) * page_size]++; + + written = pagemap_ioctl(&mem[vec_size/2 * page_size], 2 * page_size, vec, 1, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && vec[0].start == (uintptr_t)(&mem[vec_size/2 * page_size]) + && LEN(vec[0]) == 2, + "%s only two middle pages dirty\n", prefix); + + free(vec); + free(vec2); + return 0; +} + +void *gethugepage(int map_size) +{ + int ret; + char *map; + + map = memalign(hpage_size, map_size); + if (!map) + ksft_exit_fail_msg("memalign failed %d %s\n", errno, strerror(errno)); + + ret = madvise(map, map_size, MADV_HUGEPAGE); + if (ret) + return NULL; + + memset(map, 0, map_size); + + return map; +} + +int hpage_unit_tests(void) +{ + char *map; + int ret, ret2; + size_t num_pages = 10; + int map_size = hpage_size * num_pages; + int vec_size = map_size/page_size; + struct page_region *vec, *vec2; + + vec = malloc(sizeof(struct page_region) * vec_size); + vec2 = malloc(sizeof(struct page_region) * vec_size); + if (!vec || !vec2) + ksft_exit_fail_msg("malloc failed\n"); + + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + /* 1. all new huge page must not be written (dirty) */ + ret = pagemap_ioctl(map, map_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 0, "%s all new huge page must not be written (dirty)\n", + __func__); + + /* 2. all the huge page must not be written */ + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 0, "%s all the huge page must not be written\n", __func__); + + /* 3. all the huge page must be written and clear dirty as well */ + memset(map, -1, map_size); + ret = pagemap_ioctl(map, map_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && vec[0].start == (uintptr_t)map && + LEN(vec[0]) == vec_size && vec[0].categories == PAGE_IS_WRITTEN, + "%s all the huge page must be written and clear\n", __func__); + + /* 4. only middle page written */ + wp_free(map, map_size); + free(map); + map = gethugepage(map_size); + wp_init(map, map_size); + wp_addr_range(map, map_size); + map[vec_size/2 * page_size]++; + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) > 0, + "%s only middle page written\n", __func__); + + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s all new huge page must be written\n", __func__); + ksft_test_result_skip("%s all the huge page must not be written\n", __func__); + ksft_test_result_skip("%s all the huge page must be written and clear\n", __func__); + ksft_test_result_skip("%s only middle page written\n", __func__); + } + + /* 5. clear first half of huge page */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, 0, map_size); + + wp_addr_range(map, map_size/2); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 && + vec[0].start == (uintptr_t)(map + map_size/2), + "%s clear first half of huge page\n", __func__); + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s clear first half of huge page\n", __func__); + } + + /* 6. clear first half of huge page with limited buffer */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, 0, map_size); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 && + vec[0].start == (uintptr_t)(map + map_size/2), + "%s clear first half of huge page with limited buffer\n", + __func__); + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s clear first half of huge page with limited buffer\n", + __func__); + } + + /* 7. clear second half of huge page */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, -1, map_size); + + ret = pagemap_ioctl(map + map_size/2, map_size/2, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size/2, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2, + "%s clear second half huge page\n", __func__); + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s clear second half huge page\n", __func__); + } + + /* 8. get half huge page */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, -1, map_size); + usleep(100); + + ret = pagemap_ioctl(map, map_size, vec, 1, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + hpage_size/(2*page_size), PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == hpage_size/(2*page_size), + "%s get half huge page\n", __func__); + + ret2 = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ksft_test_result(ret2 == 1 && LEN(vec[0]) == (map_size - hpage_size/2)/page_size, + "%s get half huge page\n", __func__); + + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s get half huge page\n", __func__); + ksft_test_result_skip("%s get half huge page\n", __func__); + } + + free(vec); + free(vec2); + return 0; +} + +int unmapped_region_tests(void) +{ + void *start = (void *)0x10000000; + int written, len = 0x00040000; + int vec_size = len / page_size; + struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); + + /* 1. Get written pages */ + written = pagemap_ioctl(start, len, vec, vec_size, 0, 0, + PAGEMAP_NON_WRITTEN_BITS, 0, 0, PAGEMAP_NON_WRITTEN_BITS); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written >= 0, "%s Get status of pages\n", __func__); + + free(vec); + return 0; +} + +static void test_simple(void) +{ + int i; + char *map; + struct page_region vec; + + map = aligned_alloc(page_size, page_size); + if (!map) + ksft_exit_fail_msg("aligned_alloc failed\n"); + + wp_init(map, page_size); + wp_addr_range(map, page_size); + + for (i = 0 ; i < TEST_ITERATIONS; i++) { + if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 1) { + ksft_print_msg("written bit was 1, but should be 0 (i=%d)\n", i); + break; + } + + wp_addr_range(map, page_size); + /* Write something to the page to get the written bit enabled on the page */ + map[0]++; + + if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0) { + ksft_print_msg("written bit was 0, but should be 1 (i=%d)\n", i); + break; + } + + wp_addr_range(map, page_size); + } + wp_free(map, page_size); + free(map); + + ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__); +} + +int sanity_tests(void) +{ + int mem_size, vec_size, ret, fd, i, buf_size; + struct page_region *vec; + char *mem, *fmem; + struct stat sbuf; + char *tmp_buf; + + /* 1. wrong operation */ + mem_size = 10 * page_size; + vec_size = mem_size / page_size; + + vec = malloc(sizeof(struct page_region) * vec_size); + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED || vec == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0, + "%s WP op can be specified with !PAGE_IS_WRITTEN\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0, + "%s required_mask specified\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0, + "%s anyof_mask specified\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + 0, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL) >= 0, + "%s excluded_mask specified\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL, 0, + PAGEMAP_BITS_ALL) >= 0, + "%s required_mask and anyof_mask specified\n", __func__); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 2. Get sd and present pages with anyof_mask */ + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + memset(mem, 0, mem_size); + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) == + (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), + "%s Get sd and present pages with anyof_mask\n", __func__); + + /* 3. Get sd and present pages with required_mask */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) == + (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), + "%s Get all the pages with required_mask\n", __func__); + + /* 4. Get sd and present pages with required_mask and anyof_mask */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, PAGE_IS_PRESENT, 0, PAGEMAP_BITS_ALL); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) == + (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), + "%s Get sd and present pages with required_mask and anyof_mask\n", + __func__); + + /* 5. Don't get sd pages */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN, PAGEMAP_BITS_ALL); + ksft_test_result(ret == 0, "%s Don't get sd pages\n", __func__); + + /* 6. Don't get present pages */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGE_IS_PRESENT, 0, PAGE_IS_PRESENT, PAGEMAP_BITS_ALL); + ksft_test_result(ret == 0, "%s Don't get present pages\n", __func__); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 8. Find written present pages with return mask */ + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + memset(mem, 0, mem_size); + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + 0, PAGEMAP_BITS_ALL, 0, PAGE_IS_WRITTEN); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + vec[0].categories == PAGE_IS_WRITTEN, + "%s Find written present pages with return mask\n", __func__); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 9. Memory mapped file */ + fd = open(progname, O_RDONLY); + if (fd < 0) + ksft_exit_fail_msg("%s Memory mapped file\n", __func__); + + ret = stat(progname, &sbuf); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + tmp_buf = malloc(sbuf.st_size); + memcpy(tmp_buf, fmem, sbuf.st_size); + + ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, 0, 0, + 0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS); + + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem && + LEN(vec[0]) == ceilf((float)sbuf.st_size/page_size) && + (vec[0].categories & PAGE_IS_FILE), + "%s Memory mapped file\n", __func__); + + munmap(fmem, sbuf.st_size); + close(fd); + + /* 10. Create and read/write to a memory mapped file */ + buf_size = page_size * 10; + + fd = open(__FILE__".tmp2", O_RDWR | O_CREAT, 0666); + if (fd < 0) + ksft_exit_fail_msg("Read/write to memory: %s\n", + strerror(errno)); + + for (i = 0; i < buf_size; i++) + if (write(fd, "c", 1) < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file\n"); + + fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + wp_init(fmem, buf_size); + wp_addr_range(fmem, buf_size); + + for (i = 0; i < buf_size; i++) + fmem[i] = 'z'; + + msync(fmem, buf_size, MS_SYNC); + + ret = pagemap_ioctl(fmem, buf_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_FILE, 0, + PAGEMAP_BITS_ALL); + + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem && + LEN(vec[0]) == (buf_size/page_size) && + (vec[0].categories & PAGE_IS_WRITTEN), + "%s Read/write to memory\n", __func__); + + wp_free(fmem, buf_size); + munmap(fmem, buf_size); + close(fd); + + free(vec); + return 0; +} + +int mprotect_tests(void) +{ + int ret; + char *mem, *mem2; + struct page_region vec; + int pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + + if (pagemap_fd < 0) { + fprintf(stderr, "open() failed\n"); + exit(1); + } + + /* 1. Map two pages */ + mem = mmap(0, 2 * page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, 2 * page_size); + wp_addr_range(mem, 2 * page_size); + + /* Populate both pages. */ + memset(mem, 1, 2 * page_size); + + ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == 2, "%s Both pages written\n", __func__); + + /* 2. Start tracking */ + wp_addr_range(mem, 2 * page_size); + + ksft_test_result(pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0, + "%s Both pages are not written (dirty)\n", __func__); + + /* 3. Remap the second page */ + mem2 = mmap(mem + page_size, page_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0); + if (mem2 == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem2, page_size); + wp_addr_range(mem2, page_size); + + /* Protect + unprotect. */ + mprotect(mem, page_size, PROT_NONE); + mprotect(mem, 2 * page_size, PROT_READ); + mprotect(mem, 2 * page_size, PROT_READ|PROT_WRITE); + + /* Modify both pages. */ + memset(mem, 2, 2 * page_size); + + /* Protect + unprotect. */ + mprotect(mem, page_size, PROT_NONE); + mprotect(mem, page_size, PROT_READ); + mprotect(mem, page_size, PROT_READ|PROT_WRITE); + + ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == 2, + "%s Both pages written after remap and mprotect\n", __func__); + + /* 4. Clear and make the pages written */ + wp_addr_range(mem, 2 * page_size); + + memset(mem, 'A', 2 * page_size); + + ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == 2, + "%s Clear and make the pages written\n", __func__); + + wp_free(mem, 2 * page_size); + munmap(mem, 2 * page_size); + return 0; +} + +/* transact test */ +static const unsigned int nthreads = 6, pages_per_thread = 32, access_per_thread = 8; +static pthread_barrier_t start_barrier, end_barrier; +static unsigned int extra_thread_faults; +static unsigned int iter_count = 1000; +static volatile int finish; + +static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, + int reset, int page_size) +{ + struct pm_scan_arg arg = {0}; + struct page_region rgns[256]; + int i, j, cnt, ret; + + arg.size = sizeof(struct pm_scan_arg); + arg.start = (uintptr_t)mem; + arg.max_pages = count; + arg.end = (uintptr_t)(mem + count * page_size); + arg.vec = (uintptr_t)rgns; + arg.vec_len = sizeof(rgns) / sizeof(*rgns); + if (reset) + arg.flags |= PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC; + arg.category_mask = PAGE_IS_WRITTEN; + arg.return_mask = PAGE_IS_WRITTEN; + + ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); + if (ret < 0) + ksft_exit_fail_msg("ioctl failed\n"); + + cnt = 0; + for (i = 0; i < ret; ++i) { + if (rgns[i].categories != PAGE_IS_WRITTEN) + ksft_exit_fail_msg("wrong flags\n"); + + for (j = 0; j < LEN(rgns[i]); ++j) + cnt++; + } + + return cnt; +} + +void *thread_proc(void *mem) +{ + int *m = mem; + long curr_faults, faults; + struct rusage r; + unsigned int i; + int ret; + + if (getrusage(RUSAGE_THREAD, &r)) + ksft_exit_fail_msg("getrusage\n"); + + curr_faults = r.ru_minflt; + + while (!finish) { + ret = pthread_barrier_wait(&start_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + for (i = 0; i < access_per_thread; ++i) + __atomic_add_fetch(m + i * (0x1000 / sizeof(*m)), 1, __ATOMIC_SEQ_CST); + + ret = pthread_barrier_wait(&end_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + if (getrusage(RUSAGE_THREAD, &r)) + ksft_exit_fail_msg("getrusage\n"); + + faults = r.ru_minflt - curr_faults; + if (faults < access_per_thread) + ksft_exit_fail_msg("faults < access_per_thread"); + + __atomic_add_fetch(&extra_thread_faults, faults - access_per_thread, + __ATOMIC_SEQ_CST); + curr_faults = r.ru_minflt; + } + + return NULL; +} + +static void transact_test(int page_size) +{ + unsigned int i, count, extra_pages; + pthread_t th; + char *mem; + int ret, c; + + if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1)) + ksft_exit_fail_msg("pthread_barrier_init\n"); + + if (pthread_barrier_init(&end_barrier, NULL, nthreads + 1)) + ksft_exit_fail_msg("pthread_barrier_init\n"); + + mem = mmap(NULL, 0x1000 * nthreads * pages_per_thread, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("Error mmap %s.\n", strerror(errno)); + + wp_init(mem, 0x1000 * nthreads * pages_per_thread); + wp_addr_range(mem, 0x1000 * nthreads * pages_per_thread); + + memset(mem, 0, 0x1000 * nthreads * pages_per_thread); + + count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); + ksft_test_result(count > 0, "%s count %d\n", __func__, count); + count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); + ksft_test_result(count == 0, "%s count %d\n", __func__, count); + + finish = 0; + for (i = 0; i < nthreads; ++i) + pthread_create(&th, NULL, thread_proc, mem + 0x1000 * i * pages_per_thread); + + extra_pages = 0; + for (i = 0; i < iter_count; ++i) { + count = 0; + + ret = pthread_barrier_wait(&start_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, + page_size); + + ret = pthread_barrier_wait(&end_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + if (count > nthreads * access_per_thread) + ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n", + count, nthreads * access_per_thread, i); + + c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); + count += c; + + if (c > nthreads * access_per_thread) { + ksft_test_result_fail(" %s count > nthreads\n", __func__); + return; + } + + if (count != nthreads * access_per_thread) { + /* + * The purpose of the test is to make sure that no page updates are lost + * when the page updates and read-resetting soft dirty flags are performed + * in parallel. However, it is possible that the application will get the + * soft dirty flags twice on the two consecutive read-resets. This seems + * unavoidable as soft dirty flag is handled in software through page faults + * in kernel. While the updating the flags is supposed to be synchronized + * between page fault handling and read-reset, it is possible that + * read-reset happens after page fault PTE update but before the application + * re-executes write instruction. So read-reset gets the flag, clears write + * access and application gets page fault again for the same write. + */ + if (count < nthreads * access_per_thread) { + ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count, + nthreads * access_per_thread); + return; + } + + extra_pages += count - nthreads * access_per_thread; + } + } + + pthread_barrier_wait(&start_barrier); + finish = 1; + pthread_barrier_wait(&end_barrier); + + ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__, + extra_pages, + 100.0 * extra_pages / (iter_count * nthreads * access_per_thread), + extra_thread_faults); +} + +int main(int argc, char *argv[]) +{ + int mem_size, shmid, buf_size, fd, i, ret; + char *mem, *map, *fmem; + struct stat sbuf; + + progname = argv[0]; + + ksft_print_header(); + + if (init_uffd()) + return ksft_exit_pass(); + + ksft_set_plan(115); + + page_size = getpagesize(); + hpage_size = read_pmd_pagesize(); + + pagemap_fd = open(PAGEMAP, O_RDONLY); + if (pagemap_fd < 0) + return -EINVAL; + + /* 1. Sanity testing */ + sanity_tests_sd(); + + /* 2. Normal page testing */ + mem_size = 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Page testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 3. Large page testing */ + mem_size = 512 * 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Large Page testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 4. Huge page testing */ + map = gethugepage(hpage_size); + if (map) { + wp_init(map, hpage_size); + wp_addr_range(map, hpage_size); + base_tests("Huge page testing:", map, hpage_size, 0); + wp_free(map, hpage_size); + free(map); + } else { + base_tests("Huge page testing:", NULL, 0, 1); + } + + /* 5. SHM Hugetlb page testing */ + mem_size = 2*1024*1024; + mem = gethugetlb_mem(mem_size, &shmid); + if (mem) { + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Hugetlb shmem testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + shmctl(shmid, IPC_RMID, NULL); + } else { + base_tests("Hugetlb shmem testing:", NULL, 0, 1); + } + + /* 6. Hugetlb page testing */ + mem = gethugetlb_mem(mem_size, NULL); + if (mem) { + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Hugetlb mem testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + } else { + base_tests("Hugetlb mem testing:", NULL, 0, 1); + } + + /* 7. File Hugetlb testing */ + mem_size = 2*1024*1024; + fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL); + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (mem) { + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Hugetlb shmem testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + shmctl(shmid, IPC_RMID, NULL); + } else { + base_tests("Hugetlb shmem testing:", NULL, 0, 1); + } + close(fd); + + /* 8. File memory testing */ + buf_size = page_size * 10; + + fd = open(__FILE__".tmp0", O_RDWR | O_CREAT, 0777); + if (fd < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n", + strerror(errno)); + + for (i = 0; i < buf_size; i++) + if (write(fd, "c", 1) < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file\n"); + + ret = stat(__FILE__".tmp0", &sbuf); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + wp_init(fmem, sbuf.st_size); + wp_addr_range(fmem, sbuf.st_size); + + base_tests("File memory testing:", fmem, sbuf.st_size, 0); + + wp_free(fmem, sbuf.st_size); + munmap(fmem, sbuf.st_size); + close(fd); + + /* 9. File memory testing */ + buf_size = page_size * 10; + + fd = memfd_create(__FILE__".tmp00", MFD_NOEXEC_SEAL); + if (fd < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n", + strerror(errno)); + + if (ftruncate(fd, buf_size)) + ksft_exit_fail_msg("Error ftruncate\n"); + + for (i = 0; i < buf_size; i++) + if (write(fd, "c", 1) < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file\n"); + + fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + wp_init(fmem, buf_size); + wp_addr_range(fmem, buf_size); + + base_tests("File anonymous memory testing:", fmem, buf_size, 0); + + wp_free(fmem, buf_size); + munmap(fmem, buf_size); + close(fd); + + /* 10. Huge page tests */ + hpage_unit_tests(); + + /* 11. Iterative test */ + test_simple(); + + /* 12. Mprotect test */ + mprotect_tests(); + + /* 13. Transact test */ + transact_test(page_size); + + /* 14. Sanity testing */ + sanity_tests(); + + /*15. Unmapped address test */ + unmapped_region_tests(); + + /* 16. Userfaultfd tests */ + userfaultfd_tests(); + + close(pagemap_fd); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 92f3be3dd8..1af3156a9d 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -34,7 +34,7 @@ extern int test_nr; extern int iteration_nr; #ifdef __GNUC__ -__attribute__((format(printf, 1, 2))) +__printf(1, 2) #endif static inline void sigsafe_printf(const char *format, ...) { diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 3e2bc818d5..0075744527 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -56,6 +56,8 @@ separated by spaces: memory protection key tests - soft_dirty test soft dirty page bit semantics +- pagemap + test pagemap_scan IOCTL - cow test copy-on-write semantics - thp @@ -221,6 +223,13 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) +# For this test, we need one and just one huge page +echo 1 > /proc/sys/vm/nr_hugepages +CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv +# Restore the previous number of huge pages, since further tests rely on it +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages + if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" @@ -303,6 +312,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope CATEGORY="memfd_secret" run_test ./memfd_secret # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 @@ -342,6 +352,8 @@ then CATEGORY="soft_dirty" run_test ./soft-dirty fi +CATEGORY="pagemap" run_test ./pagemap_ioctl + # COW tests CATEGORY="cow" run_test ./cow diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 2709a34a39..2c68709062 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1309,6 +1309,12 @@ int main(int argc, char *argv[]) continue; uffd_test_start("%s on %s", test->name, mem_type->name); + if ((mem_type->mem_flag == MEM_HUGETLB || + mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && + (default_huge_page_size() == 0)) { + uffd_test_skip("huge page size is 0, feature missing?"); + continue; + } if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 45cae7cab2..a0a75f3029 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 558c9cd890..3082b40492 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -269,3 +269,22 @@ int uffd_unregister(int uffd, void *addr, uint64_t len) return ret; } + +unsigned long get_free_hugepages(void) +{ + unsigned long fhp = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return fhp; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1) + break; + } + + free(line); + fclose(f); + return fhp; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c7fa61f0df..c02990bbd5 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -51,6 +51,7 @@ int uffd_register(int uffd, void *addr, uint64_t len, int uffd_unregister(int uffd, void *addr, uint64_t len); int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); +unsigned long get_free_hugepages(void); /* * On ppc64 this will only work with radix 2M hugepage size diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh index 70a02301f4..3d2d2eb9d6 100755 --- a/tools/testing/selftests/mm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e -- cgit v1.2.3