diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
commit | 76cb841cb886eef6b3bee341a2266c76578724ad (patch) | |
tree | f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /tools/testing/selftests/vm | |
parent | Initial commit. (diff) | |
download | linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip |
Adding upstream version 4.19.249.upstream/4.19.249
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/testing/selftests/vm')
19 files changed, 4074 insertions, 0 deletions
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore new file mode 100644 index 000000000..af5ff83f6 --- /dev/null +++ b/tools/testing/selftests/vm/.gitignore @@ -0,0 +1,15 @@ +hugepage-mmap +hugepage-shm +map_hugetlb +map_populate +thuge-gen +compaction_test +mlock2-tests +on-fault-limit +transhuge-stress +userfaultfd +mlock-intersect-test +mlock-random-test +virtual_address_range +gup_benchmark +va_128TBswitch diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile new file mode 100644 index 000000000..2cf3dc49b --- /dev/null +++ b/tools/testing/selftests/vm/Makefile @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for vm selftests + +ifndef OUTPUT + OUTPUT := $(shell pwd) +endif + +CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) +LDLIBS = -lrt +TEST_GEN_FILES = compaction_test +TEST_GEN_FILES += gup_benchmark +TEST_GEN_FILES += hugepage-mmap +TEST_GEN_FILES += hugepage-shm +TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += map_populate +TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += mlock2-tests +TEST_GEN_FILES += on-fault-limit +TEST_GEN_FILES += thuge-gen +TEST_GEN_FILES += transhuge-stress +TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += va_128TBswitch +TEST_GEN_FILES += virtual_address_range + +TEST_PROGS := run_vmtests + +TEST_FILES := test_vmalloc.sh + +KSFT_KHDR_INSTALL := 1 +include ../lib.mk + +$(OUTPUT)/userfaultfd: LDLIBS += -lpthread + +$(OUTPUT)/mlock-random-test: LDLIBS += -lcap diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c new file mode 100644 index 000000000..bcec71250 --- /dev/null +++ b/tools/testing/selftests/vm/compaction_test.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * A test for the patch "Allow compaction of unevictable pages". + * With this patch we should be able to allocate at least 1/4 + * of RAM in huge pages. Without the patch much less is + * allocated. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> + +#include "../kselftest.h" + +#define MAP_SIZE 1048576 + +struct map_list { + void *map; + struct map_list *next; +}; + +int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) +{ + char buffer[256] = {0}; + char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'"; + FILE *cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + + *memfree = atoll(buffer); + cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'"; + cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + *hugepagesize = atoll(buffer); + + return 0; +} + +int prereq(void) +{ + char allowed; + int fd; + + fd = open("/proc/sys/vm/compact_unevictable_allowed", + O_RDONLY | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + return -1; + } + + if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { + perror("Failed to read from\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + close(fd); + return -1; + } + + close(fd); + if (allowed == '1') + return 0; + + return -1; +} + +int check_compaction(unsigned long mem_free, unsigned int hugepage_size) +{ + int fd; + int compaction_index = 0; + char initial_nr_hugepages[10] = {0}; + char nr_hugepages[10] = {0}; + + /* We want to test with 80% of available memory. Else, OOM killer comes + in to play */ + mem_free = mem_free * 0.8; + + fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open /proc/sys/vm/nr_hugepages"); + return -1; + } + + if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages"); + goto close_fd; + } + + /* Start with the initial condition of 0 huge pages*/ + if (write(fd, "0", sizeof(char)) != sizeof(char)) { + perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + /* Request a large number of huge pages. The Kernel will allocate + as much as it can */ + if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { + perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { + perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + /* We should have been able to request at least 1/3 rd of the memory in + huge pages */ + compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); + + if (compaction_index > 3) { + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + goto close_fd; + } + + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + + lseek(fd, 0, SEEK_SET); + + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) + != strlen(initial_nr_hugepages)) { + perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + close(fd); + return 0; + + close_fd: + close(fd); + printf("Not OK. Compaction test failed."); + return -1; +} + + +int main(int argc, char **argv) +{ + struct rlimit lim; + struct map_list *list, *entry; + size_t page_size, i; + void *map = NULL; + unsigned long mem_free = 0; + unsigned long hugepage_size = 0; + unsigned long mem_fragmentable = 0; + + if (prereq() != 0) { + printf("Either the sysctl compact_unevictable_allowed is not\n" + "set to 1 or couldn't read the proc file.\n" + "Skipping the test\n"); + return KSFT_SKIP; + } + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_MEMLOCK, &lim)) { + perror("Failed to set rlimit:\n"); + return -1; + } + + page_size = getpagesize(); + + list = NULL; + + if (read_memory_info(&mem_free, &hugepage_size) != 0) { + printf("ERROR: Cannot read meminfo\n"); + return -1; + } + + mem_fragmentable = mem_free * 0.8 / 1024; + + while (mem_fragmentable > 0) { + map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); + if (map == MAP_FAILED) + break; + + entry = malloc(sizeof(struct map_list)); + if (!entry) { + munmap(map, MAP_SIZE); + break; + } + entry->map = map; + entry->next = list; + list = entry; + + /* Write something (in this case the address of the map) to + * ensure that KSM can't merge the mapped pages + */ + for (i = 0; i < MAP_SIZE; i += page_size) + *(unsigned long *)(map + i) = (unsigned long)map + i; + + mem_fragmentable--; + } + + for (entry = list; entry != NULL; entry = entry->next) { + munmap(entry->map, MAP_SIZE); + if (!entry->next) + break; + entry = entry->next; + } + + if (check_compaction(mem_free, hugepage_size) == 0) + return 0; + + return -1; +} diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config new file mode 100644 index 000000000..1c0d76cb5 --- /dev/null +++ b/tools/testing/selftests/vm/config @@ -0,0 +1,2 @@ +CONFIG_SYSVIPC=y +CONFIG_USERFAULTFD=y diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c new file mode 100644 index 000000000..17da711f2 --- /dev/null +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -0,0 +1,93 @@ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <linux/types.h> + +#define MB (1UL << 20) +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) + +struct gup_benchmark { + __u64 delta_usec; + __u64 addr; + __u64 size; + __u32 nr_pages_per_call; + __u32 flags; + __u64 expansion[10]; /* For future use */ +}; + +int main(int argc, char **argv) +{ + struct gup_benchmark gup; + unsigned long size = 128 * MB; + int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + char *p; + + while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) { + switch (opt) { + case 'm': + size = atoi(optarg) * MB; + break; + case 'r': + repeats = atoi(optarg); + break; + case 'n': + nr_pages = atoi(optarg); + break; + case 't': + thp = 1; + break; + case 'T': + thp = 0; + break; + case 'w': + write = 1; + break; + default: + return -1; + } + } + + gup.nr_pages_per_call = nr_pages; + gup.flags = write; + + fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + + p = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (p == MAP_FAILED) + perror("mmap"), exit(1); + gup.addr = (unsigned long)p; + + if (thp == 1) + madvise(p, size, MADV_HUGEPAGE); + else if (thp == 0) + madvise(p, size, MADV_NOHUGEPAGE); + + for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) + p[0] = 0; + + for (i = 0; i < repeats; i++) { + gup.size = size; + if (ioctl(fd, GUP_FAST_BENCHMARK, &gup)) + perror("ioctl"), exit(1); + + printf("Time: %lld us", gup.delta_usec); + if (gup.size != size) + printf(", truncated (size: %lld)", gup.size); + printf("\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c new file mode 100644 index 000000000..93f9e7b81 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-mmap.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * hugepage-mmap: + * + * Example of using huge page memory in a user application using the mmap + * system call. Before running this application, make sure that the + * administrator has mounted the hugetlbfs filesystem (on some directory + * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this + * example, the app is requesting memory of size 256MB that is backed by + * huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> + +#define FILE_NAME "huge/hugepagefile" +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_SHARED | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_SHARED) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int fd, ret; + + fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + perror("Open failed"); + exit(1); + } + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + unlink(FILE_NAME); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + close(fd); + unlink(FILE_NAME); + + return ret; +} diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c new file mode 100644 index 000000000..e2527f320 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-shm.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * hugepage-shm: + * + * Example of using huge page memory in a user application using Sys V shared + * memory system calls. In this example the app is requesting 256MB of + * memory that is backed by huge pages. The application uses the flag + * SHM_HUGETLB in the shmget system call to inform the kernel that it is + * requesting huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + * + * Note: The default shared memory limit is quite low on many kernels, + * you may need to increase it via: + * + * echo 268435456 > /proc/sys/kernel/shmmax + * + * This will increase the maximum size per shared memory segment to 256MB. + * The other limit that you will hit eventually is shmall which is the + * total amount of shared memory in pages. To set it to 16GB on a system + * with a 4kB pagesize do: + * + * echo 4194304 > /proc/sys/kernel/shmall + */ + +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/mman.h> + +#ifndef SHM_HUGETLB +#define SHM_HUGETLB 04000 +#endif + +#define LENGTH (256UL*1024*1024) + +#define dprintf(x) printf(x) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define SHMAT_FLAGS (SHM_RND) +#else +#define ADDR (void *)(0x0UL) +#define SHMAT_FLAGS (0) +#endif + +int main(void) +{ + int shmid; + unsigned long i; + char *shmaddr; + + shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (shmid < 0) { + perror("shmget"); + exit(1); + } + printf("shmid: 0x%x\n", shmid); + + shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); + if (shmaddr == (char *)-1) { + perror("Shared memory attach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(2); + } + printf("shmaddr: %p\n", shmaddr); + + dprintf("Starting the writes:\n"); + for (i = 0; i < LENGTH; i++) { + shmaddr[i] = (char)(i); + if (!(i % (1024 * 1024))) + dprintf("."); + } + dprintf("\n"); + + dprintf("Starting the Check..."); + for (i = 0; i < LENGTH; i++) + if (shmaddr[i] != (char)i) { + printf("\nIndex %lu mismatched\n", i); + exit(3); + } + dprintf("Done.\n"); + + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + + shmctl(shmid, IPC_RMID, NULL); + + return 0; +} diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c new file mode 100644 index 000000000..9b777fa95 --- /dev/null +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Example of using hugepage memory in a user application using the mmap + * system call with MAP_HUGETLB flag. Before running this program make + * sure the administrator has allocated enough default sized huge pages + * to cover the 256 MB allocation. + * + * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. + * That means the addresses starting with 0x800000... will need to be + * specified. Specifying a fixed address is not required on ppc64, i386 + * or x86_64. + */ +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> + +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int ret; + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, -1, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ + if (munmap(addr, LENGTH)) { + perror("munmap"); + exit(1); + } + + return ret; +} diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/vm/map_populate.c new file mode 100644 index 000000000..6b8aeaa0b --- /dev/null +++ b/tools/testing/selftests/vm/map_populate.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018 Dmitry Safonov, Arista Networks + * + * MAP_POPULATE | MAP_PRIVATE should COW VMA pages. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#ifndef MMAP_SZ +#define MMAP_SZ 4096 +#endif + +#define BUG_ON(condition, description) \ + do { \ + if (condition) { \ + fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \ + __LINE__, (description), strerror(errno)); \ + exit(1); \ + } \ + } while (0) + +static int parent_f(int sock, unsigned long *smap, int child) +{ + int status, ret; + + ret = read(sock, &status, sizeof(int)); + BUG_ON(ret <= 0, "read(sock)"); + + *smap = 0x22222BAD; + ret = msync(smap, MMAP_SZ, MS_SYNC); + BUG_ON(ret, "msync()"); + + ret = write(sock, &status, sizeof(int)); + BUG_ON(ret <= 0, "write(sock)"); + + waitpid(child, &status, 0); + BUG_ON(!WIFEXITED(status), "child in unexpected state"); + + return WEXITSTATUS(status); +} + +static int child_f(int sock, unsigned long *smap, int fd) +{ + int ret, buf = 0; + + smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_POPULATE, fd, 0); + BUG_ON(smap == MAP_FAILED, "mmap()"); + + BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file"); + + ret = write(sock, &buf, sizeof(int)); + BUG_ON(ret <= 0, "write(sock)"); + + ret = read(sock, &buf, sizeof(int)); + BUG_ON(ret <= 0, "read(sock)"); + + BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page"); + BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted"); + + return 0; +} + +int main(int argc, char **argv) +{ + int sock[2], child, ret; + FILE *ftmp; + unsigned long *smap; + + ftmp = tmpfile(); + BUG_ON(ftmp == 0, "tmpfile()"); + + ret = ftruncate(fileno(ftmp), MMAP_SZ); + BUG_ON(ret, "ftruncate()"); + + smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, + MAP_SHARED, fileno(ftmp), 0); + BUG_ON(smap == MAP_FAILED, "mmap()"); + + *smap = 0xdeadbabe; + /* Probably unnecessary, but let it be. */ + ret = msync(smap, MMAP_SZ, MS_SYNC); + BUG_ON(ret, "msync()"); + + ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock); + BUG_ON(ret, "socketpair()"); + + child = fork(); + BUG_ON(child == -1, "fork()"); + + if (child) { + ret = close(sock[0]); + BUG_ON(ret, "close()"); + + return parent_f(sock[1], smap, child); + } + + ret = close(sock[1]); + BUG_ON(ret, "close()"); + + return child_f(sock[0], smap, fileno(ftmp)); +} diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c new file mode 100644 index 000000000..ff4d72eb7 --- /dev/null +++ b/tools/testing/selftests/vm/mlock-random-test.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * It tests the mlock/mlock2() when they are invoked + * on randomly memory region. + */ +#include <unistd.h> +#include <sys/resource.h> +#include <sys/capability.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <time.h> +#include "mlock2.h" + +#define CHUNK_UNIT (128 * 1024) +#define MLOCK_RLIMIT_SIZE (CHUNK_UNIT * 2) +#define MLOCK_WITHIN_LIMIT_SIZE CHUNK_UNIT +#define MLOCK_OUTOF_LIMIT_SIZE (CHUNK_UNIT * 3) + +#define TEST_LOOP 100 +#define PAGE_ALIGN(size, ps) (((size) + ((ps) - 1)) & ~((ps) - 1)) + +int set_cap_limits(rlim_t max) +{ + struct rlimit new; + cap_t cap = cap_init(); + + new.rlim_cur = max; + new.rlim_max = max; + if (setrlimit(RLIMIT_MEMLOCK, &new)) { + perror("setrlimit() returns error\n"); + return -1; + } + + /* drop capabilities including CAP_IPC_LOCK */ + if (cap_set_proc(cap)) { + perror("cap_set_proc() returns error\n"); + return -2; + } + + return 0; +} + +int get_proc_locked_vm_size(void) +{ + FILE *f; + int ret = -1; + char line[1024] = {0}; + unsigned long lock_size = 0; + + f = fopen("/proc/self/status", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(line, 1024, f)) { + if (strstr(line, "VmLck")) { + ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size); + if (ret <= 0) { + printf("sscanf() on VmLck error: %s: %d\n", + line, ret); + fclose(f); + return -1; + } + fclose(f); + return (int)(lock_size << 10); + } + } + + perror("cann't parse VmLck in /proc/self/status\n"); + fclose(f); + return -1; +} + +/* + * Get the MMUPageSize of the memory region including input + * address from proc file. + * + * return value: on error case, 0 will be returned. + * Otherwise the page size(in bytes) is returned. + */ +int get_proc_page_size(unsigned long addr) +{ + FILE *smaps; + char *line; + unsigned long mmupage_size = 0; + size_t size; + + smaps = seek_to_smaps_entry(addr); + if (!smaps) { + printf("Unable to parse /proc/self/smaps\n"); + return 0; + } + + while (getline(&line, &size, smaps) > 0) { + if (!strstr(line, "MMUPageSize")) { + free(line); + line = NULL; + size = 0; + continue; + } + + /* found the MMUPageSize of this section */ + if (sscanf(line, "MMUPageSize: %8lu kB", + &mmupage_size) < 1) { + printf("Unable to parse smaps entry for Size:%s\n", + line); + break; + } + + } + free(line); + if (smaps) + fclose(smaps); + return mmupage_size << 10; +} + +/* + * Test mlock/mlock2() on provided memory chunk. + * It expects the mlock/mlock2() to be successful (within rlimit) + * + * With allocated memory chunk [p, p + alloc_size), this + * test will choose start/len randomly to perform mlock/mlock2 + * [start, start + len] memory range. The range is within range + * of the allocated chunk. + * + * The memory region size alloc_size is within the rlimit. + * So we always expect a success of mlock/mlock2. + * + * VmLck is assumed to be 0 before this test. + * + * return value: 0 - success + * else: failure + */ +int test_mlock_within_limit(char *p, int alloc_size) +{ + int i; + int ret = 0; + int locked_vm_size = 0; + struct rlimit cur; + int page_size = 0; + + getrlimit(RLIMIT_MEMLOCK, &cur); + if (cur.rlim_cur < alloc_size) { + printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n", + alloc_size, (unsigned int)cur.rlim_cur); + return -1; + } + + srand(time(NULL)); + for (i = 0; i < TEST_LOOP; i++) { + /* + * - choose mlock/mlock2 randomly + * - choose lock_size randomly but lock_size < alloc_size + * - choose start_offset randomly but p+start_offset+lock_size + * < p+alloc_size + */ + int is_mlock = !!(rand() % 2); + int lock_size = rand() % alloc_size; + int start_offset = rand() % (alloc_size - lock_size); + + if (is_mlock) + ret = mlock(p + start_offset, lock_size); + else + ret = mlock2_(p + start_offset, lock_size, + MLOCK_ONFAULT); + + if (ret) { + printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", + is_mlock ? "mlock" : "mlock2", + p, alloc_size, + p + start_offset, lock_size); + return ret; + } + } + + /* + * Check VmLck left by the tests. + */ + locked_vm_size = get_proc_locked_vm_size(); + page_size = get_proc_page_size((unsigned long)p); + if (page_size == 0) { + printf("cannot get proc MMUPageSize\n"); + return -1; + } + + if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) { + printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n", + locked_vm_size, alloc_size); + return -1; + } + + return 0; +} + + +/* + * We expect the mlock/mlock2() to be fail (outof limitation) + * + * With allocated memory chunk [p, p + alloc_size), this + * test will randomly choose start/len and perform mlock/mlock2 + * on [start, start+len] range. + * + * The memory region size alloc_size is above the rlimit. + * And the len to be locked is higher than rlimit. + * So we always expect a failure of mlock/mlock2. + * No locked page number should be increased as a side effect. + * + * return value: 0 - success + * else: failure + */ +int test_mlock_outof_limit(char *p, int alloc_size) +{ + int i; + int ret = 0; + int locked_vm_size = 0, old_locked_vm_size = 0; + struct rlimit cur; + + getrlimit(RLIMIT_MEMLOCK, &cur); + if (cur.rlim_cur >= alloc_size) { + printf("alloc_size[%d] >%u rlimit, violates test condition\n", + alloc_size, (unsigned int)cur.rlim_cur); + return -1; + } + + old_locked_vm_size = get_proc_locked_vm_size(); + srand(time(NULL)); + for (i = 0; i < TEST_LOOP; i++) { + int is_mlock = !!(rand() % 2); + int lock_size = (rand() % (alloc_size - cur.rlim_cur)) + + cur.rlim_cur; + int start_offset = rand() % (alloc_size - lock_size); + + if (is_mlock) + ret = mlock(p + start_offset, lock_size); + else + ret = mlock2_(p + start_offset, lock_size, + MLOCK_ONFAULT); + if (ret == 0) { + printf("%s() succeeds? on %p(%d) mlock%p(%d)\n", + is_mlock ? "mlock" : "mlock2", + p, alloc_size, + p + start_offset, lock_size); + return -1; + } + } + + locked_vm_size = get_proc_locked_vm_size(); + if (locked_vm_size != old_locked_vm_size) { + printf("tests leads to new mlocked page: old[%d], new[%d]\n", + old_locked_vm_size, + locked_vm_size); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + char *p = NULL; + int ret = 0; + + if (set_cap_limits(MLOCK_RLIMIT_SIZE)) + return -1; + + p = malloc(MLOCK_WITHIN_LIMIT_SIZE); + if (p == NULL) { + perror("malloc() failure\n"); + return -1; + } + ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE); + if (ret) + return ret; + munlock(p, MLOCK_WITHIN_LIMIT_SIZE); + free(p); + + + p = malloc(MLOCK_OUTOF_LIMIT_SIZE); + if (p == NULL) { + perror("malloc() failure\n"); + return -1; + } + ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE); + if (ret) + return ret; + munlock(p, MLOCK_OUTOF_LIMIT_SIZE); + free(p); + + return 0; +} diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c new file mode 100644 index 000000000..11b2301f3 --- /dev/null +++ b/tools/testing/selftests/vm/mlock2-tests.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/mman.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <stdbool.h> +#include "mlock2.h" + +#include "../kselftest.h" + +struct vm_boundaries { + unsigned long start; + unsigned long end; +}; + +static int get_vm_area(unsigned long addr, struct vm_boundaries *area) +{ + FILE *file; + int ret = 1; + char line[1024] = {0}; + char *end_addr; + char *stop; + unsigned long start; + unsigned long end; + + if (!area) + return ret; + + file = fopen("/proc/self/maps", "r"); + if (!file) { + perror("fopen"); + return ret; + } + + memset(area, 0, sizeof(struct vm_boundaries)); + + while(fgets(line, 1024, file)) { + end_addr = strchr(line, '-'); + if (!end_addr) { + printf("cannot parse /proc/self/maps\n"); + goto out; + } + *end_addr = '\0'; + end_addr++; + stop = strchr(end_addr, ' '); + if (!stop) { + printf("cannot parse /proc/self/maps\n"); + goto out; + } + stop = '\0'; + + sscanf(line, "%lx", &start); + sscanf(end_addr, "%lx", &end); + + if (start <= addr && end > addr) { + area->start = start; + area->end = end; + ret = 0; + goto out; + } + } +out: + fclose(file); + return ret; +} + +#define VMFLAGS "VmFlags:" + +static bool is_vmflag_set(unsigned long addr, const char *vmflag) +{ + char *line = NULL; + char *flags; + size_t size = 0; + bool ret = false; + FILE *smaps; + + smaps = seek_to_smaps_entry(addr); + if (!smaps) { + printf("Unable to parse /proc/self/smaps\n"); + goto out; + } + + while (getline(&line, &size, smaps) > 0) { + if (!strstr(line, VMFLAGS)) { + free(line); + line = NULL; + size = 0; + continue; + } + + flags = line + strlen(VMFLAGS); + ret = (strstr(flags, vmflag) != NULL); + goto out; + } + +out: + free(line); + fclose(smaps); + return ret; +} + +#define SIZE "Size:" +#define RSS "Rss:" +#define LOCKED "lo" + +static unsigned long get_value_for_name(unsigned long addr, const char *name) +{ + char *line = NULL; + size_t size = 0; + char *value_ptr; + FILE *smaps = NULL; + unsigned long value = -1UL; + + smaps = seek_to_smaps_entry(addr); + if (!smaps) { + printf("Unable to parse /proc/self/smaps\n"); + goto out; + } + + while (getline(&line, &size, smaps) > 0) { + if (!strstr(line, name)) { + free(line); + line = NULL; + size = 0; + continue; + } + + value_ptr = line + strlen(name); + if (sscanf(value_ptr, "%lu kB", &value) < 1) { + printf("Unable to parse smaps entry for Size\n"); + goto out; + } + break; + } + +out: + if (smaps) + fclose(smaps); + free(line); + return value; +} + +static bool is_vma_lock_on_fault(unsigned long addr) +{ + bool locked; + unsigned long vma_size, vma_rss; + + locked = is_vmflag_set(addr, LOCKED); + if (!locked) + return false; + + vma_size = get_value_for_name(addr, SIZE); + vma_rss = get_value_for_name(addr, RSS); + + /* only one page is faulted in */ + return (vma_rss < vma_size); +} + +#define PRESENT_BIT 0x8000000000000000ULL +#define PFN_MASK 0x007FFFFFFFFFFFFFULL +#define UNEVICTABLE_BIT (1UL << 18) + +static int lock_check(unsigned long addr) +{ + bool locked; + unsigned long vma_size, vma_rss; + + locked = is_vmflag_set(addr, LOCKED); + if (!locked) + return false; + + vma_size = get_value_for_name(addr, SIZE); + vma_rss = get_value_for_name(addr, RSS); + + return (vma_rss == vma_size); +} + +static int unlock_lock_check(char *map) +{ + if (is_vmflag_set((unsigned long)map, LOCKED)) { + printf("VMA flag %s is present on page 1 after unlock\n", LOCKED); + return 1; + } + + return 0; +} + +static int test_mlock_lock() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (map == MAP_FAILED) { + perror("test_mlock_locked mmap"); + goto out; + } + + if (mlock2_(map, 2 * page_size, 0)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(KSFT_SKIP); + } + perror("mlock2(0)"); + goto unmap; + } + + if (!lock_check((unsigned long)map)) + goto unmap; + + /* Now unlock and recheck attributes */ + if (munlock(map, 2 * page_size)) { + perror("munlock()"); + goto unmap; + } + + ret = unlock_lock_check(map); + +unmap: + munmap(map, 2 * page_size); +out: + return ret; +} + +static int onfault_check(char *map) +{ + *map = 'a'; + if (!is_vma_lock_on_fault((unsigned long)map)) { + printf("VMA is not marked for lock on fault\n"); + return 1; + } + + return 0; +} + +static int unlock_onfault_check(char *map) +{ + unsigned long page_size = getpagesize(); + + if (is_vma_lock_on_fault((unsigned long)map) || + is_vma_lock_on_fault((unsigned long)map + page_size)) { + printf("VMA is still lock on fault after unlock\n"); + return 1; + } + + return 0; +} + +static int test_mlock_onfault() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (map == MAP_FAILED) { + perror("test_mlock_locked mmap"); + goto out; + } + + if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(KSFT_SKIP); + } + perror("mlock2(MLOCK_ONFAULT)"); + goto unmap; + } + + if (onfault_check(map)) + goto unmap; + + /* Now unlock and recheck attributes */ + if (munlock(map, 2 * page_size)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(KSFT_SKIP); + } + perror("munlock()"); + goto unmap; + } + + ret = unlock_onfault_check(map); +unmap: + munmap(map, 2 * page_size); +out: + return ret; +} + +static int test_lock_onfault_of_present() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (map == MAP_FAILED) { + perror("test_mlock_locked mmap"); + goto out; + } + + *map = 'a'; + + if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(KSFT_SKIP); + } + perror("mlock2(MLOCK_ONFAULT)"); + goto unmap; + } + + if (!is_vma_lock_on_fault((unsigned long)map) || + !is_vma_lock_on_fault((unsigned long)map + page_size)) { + printf("VMA with present pages is not marked lock on fault\n"); + goto unmap; + } + ret = 0; +unmap: + munmap(map, 2 * page_size); +out: + return ret; +} + +static int test_munlockall() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + if (map == MAP_FAILED) { + perror("test_munlockall mmap"); + goto out; + } + + if (mlockall(MCL_CURRENT)) { + perror("mlockall(MCL_CURRENT)"); + goto out; + } + + if (!lock_check((unsigned long)map)) + goto unmap; + + if (munlockall()) { + perror("munlockall()"); + goto unmap; + } + + if (unlock_lock_check(map)) + goto unmap; + + munmap(map, 2 * page_size); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + if (map == MAP_FAILED) { + perror("test_munlockall second mmap"); + goto out; + } + + if (mlockall(MCL_CURRENT | MCL_ONFAULT)) { + perror("mlockall(MCL_CURRENT | MCL_ONFAULT)"); + goto unmap; + } + + if (onfault_check(map)) + goto unmap; + + if (munlockall()) { + perror("munlockall()"); + goto unmap; + } + + if (unlock_onfault_check(map)) + goto unmap; + + if (mlockall(MCL_CURRENT | MCL_FUTURE)) { + perror("mlockall(MCL_CURRENT | MCL_FUTURE)"); + goto out; + } + + if (!lock_check((unsigned long)map)) + goto unmap; + + if (munlockall()) { + perror("munlockall()"); + goto unmap; + } + + ret = unlock_lock_check(map); + +unmap: + munmap(map, 2 * page_size); +out: + munlockall(); + return ret; +} + +static int test_vma_management(bool call_mlock) +{ + int ret = 1; + void *map; + unsigned long page_size = getpagesize(); + struct vm_boundaries page1; + struct vm_boundaries page2; + struct vm_boundaries page3; + + map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (map == MAP_FAILED) { + perror("mmap()"); + return ret; + } + + if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(KSFT_SKIP); + } + perror("mlock(ONFAULT)\n"); + goto out; + } + + if (get_vm_area((unsigned long)map, &page1) || + get_vm_area((unsigned long)map + page_size, &page2) || + get_vm_area((unsigned long)map + page_size * 2, &page3)) { + printf("couldn't find mapping in /proc/self/maps\n"); + goto out; + } + + /* + * Before we unlock a portion, we need to that all three pages are in + * the same VMA. If they are not we abort this test (Note that this is + * not a failure) + */ + if (page1.start != page2.start || page2.start != page3.start) { + printf("VMAs are not merged to start, aborting test\n"); + ret = 0; + goto out; + } + + if (munlock(map + page_size, page_size)) { + perror("munlock()"); + goto out; + } + + if (get_vm_area((unsigned long)map, &page1) || + get_vm_area((unsigned long)map + page_size, &page2) || + get_vm_area((unsigned long)map + page_size * 2, &page3)) { + printf("couldn't find mapping in /proc/self/maps\n"); + goto out; + } + + /* All three VMAs should be different */ + if (page1.start == page2.start || page2.start == page3.start) { + printf("failed to split VMA for munlock\n"); + goto out; + } + + /* Now unlock the first and third page and check the VMAs again */ + if (munlock(map, page_size * 3)) { + perror("munlock()"); + goto out; + } + + if (get_vm_area((unsigned long)map, &page1) || + get_vm_area((unsigned long)map + page_size, &page2) || + get_vm_area((unsigned long)map + page_size * 2, &page3)) { + printf("couldn't find mapping in /proc/self/maps\n"); + goto out; + } + + /* Now all three VMAs should be the same */ + if (page1.start != page2.start || page2.start != page3.start) { + printf("failed to merge VMAs after munlock\n"); + goto out; + } + + ret = 0; +out: + munmap(map, 3 * page_size); + return ret; +} + +static int test_mlockall(int (test_function)(bool call_mlock)) +{ + int ret = 1; + + if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) { + perror("mlockall"); + return ret; + } + + ret = test_function(false); + munlockall(); + return ret; +} + +int main(int argc, char **argv) +{ + int ret = 0; + ret += test_mlock_lock(); + ret += test_mlock_onfault(); + ret += test_munlockall(); + ret += test_lock_onfault_of_present(); + ret += test_vma_management(true); + ret += test_mlockall(test_vma_management); + return ret; +} diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/vm/mlock2.h new file mode 100644 index 000000000..2a6e76c22 --- /dev/null +++ b/tools/testing/selftests/vm/mlock2.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <syscall.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#ifndef MLOCK_ONFAULT +#define MLOCK_ONFAULT 1 +#endif + +#ifndef MCL_ONFAULT +#define MCL_ONFAULT (MCL_FUTURE << 1) +#endif + +static int mlock2_(void *start, size_t len, int flags) +{ +#ifdef __NR_mlock2 + return syscall(__NR_mlock2, start, len, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + +static FILE *seek_to_smaps_entry(unsigned long addr) +{ + FILE *file; + char *line = NULL; + size_t size = 0; + unsigned long start, end; + char perms[5]; + unsigned long offset; + char dev[32]; + unsigned long inode; + char path[BUFSIZ]; + + file = fopen("/proc/self/smaps", "r"); + if (!file) { + perror("fopen smaps"); + _exit(1); + } + + while (getline(&line, &size, file) > 0) { + if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n", + &start, &end, perms, &offset, dev, &inode, path) < 6) + goto next; + + if (start <= addr && addr < end) + goto out; + +next: + free(line); + line = NULL; + size = 0; + } + + fclose(file); + file = NULL; + +out: + free(line); + return file; +} diff --git a/tools/testing/selftests/vm/on-fault-limit.c b/tools/testing/selftests/vm/on-fault-limit.c new file mode 100644 index 000000000..634d87dfb --- /dev/null +++ b/tools/testing/selftests/vm/on-fault-limit.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/mman.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> + +#ifndef MCL_ONFAULT +#define MCL_ONFAULT (MCL_FUTURE << 1) +#endif + +static int test_limit(void) +{ + int ret = 1; + struct rlimit lims; + void *map; + + if (getrlimit(RLIMIT_MEMLOCK, &lims)) { + perror("getrlimit"); + return ret; + } + + if (mlockall(MCL_ONFAULT | MCL_FUTURE)) { + perror("mlockall"); + return ret; + } + + map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); + if (map != MAP_FAILED) + printf("mmap should have failed, but didn't\n"); + else { + ret = 0; + munmap(map, 2 * lims.rlim_max); + } + + munlockall(); + return ret; +} + +int main(int argc, char **argv) +{ + int ret = 0; + + ret += test_limit(); + return ret; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests new file mode 100755 index 000000000..584a91ae4 --- /dev/null +++ b/tools/testing/selftests/vm/run_vmtests @@ -0,0 +1,214 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +#please run as root + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +mnt=./huge +exitcode=0 + +#get huge pagesize and freepages from /proc/meminfo +while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + if [ "$name" = "Hugepagesize:" ]; then + hpgsize_KB=$size + fi +done < /proc/meminfo + +# Simple hugetlbfs tests have a hardcoded minimum requirement of +# huge pages totaling 256MB (262144KB) in size. The userfaultfd +# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take +# both of these requirements into account and attempt to increase +# number of huge pages available. +nr_cpus=$(nproc) +hpgsize_MB=$((hpgsize_KB / 1024)) +half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) +needmem_KB=$((half_ufd_size_MB * 2 * 1024)) + +#set proper nr_hugepages +if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then + nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + needpgs=$((needmem_KB / hpgsize_KB)) + tries=2 + while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do + lackpgs=$(( $needpgs - $freepgs )) + echo 3 > /proc/sys/vm/drop_caches + echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages + if [ $? -ne 0 ]; then + echo "Please run this test as root" + exit $ksft_skip + fi + while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + done < /proc/meminfo + tries=$((tries - 1)) + done + if [ $freepgs -lt $needpgs ]; then + printf "Not enough huge pages available (%d < %d)\n" \ + $freepgs $needpgs + exit 1 + fi +else + echo "no hugetlbfs support in kernel?" + exit 1 +fi + +mkdir $mnt +mount -t hugetlbfs none $mnt + +echo "---------------------" +echo "running hugepage-mmap" +echo "---------------------" +./hugepage-mmap +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +shmmax=`cat /proc/sys/kernel/shmmax` +shmall=`cat /proc/sys/kernel/shmall` +echo 268435456 > /proc/sys/kernel/shmmax +echo 4194304 > /proc/sys/kernel/shmall +echo "--------------------" +echo "running hugepage-shm" +echo "--------------------" +./hugepage-shm +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi +echo $shmmax > /proc/sys/kernel/shmmax +echo $shmall > /proc/sys/kernel/shmall + +echo "-------------------" +echo "running map_hugetlb" +echo "-------------------" +./map_hugetlb +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "NOTE: The above hugetlb tests provide minimal coverage. Use" +echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" +echo " hugetlb regression testing." + +echo "-------------------" +echo "running userfaultfd" +echo "-------------------" +./userfaultfd anon 128 32 +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "---------------------------" +echo "running userfaultfd_hugetlb" +echo "---------------------------" +# Test requires source and destination huge pages. Size of source +# (half_ufd_size_MB) is passed as argument to test. +./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi +rm -f $mnt/ufd_test_file + +echo "-------------------------" +echo "running userfaultfd_shmem" +echo "-------------------------" +./userfaultfd shmem 128 32 +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +#cleanup +umount $mnt +rm -rf $mnt +echo $nr_hugepgs > /proc/sys/vm/nr_hugepages + +echo "-----------------------" +echo "running compaction_test" +echo "-----------------------" +./compaction_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "----------------------" +echo "running on-fault-limit" +echo "----------------------" +sudo -u nobody ./on-fault-limit +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running map_populate" +echo "--------------------" +./map_populate +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running mlock2-tests" +echo "--------------------" +./mlock2-tests +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "-----------------------------" +echo "running virtual_address_range" +echo "-----------------------------" +./virtual_address_range +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "-----------------------------" +echo "running virtual address 128TB switch test" +echo "-----------------------------" +./va_128TBswitch +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +exit $exitcode diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c new file mode 100644 index 000000000..361ef7192 --- /dev/null +++ b/tools/testing/selftests/vm/thuge-gen.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Test selecting other page sizes for mmap/shmget. + + Before running this huge pages for each huge page size must have been + reserved. + For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. + Also shmmax must be increased. + And you need to run as root to work around some weird permissions in shm. + And nothing using huge pages should run in parallel. + When the program aborts you may need to clean up the shm segments with + ipcrm -m by hand, like this + sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m + (warning this will remove all if someone else uses them) */ + +#define _GNU_SOURCE 1 +#include <sys/mman.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <glob.h> +#include <assert.h> +#include <unistd.h> +#include <stdarg.h> +#include <string.h> + +#define err(x) perror(x), exit(1) + +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f +#if !defined(MAP_HUGETLB) +#define MAP_HUGETLB 0x40000 +#endif + +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#define SHM_HUGE_SHIFT 26 +#define SHM_HUGE_MASK 0x3f +#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) + +#define NUM_PAGESIZES 5 + +#define NUM_PAGES 4 + +#define Dprintf(fmt...) // printf(fmt) + +unsigned long page_sizes[NUM_PAGESIZES]; +int num_page_sizes; + +int ilog2(unsigned long v) +{ + int l = 0; + while ((1UL << l) < v) + l++; + return l; +} + +void find_pagesizes(void) +{ + glob_t g; + int i; + glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g); + assert(g.gl_pathc <= NUM_PAGESIZES); + for (i = 0; i < g.gl_pathc; i++) { + sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB", + &page_sizes[i]); + page_sizes[i] <<= 10; + printf("Found %luMB\n", page_sizes[i] >> 20); + } + num_page_sizes = g.gl_pathc; + globfree(&g); +} + +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + free(line); + return hps; +} + +void show(unsigned long ps) +{ + char buf[100]; + if (ps == getpagesize()) + return; + printf("%luMB: ", ps >> 20); + fflush(stdout); + snprintf(buf, sizeof buf, + "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); + system(buf); +} + +unsigned long read_sysfs(int warn, char *fmt, ...) +{ + char *line = NULL; + size_t linelen = 0; + char buf[100]; + FILE *f; + va_list ap; + unsigned long val = 0; + + va_start(ap, fmt); + vsnprintf(buf, sizeof buf, fmt, ap); + va_end(ap); + + f = fopen(buf, "r"); + if (!f) { + if (warn) + printf("missing %s\n", buf); + return 0; + } + if (getline(&line, &linelen, f) > 0) { + sscanf(line, "%lu", &val); + } + fclose(f); + free(line); + return val; +} + +unsigned long read_free(unsigned long ps) +{ + return read_sysfs(ps != getpagesize(), + "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); +} + +void test_mmap(unsigned long size, unsigned flags) +{ + char *map; + unsigned long before, after; + int err; + + before = read_free(size); + map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0); + + if (map == (char *)-1) err("mmap"); + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = munmap(map, size); + assert(!err); +} + +void test_shmget(unsigned long size, unsigned flags) +{ + int id; + unsigned long before, after; + int err; + + before = read_free(size); + id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags); + if (id < 0) err("shmget"); + + struct shm_info i; + if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl"); + Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss); + + + Dprintf("id %d\n", id); + char *map = shmat(id, NULL, 0600); + if (map == (char*)-1) err("shmat"); + + shmctl(id, IPC_RMID, NULL); + + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = shmdt(map); + assert(!err); +} + +void sanity_checks(void) +{ + int i; + unsigned long largest = getpagesize(); + + for (i = 0; i < num_page_sizes; i++) { + if (page_sizes[i] > largest) + largest = page_sizes[i]; + + if (read_free(page_sizes[i]) < NUM_PAGES) { + printf("Not enough huge pages for page size %lu MB, need %u\n", + page_sizes[i] >> 20, + NUM_PAGES); + exit(0); + } + } + + if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) { + printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); + exit(0); + } + +#if defined(__x86_64__) + if (largest != 1U<<30) { + printf("No GB pages available on x86-64\n" + "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); + exit(0); + } +#endif +} + +int main(void) +{ + int i; + unsigned default_hps = default_huge_page_size(); + + find_pagesizes(); + + sanity_checks(); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << MAP_HUGE_SHIFT; + printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg); + test_mmap(ps, MAP_HUGETLB | arg); + } + printf("Testing default huge mmap\n"); + test_mmap(default_hps, SHM_HUGETLB); + + puts("Testing non-huge shmget"); + test_shmget(getpagesize(), 0); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << SHM_HUGE_SHIFT; + printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg); + test_shmget(ps, SHM_HUGETLB | arg); + } + puts("default huge shmget"); + test_shmget(default_hps, SHM_HUGETLB); + + return 0; +} diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c new file mode 100644 index 000000000..fd7f1b4a9 --- /dev/null +++ b/tools/testing/selftests/vm/transhuge-stress.c @@ -0,0 +1,144 @@ +/* + * Stress test for transparent huge pages, memory compaction and migration. + * + * Authors: Konstantin Khlebnikov <koct9i@gmail.com> + * + * This is free and unencumbered software released into the public domain. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <err.h> +#include <time.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <sys/mman.h> + +#define PAGE_SHIFT 12 +#define HPAGE_SHIFT 21 + +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define HPAGE_SIZE (1 << HPAGE_SHIFT) + +#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0) +#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1)) + +int pagemap_fd; + +int64_t allocate_transhuge(void *ptr) +{ + uint64_t ent[2]; + + /* drop pmd */ + if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANONYMOUS | + MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) + errx(2, "mmap transhuge"); + + if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) + err(2, "MADV_HUGEPAGE"); + + /* allocate transparent huge page */ + *(volatile void **)ptr = ptr; + + if (pread(pagemap_fd, ent, sizeof(ent), + (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent)) + err(2, "read pagemap"); + + if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && + PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && + !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1))) + return PAGEMAP_PFN(ent[0]); + + return -1; +} + +int main(int argc, char **argv) +{ + size_t ram, len; + void *ptr, *p; + struct timespec a, b; + double s; + uint8_t *map; + size_t map_len; + + ram = sysconf(_SC_PHYS_PAGES); + if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4) + ram = SIZE_MAX / 4; + else + ram *= sysconf(_SC_PAGESIZE); + + if (argc == 1) + len = ram; + else if (!strcmp(argv[1], "-h")) + errx(1, "usage: %s [size in MiB]", argv[0]); + else + len = atoll(argv[1]) << 20; + + warnx("allocate %zd transhuge pages, using %zd MiB virtual memory" + " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20, + len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1)); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + err(2, "open pagemap"); + + len -= len % HPAGE_SIZE; + ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0); + if (ptr == MAP_FAILED) + err(2, "initial mmap"); + ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE; + + if (madvise(ptr, len, MADV_HUGEPAGE)) + err(2, "MADV_HUGEPAGE"); + + map_len = ram >> (HPAGE_SHIFT - 1); + map = malloc(map_len); + if (!map) + errx(2, "map malloc"); + + while (1) { + int nr_succeed = 0, nr_failed = 0, nr_pages = 0; + + memset(map, 0, map_len); + + clock_gettime(CLOCK_MONOTONIC, &a); + for (p = ptr; p < ptr + len; p += HPAGE_SIZE) { + int64_t pfn; + + pfn = allocate_transhuge(p); + + if (pfn < 0) { + nr_failed++; + } else { + size_t idx = pfn >> (HPAGE_SHIFT - PAGE_SHIFT); + + nr_succeed++; + if (idx >= map_len) { + map = realloc(map, idx + 1); + if (!map) + errx(2, "map realloc"); + memset(map + map_len, 0, idx + 1 - map_len); + map_len = idx + 1; + } + if (!map[idx]) + nr_pages++; + map[idx] = 1; + } + + /* split transhuge page, keep last page */ + if (madvise(p, HPAGE_SIZE - PAGE_SIZE, MADV_DONTNEED)) + err(2, "MADV_DONTNEED"); + } + clock_gettime(CLOCK_MONOTONIC, &b); + s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.; + + warnx("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t" + "%4d succeed, %4d failed, %4d different pages", + s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20), + nr_succeed, nr_failed, nr_pages); + } +} diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c new file mode 100644 index 000000000..b2c7043c0 --- /dev/null +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -0,0 +1,1333 @@ +/* + * Stress userfaultfd syscall. + * + * Copyright (C) 2015 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * This test allocates two virtual areas and bounces the physical + * memory across the two virtual areas (from area_src to area_dst) + * using userfaultfd. + * + * There are three threads running per CPU: + * + * 1) one per-CPU thread takes a per-page pthread_mutex in a random + * page of the area_dst (while the physical page may still be in + * area_src), and increments a per-page counter in the same page, + * and checks its value against a verification region. + * + * 2) another per-CPU thread handles the userfaults generated by + * thread 1 above. userfaultfd blocking reads or poll() modes are + * exercised interleaved. + * + * 3) one last per-CPU thread transfers the memory in the background + * at maximum bandwidth (if not already transferred by thread + * 2). Each cpu thread takes cares of transferring a portion of the + * area. + * + * When all threads of type 3 completed the transfer, one bounce is + * complete. area_src and area_dst are then swapped. All threads are + * respawned and so the bounce is immediately restarted in the + * opposite direction. + * + * per-CPU threads 1 by triggering userfaults inside + * pthread_mutex_lock will also verify the atomicity of the memory + * transfer (UFFDIO_COPY). + * + * The program takes two parameters: the amounts of physical memory in + * megabytes (MiB) of the area and the number of bounces to execute. + * + * # 100MiB 99999 bounces + * ./userfaultfd 100 99999 + * + * # 1GiB 99 bounces + * ./userfaultfd 1000 99 + * + * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers + * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <signal.h> +#include <poll.h> +#include <string.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <sys/wait.h> +#include <pthread.h> +#include <linux/userfaultfd.h> +#include <setjmp.h> +#include <stdbool.h> + +#include "../kselftest.h" + +#ifdef __NR_userfaultfd + +static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; + +#define BOUNCE_RANDOM (1<<0) +#define BOUNCE_RACINGFAULTS (1<<1) +#define BOUNCE_VERIFY (1<<2) +#define BOUNCE_POLL (1<<3) +static int bounces; + +#define TEST_ANON 1 +#define TEST_HUGETLB 2 +#define TEST_SHMEM 3 +static int test_type; + +/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ +#define ALARM_INTERVAL_SECS 10 +static volatile bool test_uffdio_copy_eexist = true; +static volatile bool test_uffdio_zeropage_eexist = true; + +static bool map_shared; +static int huge_fd; +static char *huge_fd_off0; +static unsigned long long *count_verify; +static int uffd, uffd_flags, finished, *pipefd; +static char *area_src, *area_src_alias, *area_dst, *area_dst_alias; +static char *zeropage; +pthread_attr_t attr; + +/* pthread_mutex_t starts at page offset 0 */ +#define area_mutex(___area, ___nr) \ + ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +#define area_count(___area, ___nr) \ + ((volatile unsigned long long *) ((unsigned long) \ + ((___area) + (___nr)*page_size + \ + sizeof(pthread_mutex_t) + \ + sizeof(unsigned long long) - 1) & \ + ~(unsigned long)(sizeof(unsigned long long) \ + - 1))) + +static int anon_release_pages(char *rel_area) +{ + int ret = 0; + + if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise"); + ret = 1; + } + + return ret; +} + +static void anon_allocate_area(void **alloc_area) +{ + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*alloc_area == MAP_FAILED) { + fprintf(stderr, "mmap of anonymous memory failed"); + *alloc_area = NULL; + } +} + +static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) +{ +} + +/* HugeTLB memory */ +static int hugetlb_release_pages(char *rel_area) +{ + int ret = 0; + + if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + rel_area == huge_fd_off0 ? 0 : + nr_pages * page_size, + nr_pages * page_size)) { + perror("fallocate"); + ret = 1; + } + + return ret; +} + + +static void hugetlb_allocate_area(void **alloc_area) +{ + void *area_alias = NULL; + char **alloc_area_alias; + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + (map_shared ? MAP_SHARED : MAP_PRIVATE) | + MAP_HUGETLB, + huge_fd, *alloc_area == area_src ? 0 : + nr_pages * page_size); + if (*alloc_area == MAP_FAILED) { + fprintf(stderr, "mmap of hugetlbfs file failed\n"); + *alloc_area = NULL; + } + + if (map_shared) { + area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_HUGETLB, + huge_fd, *alloc_area == area_src ? 0 : + nr_pages * page_size); + if (area_alias == MAP_FAILED) { + if (munmap(*alloc_area, nr_pages * page_size) < 0) + perror("hugetlb munmap"), exit(1); + *alloc_area = NULL; + return; + } + } + if (*alloc_area == area_src) { + huge_fd_off0 = *alloc_area; + alloc_area_alias = &area_src_alias; + } else { + alloc_area_alias = &area_dst_alias; + } + if (area_alias) + *alloc_area_alias = area_alias; +} + +static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) +{ + if (!map_shared) + return; + /* + * We can't zap just the pagetable with hugetlbfs because + * MADV_DONTEED won't work. So exercise -EEXIST on a alias + * mapping where the pagetables are not established initially, + * this way we'll exercise the -EEXEC at the fs level. + */ + *start = (unsigned long) area_dst_alias + offset; +} + +/* Shared memory */ +static int shmem_release_pages(char *rel_area) +{ + int ret = 0; + + if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) { + perror("madvise"); + ret = 1; + } + + return ret; +} + +static void shmem_allocate_area(void **alloc_area) +{ + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (*alloc_area == MAP_FAILED) { + fprintf(stderr, "shared memory mmap failed\n"); + *alloc_area = NULL; + } +} + +struct uffd_test_ops { + unsigned long expected_ioctls; + void (*allocate_area)(void **alloc_area); + int (*release_pages)(char *rel_area); + void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); +}; + +#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ + (1 << _UFFDIO_COPY) | \ + (1 << _UFFDIO_ZEROPAGE)) + +static struct uffd_test_ops anon_uffd_test_ops = { + .expected_ioctls = ANON_EXPECTED_IOCTLS, + .allocate_area = anon_allocate_area, + .release_pages = anon_release_pages, + .alias_mapping = noop_alias_mapping, +}; + +static struct uffd_test_ops shmem_uffd_test_ops = { + .expected_ioctls = ANON_EXPECTED_IOCTLS, + .allocate_area = shmem_allocate_area, + .release_pages = shmem_release_pages, + .alias_mapping = noop_alias_mapping, +}; + +static struct uffd_test_ops hugetlb_uffd_test_ops = { + .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .allocate_area = hugetlb_allocate_area, + .release_pages = hugetlb_release_pages, + .alias_mapping = hugetlb_alias_mapping, +}; + +static struct uffd_test_ops *uffd_test_ops; + +static int my_bcmp(char *str1, char *str2, size_t n) +{ + unsigned long i; + for (i = 0; i < n; i++) + if (str1[i] != str2[i]) + return 1; + return 0; +} + +static void *locking_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct random_data rand; + unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ + int32_t rand_nr; + unsigned long long count; + char randstate[64]; + unsigned int seed; + time_t start; + + if (bounces & BOUNCE_RANDOM) { + seed = (unsigned int) time(NULL) - bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + seed += cpu; + bzero(&rand, sizeof(rand)); + bzero(&randstate, sizeof(randstate)); + if (initstate_r(seed, randstate, sizeof(randstate), &rand)) + fprintf(stderr, "srandom_r error\n"), exit(1); + } else { + page_nr = -bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + page_nr += cpu * nr_pages_per_cpu; + } + + while (!finished) { + if (bounces & BOUNCE_RANDOM) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 1 error\n"), exit(1); + page_nr = rand_nr; + if (sizeof(page_nr) > sizeof(rand_nr)) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 2 error\n"), exit(1); + page_nr |= (((unsigned long) rand_nr) << 16) << + 16; + } + } else + page_nr += 1; + page_nr %= nr_pages; + + start = time(NULL); + if (bounces & BOUNCE_VERIFY) { + count = *area_count(area_dst, page_nr); + if (!count) + fprintf(stderr, + "page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + + + /* + * We can't use bcmp (or memcmp) because that + * returns 0 erroneously if the memory is + * changing under it (even if the end of the + * page is never changing and always + * different). + */ +#if 1 + if (!my_bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) + fprintf(stderr, + "my_bcmp page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); +#else + unsigned long loops; + + loops = 0; + /* uncomment the below line to test with mutex */ + /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */ + while (!bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) { + loops += 1; + if (loops > 10) + break; + } + /* uncomment below line to test with mutex */ + /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */ + if (loops) { + fprintf(stderr, + "page_nr %lu all zero thread %lu %p %lu\n", + page_nr, cpu, area_dst + page_nr * page_size, + loops); + if (loops > 10) + exit(1); + } +#endif + } + + pthread_mutex_lock(area_mutex(area_dst, page_nr)); + count = *area_count(area_dst, page_nr); + if (count != count_verify[page_nr]) { + fprintf(stderr, + "page_nr %lu memory corruption %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + } + count++; + *area_count(area_dst, page_nr) = count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + + if (time(NULL) - start > 1) + fprintf(stderr, + "userfault too slow %ld " + "possible false positive with overcommit\n", + time(NULL) - start); + } + + return NULL; +} + +static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, + unsigned long offset) +{ + uffd_test_ops->alias_mapping(&uffdio_copy->dst, + uffdio_copy->len, + offset); + if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { + /* real retval in ufdio_copy.copy */ + if (uffdio_copy->copy != -EEXIST) + fprintf(stderr, "UFFDIO_COPY retry error %Ld\n", + uffdio_copy->copy), exit(1); + } else { + fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n", + uffdio_copy->copy), exit(1); + } +} + +static int __copy_page(int ufd, unsigned long offset, bool retry) +{ + struct uffdio_copy uffdio_copy; + + if (offset >= nr_pages * page_size) + fprintf(stderr, "unexpected offset %lu\n", + offset), exit(1); + uffdio_copy.dst = (unsigned long) area_dst + offset; + uffdio_copy.src = (unsigned long) area_src + offset; + uffdio_copy.len = page_size; + uffdio_copy.mode = 0; + uffdio_copy.copy = 0; + if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { + /* real retval in ufdio_copy.copy */ + if (uffdio_copy.copy != -EEXIST) + fprintf(stderr, "UFFDIO_COPY error %Ld\n", + uffdio_copy.copy), exit(1); + } else if (uffdio_copy.copy != page_size) { + fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", + uffdio_copy.copy), exit(1); + } else { + if (test_uffdio_copy_eexist && retry) { + test_uffdio_copy_eexist = false; + retry_copy_page(ufd, &uffdio_copy, offset); + } + return 1; + } + return 0; +} + +static int copy_page_retry(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, true); +} + +static int copy_page(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, false); +} + +static void *uffd_poll_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct pollfd pollfd[2]; + struct uffd_msg msg; + struct uffdio_register uffd_reg; + int ret; + unsigned long offset; + char tmp_chr; + unsigned long userfaults = 0; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].events = POLLIN; + + for (;;) { + ret = poll(pollfd, 2, -1); + if (!ret) + fprintf(stderr, "poll error %d\n", ret), exit(1); + if (ret < 0) + perror("poll"), exit(1); + if (pollfd[1].revents & POLLIN) { + if (read(pollfd[1].fd, &tmp_chr, 1) != 1) + fprintf(stderr, "read pipefd error\n"), + exit(1); + break; + } + if (!(pollfd[0].revents & POLLIN)) + fprintf(stderr, "pollfd[0].revents %d\n", + pollfd[0].revents), exit(1); + ret = read(uffd, &msg, sizeof(msg)); + if (ret < 0) { + if (errno == EAGAIN) + continue; + perror("nonblocking read error"), exit(1); + } + switch (msg.event) { + default: + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + break; + case UFFD_EVENT_PAGEFAULT: + if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(uffd, offset)) + userfaults++; + break; + case UFFD_EVENT_FORK: + close(uffd); + uffd = msg.arg.fork.ufd; + pollfd[0].fd = uffd; + break; + case UFFD_EVENT_REMOVE: + uffd_reg.range.start = msg.arg.remove.start; + uffd_reg.range.len = msg.arg.remove.end - + msg.arg.remove.start; + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) + fprintf(stderr, "remove failure\n"), exit(1); + break; + case UFFD_EVENT_REMAP: + area_dst = (char *)(unsigned long)msg.arg.remap.to; + break; + } + } + return (void *)userfaults; +} + +pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void *uffd_read_thread(void *arg) +{ + unsigned long *this_cpu_userfaults; + struct uffd_msg msg; + unsigned long offset; + int ret; + + this_cpu_userfaults = (unsigned long *) arg; + *this_cpu_userfaults = 0; + + pthread_mutex_unlock(&uffd_read_mutex); + /* from here cancellation is ok */ + + for (;;) { + ret = read(uffd, &msg, sizeof(msg)); + if (ret != sizeof(msg)) { + if (ret < 0) + perror("blocking read error"), exit(1); + else + fprintf(stderr, "short read\n"), exit(1); + } + if (msg.event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + if (bounces & BOUNCE_VERIFY && + msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(uffd, offset)) + (*this_cpu_userfaults)++; + } + return (void *)NULL; +} + +static void *background_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + unsigned long page_nr; + + for (page_nr = cpu * nr_pages_per_cpu; + page_nr < (cpu+1) * nr_pages_per_cpu; + page_nr++) + copy_page_retry(uffd, page_nr * page_size); + + return NULL; +} + +static int stress(unsigned long *userfaults) +{ + unsigned long cpu; + pthread_t locking_threads[nr_cpus]; + pthread_t uffd_threads[nr_cpus]; + pthread_t background_threads[nr_cpus]; + void **_userfaults = (void **) userfaults; + + finished = 0; + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pthread_create(&locking_threads[cpu], &attr, + locking_thread, (void *)cpu)) + return 1; + if (bounces & BOUNCE_POLL) { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_poll_thread, (void *)cpu)) + return 1; + } else { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_read_thread, + &_userfaults[cpu])) + return 1; + pthread_mutex_lock(&uffd_read_mutex); + } + if (pthread_create(&background_threads[cpu], &attr, + background_thread, (void *)cpu)) + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(background_threads[cpu], NULL)) + return 1; + + /* + * Be strict and immediately zap area_src, the whole area has + * been transferred already by the background treads. The + * area_src could then be faulted in in a racy way by still + * running uffdio_threads reading zeropages after we zapped + * area_src (but they're guaranteed to get -EEXIST from + * UFFDIO_COPY without writing zero pages into area_dst + * because the background threads already completed). + */ + if (uffd_test_ops->release_pages(area_src)) + return 1; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + char c; + if (bounces & BOUNCE_POLL) { + if (write(pipefd[cpu*2+1], &c, 1) != 1) { + fprintf(stderr, "pipefd write error\n"); + return 1; + } + if (pthread_join(uffd_threads[cpu], &_userfaults[cpu])) + return 1; + } else { + if (pthread_cancel(uffd_threads[cpu])) + return 1; + if (pthread_join(uffd_threads[cpu], NULL)) + return 1; + } + } + + finished = 1; + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(locking_threads[cpu], NULL)) + return 1; + + return 0; +} + +static int userfaultfd_open(int features) +{ + struct uffdio_api uffdio_api; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + fprintf(stderr, + "userfaultfd syscall not available in this kernel\n"); + return 1; + } + uffd_flags = fcntl(uffd, F_GETFD, NULL); + + uffdio_api.api = UFFD_API; + uffdio_api.features = features; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { + fprintf(stderr, "UFFDIO_API\n"); + return 1; + } + if (uffdio_api.api != UFFD_API) { + fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api); + return 1; + } + + return 0; +} + +sigjmp_buf jbuf, *sigbuf; + +static void sighndl(int sig, siginfo_t *siginfo, void *ptr) +{ + if (sig == SIGBUS) { + if (sigbuf) + siglongjmp(*sigbuf, 1); + abort(); + } +} + +/* + * For non-cooperative userfaultfd test we fork() a process that will + * generate pagefaults, will mremap the area monitored by the + * userfaultfd and at last this process will release the monitored + * area. + * For the anonymous and shared memory the area is divided into two + * parts, the first part is accessed before mremap, and the second + * part is accessed after mremap. Since hugetlbfs does not support + * mremap, the entire monitored area is accessed in a single pass for + * HUGETLB_TEST. + * The release of the pages currently generates event for shmem and + * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked + * for hugetlb. + * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register + * monitored area, generate pagefaults and test that signal is delivered. + * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2 + * test robustness use case - we release monitored area, fork a process + * that will generate pagefaults and verify signal is generated. + * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal + * feature. Using monitor thread, verify no userfault events are generated. + */ +static int faulting_process(int signal_test) +{ + unsigned long nr; + unsigned long long count; + unsigned long split_nr_pages; + unsigned long lastnr; + struct sigaction act; + unsigned long signalled = 0; + + if (test_type != TEST_HUGETLB) + split_nr_pages = (nr_pages + 1) / 2; + else + split_nr_pages = nr_pages; + + if (signal_test) { + sigbuf = &jbuf; + memset(&act, 0, sizeof(act)); + act.sa_sigaction = sighndl; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGBUS, &act, 0)) { + perror("sigaction"); + return 1; + } + lastnr = (unsigned long)-1; + } + + for (nr = 0; nr < split_nr_pages; nr++) { + if (signal_test) { + if (sigsetjmp(*sigbuf, 1) != 0) { + if (nr == lastnr) { + fprintf(stderr, "Signal repeated\n"); + return 1; + } + + lastnr = nr; + if (signal_test == 1) { + if (copy_page(uffd, nr * page_size)) + signalled++; + } else { + signalled++; + continue; + } + } + } + + count = *area_count(area_dst, nr); + if (count != count_verify[nr]) { + fprintf(stderr, + "nr %lu memory corruption %Lu %Lu\n", + nr, count, + count_verify[nr]), exit(1); + } + } + + if (signal_test) + return signalled != split_nr_pages; + + if (test_type == TEST_HUGETLB) + return 0; + + area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, area_src); + if (area_dst == MAP_FAILED) + perror("mremap"), exit(1); + + for (; nr < nr_pages; nr++) { + count = *area_count(area_dst, nr); + if (count != count_verify[nr]) { + fprintf(stderr, + "nr %lu memory corruption %Lu %Lu\n", + nr, count, + count_verify[nr]), exit(1); + } + } + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + for (nr = 0; nr < nr_pages; nr++) { + if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) + fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); + } + + return 0; +} + +static void retry_uffdio_zeropage(int ufd, + struct uffdio_zeropage *uffdio_zeropage, + unsigned long offset) +{ + uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, + uffdio_zeropage->range.len, + offset); + if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { + if (uffdio_zeropage->zeropage != -EEXIST) + fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n", + uffdio_zeropage->zeropage), exit(1); + } else { + fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n", + uffdio_zeropage->zeropage), exit(1); + } +} + +static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) +{ + struct uffdio_zeropage uffdio_zeropage; + int ret; + unsigned long has_zeropage; + + has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); + + if (offset >= nr_pages * page_size) + fprintf(stderr, "unexpected offset %lu\n", + offset), exit(1); + uffdio_zeropage.range.start = (unsigned long) area_dst + offset; + uffdio_zeropage.range.len = page_size; + uffdio_zeropage.mode = 0; + ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + if (ret) { + /* real retval in ufdio_zeropage.zeropage */ + if (has_zeropage) { + if (uffdio_zeropage.zeropage == -EEXIST) + fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"), + exit(1); + else + fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } else { + if (uffdio_zeropage.zeropage != -EINVAL) + fprintf(stderr, + "UFFDIO_ZEROPAGE not -EINVAL %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } + } else if (has_zeropage) { + if (uffdio_zeropage.zeropage != page_size) { + fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } else { + if (test_uffdio_zeropage_eexist && retry) { + test_uffdio_zeropage_eexist = false; + retry_uffdio_zeropage(ufd, &uffdio_zeropage, + offset); + } + return 1; + } + } else { + fprintf(stderr, + "UFFDIO_ZEROPAGE succeeded %Ld\n", + uffdio_zeropage.zeropage), exit(1); + } + + return 0; +} + +static int uffdio_zeropage(int ufd, unsigned long offset) +{ + return __uffdio_zeropage(ufd, offset, false); +} + +/* exercise UFFDIO_ZEROPAGE */ +static int userfaultfd_zeropage_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + + printf("testing UFFDIO_ZEROPAGE: "); + fflush(stdout); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + if (userfaultfd_open(0) < 0) + return 1; + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + fprintf(stderr, "register failure\n"), exit(1); + + expected_ioctls = uffd_test_ops->expected_ioctls; + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"), + exit(1); + + if (uffdio_zeropage(uffd, 0)) { + if (my_bcmp(area_dst, zeropage, page_size)) + fprintf(stderr, "zeropage is not zero\n"), exit(1); + } + + close(uffd); + printf("done.\n"); + return 0; +} + +static int userfaultfd_events_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + unsigned long userfaults; + pthread_t uffd_mon; + int err, features; + pid_t pid; + char c; + + printf("testing events (fork, remap, remove): "); + fflush(stdout); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | + UFFD_FEATURE_EVENT_REMOVE; + if (userfaultfd_open(features) < 0) + return 1; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + fprintf(stderr, "register failure\n"), exit(1); + + expected_ioctls = uffd_test_ops->expected_ioctls; + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"), + exit(1); + + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL)) + perror("uffd_poll_thread create"), exit(1); + + pid = fork(); + if (pid < 0) + perror("fork"), exit(1); + + if (!pid) + return faulting_process(0); + + waitpid(pid, &err, 0); + if (err) + fprintf(stderr, "faulting process failed\n"), exit(1); + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + perror("pipe write"), exit(1); + if (pthread_join(uffd_mon, (void **)&userfaults)) + return 1; + + close(uffd); + printf("userfaults: %ld\n", userfaults); + + return userfaults != nr_pages; +} + +static int userfaultfd_sig_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + unsigned long userfaults; + pthread_t uffd_mon; + int err, features; + pid_t pid; + char c; + + printf("testing signal delivery: "); + fflush(stdout); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS; + if (userfaultfd_open(features) < 0) + return 1; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + fprintf(stderr, "register failure\n"), exit(1); + + expected_ioctls = uffd_test_ops->expected_ioctls; + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"), + exit(1); + + if (faulting_process(1)) + fprintf(stderr, "faulting process failed\n"), exit(1); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL)) + perror("uffd_poll_thread create"), exit(1); + + pid = fork(); + if (pid < 0) + perror("fork"), exit(1); + + if (!pid) + exit(faulting_process(2)); + + waitpid(pid, &err, 0); + if (err) + fprintf(stderr, "faulting process failed\n"), exit(1); + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + perror("pipe write"), exit(1); + if (pthread_join(uffd_mon, (void **)&userfaults)) + return 1; + + printf("done.\n"); + if (userfaults) + fprintf(stderr, "Signal test failed, userfaults: %ld\n", + userfaults); + close(uffd); + return userfaults != 0; +} +static int userfaultfd_stress(void) +{ + void *area; + char *tmp_area; + unsigned long nr; + struct uffdio_register uffdio_register; + unsigned long cpu; + int err; + unsigned long userfaults[nr_cpus]; + + uffd_test_ops->allocate_area((void **)&area_src); + if (!area_src) + return 1; + uffd_test_ops->allocate_area((void **)&area_dst); + if (!area_dst) + return 1; + + if (userfaultfd_open(0) < 0) + return 1; + + count_verify = malloc(nr_pages * sizeof(unsigned long long)); + if (!count_verify) { + perror("count_verify"); + return 1; + } + + for (nr = 0; nr < nr_pages; nr++) { + *area_mutex(area_src, nr) = (pthread_mutex_t) + PTHREAD_MUTEX_INITIALIZER; + count_verify[nr] = *area_count(area_src, nr) = 1; + /* + * In the transition between 255 to 256, powerpc will + * read out of order in my_bcmp and see both bytes as + * zero, so leave a placeholder below always non-zero + * after the count, to avoid my_bcmp to trigger false + * positives. + */ + *(area_count(area_src, nr) + 1) = 1; + } + + pipefd = malloc(sizeof(int) * nr_cpus * 2); + if (!pipefd) { + perror("pipefd"); + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) { + perror("pipe"); + return 1; + } + } + + if (posix_memalign(&area, page_size, page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + zeropage = area; + bzero(zeropage, page_size); + + pthread_mutex_lock(&uffd_read_mutex); + + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 16*1024*1024); + + err = 0; + while (bounces--) { + unsigned long expected_ioctls; + + printf("bounces: %d, mode:", bounces); + if (bounces & BOUNCE_RANDOM) + printf(" rnd"); + if (bounces & BOUNCE_RACINGFAULTS) + printf(" racing"); + if (bounces & BOUNCE_VERIFY) + printf(" ver"); + if (bounces & BOUNCE_POLL) + printf(" poll"); + printf(", "); + fflush(stdout); + + if (bounces & BOUNCE_POLL) + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + else + fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + + /* register */ + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + return 1; + } + expected_ioctls = uffd_test_ops->expected_ioctls; + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) { + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"); + return 1; + } + + if (area_dst_alias) { + uffdio_register.range.start = (unsigned long) + area_dst_alias; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure alias\n"); + return 1; + } + } + + /* + * The madvise done previously isn't enough: some + * uffd_thread could have read userfaults (one of + * those already resolved by the background thread) + * and it may be in the process of calling + * UFFDIO_COPY. UFFDIO_COPY will read the zapped + * area_src and it would map a zero page in it (of + * course such a UFFDIO_COPY is perfectly safe as it'd + * return -EEXIST). The problem comes at the next + * bounce though: that racing UFFDIO_COPY would + * generate zeropages in the area_src, so invalidating + * the previous MADV_DONTNEED. Without this additional + * MADV_DONTNEED those zeropages leftovers in the + * area_src would lead to -EEXIST failure during the + * next bounce, effectively leaving a zeropage in the + * area_dst. + * + * Try to comment this out madvise to see the memory + * corruption being caught pretty quick. + * + * khugepaged is also inhibited to collapse THP after + * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's + * required to MADV_DONTNEED here. + */ + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + /* bounce pass */ + if (stress(userfaults)) + return 1; + + /* unregister */ + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) { + fprintf(stderr, "unregister failure\n"); + return 1; + } + if (area_dst_alias) { + uffdio_register.range.start = (unsigned long) area_dst; + if (ioctl(uffd, UFFDIO_UNREGISTER, + &uffdio_register.range)) { + fprintf(stderr, "unregister failure alias\n"); + return 1; + } + } + + /* verification */ + if (bounces & BOUNCE_VERIFY) { + for (nr = 0; nr < nr_pages; nr++) { + if (*area_count(area_dst, nr) != count_verify[nr]) { + fprintf(stderr, + "error area_count %Lu %Lu %lu\n", + *area_count(area_src, nr), + count_verify[nr], + nr); + err = 1; + bounces = 0; + } + } + } + + /* prepare next bounce */ + tmp_area = area_src; + area_src = area_dst; + area_dst = tmp_area; + + tmp_area = area_src_alias; + area_src_alias = area_dst_alias; + area_dst_alias = tmp_area; + + printf("userfaults:"); + for (cpu = 0; cpu < nr_cpus; cpu++) + printf(" %lu", userfaults[cpu]); + printf("\n"); + } + + if (err) + return err; + + close(uffd); + return userfaultfd_zeropage_test() || userfaultfd_sig_test() + || userfaultfd_events_test(); +} + +/* + * Copied from mlock2-tests.c + */ +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +static void set_test_type(const char *type) +{ + if (!strcmp(type, "anon")) { + test_type = TEST_ANON; + uffd_test_ops = &anon_uffd_test_ops; + } else if (!strcmp(type, "hugetlb")) { + test_type = TEST_HUGETLB; + uffd_test_ops = &hugetlb_uffd_test_ops; + } else if (!strcmp(type, "hugetlb_shared")) { + map_shared = true; + test_type = TEST_HUGETLB; + uffd_test_ops = &hugetlb_uffd_test_ops; + } else if (!strcmp(type, "shmem")) { + map_shared = true; + test_type = TEST_SHMEM; + uffd_test_ops = &shmem_uffd_test_ops; + } else { + fprintf(stderr, "Unknown test type: %s\n", type), exit(1); + } + + if (test_type == TEST_HUGETLB) + page_size = default_huge_page_size(); + else + page_size = sysconf(_SC_PAGE_SIZE); + + if (!page_size) + fprintf(stderr, "Unable to determine page size\n"), + exit(2); + if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 + > page_size) + fprintf(stderr, "Impossible to run this test\n"), exit(2); +} + +static void sigalrm(int sig) +{ + if (sig != SIGALRM) + abort(); + test_uffdio_copy_eexist = true; + test_uffdio_zeropage_eexist = true; + alarm(ALARM_INTERVAL_SECS); +} + +int main(int argc, char **argv) +{ + if (argc < 4) + fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"), + exit(1); + + if (signal(SIGALRM, sigalrm) == SIG_ERR) + fprintf(stderr, "failed to arm SIGALRM"), exit(1); + alarm(ALARM_INTERVAL_SECS); + + set_test_type(argv[1]); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / + nr_cpus; + if (!nr_pages_per_cpu) { + fprintf(stderr, "invalid MiB\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + + bounces = atoi(argv[3]); + if (bounces <= 0) { + fprintf(stderr, "invalid bounces\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + nr_pages = nr_pages_per_cpu * nr_cpus; + + if (test_type == TEST_HUGETLB) { + if (argc < 5) + fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"), + exit(1); + huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); + if (huge_fd < 0) { + fprintf(stderr, "Open of %s failed", argv[3]); + perror("open"); + exit(1); + } + if (ftruncate(huge_fd, 0)) { + fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]); + perror("ftruncate"); + exit(1); + } + } + printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", + nr_pages, nr_pages_per_cpu); + return userfaultfd_stress(); +} + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c new file mode 100644 index 000000000..e7fe734c3 --- /dev/null +++ b/tools/testing/selftests/vm/va_128TBswitch.c @@ -0,0 +1,297 @@ +/* + * + * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> + * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#ifdef __powerpc64__ +#define PAGE_SIZE (64 << 10) +/* + * This will work with 16M and 2M hugepage size + */ +#define HUGETLB_SIZE (16 << 20) +#else +#define PAGE_SIZE (4 << 10) +#define HUGETLB_SIZE (2 << 20) +#endif + +/* + * >= 128TB is the hint addr value we used to select + * large address space. + */ +#define ADDR_SWITCH_HINT (1UL << 47) +#define LOW_ADDR ((void *) (1UL << 30)) +#define HIGH_ADDR ((void *) (1UL << 48)) + +struct testcase { + void *addr; + unsigned long size; + unsigned long flags; + const char *msg; + unsigned int low_addr_required:1; + unsigned int keep_mapped:1; +}; + +static struct testcase testcases[] = { + { + /* + * If stack is moved, we could possibly allocate + * this at the requested address. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + /* + * We should never allocate at the requested address or above it + * The len cross the 128TB boundary. Without MAP_FIXED + * we will always search in the lower address space. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", + .low_addr_required = 1, + }, + { + /* + * Exact mapping at 128TB, the area is free we should get that + * even without MAP_FIXED. + */ + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED)", + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, +}; + +static struct testcase hugetlb_testcases[] = { + { + .addr = NULL, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", + }, +}; + +static int run_test(struct testcase *test, int count) +{ + void *p; + int i, ret = 0; + + for (i = 0; i < count; i++) { + struct testcase *t = test + i; + + p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0); + + printf("%s: %p - ", t->msg, p); + + if (p == MAP_FAILED) { + printf("FAILED\n"); + ret = 1; + continue; + } + + if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { + printf("FAILED\n"); + ret = 1; + } else { + /* + * Do a dereference of the address returned so that we catch + * bugs in page fault handling + */ + memset(p, 0, t->size); + printf("OK\n"); + } + if (!t->keep_mapped) + munmap(p, t->size); + } + + return ret; +} + +static int supported_arch(void) +{ +#if defined(__powerpc64__) + return 1; +#elif defined(__x86_64__) + return 1; +#else + return 0; +#endif +} + +int main(int argc, char **argv) +{ + int ret; + + if (!supported_arch()) + return 0; + + ret = run_test(testcases, ARRAY_SIZE(testcases)); + if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) + ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); + return ret; +} diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/vm/virtual_address_range.c new file mode 100644 index 000000000..1830d66a6 --- /dev/null +++ b/tools/testing/selftests/vm/virtual_address_range.c @@ -0,0 +1,139 @@ +/* + * Copyright 2017, Anshuman Khandual, IBM Corp. + * Licensed under GPLv2. + * + * Works on architectures which support 128TB virtual + * address range and beyond. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/mman.h> +#include <sys/time.h> + +/* + * Maximum address range mapped with a single mmap() + * call is little bit more than 16GB. Hence 16GB is + * chosen as the single chunk size for address space + * mapping. + */ +#define MAP_CHUNK_SIZE 17179869184UL /* 16GB */ + +/* + * Address space till 128TB is mapped without any hint + * and is enabled by default. Address space beyond 128TB + * till 512TB is obtained by passing hint address as the + * first argument into mmap() system call. + * + * The process heap address space is divided into two + * different areas one below 128TB and one above 128TB + * till it reaches 512TB. One with size 128TB and the + * other being 384TB. + * + * On Arm64 the address space is 256TB and no high mappings + * are supported so far. + */ + +#define NR_CHUNKS_128TB 8192UL /* Number of 16GB chunks for 128TB */ +#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL) +#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL) + +#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */ +#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */ + +#ifdef __aarch64__ +#define HIGH_ADDR_MARK ADDR_MARK_256TB +#define HIGH_ADDR_SHIFT 49 +#define NR_CHUNKS_LOW NR_CHUNKS_256TB +#define NR_CHUNKS_HIGH 0 +#else +#define HIGH_ADDR_MARK ADDR_MARK_128TB +#define HIGH_ADDR_SHIFT 48 +#define NR_CHUNKS_LOW NR_CHUNKS_128TB +#define NR_CHUNKS_HIGH NR_CHUNKS_384TB +#endif + +static char *hind_addr(void) +{ + int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT); + + return (char *) (1UL << bits); +} + +static int validate_addr(char *ptr, int high_addr) +{ + unsigned long addr = (unsigned long) ptr; + + if (high_addr) { + if (addr < HIGH_ADDR_MARK) { + printf("Bad address %lx\n", addr); + return 1; + } + return 0; + } + + if (addr > HIGH_ADDR_MARK) { + printf("Bad address %lx\n", addr); + return 1; + } + return 0; +} + +static int validate_lower_address_hint(void) +{ + char *ptr; + + ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ | + PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr == MAP_FAILED) + return 0; + + return 1; +} + +int main(int argc, char *argv[]) +{ + char *ptr[NR_CHUNKS_LOW]; + char *hptr[NR_CHUNKS_HIGH]; + char *hint; + unsigned long i, lchunks, hchunks; + + for (i = 0; i < NR_CHUNKS_LOW; i++) { + ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr[i] == MAP_FAILED) { + if (validate_lower_address_hint()) + return 1; + break; + } + + if (validate_addr(ptr[i], 0)) + return 1; + } + lchunks = i; + + for (i = 0; i < NR_CHUNKS_HIGH; i++) { + hint = hind_addr(); + hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (hptr[i] == MAP_FAILED) + break; + + if (validate_addr(hptr[i], 1)) + return 1; + } + hchunks = i; + + for (i = 0; i < lchunks; i++) + munmap(ptr[i], MAP_CHUNK_SIZE); + + for (i = 0; i < hchunks; i++) + munmap(hptr[i], MAP_CHUNK_SIZE); + + return 0; +} |