diff options
Diffstat (limited to 'man2/userfaultfd.2')
-rw-r--r-- | man2/userfaultfd.2 | 951 |
1 files changed, 0 insertions, 951 deletions
diff --git a/man2/userfaultfd.2 b/man2/userfaultfd.2 deleted file mode 100644 index 27f4b69..0000000 --- a/man2/userfaultfd.2 +++ /dev/null @@ -1,951 +0,0 @@ -.\" Copyright (c) 2016, IBM Corporation. -.\" Written by Mike Rapoport <rppt@linux.vnet.ibm.com> -.\" and Copyright (C) 2017 Michael Kerrisk <mtk.manpages@gmail.com> -.\" -.\" SPDX-License-Identifier: Linux-man-pages-copyleft -.\" -.TH userfaultfd 2 2024-02-12 "Linux man-pages 6.7" -.SH NAME -userfaultfd \- create a file descriptor for handling page faults in user space -.SH LIBRARY -Standard C library -.RI ( libc ", " \-lc ) -.SH SYNOPSIS -.nf -.BR "#include <fcntl.h>" " /* Definition of " O_* " constants */" -.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */" -.BR "#include <linux/userfaultfd.h>" " /* Definition of " UFFD_* " constants */" -.B #include <unistd.h> -.P -.BI "int syscall(SYS_userfaultfd, int " flags ); -.fi -.P -.IR Note : -glibc provides no wrapper for -.BR userfaultfd (), -necessitating the use of -.BR syscall (2). -.SH DESCRIPTION -.BR userfaultfd () -creates a new userfaultfd object that can be used for delegation of page-fault -handling to a user-space application, -and returns a file descriptor that refers to the new object. -The new userfaultfd object is configured using -.BR ioctl (2). -.P -Once the userfaultfd object is configured, the application can use -.BR read (2) -to receive userfaultfd notifications. -The reads from userfaultfd may be blocking or non-blocking, -depending on the value of -.I flags -used for the creation of the userfaultfd or subsequent calls to -.BR fcntl (2). -.P -The following values may be bitwise ORed in -.I flags -to change the behavior of -.BR userfaultfd (): -.TP -.B O_CLOEXEC -Enable the close-on-exec flag for the new userfaultfd file descriptor. -See the description of the -.B O_CLOEXEC -flag in -.BR open (2). -.TP -.B O_NONBLOCK -Enables non-blocking operation for the userfaultfd object. -See the description of the -.B O_NONBLOCK -flag in -.BR open (2). -.TP -.B UFFD_USER_MODE_ONLY -This is an userfaultfd-specific flag that was introduced in Linux 5.11. -When set, the userfaultfd object will only be able to handle -page faults originated from the user space on the registered regions. -When a kernel-originated fault was triggered -on the registered range with this userfaultfd, a -.B SIGBUS -signal will be delivered. -.P -When the last file descriptor referring to a userfaultfd object is closed, -all memory ranges that were registered with the object are unregistered -and unread events are flushed. -.\" -.P -Userfaultfd supports three modes of registration: -.TP -.BR UFFDIO_REGISTER_MODE_MISSING " (since Linux 4.10)" -When registered with -.B UFFDIO_REGISTER_MODE_MISSING -mode, user-space will receive a page-fault notification -when a missing page is accessed. -The faulted thread will be stopped from execution until the page fault is -resolved from user-space by either an -.B UFFDIO_COPY -or an -.B UFFDIO_ZEROPAGE -ioctl. -.TP -.BR UFFDIO_REGISTER_MODE_MINOR " (since Linux 5.13)" -When registered with -.B UFFDIO_REGISTER_MODE_MINOR -mode, user-space will receive a page-fault notification -when a minor page fault occurs. -That is, -when a backing page is in the page cache, -but page table entries don't yet exist. -The faulted thread will be stopped from execution -until the page fault is resolved from user-space by an -.B UFFDIO_CONTINUE -ioctl. -.TP -.BR UFFDIO_REGISTER_MODE_WP " (since Linux 5.7)" -When registered with -.B UFFDIO_REGISTER_MODE_WP -mode, user-space will receive a page-fault notification -when a write-protected page is written. -The faulted thread will be stopped from execution -until user-space write-unprotects the page using an -.B UFFDIO_WRITEPROTECT -ioctl. -.P -Multiple modes can be enabled at the same time for the same memory range. -.P -Since Linux 4.14, a userfaultfd page-fault notification can selectively embed -faulting thread ID information into the notification. -One needs to enable this feature explicitly using the -.B UFFD_FEATURE_THREAD_ID -feature bit when initializing the userfaultfd context. -By default, thread ID reporting is disabled. -.SS Usage -The userfaultfd mechanism is designed to allow a thread in a multithreaded -program to perform user-space paging for the other threads in the process. -When a page fault occurs for one of the regions registered -to the userfaultfd object, -the faulting thread is put to sleep and -an event is generated that can be read via the userfaultfd file descriptor. -The fault-handling thread reads events from this file descriptor and services -them using the operations described in -.BR ioctl_userfaultfd (2). -When servicing the page fault events, -the fault-handling thread can trigger a wake-up for the sleeping thread. -.P -It is possible for the faulting threads and the fault-handling threads -to run in the context of different processes. -In this case, these threads may belong to different programs, -and the program that executes the faulting threads -will not necessarily cooperate with the program that handles the page faults. -In such non-cooperative mode, -the process that monitors userfaultfd and handles page faults -needs to be aware of the changes in the virtual memory layout -of the faulting process to avoid memory corruption. -.P -Since Linux 4.11, -userfaultfd can also notify the fault-handling threads about changes -in the virtual memory layout of the faulting process. -In addition, if the faulting process invokes -.BR fork (2), -the userfaultfd objects associated with the parent may be duplicated -into the child process and the userfaultfd monitor will be notified -(via the -.B UFFD_EVENT_FORK -described below) -about the file descriptor associated with the userfault objects -created for the child process, -which allows the userfaultfd monitor to perform user-space paging -for the child process. -Unlike page faults which have to be synchronous and require an -explicit or implicit wakeup, -all other events are delivered asynchronously and -the non-cooperative process resumes execution as -soon as the userfaultfd manager executes -.BR read (2). -The userfaultfd manager should carefully synchronize calls to -.B UFFDIO_COPY -with the processing of events. -.P -The current asynchronous model of the event delivery is optimal for -single threaded non-cooperative userfaultfd manager implementations. -.\" Regarding the preceding sentence, Mike Rapoport says: -.\" The major point here is that current events delivery model could be -.\" problematic for multi-threaded monitor. I even suspect that it would be -.\" impossible to ensure synchronization between page faults and non-page -.\" fault events in multi-threaded monitor. -.\" .P -.\" FIXME elaborate about non-cooperating mode, describe its limitations -.\" for kernels before Linux 4.11, features added in Linux 4.11 -.\" and limitations remaining in Linux 4.11 -.\" Maybe it's worth adding a dedicated sub-section... -.\" -.P -Since Linux 5.7, userfaultfd is able to do -synchronous page dirty tracking using the new write-protect register mode. -One should check against the feature bit -.B UFFD_FEATURE_PAGEFAULT_FLAG_WP -before using this feature. -Similar to the original userfaultfd missing mode, the write-protect mode will -generate a userfaultfd notification when the protected page is written. -The user needs to resolve the page fault by unprotecting the faulted page and -kicking the faulted thread to continue. -For more information, -please refer to the "Userfaultfd write-protect mode" section. -.\" -.SS Userfaultfd operation -After the userfaultfd object is created with -.BR userfaultfd (), -the application must enable it using the -.B UFFDIO_API -.BR ioctl (2) -operation. -This operation allows a two-step handshake between the kernel and user space -to determine what API version and features the kernel supports, -and then to enable those features user space wants. -This operation must be performed before any of the other -.BR ioctl (2) -operations described below (or those operations fail with the -.B EINVAL -error). -.P -After a successful -.B UFFDIO_API -operation, -the application then registers memory address ranges using the -.B UFFDIO_REGISTER -.BR ioctl (2) -operation. -After successful completion of a -.B UFFDIO_REGISTER -operation, -a page fault occurring in the requested memory range, and satisfying -the mode defined at the registration time, will be forwarded by the kernel to -the user-space application. -The application can then use various (e.g., -.BR UFFDIO_COPY , -.BR UFFDIO_ZEROPAGE , -or -.BR UFFDIO_CONTINUE ) -.BR ioctl (2) -operations to resolve the page fault. -.P -Since Linux 4.14, if the application sets the -.B UFFD_FEATURE_SIGBUS -feature bit using the -.B UFFDIO_API -.BR ioctl (2), -no page-fault notification will be forwarded to user space. -Instead a -.B SIGBUS -signal is delivered to the faulting process. -With this feature, -userfaultfd can be used for robustness purposes to simply catch -any access to areas within the registered address range that do not -have pages allocated, without having to listen to userfaultfd events. -No userfaultfd monitor will be required for dealing with such memory -accesses. -For example, this feature can be useful for applications that -want to prevent the kernel from automatically allocating pages and filling -holes in sparse files when the hole is accessed through a memory mapping. -.P -The -.B UFFD_FEATURE_SIGBUS -feature is implicitly inherited through -.BR fork (2) -if used in combination with -.BR UFFD_FEATURE_FORK . -.P -Details of the various -.BR ioctl (2) -operations can be found in -.BR ioctl_userfaultfd (2). -.P -Since Linux 4.11, events other than page-fault may enabled during -.B UFFDIO_API -operation. -.P -Up to Linux 4.11, -userfaultfd can be used only with anonymous private memory mappings. -Since Linux 4.11, -userfaultfd can be also used with hugetlbfs and shared memory mappings. -.\" -.SS Userfaultfd write-protect mode (since Linux 5.7) -Since Linux 5.7, userfaultfd supports write-protect mode for anonymous memory. -The user needs to first check availability of this feature using -.B UFFDIO_API -ioctl against the feature bit -.B UFFD_FEATURE_PAGEFAULT_FLAG_WP -before using this feature. -.P -Since Linux 5.19, -the write-protection mode was also supported on -shmem and hugetlbfs memory types. -It can be detected with the feature bit -.BR UFFD_FEATURE_WP_HUGETLBFS_SHMEM . -.P -To register with userfaultfd write-protect mode, the user needs to initiate the -.B UFFDIO_REGISTER -ioctl with mode -.B UFFDIO_REGISTER_MODE_WP -set. -Note that it is legal to monitor the same memory range with multiple modes. -For example, the user can do -.B UFFDIO_REGISTER -with the mode set to -.BR "UFFDIO_REGISTER_MODE_MISSING | UFFDIO_REGISTER_MODE_WP" . -When there is only -.B UFFDIO_REGISTER_MODE_WP -registered, user-space will -.I not -receive any notification when a missing page is written. -Instead, user-space will receive a write-protect page-fault notification -only when an existing but write-protected page got written. -.P -After the -.B UFFDIO_REGISTER -ioctl completed with -.B UFFDIO_REGISTER_MODE_WP -mode set, -the user can write-protect any existing memory within the range using the ioctl -.B UFFDIO_WRITEPROTECT -where -.I uffdio_writeprotect.mode -should be set to -.BR UFFDIO_WRITEPROTECT_MODE_WP . -.P -When a write-protect event happens, -user-space will receive a page-fault notification whose -.I uffd_msg.pagefault.flags -will be with -.B UFFD_PAGEFAULT_FLAG_WP -flag set. -Note: since only writes can trigger this kind of fault, -write-protect notifications will always have the -.B UFFD_PAGEFAULT_FLAG_WRITE -bit set along with the -.B UFFD_PAGEFAULT_FLAG_WP -bit. -.P -To resolve a write-protection page fault, the user should initiate another -.B UFFDIO_WRITEPROTECT -ioctl, whose -.I uffd_msg.pagefault.flags -should have the flag -.B UFFDIO_WRITEPROTECT_MODE_WP -cleared upon the faulted page or range. -.\" -.SS Userfaultfd minor fault mode (since Linux 5.13) -Since Linux 5.13, -userfaultfd supports minor fault mode. -In this mode, -fault messages are produced not for major faults -(where the page was missing), -but rather for minor faults, -where a page exists in the page cache, -but the page table entries are not yet present. -The user needs to first check availability of this feature using the -.B UFFDIO_API -ioctl with the appropriate feature bits set before using this feature: -.B UFFD_FEATURE_MINOR_HUGETLBFS -since Linux 5.13, -or -.B UFFD_FEATURE_MINOR_SHMEM -since Linux 5.14. -.P -To register with userfaultfd minor fault mode, -the user needs to initiate the -.B UFFDIO_REGISTER -ioctl with mode -.B UFFD_REGISTER_MODE_MINOR -set. -.P -When a minor fault occurs, -user-space will receive a page-fault notification -whose -.I uffd_msg.pagefault.flags -will have the -.B UFFD_PAGEFAULT_FLAG_MINOR -flag set. -.P -To resolve a minor page fault, -the handler should decide whether or not -the existing page contents need to be modified first. -If so, -this should be done in-place via a second, -non-userfaultfd-registered mapping -to the same backing page -(e.g., by mapping the shmem or hugetlbfs file twice). -Once the page is considered "up to date", -the fault can be resolved by initiating an -.B UFFDIO_CONTINUE -ioctl, -which installs the page table entries and -(by default) -wakes up the faulting thread(s). -.P -Minor fault mode supports only hugetlbfs-backed (since Linux 5.13) -and shmem-backed (since Linux 5.14) memory. -.\" -.SS Reading from the userfaultfd structure -Each -.BR read (2) -from the userfaultfd file descriptor returns one or more -.I uffd_msg -structures, each of which describes a page-fault event -or an event required for the non-cooperative userfaultfd usage: -.P -.in +4n -.EX -struct uffd_msg { - __u8 event; /* Type of event */ - ... - union { - struct { - __u64 flags; /* Flags describing fault */ - __u64 address; /* Faulting address */ - union { - __u32 ptid; /* Thread ID of the fault */ - } feat; - } pagefault; -\& - struct { /* Since Linux 4.11 */ - __u32 ufd; /* Userfault file descriptor - of the child process */ - } fork; -\& - struct { /* Since Linux 4.11 */ - __u64 from; /* Old address of remapped area */ - __u64 to; /* New address of remapped area */ - __u64 len; /* Original mapping length */ - } remap; -\& - struct { /* Since Linux 4.11 */ - __u64 start; /* Start address of removed area */ - __u64 end; /* End address of removed area */ - } remove; - ... - } arg; -\& - /* Padding fields omitted */ -} __packed; -.EE -.in -.P -If multiple events are available and the supplied buffer is large enough, -.BR read (2) -returns as many events as will fit in the supplied buffer. -If the buffer supplied to -.BR read (2) -is smaller than the size of the -.I uffd_msg -structure, the -.BR read (2) -fails with the error -.BR EINVAL . -.P -The fields set in the -.I uffd_msg -structure are as follows: -.TP -.I event -The type of event. -Depending of the event type, -different fields of the -.I arg -union represent details required for the event processing. -The non-page-fault events are generated only when appropriate feature -is enabled during API handshake with -.B UFFDIO_API -.BR ioctl (2). -.IP -The following values can appear in the -.I event -field: -.RS -.TP -.BR UFFD_EVENT_PAGEFAULT " (since Linux 4.3)" -A page-fault event. -The page-fault details are available in the -.I pagefault -field. -.TP -.BR UFFD_EVENT_FORK " (since Linux 4.11)" -Generated when the faulting process invokes -.BR fork (2) -(or -.BR clone (2) -without the -.B CLONE_VM -flag). -The event details are available in the -.I fork -field. -.\" FIXME describe duplication of userfault file descriptor during fork -.TP -.BR UFFD_EVENT_REMAP " (since Linux 4.11)" -Generated when the faulting process invokes -.BR mremap (2). -The event details are available in the -.I remap -field. -.TP -.BR UFFD_EVENT_REMOVE " (since Linux 4.11)" -Generated when the faulting process invokes -.BR madvise (2) -with -.B MADV_DONTNEED -or -.B MADV_REMOVE -advice. -The event details are available in the -.I remove -field. -.TP -.BR UFFD_EVENT_UNMAP " (since Linux 4.11)" -Generated when the faulting process unmaps a memory range, -either explicitly using -.BR munmap (2) -or implicitly during -.BR mmap (2) -or -.BR mremap (2). -The event details are available in the -.I remove -field. -.RE -.TP -.I pagefault.address -The address that triggered the page fault. -.TP -.I pagefault.flags -A bit mask of flags that describe the event. -For -.BR UFFD_EVENT_PAGEFAULT , -the following flag may appear: -.RS -.TP -.B UFFD_PAGEFAULT_FLAG_WP -If this flag is set, then the fault was a write-protect fault. -.TP -.B UFFD_PAGEFAULT_FLAG_MINOR -If this flag is set, then the fault was a minor fault. -.TP -.B UFFD_PAGEFAULT_FLAG_WRITE -If this flag is set, then the fault was a write fault. -.P -If neither -.B UFFD_PAGEFAULT_FLAG_WP -nor -.B UFFD_PAGEFAULT_FLAG_MINOR -are set, then the fault was a missing fault. -.RE -.TP -.I pagefault.feat.pid -The thread ID that triggered the page fault. -.TP -.I fork.ufd -The file descriptor associated with the userfault object -created for the child created by -.BR fork (2). -.TP -.I remap.from -The original address of the memory range that was remapped using -.BR mremap (2). -.TP -.I remap.to -The new address of the memory range that was remapped using -.BR mremap (2). -.TP -.I remap.len -The original length of the memory range that was remapped using -.BR mremap (2). -.TP -.I remove.start -The start address of the memory range that was freed using -.BR madvise (2) -or unmapped -.TP -.I remove.end -The end address of the memory range that was freed using -.BR madvise (2) -or unmapped -.P -A -.BR read (2) -on a userfaultfd file descriptor can fail with the following errors: -.TP -.B EINVAL -The userfaultfd object has not yet been enabled using the -.B UFFDIO_API -.BR ioctl (2) -operation -.P -If the -.B O_NONBLOCK -flag is enabled in the associated open file description, -the userfaultfd file descriptor can be monitored with -.BR poll (2), -.BR select (2), -and -.BR epoll (7). -When events are available, the file descriptor indicates as readable. -If the -.B O_NONBLOCK -flag is not enabled, then -.BR poll (2) -(always) indicates the file as having a -.B POLLERR -condition, and -.BR select (2) -indicates the file descriptor as both readable and writable. -.\" FIXME What is the reason for this seemingly odd behavior with respect -.\" to the O_NONBLOCK flag? (see userfaultfd_poll() in fs/userfaultfd.c). -.\" Something needs to be said about this. -.SH RETURN VALUE -On success, -.BR userfaultfd () -returns a new file descriptor that refers to the userfaultfd object. -On error, \-1 is returned, and -.I errno -is set to indicate the error. -.SH ERRORS -.TP -.B EINVAL -An unsupported value was specified in -.IR flags . -.TP -.B EMFILE -The per-process limit on the number of open file descriptors has been -reached -.TP -.B ENFILE -The system-wide limit on the total number of open files has been -reached. -.TP -.B ENOMEM -Insufficient kernel memory was available. -.TP -.BR EPERM " (since Linux 5.2)" -.\" cefdca0a86be517bc390fc4541e3674b8e7803b0 -The caller is not privileged (does not have the -.B CAP_SYS_PTRACE -capability in the initial user namespace), and -.I /proc/sys/vm/unprivileged_userfaultfd -has the value 0. -.SH STANDARDS -Linux. -.SH HISTORY -Linux 4.3. -.P -Support for hugetlbfs and shared memory areas and -non-page-fault events was added in Linux 4.11 -.SH NOTES -The userfaultfd mechanism can be used as an alternative to -traditional user-space paging techniques based on the use of the -.B SIGSEGV -signal and -.BR mmap (2). -It can also be used to implement lazy restore -for checkpoint/restore mechanisms, -as well as post-copy migration to allow (nearly) uninterrupted execution -when transferring virtual machines and Linux containers -from one host to another. -.SH BUGS -If the -.B UFFD_FEATURE_EVENT_FORK -is enabled and a system call from the -.BR fork (2) -family is interrupted by a signal or failed, a stale userfaultfd descriptor -might be created. -In this case, a spurious -.B UFFD_EVENT_FORK -will be delivered to the userfaultfd monitor. -.SH EXAMPLES -The program below demonstrates the use of the userfaultfd mechanism. -The program creates two threads, one of which acts as the -page-fault handler for the process, for the pages in a demand-page zero -region created using -.BR mmap (2). -.P -The program takes one command-line argument, -which is the number of pages that will be created in a mapping -whose page faults will be handled via userfaultfd. -After creating a userfaultfd object, -the program then creates an anonymous private mapping of the specified size -and registers the address range of that mapping using the -.B UFFDIO_REGISTER -.BR ioctl (2) -operation. -The program then creates a second thread that will perform the -task of handling page faults. -.P -The main thread then walks through the pages of the mapping fetching -bytes from successive pages. -Because the pages have not yet been accessed, -the first access of a byte in each page will trigger a page-fault event -on the userfaultfd file descriptor. -.P -Each of the page-fault events is handled by the second thread, -which sits in a loop processing input from the userfaultfd file descriptor. -In each loop iteration, the second thread first calls -.BR poll (2) -to check the state of the file descriptor, -and then reads an event from the file descriptor. -All such events should be -.B UFFD_EVENT_PAGEFAULT -events, -which the thread handles by copying a page of data into -the faulting region using the -.B UFFDIO_COPY -.BR ioctl (2) -operation. -.P -The following is an example of what we see when running the program: -.P -.in +4n -.EX -$ \fB./userfaultfd_demo 3\fP -Address returned by mmap() = 0x7fd30106c000 -\& -fault_handler_thread(): - poll() returns: nready = 1; POLLIN = 1; POLLERR = 0 - UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106c00f - (uffdio_copy.copy returned 4096) -Read address 0x7fd30106c00f in main(): A -Read address 0x7fd30106c40f in main(): A -Read address 0x7fd30106c80f in main(): A -Read address 0x7fd30106cc0f in main(): A -\& -fault_handler_thread(): - poll() returns: nready = 1; POLLIN = 1; POLLERR = 0 - UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106d00f - (uffdio_copy.copy returned 4096) -Read address 0x7fd30106d00f in main(): B -Read address 0x7fd30106d40f in main(): B -Read address 0x7fd30106d80f in main(): B -Read address 0x7fd30106dc0f in main(): B -\& -fault_handler_thread(): - poll() returns: nready = 1; POLLIN = 1; POLLERR = 0 - UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fd30106e00f - (uffdio_copy.copy returned 4096) -Read address 0x7fd30106e00f in main(): C -Read address 0x7fd30106e40f in main(): C -Read address 0x7fd30106e80f in main(): C -Read address 0x7fd30106ec0f in main(): C -.EE -.in -.SS Program source -\& -.\" SRC BEGIN (userfaultfd.c) -.EX -/* userfaultfd_demo.c -\& - Licensed under the GNU General Public License version 2 or later. -*/ -#define _GNU_SOURCE -#include <err.h> -#include <errno.h> -#include <fcntl.h> -#include <inttypes.h> -#include <linux/userfaultfd.h> -#include <poll.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/syscall.h> -#include <unistd.h> -\& -static int page_size; -\& -static void * -fault_handler_thread(void *arg) -{ - int nready; - long uffd; /* userfaultfd file descriptor */ - ssize_t nread; - struct pollfd pollfd; - struct uffdio_copy uffdio_copy; -\& - static int fault_cnt = 0; /* Number of faults so far handled */ - static char *page = NULL; - static struct uffd_msg msg; /* Data read from userfaultfd */ -\& - uffd = (long) arg; -\& - /* Create a page that will be copied into the faulting region. */ -\& - if (page == NULL) { - page = mmap(NULL, page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0); - if (page == MAP_FAILED) - err(EXIT_FAILURE, "mmap"); - } -\& - /* Loop, handling incoming events on the userfaultfd - file descriptor. */ -\& - for (;;) { -\& - /* See what poll() tells us about the userfaultfd. */ -\& - pollfd.fd = uffd; - pollfd.events = POLLIN; - nready = poll(&pollfd, 1, \-1); - if (nready == \-1) - err(EXIT_FAILURE, "poll"); -\& - printf("\enfault_handler_thread():\en"); - printf(" poll() returns: nready = %d; " - "POLLIN = %d; POLLERR = %d\en", nready, - (pollfd.revents & POLLIN) != 0, - (pollfd.revents & POLLERR) != 0); -\& - /* Read an event from the userfaultfd. */ -\& - nread = read(uffd, &msg, sizeof(msg)); - if (nread == 0) { - printf("EOF on userfaultfd!\en"); - exit(EXIT_FAILURE); - } -\& - if (nread == \-1) - err(EXIT_FAILURE, "read"); -\& - /* We expect only one kind of event; verify that assumption. */ -\& - if (msg.event != UFFD_EVENT_PAGEFAULT) { - fprintf(stderr, "Unexpected event on userfaultfd\en"); - exit(EXIT_FAILURE); - } -\& - /* Display info about the page\-fault event. */ -\& - printf(" UFFD_EVENT_PAGEFAULT event: "); - printf("flags = %"PRIx64"; ", msg.arg.pagefault.flags); - printf("address = %"PRIx64"\en", msg.arg.pagefault.address); -\& - /* Copy the page pointed to by \[aq]page\[aq] into the faulting - region. Vary the contents that are copied in, so that it - is more obvious that each fault is handled separately. */ -\& - memset(page, \[aq]A\[aq] + fault_cnt % 20, page_size); - fault_cnt++; -\& - uffdio_copy.src = (unsigned long) page; -\& - /* We need to handle page faults in units of pages(!). - So, round faulting address down to page boundary. */ -\& - uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address & - \[ti](page_size \- 1); - uffdio_copy.len = page_size; - uffdio_copy.mode = 0; - uffdio_copy.copy = 0; - if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == \-1) - err(EXIT_FAILURE, "ioctl\-UFFDIO_COPY"); -\& - printf(" (uffdio_copy.copy returned %"PRId64")\en", - uffdio_copy.copy); - } -} -\& -int -main(int argc, char *argv[]) -{ - int s; - char c; - char *addr; /* Start of region handled by userfaultfd */ - long uffd; /* userfaultfd file descriptor */ - size_t len, l; /* Length of region handled by userfaultfd */ - pthread_t thr; /* ID of thread that handles page faults */ - struct uffdio_api uffdio_api; - struct uffdio_register uffdio_register; -\& - if (argc != 2) { - fprintf(stderr, "Usage: %s num\-pages\en", argv[0]); - exit(EXIT_FAILURE); - } -\& - page_size = sysconf(_SC_PAGE_SIZE); - len = strtoull(argv[1], NULL, 0) * page_size; -\& - /* Create and enable userfaultfd object. */ -\& - uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd == \-1) - err(EXIT_FAILURE, "userfaultfd"); -\& - /* NOTE: Two-step feature handshake is not needed here, since this - example doesn't require any specific features. -\& - Programs that *do* should call UFFDIO_API twice: once with - `features = 0` to detect features supported by this kernel, and - again with the subset of features the program actually wants to - enable. */ - uffdio_api.api = UFFD_API; - uffdio_api.features = 0; - if (ioctl(uffd, UFFDIO_API, &uffdio_api) == \-1) - err(EXIT_FAILURE, "ioctl\-UFFDIO_API"); -\& - /* Create a private anonymous mapping. The memory will be - demand\-zero paged\-\-that is, not yet allocated. When we - actually touch the memory, it will be allocated via - the userfaultfd. */ -\& - addr = mmap(NULL, len, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, \-1, 0); - if (addr == MAP_FAILED) - err(EXIT_FAILURE, "mmap"); -\& - printf("Address returned by mmap() = %p\en", addr); -\& - /* Register the memory range of the mapping we just created for - handling by the userfaultfd object. In mode, we request to track - missing pages (i.e., pages that have not yet been faulted in). */ -\& - uffdio_register.range.start = (unsigned long) addr; - uffdio_register.range.len = len; - uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == \-1) - err(EXIT_FAILURE, "ioctl\-UFFDIO_REGISTER"); -\& - /* Create a thread that will process the userfaultfd events. */ -\& - s = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); - if (s != 0) { - errc(EXIT_FAILURE, s, "pthread_create"); - } -\& - /* Main thread now touches memory in the mapping, touching - locations 1024 bytes apart. This will trigger userfaultfd - events for all pages in the region. */ -\& - l = 0xf; /* Ensure that faulting address is not on a page - boundary, in order to test that we correctly - handle that case in fault_handling_thread(). */ - while (l < len) { - c = addr[l]; - printf("Read address %p in %s(): ", addr + l, __func__); - printf("%c\en", c); - l += 1024; - usleep(100000); /* Slow things down a little */ - } -\& - exit(EXIT_SUCCESS); -} -.EE -.\" SRC END -.SH SEE ALSO -.BR fcntl (2), -.BR ioctl (2), -.BR ioctl_userfaultfd (2), -.BR madvise (2), -.BR mmap (2) -.P -.I Documentation/admin\-guide/mm/userfaultfd.rst -in the Linux kernel source tree |