From 3d08cd331c1adcf0d917392f7e527b3f00511748 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 24 May 2024 06:52:22 +0200 Subject: Merging upstream version 6.8. Signed-off-by: Daniel Baumann --- man2/memfd_create.2 | 549 ---------------------------------------------------- 1 file changed, 549 deletions(-) delete mode 100644 man2/memfd_create.2 (limited to 'man2/memfd_create.2') diff --git a/man2/memfd_create.2 b/man2/memfd_create.2 deleted file mode 100644 index c7cf859..0000000 --- a/man2/memfd_create.2 +++ /dev/null @@ -1,549 +0,0 @@ -.\" Copyright (C) 2014 Michael Kerrisk -.\" and Copyright (C) 2014 David Herrmann -.\" -.\" SPDX-License-Identifier: GPL-2.0-or-later -.\" -.TH memfd_create 2 2023-10-31 "Linux man-pages 6.7" -.SH NAME -memfd_create \- create an anonymous file -.SH LIBRARY -Standard C library -.RI ( libc ", " \-lc ) -.SH SYNOPSIS -.nf -.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */" -.B #include -.P -.BI "int memfd_create(const char *" name ", unsigned int " flags ");" -.fi -.SH DESCRIPTION -.BR memfd_create () -creates an anonymous file and returns a file descriptor that refers to it. -The file behaves like a regular file, and so can be modified, -truncated, memory-mapped, and so on. -However, unlike a regular file, -it lives in RAM and has a volatile backing storage. -Once all references to the file are dropped, it is automatically released. -Anonymous memory is used for all backing pages of the file. -Therefore, files created by -.BR memfd_create () -have the same semantics as other anonymous -.\" David Herrmann: -.\" memfd uses VM_NORESERVE so each page is accounted on first access. -.\" This means, the overcommit-limits (see __vm_enough_memory()) and the -.\" memory-cgroup limits (mem_cgroup_try_charge()) are applied. Note that -.\" those are accounted on "current" and "current->mm", that is, the -.\" process doing the first page access. -memory allocations such as those allocated using -.BR mmap (2) -with the -.B MAP_ANONYMOUS -flag. -.P -The initial size of the file is set to 0. -Following the call, the file size should be set using -.BR ftruncate (2). -(Alternatively, the file may be populated by calls to -.BR write (2) -or similar.) -.P -The name supplied in -.I name -is used as a filename and will be displayed -as the target of the corresponding symbolic link in the directory -.IR /proc/self/fd/ . -The displayed name is always prefixed with -.I memfd: -and serves only for debugging purposes. -Names do not affect the behavior of the file descriptor, -and as such multiple files can have the same name without any side effects. -.P -The following values may be bitwise ORed in -.I flags -to change the behavior of -.BR memfd_create (): -.TP -.B MFD_CLOEXEC -Set the close-on-exec -.RB ( FD_CLOEXEC ) -flag on the new file descriptor. -See the description of the -.B O_CLOEXEC -flag in -.BR open (2) -for reasons why this may be useful. -.TP -.B MFD_ALLOW_SEALING -Allow sealing operations on this file. -See the discussion of the -.B F_ADD_SEALS -and -.B F_GET_SEALS -operations in -.BR fcntl (2), -and also NOTES, below. -The initial set of seals is empty. -If this flag is not set, the initial set of seals will be -.BR F_SEAL_SEAL , -meaning that no other seals can be set on the file. -.\" FIXME Why is the MFD_ALLOW_SEALING behavior not simply the default? -.\" Is it worth adding some text explaining this? -.TP -.BR MFD_HUGETLB " (since Linux 4.14)" -.\" commit 749df87bd7bee5a79cef073f5d032ddb2b211de8 -The anonymous file will be created in the hugetlbfs filesystem using -huge pages. -See the Linux kernel source file -.I Documentation/admin\-guide/mm/hugetlbpage.rst -for more information about hugetlbfs. -.\" commit 47b9012ecdc747f6936395265e677d41e11a31ff -Specifying both -.B MFD_HUGETLB -and -.B MFD_ALLOW_SEALING -in -.I flags -is supported since Linux 4.16. -.TP -.B MFD_HUGE_2MB -.TQ -.B MFD_HUGE_1GB -.TQ -\&.\|.\|. -Used in conjunction with -.B MFD_HUGETLB -to select alternative hugetlb page sizes (respectively, 2\ MB, 1\ GB, ...) -on systems that support multiple hugetlb page sizes. -Definitions for known -huge page sizes are included in the header file -.I . -.IP -For details on encoding huge page sizes not included in the header file, -see the discussion of the similarly named constants in -.BR mmap (2). -.P -Unused bits in -.I flags -must be 0. -.P -As its return value, -.BR memfd_create () -returns a new file descriptor that can be used to refer to the file. -This file descriptor is opened for both reading and writing -.RB ( O_RDWR ) -and -.B O_LARGEFILE -is set for the file descriptor. -.P -With respect to -.BR fork (2) -and -.BR execve (2), -the usual semantics apply for the file descriptor created by -.BR memfd_create (). -A copy of the file descriptor is inherited by the child produced by -.BR fork (2) -and refers to the same file. -The file descriptor is preserved across -.BR execve (2), -unless the close-on-exec flag has been set. -.SH RETURN VALUE -On success, -.BR memfd_create () -returns a new file descriptor. -On error, \-1 is returned and -.I errno -is set to indicate the error. -.SH ERRORS -.TP -.B EFAULT -The address in -.I name -points to invalid memory. -.TP -.B EINVAL -.I flags -included unknown bits. -.TP -.B EINVAL -.I name -was too long. -(The limit is -.\" NAME_MAX - strlen("memfd:") -249 bytes, excluding the terminating null byte.) -.TP -.B EINVAL -Both -.B MFD_HUGETLB -and -.B MFD_ALLOW_SEALING -were specified in -.IR flags . -.TP -.B EMFILE -The per-process limit on the number of open file descriptors has been reached. -.TP -.B ENFILE -The system-wide limit on the total number of open files has been reached. -.TP -.B ENOMEM -There was insufficient memory to create a new anonymous file. -.TP -.B EPERM -The -.B MFD_HUGETLB -flag was specified, but the caller was not privileged (did not have the -.B CAP_IPC_LOCK -capability) -and is not a member of the -.I sysctl_hugetlb_shm_group -group; see the description of -.I /proc/sys/vm/sysctl_hugetlb_shm_group -in -.BR proc (5). -.SH STANDARDS -Linux. -.SH HISTORY -Linux 3.17, -glibc 2.27. -.SH NOTES -.\" See also http://lwn.net/Articles/593918/ -.\" and http://lwn.net/Articles/594919/ and http://lwn.net/Articles/591108/ -The -.BR memfd_create () -system call provides a simple alternative to manually mounting a -.BR tmpfs (5) -filesystem and creating and opening a file in that filesystem. -The primary purpose of -.BR memfd_create () -is to create files and associated file descriptors that are -used with the file-sealing APIs provided by -.BR fcntl (2). -.P -The -.BR memfd_create () -system call also has uses without file sealing -(which is why file-sealing is disabled, unless explicitly requested with the -.B MFD_ALLOW_SEALING -flag). -In particular, it can be used as an alternative to creating files in -.I tmp -or as an alternative to using the -.BR open (2) -.B O_TMPFILE -in cases where there is no intention to actually link the -resulting file into the filesystem. -.SS File sealing -In the absence of file sealing, -processes that communicate via shared memory must either trust each other, -or take measures to deal with the possibility that an untrusted peer -may manipulate the shared memory region in problematic ways. -For example, an untrusted peer might modify the contents of the -shared memory at any time, or shrink the shared memory region. -The former possibility leaves the local process vulnerable to -time-of-check-to-time-of-use race conditions -(typically dealt with by copying data from -the shared memory region before checking and using it). -The latter possibility leaves the local process vulnerable to -.B SIGBUS -signals when an attempt is made to access a now-nonexistent -location in the shared memory region. -(Dealing with this possibility necessitates the use of a handler for the -.B SIGBUS -signal.) -.P -Dealing with untrusted peers imposes extra complexity on -code that employs shared memory. -Memory sealing enables that extra complexity to be eliminated, -by allowing a process to operate secure in the knowledge that -its peer can't modify the shared memory in an undesired fashion. -.P -An example of the usage of the sealing mechanism is as follows: -.IP (1) 5 -The first process creates a -.BR tmpfs (5) -file using -.BR memfd_create (). -The call yields a file descriptor used in subsequent steps. -.IP (2) -The first process -sizes the file created in the previous step using -.BR ftruncate (2), -maps it using -.BR mmap (2), -and populates the shared memory with the desired data. -.IP (3) -The first process uses the -.BR fcntl (2) -.B F_ADD_SEALS -operation to place one or more seals on the file, -in order to restrict further modifications on the file. -(If placing the seal -.BR F_SEAL_WRITE , -then it will be necessary to first unmap the shared writable mapping -created in the previous step. -Otherwise, behavior similar to -.B F_SEAL_WRITE -can be achieved by using -.BR F_SEAL_FUTURE_WRITE , -which will prevent future writes via -.BR mmap (2) -and -.BR write (2) -from succeeding while keeping existing shared writable mappings). -.IP (4) -A second process obtains a file descriptor for the -.BR tmpfs (5) -file and maps it. -Among the possible ways in which this could happen are the following: -.RS -.IP \[bu] 3 -The process that called -.BR memfd_create () -could transfer the resulting file descriptor to the second process -via a UNIX domain socket (see -.BR unix (7) -and -.BR cmsg (3)). -The second process then maps the file using -.BR mmap (2). -.IP \[bu] -The second process is created via -.BR fork (2) -and thus automatically inherits the file descriptor and mapping. -(Note that in this case and the next, -there is a natural trust relationship between the two processes, -since they are running under the same user ID. -Therefore, file sealing would not normally be necessary.) -.IP \[bu] -The second process opens the file -.IR /proc/ pid /fd/ fd, -where -.I -is the PID of the first process (the one that called -.BR memfd_create ()), -and -.I -is the number of the file descriptor returned by the call to -.BR memfd_create () -in that process. -The second process then maps the file using -.BR mmap (2). -.RE -.IP (5) -The second process uses the -.BR fcntl (2) -.B F_GET_SEALS -operation to retrieve the bit mask of seals -that has been applied to the file. -This bit mask can be inspected in order to determine -what kinds of restrictions have been placed on file modifications. -If desired, the second process can apply further seals -to impose additional restrictions (so long as the -.B F_SEAL_SEAL -seal has not yet been applied). -.SH EXAMPLES -Below are shown two example programs that demonstrate the use of -.BR memfd_create () -and the file sealing API. -.P -The first program, -.IR t_memfd_create.c , -creates a -.BR tmpfs (5) -file using -.BR memfd_create (), -sets a size for the file, maps it into memory, -and optionally places some seals on the file. -The program accepts up to three command-line arguments, -of which the first two are required. -The first argument is the name to associate with the file, -the second argument is the size to be set for the file, -and the optional third argument is a string of characters that specify -seals to be set on the file. -.P -The second program, -.IR t_get_seals.c , -can be used to open an existing file that was created via -.BR memfd_create () -and inspect the set of seals that have been applied to that file. -.P -The following shell session demonstrates the use of these programs. -First we create a -.BR tmpfs (5) -file and set some seals on it: -.P -.in +4n -.EX -$ \fB./t_memfd_create my_memfd_file 4096 sw &\fP -[1] 11775 -PID: 11775; fd: 3; /proc/11775/fd/3 -.EE -.in -.P -At this point, the -.I t_memfd_create -program continues to run in the background. -From another program, we can obtain a file descriptor for the -file created by -.BR memfd_create () -by opening the -.IR /proc/ pid /fd -file that corresponds to the file descriptor opened by -.BR memfd_create (). -Using that pathname, we inspect the content of the -.IR /proc/ pid /fd -symbolic link, and use our -.I t_get_seals -program to view the seals that have been placed on the file: -.P -.in +4n -.EX -$ \fBreadlink /proc/11775/fd/3\fP -/memfd:my_memfd_file (deleted) -$ \fB./t_get_seals /proc/11775/fd/3\fP -Existing seals: WRITE SHRINK -.EE -.in -.SS Program source: t_memfd_create.c -\& -.\" SRC BEGIN (t_memfd_create.c) -.EX -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -\& -int -main(int argc, char *argv[]) -{ - int fd; - char *name, *seals_arg; - ssize_t len; - unsigned int seals; -\& - if (argc < 3) { - fprintf(stderr, "%s name size [seals]\en", argv[0]); - fprintf(stderr, "\et\[aq]seals\[aq] can contain any of the " - "following characters:\en"); - fprintf(stderr, "\et\etg \- F_SEAL_GROW\en"); - fprintf(stderr, "\et\ets \- F_SEAL_SHRINK\en"); - fprintf(stderr, "\et\etw \- F_SEAL_WRITE\en"); - fprintf(stderr, "\et\etW \- F_SEAL_FUTURE_WRITE\en"); - fprintf(stderr, "\et\etS \- F_SEAL_SEAL\en"); - exit(EXIT_FAILURE); - } -\& - name = argv[1]; - len = atoi(argv[2]); - seals_arg = argv[3]; -\& - /* Create an anonymous file in tmpfs; allow seals to be - placed on the file. */ -\& - fd = memfd_create(name, MFD_ALLOW_SEALING); - if (fd == \-1) - err(EXIT_FAILURE, "memfd_create"); -\& - /* Size the file as specified on the command line. */ -\& - if (ftruncate(fd, len) == \-1) - err(EXIT_FAILURE, "truncate"); -\& - printf("PID: %jd; fd: %d; /proc/%jd/fd/%d\en", - (intmax_t) getpid(), fd, (intmax_t) getpid(), fd); -\& - /* Code to map the file and populate the mapping with data - omitted. */ -\& - /* If a \[aq]seals\[aq] command\-line argument was supplied, set some - seals on the file. */ -\& - if (seals_arg != NULL) { - seals = 0; -\& - if (strchr(seals_arg, \[aq]g\[aq]) != NULL) - seals |= F_SEAL_GROW; - if (strchr(seals_arg, \[aq]s\[aq]) != NULL) - seals |= F_SEAL_SHRINK; - if (strchr(seals_arg, \[aq]w\[aq]) != NULL) - seals |= F_SEAL_WRITE; - if (strchr(seals_arg, \[aq]W\[aq]) != NULL) - seals |= F_SEAL_FUTURE_WRITE; - if (strchr(seals_arg, \[aq]S\[aq]) != NULL) - seals |= F_SEAL_SEAL; -\& - if (fcntl(fd, F_ADD_SEALS, seals) == \-1) - err(EXIT_FAILURE, "fcntl"); - } -\& - /* Keep running, so that the file created by memfd_create() - continues to exist. */ -\& - pause(); -\& - exit(EXIT_SUCCESS); -} -.EE -.\" SRC END -.SS Program source: t_get_seals.c -\& -.\" SRC BEGIN (t_get_seals.c) -.EX -#define _GNU_SOURCE -#include -#include -#include -#include -\& -int -main(int argc, char *argv[]) -{ - int fd; - unsigned int seals; -\& - if (argc != 2) { - fprintf(stderr, "%s /proc/PID/fd/FD\en", argv[0]); - exit(EXIT_FAILURE); - } -\& - fd = open(argv[1], O_RDWR); - if (fd == \-1) - err(EXIT_FAILURE, "open"); -\& - seals = fcntl(fd, F_GET_SEALS); - if (seals == \-1) - err(EXIT_FAILURE, "fcntl"); -\& - printf("Existing seals:"); - if (seals & F_SEAL_SEAL) - printf(" SEAL"); - if (seals & F_SEAL_GROW) - printf(" GROW"); - if (seals & F_SEAL_WRITE) - printf(" WRITE"); - if (seals & F_SEAL_FUTURE_WRITE) - printf(" FUTURE_WRITE"); - if (seals & F_SEAL_SHRINK) - printf(" SHRINK"); - printf("\en"); -\& - /* Code to map the file and access the contents of the - resulting mapping omitted. */ -\& - exit(EXIT_SUCCESS); -} -.EE -.\" SRC END -.SH SEE ALSO -.BR fcntl (2), -.BR ftruncate (2), -.BR memfd_secret (2), -.BR mmap (2), -.BR shmget (2), -.BR shm_open (3) -- cgit v1.2.3