From 399644e47874bff147afb19c89228901ac39340e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 21:40:15 +0200 Subject: Adding upstream version 6.05.01. Signed-off-by: Daniel Baumann --- man2/mmap.2 | 1035 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1035 insertions(+) create mode 100644 man2/mmap.2 (limited to 'man2/mmap.2') diff --git a/man2/mmap.2 b/man2/mmap.2 new file mode 100644 index 0000000..3d9a887 --- /dev/null +++ b/man2/mmap.2 @@ -0,0 +1,1035 @@ +'\" t +.\" Copyright (C) 1996 Andries Brouwer +.\" and Copyright (C) 2006, 2007 Michael Kerrisk +.\" +.\" SPDX-License-Identifier: Linux-man-pages-copyleft +.\" +.\" Modified 1997-01-31 by Eric S. Raymond +.\" Modified 2000-03-25 by Jim Van Zandt +.\" Modified 2001-10-04 by John Levon +.\" Modified 2003-02-02 by Andi Kleen +.\" Modified 2003-05-21 by Michael Kerrisk +.\" MAP_LOCKED works from Linux 2.5.37 +.\" Modified 2004-06-17 by Michael Kerrisk +.\" Modified 2004-09-11 by aeb +.\" Modified 2004-12-08, from Eric Estievenart +.\" Modified 2004-12-08, mtk, formatting tidy-ups +.\" Modified 2006-12-04, mtk, various parts rewritten +.\" 2007-07-10, mtk, Added an example program. +.\" 2008-11-18, mtk, document MAP_STACK +.\" +.TH mmap 2 2023-07-20 "Linux man-pages 6.05.01" +.SH NAME +mmap, munmap \- map or unmap files or devices into memory +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +.B #include +.PP +.BI "void *mmap(void " addr [. length "], size_t " length \ +", int " prot ", int " flags , +.BI " int " fd ", off_t " offset ); +.BI "int munmap(void " addr [. length "], size_t " length ); +.fi +.PP +See NOTES for information on feature test macro requirements. +.SH DESCRIPTION +.BR mmap () +creates a new mapping in the virtual address space of +the calling process. +The starting address for the new mapping is specified in +.IR addr . +The +.I length +argument specifies the length of the mapping (which must be greater than 0). +.PP +If +.I addr +is NULL, +then the kernel chooses the (page-aligned) address +at which to create the mapping; +this is the most portable method of creating a new mapping. +If +.I addr +is not NULL, +then the kernel takes it as a hint about where to place the mapping; +on Linux, the kernel will pick a nearby page boundary (but always above +or equal to the value specified by +.IR /proc/sys/vm/mmap_min_addr ) +and attempt to create the mapping there. +If another mapping already exists there, the kernel picks a new address that +may or may not depend on the hint. +.\" Before Linux 2.6.24, the address was rounded up to the next page +.\" boundary; since Linux 2.6.24, it is rounded down! +The address of the new mapping is returned as the result of the call. +.PP +The contents of a file mapping (as opposed to an anonymous mapping; see +.B MAP_ANONYMOUS +below), are initialized using +.I length +bytes starting at offset +.I offset +in the file (or other object) referred to by the file descriptor +.IR fd . +.I offset +must be a multiple of the page size as returned by +.IR sysconf(_SC_PAGE_SIZE) . +.PP +After the +.BR mmap () +call has returned, the file descriptor, +.IR fd , +can be closed immediately without invalidating the mapping. +.PP +The +.I prot +argument describes the desired memory protection of the mapping +(and must not conflict with the open mode of the file). +It is either +.B PROT_NONE +or the bitwise OR of one or more of the following flags: +.TP 1.1i +.B PROT_EXEC +Pages may be executed. +.TP +.B PROT_READ +Pages may be read. +.TP +.B PROT_WRITE +Pages may be written. +.TP +.B PROT_NONE +Pages may not be accessed. +.\" +.SS The flags argument +The +.I flags +argument determines whether updates to the mapping +are visible to other processes mapping the same region, +and whether updates are carried through to the underlying file. +This behavior is determined by including exactly one +of the following values in +.IR flags : +.TP +.B MAP_SHARED +Share this mapping. +Updates to the mapping are visible to other processes mapping the same region, +and (in the case of file-backed mappings) +are carried through to the underlying file. +(To precisely control when updates are carried through +to the underlying file requires the use of +.BR msync (2).) +.TP +.BR MAP_SHARED_VALIDATE " (since Linux 4.15)" +This flag provides the same behavior as +.B MAP_SHARED +except that +.B MAP_SHARED +mappings ignore unknown flags in +.IR flags . +By contrast, when creating a mapping using +.BR MAP_SHARED_VALIDATE , +the kernel verifies all passed flags are known and fails the +mapping with the error +.B EOPNOTSUPP +for unknown flags. +This mapping type is also required to be able to use some mapping flags +(e.g., +.BR MAP_SYNC ). +.TP +.B MAP_PRIVATE +Create a private copy-on-write mapping. +Updates to the mapping are not visible to other processes +mapping the same file, and are not carried through to +the underlying file. +It is unspecified whether changes made to the file after the +.BR mmap () +call are visible in the mapped region. +.PP +Both +.B MAP_SHARED +and +.B MAP_PRIVATE +are described in POSIX.1-2001 and POSIX.1-2008. +.B MAP_SHARED_VALIDATE +is a Linux extension. +.PP +In addition, zero or more of the following values can be ORed in +.IR flags : +.TP +.BR MAP_32BIT " (since Linux 2.4.20, 2.6)" +Put the mapping into the first 2 Gigabytes of the process address space. +This flag is supported only on x86-64, for 64-bit programs. +It was added to allow thread stacks to be allocated somewhere +in the first 2\ GB of memory, +so as to improve context-switch performance on some early +64-bit processors. +.\" See http://lwn.net/Articles/294642 "Tangled up in threads", 19 Aug 08 +Modern x86-64 processors no longer have this performance problem, +so use of this flag is not required on those systems. +The +.B MAP_32BIT +flag is ignored when +.B MAP_FIXED +is set. +.TP +.B MAP_ANON +Synonym for +.BR MAP_ANONYMOUS ; +provided for compatibility with other implementations. +.TP +.B MAP_ANONYMOUS +The mapping is not backed by any file; +its contents are initialized to zero. +The +.I fd +argument is ignored; +however, some implementations require +.I fd +to be \-1 if +.B MAP_ANONYMOUS +(or +.BR MAP_ANON ) +is specified, +and portable applications should ensure this. +The +.I offset +argument should be zero. +.\" See the pgoff overflow check in do_mmap(). +.\" See the offset check in sys_mmap in arch/x86/kernel/sys_x86_64.c. +Support for +.B MAP_ANONYMOUS +in conjunction with +.B MAP_SHARED +was added in Linux 2.4. +.TP +.B MAP_DENYWRITE +This flag is ignored. +.\" Introduced in 1.1.36, removed in 1.3.24. +(Long ago\[em]Linux 2.0 and earlier\[em]it signaled +that attempts to write to the underlying file should fail with +.BR ETXTBSY . +But this was a source of denial-of-service attacks.) +.TP +.B MAP_EXECUTABLE +This flag is ignored. +.\" Introduced in 1.1.38, removed in 1.3.24. Flag tested in proc_follow_link. +.\" (Long ago, it signaled that the underlying file is an executable. +.\" However, that information was not really used anywhere.) +.\" Linus talked about DOS related to MAP_EXECUTABLE, but he was thinking of +.\" MAP_DENYWRITE? +.TP +.B MAP_FILE +Compatibility flag. +Ignored. +.\" On some systems, this was required as the opposite of +.\" MAP_ANONYMOUS -- mtk, 1 May 2007 +.TP +.B MAP_FIXED +Don't interpret +.I addr +as a hint: place the mapping at exactly that address. +.I addr +must be suitably aligned: for most architectures a multiple of the page +size is sufficient; however, some architectures may impose additional +restrictions. +If the memory region specified by +.I addr +and +.I length +overlaps pages of any existing mapping(s), then the overlapped +part of the existing mapping(s) will be discarded. +If the specified address cannot be used, +.BR mmap () +will fail. +.IP +Software that aspires to be portable should use the +.B MAP_FIXED +flag with care, +keeping in mind that the exact layout of a process's memory mappings +is allowed to change significantly between Linux versions, +C library versions, and operating system releases. +.I Carefully read the discussion of this flag in NOTES! +.TP +.BR MAP_FIXED_NOREPLACE " (since Linux 4.17)" +.\" commit a4ff8e8620d3f4f50ac4b41e8067b7d395056843 +This flag provides behavior that is similar to +.B MAP_FIXED +with respect to the +.I addr +enforcement, but differs in that +.B MAP_FIXED_NOREPLACE +never clobbers a preexisting mapped range. +If the requested range would collide with an existing mapping, +then this call fails with the error +.B EEXIST. +This flag can therefore be used as a way to atomically +(with respect to other threads) attempt to map an address range: +one thread will succeed; all others will report failure. +.IP +Note that older kernels which do not recognize the +.B MAP_FIXED_NOREPLACE +flag will typically (upon detecting a collision with a preexisting mapping) +fall back to a +.RB \[lq]non- MAP_FIXED \[rq] +type of behavior: +they will return an address that is different from the requested address. +Therefore, backward-compatible software +should check the returned address against the requested address. +.TP +.B MAP_GROWSDOWN +This flag is used for stacks. +It indicates to the kernel virtual memory system that the mapping +should extend downward in memory. +The return address is one page lower than the memory area that is +actually created in the process's virtual address space. +Touching an address in the "guard" page below the mapping will cause +the mapping to grow by a page. +This growth can be repeated until the mapping grows to within a +page of the high end of the next lower mapping, +at which point touching the "guard" page will result in a +.B SIGSEGV +signal. +.TP +.BR MAP_HUGETLB " (since Linux 2.6.32)" +Allocate the mapping using "huge" pages. +See the Linux kernel source file +.I Documentation/admin\-guide/mm/hugetlbpage.rst +for further information, as well as NOTES, below. +.TP +.BR MAP_HUGE_2MB ", " MAP_HUGE_1GB " (since Linux 3.8)" +.\" See https://lwn.net/Articles/533499/ +Used in conjunction with +.B MAP_HUGETLB +to select alternative hugetlb page sizes (respectively, 2\ MB and 1\ GB) +on systems that support multiple hugetlb page sizes. +.IP +More generally, the desired huge page size can be configured by encoding +the base-2 logarithm of the desired page size in the six bits at the offset +.BR MAP_HUGE_SHIFT . +(A value of zero in this bit field provides the default huge page size; +the default huge page size can be discovered via the +.I Hugepagesize +field exposed by +.IR /proc/meminfo .) +Thus, the above two constants are defined as: +.IP +.in +4n +.EX +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +.EE +.in +.IP +The range of huge page sizes that are supported by the system +can be discovered by listing the subdirectories in +.IR /sys/kernel/mm/hugepages . +.TP +.BR MAP_LOCKED " (since Linux 2.5.37)" +Mark the mapped region to be locked in the same way as +.BR mlock (2). +This implementation will try to populate (prefault) the whole range but the +.BR mmap () +call doesn't fail with +.B ENOMEM +if this fails. +Therefore major faults might happen later on. +So the semantic is not as strong as +.BR mlock (2). +One should use +.BR mmap () +plus +.BR mlock (2) +when major faults are not acceptable after the initialization of the mapping. +The +.B MAP_LOCKED +flag is ignored in older kernels. +.\" If set, the mapped pages will not be swapped out. +.TP +.BR MAP_NONBLOCK " (since Linux 2.5.46)" +This flag is meaningful only in conjunction with +.BR MAP_POPULATE . +Don't perform read-ahead: +create page tables entries only for pages +that are already present in RAM. +Since Linux 2.6.23, +.\" commit 54cb8821de07f2ffcd28c380ce9b93d5784b40d7 +this flag causes +.B MAP_POPULATE +to do nothing. +One day, the combination of +.B MAP_POPULATE +and +.B MAP_NONBLOCK +may be reimplemented. +.TP +.B MAP_NORESERVE +Do not reserve swap space for this mapping. +When swap space is reserved, one has the guarantee +that it is possible to modify the mapping. +When swap space is not reserved one might get +.B SIGSEGV +upon a write +if no physical memory is available. +See also the discussion of the file +.I /proc/sys/vm/overcommit_memory +in +.BR proc (5). +Before Linux 2.6, this flag had effect only for +private writable mappings. +.TP +.BR MAP_POPULATE " (since Linux 2.5.46)" +Populate (prefault) page tables for a mapping. +For a file mapping, this causes read-ahead on the file. +This will help to reduce blocking on page faults later. +The +.BR mmap () +call doesn't fail if the mapping cannot be populated (for example, due +to limitations on the number of mapped huge pages when using +.BR MAP_HUGETLB ). +Support for +.B MAP_POPULATE +in conjunction with private mappings was added in Linux 2.6.23. +.TP +.BR MAP_STACK " (since Linux 2.6.27)" +Allocate the mapping at an address suitable for a process +or thread stack. +.IP +This flag is currently a no-op on Linux. +However, by employing this flag, applications can ensure that +they transparently obtain support if the flag +is implemented in the future. +Thus, it is used in the glibc threading implementation to allow for +the fact that some architectures may (later) require special treatment +for stack allocations. +.\" See http://lwn.net/Articles/294642 "Tangled up in threads", 19 Aug 08 +.\" commit cd98a04a59e2f94fa64d5bf1e26498d27427d5e7 +.\" http://thread.gmane.org/gmane.linux.kernel/720412 +.\" "pthread_create() slow for many threads; also time to revisit 64b +.\" context switch optimization?" +A further reason to employ this flag is portability: +.B MAP_STACK +exists (and has an effect) on some other systems (e.g., some of the BSDs). +.TP +.BR MAP_SYNC " (since Linux 4.15)" +This flag is available only with the +.B MAP_SHARED_VALIDATE +mapping type; +mappings of type +.B MAP_SHARED +will silently ignore this flag. +This flag is supported only for files supporting DAX +(direct mapping of persistent memory). +For other files, creating a mapping with this flag results in an +.B EOPNOTSUPP +error. +.IP +Shared file mappings with this flag provide the guarantee that while +some memory is mapped writable in the address space of the process, +it will be visible in the same file at the same offset even after +the system crashes or is rebooted. +In conjunction with the use of appropriate CPU instructions, +this provides users of such mappings with a more efficient way +of making data modifications persistent. +.TP +.BR MAP_UNINITIALIZED " (since Linux 2.6.33)" +Don't clear anonymous pages. +This flag is intended to improve performance on embedded devices. +This flag is honored only if the kernel was configured with the +.B CONFIG_MMAP_ALLOW_UNINITIALIZED +option. +Because of the security implications, +that option is normally enabled only on embedded devices +(i.e., devices where one has complete control of the contents of user memory). +.PP +Of the above flags, only +.B MAP_FIXED +is specified in POSIX.1-2001 and POSIX.1-2008. +However, most systems also support +.B MAP_ANONYMOUS +(or its synonym +.BR MAP_ANON ). +.\" FIXME . for later review when Issue 8 is one day released... +.\" POSIX may add MAP_ANON in the future +.\" http://austingroupbugs.net/tag_view_page.php?tag_id=8 +.\" http://austingroupbugs.net/view.php?id=850 +.SS munmap() +The +.BR munmap () +system call deletes the mappings for the specified address range, and +causes further references to addresses within the range to generate +invalid memory references. +The region is also automatically unmapped +when the process is terminated. +On the other hand, closing the file +descriptor does not unmap the region. +.PP +The address +.I addr +must be a multiple of the page size (but +.I length +need not be). +All pages containing a part +of the indicated range are unmapped, and subsequent references +to these pages will generate +.BR SIGSEGV . +It is not an error if the +indicated range does not contain any mapped pages. +.SH RETURN VALUE +On success, +.BR mmap () +returns a pointer to the mapped area. +On error, the value +.B MAP_FAILED +(that is, +.IR "(void\ *)\ \-1" ) +is returned, and +.I errno +is set to indicate the error. +.PP +On success, +.BR munmap () +returns 0. +On failure, it returns \-1, and +.I errno +is set to indicate the error (probably to +.BR EINVAL ). +.SH ERRORS +.TP +.B EACCES +A file descriptor refers to a non-regular file. +Or a file mapping was requested, but +.I fd +is not open for reading. +Or +.B MAP_SHARED +was requested and +.B PROT_WRITE +is set, but +.I fd +is not open in read/write +.RB ( O_RDWR ) +mode. +Or +.B PROT_WRITE +is set, but the file is append-only. +.TP +.B EAGAIN +The file has been locked, or too much memory has been locked (see +.BR setrlimit (2)). +.TP +.B EBADF +.I fd +is not a valid file descriptor (and +.B MAP_ANONYMOUS +was not set). +.TP +.B EEXIST +.B MAP_FIXED_NOREPLACE +was specified in +.IR flags , +and the range covered by +.I addr +and +.I length +clashes with an existing mapping. +.TP +.B EINVAL +We don't like +.IR addr , +.IR length , +or +.I offset +(e.g., they are too large, or not aligned on a page boundary). +.TP +.B EINVAL +(since Linux 2.6.12) +.I length +was 0. +.TP +.B EINVAL +.I flags +contained none of +.BR MAP_PRIVATE , +.BR MAP_SHARED , +or +.BR MAP_SHARED_VALIDATE . +.TP +.B ENFILE +.\" This is for shared anonymous segments +.\" [2.6.7] shmem_zero_setup()-->shmem_file_setup()-->get_empty_filp() +The system-wide limit on the total number of open files has been reached. +.\" .TP +.\" .B ENOEXEC +.\" A file could not be mapped for reading. +.TP +.B ENODEV +The underlying filesystem of the specified file does not support +memory mapping. +.TP +.B ENOMEM +No memory is available. +.TP +.B ENOMEM +The process's maximum number of mappings would have been exceeded. +This error can also occur for +.BR munmap (), +when unmapping a region in the middle of an existing mapping, +since this results in two smaller mappings on either side of +the region being unmapped. +.TP +.B ENOMEM +(since Linux 4.7) +The process's +.B RLIMIT_DATA +limit, described in +.BR getrlimit (2), +would have been exceeded. +.TP +.B ENOMEM +We don't like +.IR addr , +because it exceeds the virtual address space of the CPU. +.TP +.B EOVERFLOW +On 32-bit architecture together with the large file extension +(i.e., using 64-bit +.IR off_t ): +the number of pages used for +.I length +plus number of pages used for +.I offset +would overflow +.I "unsigned long" +(32 bits). +.TP +.B EPERM +The +.I prot +argument asks for +.B PROT_EXEC +but the mapped area belongs to a file on a filesystem that +was mounted no-exec. +.\" (Since Linux 2.4.25 / Linux 2.6.0.) +.TP +.B EPERM +The operation was prevented by a file seal; see +.BR fcntl (2). +.TP +.B EPERM +The +.B MAP_HUGETLB +flag was specified, but the caller was not privileged (did not have the +.B CAP_IPC_LOCK +capability) +and is not a member of the +.I sysctl_hugetlb_shm_group +group; see the description of +.I /proc/sys/vm/sysctl_hugetlb_shm_group +in +.TP +.B ETXTBSY +.B MAP_DENYWRITE +was set but the object specified by +.I fd +is open for writing. +.PP +Use of a mapped region can result in these signals: +.TP +.B SIGSEGV +Attempted write into a region mapped as read-only. +.TP +.B SIGBUS +Attempted access to a page of the buffer that lies beyond the +end of the mapped file. +For an explanation of the treatment of the bytes in the page that +corresponds to the end of a mapped file that is not a multiple +of the page size, see NOTES. +.SH ATTRIBUTES +For an explanation of the terms used in this section, see +.BR attributes (7). +.TS +allbox; +lbx lb lb +l l l. +Interface Attribute Value +T{ +.na +.nh +.BR mmap (), +.BR munmap () +T} Thread safety MT-Safe +.TE +.sp 1 +.SH VERSIONS +On some hardware architectures (e.g., i386), +.B PROT_WRITE +implies +.BR PROT_READ . +It is architecture dependent whether +.B PROT_READ +implies +.B PROT_EXEC +or not. +Portable programs should always set +.B PROT_EXEC +if they intend to execute code in the new mapping. +.PP +The portable way to create a mapping is to specify +.I addr +as 0 (NULL), and omit +.B MAP_FIXED +from +.IR flags . +In this case, the system chooses the address for the mapping; +the address is chosen so as not to conflict with any existing mapping, +and will not be 0. +If the +.B MAP_FIXED +flag is specified, and +.I addr +is 0 (NULL), then the mapped address will be 0 (NULL). +.PP +Certain +.I flags +constants are defined only if suitable feature test macros are defined +(possibly by default): +.B _DEFAULT_SOURCE +with glibc 2.19 or later; +or +.B _BSD_SOURCE +or +.B _SVID_SOURCE +in glibc 2.19 and earlier. +(Employing +.B _GNU_SOURCE +also suffices, +and requiring that macro specifically would have been more logical, +since these flags are all Linux-specific.) +The relevant flags are: +.BR MAP_32BIT , +.B MAP_ANONYMOUS +(and the synonym +.BR MAP_ANON ), +.BR MAP_DENYWRITE , +.BR MAP_EXECUTABLE , +.BR MAP_FILE , +.BR MAP_GROWSDOWN , +.BR MAP_HUGETLB , +.BR MAP_LOCKED , +.BR MAP_NONBLOCK , +.BR MAP_NORESERVE , +.BR MAP_POPULATE , +and +.BR MAP_STACK . +.SS C library/kernel differences +This page describes the interface provided by the glibc +.BR mmap () +wrapper function. +Originally, this function invoked a system call of the same name. +Since Linux 2.4, that system call has been superseded by +.BR mmap2 (2), +and nowadays +.\" Since around glibc 2.1/2.2, depending on the platform. +the glibc +.BR mmap () +wrapper function invokes +.BR mmap2 (2) +with a suitably adjusted value for +.IR offset . +.SH STANDARDS +POSIX.1-2008. +.SH HISTORY +POSIX.1-2001, SVr4, 4.4BSD. +.\" SVr4 documents additional error codes ENXIO and ENODEV. +.\" SUSv2 documents additional error codes EMFILE and EOVERFLOW. +.PP +On POSIX systems on which +.BR mmap (), +.BR msync (2), +and +.BR munmap () +are available, +.B _POSIX_MAPPED_FILES +is defined in \fI\fP to a value greater than 0. +(See also +.BR sysconf (3).) +.\" POSIX.1-2001: It shall be defined to -1 or 0 or 200112L. +.\" -1: unavailable, 0: ask using sysconf(). +.\" glibc defines it to 1. +.SH NOTES +Memory mapped by +.BR mmap () +is preserved across +.BR fork (2), +with the same attributes. +.PP +A file is mapped in multiples of the page size. +For a file that is not +a multiple of the page size, +the remaining bytes in the partial page at the end of the mapping +are zeroed when mapped, +and modifications to that region are not written out to the file. +The effect of +changing the size of the underlying file of a mapping on the pages that +correspond to added or removed regions of the file is unspecified. +.PP +An application can determine which pages of a mapping are +currently resident in the buffer/page cache using +.BR mincore (2). +.\" +.SS Using MAP_FIXED safely +The only safe use for +.B MAP_FIXED +is where the address range specified by +.I addr +and +.I length +was previously reserved using another mapping; +otherwise, the use of +.B MAP_FIXED +is hazardous because it forcibly removes preexisting mappings, +making it easy for a multithreaded process to corrupt its own address space. +.PP +For example, suppose that thread A looks through +.IR /proc/ pid /maps +in order to locate an unused address range that it can map using +.BR MAP_FIXED , +while thread B simultaneously acquires part or all of that same +address range. +When thread A subsequently employs +.BR mmap(MAP_FIXED) , +it will effectively clobber the mapping that thread B created. +In this scenario, +thread B need not create a mapping directly; simply making a library call +that, internally, uses +.BR dlopen (3) +to load some other shared library, will suffice. +The +.BR dlopen (3) +call will map the library into the process's address space. +Furthermore, almost any library call may be implemented in a way that +adds memory mappings to the address space, either with this technique, +or by simply allocating memory. +Examples include +.BR brk (2), +.BR malloc (3), +.BR pthread_create (3), +and the PAM libraries +.UR http://www.linux-pam.org +.UE . +.PP +Since Linux 4.17, a multithreaded program can use the +.B MAP_FIXED_NOREPLACE +flag to avoid the hazard described above +when attempting to create a mapping at a fixed address +that has not been reserved by a preexisting mapping. +.\" +.SS Timestamps changes for file-backed mappings +For file-backed mappings, the +.I st_atime +field for the mapped file may be updated at any time between the +.BR mmap () +and the corresponding unmapping; the first reference to a mapped +page will update the field if it has not been already. +.PP +The +.I st_ctime +and +.I st_mtime +field for a file mapped with +.B PROT_WRITE +and +.B MAP_SHARED +will be updated after +a write to the mapped region, and before a subsequent +.BR msync (2) +with the +.B MS_SYNC +or +.B MS_ASYNC +flag, if one occurs. +.\" +.SS Huge page (Huge TLB) mappings +For mappings that employ huge pages, the requirements for the arguments of +.BR mmap () +and +.BR munmap () +differ somewhat from the requirements for mappings +that use the native system page size. +.PP +For +.BR mmap (), +.I offset +must be a multiple of the underlying huge page size. +The system automatically aligns +.I length +to be a multiple of the underlying huge page size. +.PP +For +.BR munmap (), +.IR addr , +and +.I length +must both be a multiple of the underlying huge page size. +.\" +.SH BUGS +On Linux, there are no guarantees like those suggested above under +.BR MAP_NORESERVE . +By default, any process can be killed +at any moment when the system runs out of memory. +.PP +Before Linux 2.6.7, the +.B MAP_POPULATE +flag has effect only if +.I prot +is specified as +.BR PROT_NONE . +.PP +SUSv3 specifies that +.BR mmap () +should fail if +.I length +is 0. +However, before Linux 2.6.12, +.BR mmap () +succeeded in this case: no mapping was created and the call returned +.IR addr . +Since Linux 2.6.12, +.BR mmap () +fails with the error +.B EINVAL +for this case. +.PP +POSIX specifies that the system shall always +zero fill any partial page at the end +of the object and that system will never write any modification of the +object beyond its end. +On Linux, when you write data to such partial page after the end +of the object, the data stays in the page cache even after the file +is closed and unmapped +and even though the data is never written to the file itself, +subsequent mappings may see the modified content. +In some cases, this could be fixed by calling +.BR msync (2) +before the unmap takes place; +however, this doesn't work on +.BR tmpfs (5) +(for example, when using the POSIX shared memory interface documented in +.BR shm_overview (7)). +.SH EXAMPLES +.\" FIXME . Add an example here that uses an anonymous shared region for +.\" IPC between parent and child. +The following program prints part of the file specified in +its first command-line argument to standard output. +The range of bytes to be printed is specified via offset and length +values in the second and third command-line arguments. +The program creates a memory mapping of the required +pages of the file and then uses +.BR write (2) +to output the desired bytes. +.SS Program source +.\" SRC BEGIN (mmap.c) +.EX +#include +#include +#include +#include +#include +#include +\& +#define handle_error(msg) \e + do { perror(msg); exit(EXIT_FAILURE); } while (0) +\& +int +main(int argc, char *argv[]) +{ + int fd; + char *addr; + off_t offset, pa_offset; + size_t length; + ssize_t s; + struct stat sb; +\& + if (argc < 3 || argc > 4) { + fprintf(stderr, "%s file offset [length]\en", argv[0]); + exit(EXIT_FAILURE); + } +\& + fd = open(argv[1], O_RDONLY); + if (fd == \-1) + handle_error("open"); +\& + if (fstat(fd, &sb) == \-1) /* To obtain file size */ + handle_error("fstat"); +\& + offset = atoi(argv[2]); + pa_offset = offset & \[ti](sysconf(_SC_PAGE_SIZE) \- 1); + /* offset for mmap() must be page aligned */ +\& + if (offset >= sb.st_size) { + fprintf(stderr, "offset is past end of file\en"); + exit(EXIT_FAILURE); + } +\& + if (argc == 4) { + length = atoi(argv[3]); + if (offset + length > sb.st_size) + length = sb.st_size \- offset; + /* Can\[aq]t display bytes past end of file */ +\& + } else { /* No length arg ==> display to end of file */ + length = sb.st_size \- offset; + } +\& + addr = mmap(NULL, length + offset \- pa_offset, PROT_READ, + MAP_PRIVATE, fd, pa_offset); + if (addr == MAP_FAILED) + handle_error("mmap"); +\& + s = write(STDOUT_FILENO, addr + offset \- pa_offset, length); + if (s != length) { + if (s == \-1) + handle_error("write"); +\& + fprintf(stderr, "partial write"); + exit(EXIT_FAILURE); + } +\& + munmap(addr, length + offset \- pa_offset); + close(fd); +\& + exit(EXIT_SUCCESS); +} +.EE +.\" SRC END +.SH SEE ALSO +.BR ftruncate (2), +.BR getpagesize (2), +.BR memfd_create (2), +.BR mincore (2), +.BR mlock (2), +.BR mmap2 (2), +.BR mprotect (2), +.BR mremap (2), +.BR msync (2), +.BR remap_file_pages (2), +.BR setrlimit (2), +.BR shmat (2), +.BR userfaultfd (2), +.BR shm_open (3), +.BR shm_overview (7) +.PP +The descriptions of the following files in +.BR proc (5): +.IR /proc/ pid /maps , +.IR /proc/ pid /map_files , +and +.IR /proc/ pid /smaps . +.PP +B.O. Gallmeister, POSIX.4, O'Reilly, pp. 128\[en]129 and 389\[en]391. +.\" +.\" Repeat after me: private read-only mappings are 100% equivalent to +.\" shared read-only mappings. No ifs, buts, or maybes. -- Linus -- cgit v1.2.3