summaryrefslogtreecommitdiffstats
path: root/man2/eventfd.2
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--man2/eventfd.2443
1 files changed, 443 insertions, 0 deletions
diff --git a/man2/eventfd.2 b/man2/eventfd.2
new file mode 100644
index 0000000..7be6a3a
--- /dev/null
+++ b/man2/eventfd.2
@@ -0,0 +1,443 @@
+'\" t
+.\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
+.\" starting from a version by Davide Libenzi <davidel@xmailserver.org>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" 2008-10-10, mtk: describe eventfd2(), and EFD_NONBLOCK and EFD_CLOEXEC
+.\"
+.TH eventfd 2 2023-07-20 "Linux man-pages 6.05.01"
+.SH NAME
+eventfd \- create a file descriptor for event notification
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.B #include <sys/eventfd.h>
+.PP
+.BI "int eventfd(unsigned int " initval ", int " flags );
+.fi
+.SH DESCRIPTION
+.BR eventfd ()
+creates an "eventfd object" that can be used as
+an event wait/notify mechanism by user-space applications,
+and by the kernel to notify user-space applications of events.
+The object contains an unsigned 64-bit integer
+.RI ( uint64_t )
+counter that is maintained by the kernel.
+This counter is initialized with the value specified in the argument
+.IR initval .
+.PP
+As its return value,
+.BR eventfd ()
+returns a new file descriptor that can be used to refer to the
+eventfd object.
+.PP
+The following values may be bitwise ORed in
+.I flags
+to change the behavior of
+.BR eventfd ():
+.TP
+.BR EFD_CLOEXEC " (since Linux 2.6.27)"
+Set the close-on-exec
+.RB ( FD_CLOEXEC )
+flag on the new file descriptor.
+See the description of the
+.B O_CLOEXEC
+flag in
+.BR open (2)
+for reasons why this may be useful.
+.TP
+.BR EFD_NONBLOCK " (since Linux 2.6.27)"
+Set the
+.B O_NONBLOCK
+file status flag on the open file description (see
+.BR open (2))
+referred to by the new file descriptor.
+Using this flag saves extra calls to
+.BR fcntl (2)
+to achieve the same result.
+.TP
+.BR EFD_SEMAPHORE " (since Linux 2.6.30)"
+Provide semaphore-like semantics for reads from the new file descriptor.
+See below.
+.PP
+Up to Linux 2.6.26, the
+.I flags
+argument is unused, and must be specified as zero.
+.PP
+The following operations can be performed on the file descriptor returned by
+.BR eventfd ():
+.TP
+.BR read (2)
+Each successful
+.BR read (2)
+returns an 8-byte integer.
+A
+.BR read (2)
+fails with the error
+.B EINVAL
+if the size of the supplied buffer is less than 8 bytes.
+.IP
+The value returned by
+.BR read (2)
+is in host byte order\[em]that is,
+the native byte order for integers on the host machine.
+.IP
+The semantics of
+.BR read (2)
+depend on whether the eventfd counter currently has a nonzero value
+and whether the
+.B EFD_SEMAPHORE
+flag was specified when creating the eventfd file descriptor:
+.RS
+.IP \[bu] 3
+If
+.B EFD_SEMAPHORE
+was not specified and the eventfd counter has a nonzero value, then a
+.BR read (2)
+returns 8 bytes containing that value,
+and the counter's value is reset to zero.
+.IP \[bu]
+If
+.B EFD_SEMAPHORE
+was specified and the eventfd counter has a nonzero value, then a
+.BR read (2)
+returns 8 bytes containing the value 1,
+and the counter's value is decremented by 1.
+.IP \[bu]
+If the eventfd counter is zero at the time of the call to
+.BR read (2),
+then the call either blocks until the counter becomes nonzero
+(at which time, the
+.BR read (2)
+proceeds as described above)
+or fails with the error
+.B EAGAIN
+if the file descriptor has been made nonblocking.
+.RE
+.TP
+.BR write (2)
+A
+.BR write (2)
+call adds the 8-byte integer value supplied in its
+buffer to the counter.
+The maximum value that may be stored in the counter is the largest
+unsigned 64-bit value minus 1 (i.e., 0xfffffffffffffffe).
+If the addition would cause the counter's value to exceed
+the maximum, then the
+.BR write (2)
+either blocks until a
+.BR read (2)
+is performed on the file descriptor,
+or fails with the error
+.B EAGAIN
+if the file descriptor has been made nonblocking.
+.IP
+A
+.BR write (2)
+fails with the error
+.B EINVAL
+if the size of the supplied buffer is less than 8 bytes,
+or if an attempt is made to write the value 0xffffffffffffffff.
+.TP
+.BR poll "(2), " select "(2) (and similar)"
+The returned file descriptor supports
+.BR poll (2)
+(and analogously
+.BR epoll (7))
+and
+.BR select (2),
+as follows:
+.RS
+.IP \[bu] 3
+The file descriptor is readable
+(the
+.BR select (2)
+.I readfds
+argument; the
+.BR poll (2)
+.B POLLIN
+flag)
+if the counter has a value greater than 0.
+.IP \[bu]
+The file descriptor is writable
+(the
+.BR select (2)
+.I writefds
+argument; the
+.BR poll (2)
+.B POLLOUT
+flag)
+if it is possible to write a value of at least "1" without blocking.
+.IP \[bu]
+If an overflow of the counter value was detected,
+then
+.BR select (2)
+indicates the file descriptor as being both readable and writable, and
+.BR poll (2)
+returns a
+.B POLLERR
+event.
+As noted above,
+.BR write (2)
+can never overflow the counter.
+However an overflow can occur if 2\[ha]64
+eventfd "signal posts" were performed by the KAIO
+subsystem (theoretically possible, but practically unlikely).
+If an overflow has occurred, then
+.BR read (2)
+will return that maximum
+.I uint64_t
+value (i.e., 0xffffffffffffffff).
+.RE
+.IP
+The eventfd file descriptor also supports the other file-descriptor
+multiplexing APIs:
+.BR pselect (2)
+and
+.BR ppoll (2).
+.TP
+.BR close (2)
+When the file descriptor is no longer required it should be closed.
+When all file descriptors associated with the same eventfd object
+have been closed, the resources for object are freed by the kernel.
+.PP
+A copy of the file descriptor created by
+.BR eventfd ()
+is inherited by the child produced by
+.BR fork (2).
+The duplicate file descriptor is associated with the same
+eventfd object.
+File descriptors created by
+.BR eventfd ()
+are preserved across
+.BR execve (2),
+unless the close-on-exec flag has been set.
+.SH RETURN VALUE
+On success,
+.BR eventfd ()
+returns a new eventfd file descriptor.
+On error, \-1 is returned and
+.I errno
+is set to indicate the error.
+.SH ERRORS
+.TP
+.B EINVAL
+An unsupported value was specified in
+.IR flags .
+.TP
+.B EMFILE
+The per-process limit on the number of open file descriptors has been reached.
+.TP
+.B ENFILE
+The system-wide limit on the total number of open files has been
+reached.
+.TP
+.B ENODEV
+.\" Note from Davide:
+.\" The ENODEV error is basically never going to happen if
+.\" the kernel boots correctly. That error happen only if during
+.\" the kernel initialization, some error occur in the anonymous
+.\" inode source initialization.
+Could not mount (internal) anonymous inode device.
+.TP
+.B ENOMEM
+There was insufficient memory to create a new
+eventfd file descriptor.
+.SH ATTRIBUTES
+For an explanation of the terms used in this section, see
+.BR attributes (7).
+.TS
+allbox;
+lbx lb lb
+l l l.
+Interface Attribute Value
+T{
+.na
+.nh
+.BR eventfd ()
+T} Thread safety MT-Safe
+.TE
+.sp 1
+.SH VERSIONS
+.SS C library/kernel differences
+There are two underlying Linux system calls:
+.BR eventfd ()
+and the more recent
+.BR eventfd2 ().
+The former system call does not implement a
+.I flags
+argument.
+The latter system call implements the
+.I flags
+values described above.
+The glibc wrapper function will use
+.BR eventfd2 ()
+where it is available.
+.SS Additional glibc features
+The GNU C library defines an additional type,
+and two functions that attempt to abstract some of the details of
+reading and writing on an eventfd file descriptor:
+.PP
+.in +4n
+.EX
+typedef uint64_t eventfd_t;
+\&
+int eventfd_read(int fd, eventfd_t *value);
+int eventfd_write(int fd, eventfd_t value);
+.EE
+.in
+.PP
+The functions perform the read and write operations on an
+eventfd file descriptor,
+returning 0 if the correct number of bytes was transferred,
+or \-1 otherwise.
+.SH STANDARDS
+Linux, GNU.
+.SH HISTORY
+.TP
+.BR eventfd ()
+Linux 2.6.22,
+glibc 2.8.
+.\" eventfd() is in glibc 2.7, but reportedly does not build
+.TP
+.BR eventfd2 ()
+Linux 2.6.27 (see VERSIONS).
+Since glibc 2.9, the
+.BR eventfd ()
+wrapper will employ the
+.BR eventfd2 ()
+system call, if it is supported by the kernel.
+.SH NOTES
+Applications can use an eventfd file descriptor instead of a pipe (see
+.BR pipe (2))
+in all cases where a pipe is used simply to signal events.
+The kernel overhead of an eventfd file descriptor
+is much lower than that of a pipe,
+and only one file descriptor is
+required (versus the two required for a pipe).
+.PP
+When used in the kernel, an eventfd
+file descriptor can provide a bridge from kernel to user space, allowing,
+for example, functionalities like KAIO (kernel AIO)
+.\" or eventually syslets/threadlets
+to signal to a file descriptor that some operation is complete.
+.PP
+A key point about an eventfd file descriptor is that it can be
+monitored just like any other file descriptor using
+.BR select (2),
+.BR poll (2),
+or
+.BR epoll (7).
+This means that an application can simultaneously monitor the
+readiness of "traditional" files and the readiness of other
+kernel mechanisms that support the eventfd interface.
+(Without the
+.BR eventfd ()
+interface, these mechanisms could not be multiplexed via
+.BR select (2),
+.BR poll (2),
+or
+.BR epoll (7).)
+.PP
+The current value of an eventfd counter can be viewed
+via the entry for the corresponding file descriptor in the process's
+.IR /proc/ pid /fdinfo
+directory.
+See
+.BR proc (5)
+for further details.
+.SH EXAMPLES
+The following program creates an eventfd file descriptor
+and then forks to create a child process.
+While the parent briefly sleeps,
+the child writes each of the integers supplied in the program's
+command-line arguments to the eventfd file descriptor.
+When the parent has finished sleeping,
+it reads from the eventfd file descriptor.
+.PP
+The following shell session shows a sample run of the program:
+.PP
+.in +4n
+.EX
+.RB "$" " ./a.out 1 2 4 7 14"
+Child writing 1 to efd
+Child writing 2 to efd
+Child writing 4 to efd
+Child writing 7 to efd
+Child writing 14 to efd
+Child completed write loop
+Parent about to read
+Parent read 28 (0x1c) from efd
+.EE
+.in
+.SS Program source
+\&
+.\" SRC BEGIN (eventfd.c)
+.EX
+#include <err.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+\&
+int
+main(int argc, char *argv[])
+{
+ int efd;
+ uint64_t u;
+ ssize_t s;
+\&
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <num>...\en", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+\&
+ efd = eventfd(0, 0);
+ if (efd == \-1)
+ err(EXIT_FAILURE, "eventfd");
+\&
+ switch (fork()) {
+ case 0:
+ for (size_t j = 1; j < argc; j++) {
+ printf("Child writing %s to efd\en", argv[j]);
+ u = strtoull(argv[j], NULL, 0);
+ /* strtoull() allows various bases */
+ s = write(efd, &u, sizeof(uint64_t));
+ if (s != sizeof(uint64_t))
+ err(EXIT_FAILURE, "write");
+ }
+ printf("Child completed write loop\en");
+\&
+ exit(EXIT_SUCCESS);
+\&
+ default:
+ sleep(2);
+\&
+ printf("Parent about to read\en");
+ s = read(efd, &u, sizeof(uint64_t));
+ if (s != sizeof(uint64_t))
+ err(EXIT_FAILURE, "read");
+ printf("Parent read %"PRIu64" (%#"PRIx64") from efd\en", u, u);
+ exit(EXIT_SUCCESS);
+\&
+ case \-1:
+ err(EXIT_FAILURE, "fork");
+ }
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR futex (2),
+.BR pipe (2),
+.BR poll (2),
+.BR read (2),
+.BR select (2),
+.BR signalfd (2),
+.BR timerfd_create (2),
+.BR write (2),
+.BR epoll (7),
+.BR sem_overview (7)